1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
46 /* This loop nest code generation is based on non-singular matrix
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
54 A loop iteration space represents the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as an integral combinations of a set
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
119 DEF_VEC_ALLOC_P(int,heap
);
121 static bool perfect_nestify (struct loops
*,
122 struct loop
*, VEC(tree
,heap
) *,
123 VEC(tree
,heap
) *, VEC(int,heap
) *,
125 /* Lattice stuff that is internal to the code generation algorithm. */
129 /* Lattice base matrix. */
131 /* Lattice dimension. */
133 /* Origin vector for the coefficients. */
134 lambda_vector origin
;
135 /* Origin matrix for the invariants. */
136 lambda_matrix origin_invariants
;
137 /* Number of invariants. */
141 #define LATTICE_BASE(T) ((T)->base)
142 #define LATTICE_DIMENSION(T) ((T)->dimension)
143 #define LATTICE_ORIGIN(T) ((T)->origin)
144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
147 static bool lle_equal (lambda_linear_expression
, lambda_linear_expression
,
149 static lambda_lattice
lambda_lattice_new (int, int);
150 static lambda_lattice
lambda_lattice_compute_base (lambda_loopnest
);
152 static tree
find_induction_var_from_exit_cond (struct loop
*);
154 /* Create a new lambda body vector. */
157 lambda_body_vector_new (int size
)
159 lambda_body_vector ret
;
161 ret
= ggc_alloc (sizeof (*ret
));
162 LBV_COEFFICIENTS (ret
) = lambda_vector_new (size
);
163 LBV_SIZE (ret
) = size
;
164 LBV_DENOMINATOR (ret
) = 1;
168 /* Compute the new coefficients for the vector based on the
169 *inverse* of the transformation matrix. */
172 lambda_body_vector_compute_new (lambda_trans_matrix transform
,
173 lambda_body_vector vect
)
175 lambda_body_vector temp
;
178 /* Make sure the matrix is square. */
179 gcc_assert (LTM_ROWSIZE (transform
) == LTM_COLSIZE (transform
));
181 depth
= LTM_ROWSIZE (transform
);
183 temp
= lambda_body_vector_new (depth
);
184 LBV_DENOMINATOR (temp
) =
185 LBV_DENOMINATOR (vect
) * LTM_DENOMINATOR (transform
);
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect
), depth
,
187 LTM_MATRIX (transform
), depth
,
188 LBV_COEFFICIENTS (temp
));
189 LBV_SIZE (temp
) = LBV_SIZE (vect
);
193 /* Print out a lambda body vector. */
196 print_lambda_body_vector (FILE * outfile
, lambda_body_vector body
)
198 print_lambda_vector (outfile
, LBV_COEFFICIENTS (body
), LBV_SIZE (body
));
201 /* Return TRUE if two linear expressions are equal. */
204 lle_equal (lambda_linear_expression lle1
, lambda_linear_expression lle2
,
205 int depth
, int invariants
)
209 if (lle1
== NULL
|| lle2
== NULL
)
211 if (LLE_CONSTANT (lle1
) != LLE_CONSTANT (lle2
))
213 if (LLE_DENOMINATOR (lle1
) != LLE_DENOMINATOR (lle2
))
215 for (i
= 0; i
< depth
; i
++)
216 if (LLE_COEFFICIENTS (lle1
)[i
] != LLE_COEFFICIENTS (lle2
)[i
])
218 for (i
= 0; i
< invariants
; i
++)
219 if (LLE_INVARIANT_COEFFICIENTS (lle1
)[i
] !=
220 LLE_INVARIANT_COEFFICIENTS (lle2
)[i
])
225 /* Create a new linear expression with dimension DIM, and total number
226 of invariants INVARIANTS. */
228 lambda_linear_expression
229 lambda_linear_expression_new (int dim
, int invariants
)
231 lambda_linear_expression ret
;
233 ret
= ggc_alloc_cleared (sizeof (*ret
));
235 LLE_COEFFICIENTS (ret
) = lambda_vector_new (dim
);
236 LLE_CONSTANT (ret
) = 0;
237 LLE_INVARIANT_COEFFICIENTS (ret
) = lambda_vector_new (invariants
);
238 LLE_DENOMINATOR (ret
) = 1;
239 LLE_NEXT (ret
) = NULL
;
244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245 The starting letter used for variable names is START. */
248 print_linear_expression (FILE * outfile
, lambda_vector expr
, int size
,
253 for (i
= 0; i
< size
; i
++)
260 fprintf (outfile
, "-");
263 else if (expr
[i
] > 0)
264 fprintf (outfile
, " + ");
266 fprintf (outfile
, " - ");
267 if (abs (expr
[i
]) == 1)
268 fprintf (outfile
, "%c", start
+ i
);
270 fprintf (outfile
, "%d%c", abs (expr
[i
]), start
+ i
);
275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276 depth/number of coefficients is given by DEPTH, the number of invariants is
277 given by INVARIANTS, and the character to start variable names with is given
281 print_lambda_linear_expression (FILE * outfile
,
282 lambda_linear_expression expr
,
283 int depth
, int invariants
, char start
)
285 fprintf (outfile
, "\tLinear expression: ");
286 print_linear_expression (outfile
, LLE_COEFFICIENTS (expr
), depth
, start
);
287 fprintf (outfile
, " constant: %d ", LLE_CONSTANT (expr
));
288 fprintf (outfile
, " invariants: ");
289 print_linear_expression (outfile
, LLE_INVARIANT_COEFFICIENTS (expr
),
291 fprintf (outfile
, " denominator: %d\n", LLE_DENOMINATOR (expr
));
294 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
295 coefficients is given by DEPTH, the number of invariants is
296 given by INVARIANTS, and the character to start variable names with is given
300 print_lambda_loop (FILE * outfile
, lambda_loop loop
, int depth
,
301 int invariants
, char start
)
304 lambda_linear_expression expr
;
308 expr
= LL_LINEAR_OFFSET (loop
);
309 step
= LL_STEP (loop
);
310 fprintf (outfile
, " step size = %d \n", step
);
314 fprintf (outfile
, " linear offset: \n");
315 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
,
319 fprintf (outfile
, " lower bound: \n");
320 for (expr
= LL_LOWER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
321 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
322 fprintf (outfile
, " upper bound: \n");
323 for (expr
= LL_UPPER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
324 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328 number of invariants. */
331 lambda_loopnest_new (int depth
, int invariants
)
334 ret
= ggc_alloc (sizeof (*ret
));
336 LN_LOOPS (ret
) = ggc_alloc_cleared (depth
* sizeof (lambda_loop
));
337 LN_DEPTH (ret
) = depth
;
338 LN_INVARIANTS (ret
) = invariants
;
343 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
344 character to use for loop names is given by START. */
347 print_lambda_loopnest (FILE * outfile
, lambda_loopnest nest
, char start
)
350 for (i
= 0; i
< LN_DEPTH (nest
); i
++)
352 fprintf (outfile
, "Loop %c\n", start
+ i
);
353 print_lambda_loop (outfile
, LN_LOOPS (nest
)[i
], LN_DEPTH (nest
),
354 LN_INVARIANTS (nest
), 'i');
355 fprintf (outfile
, "\n");
359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
362 static lambda_lattice
363 lambda_lattice_new (int depth
, int invariants
)
366 ret
= ggc_alloc (sizeof (*ret
));
367 LATTICE_BASE (ret
) = lambda_matrix_new (depth
, depth
);
368 LATTICE_ORIGIN (ret
) = lambda_vector_new (depth
);
369 LATTICE_ORIGIN_INVARIANTS (ret
) = lambda_matrix_new (depth
, invariants
);
370 LATTICE_DIMENSION (ret
) = depth
;
371 LATTICE_INVARIANTS (ret
) = invariants
;
375 /* Compute the lattice base for NEST. The lattice base is essentially a
376 non-singular transform from a dense base space to a sparse iteration space.
377 We use it so that we don't have to specially handle the case of a sparse
378 iteration space in other parts of the algorithm. As a result, this routine
379 only does something interesting (IE produce a matrix that isn't the
380 identity matrix) if NEST is a sparse space. */
382 static lambda_lattice
383 lambda_lattice_compute_base (lambda_loopnest nest
)
386 int depth
, invariants
;
391 lambda_linear_expression expression
;
393 depth
= LN_DEPTH (nest
);
394 invariants
= LN_INVARIANTS (nest
);
396 ret
= lambda_lattice_new (depth
, invariants
);
397 base
= LATTICE_BASE (ret
);
398 for (i
= 0; i
< depth
; i
++)
400 loop
= LN_LOOPS (nest
)[i
];
402 step
= LL_STEP (loop
);
403 /* If we have a step of 1, then the base is one, and the
404 origin and invariant coefficients are 0. */
407 for (j
= 0; j
< depth
; j
++)
410 LATTICE_ORIGIN (ret
)[i
] = 0;
411 for (j
= 0; j
< invariants
; j
++)
412 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] = 0;
416 /* Otherwise, we need the lower bound expression (which must
417 be an affine function) to determine the base. */
418 expression
= LL_LOWER_BOUND (loop
);
419 gcc_assert (expression
&& !LLE_NEXT (expression
)
420 && LLE_DENOMINATOR (expression
) == 1);
422 /* The lower triangular portion of the base is going to be the
423 coefficient times the step */
424 for (j
= 0; j
< i
; j
++)
425 base
[i
][j
] = LLE_COEFFICIENTS (expression
)[j
]
426 * LL_STEP (LN_LOOPS (nest
)[j
]);
428 for (j
= i
+ 1; j
< depth
; j
++)
431 /* Origin for this loop is the constant of the lower bound
433 LATTICE_ORIGIN (ret
)[i
] = LLE_CONSTANT (expression
);
435 /* Coefficient for the invariants are equal to the invariant
436 coefficients in the expression. */
437 for (j
= 0; j
< invariants
; j
++)
438 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] =
439 LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
445 /* Compute the greatest common denominator of two numbers (A and B) using
446 Euclid's algorithm. */
467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
470 gcd_vector (lambda_vector vector
, int size
)
478 for (i
= 1; i
< size
; i
++)
479 gcd1
= gcd (gcd1
, vector
[i
]);
484 /* Compute the least common multiple of two numbers A and B . */
489 return (abs (a
) * abs (b
) / gcd (a
, b
));
492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
494 Fourier-Motzkin is a way of reducing systems of linear inequalities so that
495 it is easy to calculate the answer and bounds.
496 A sketch of how it works:
497 Given a system of linear inequalities, ai * xj >= bk, you can always
498 rewrite the constraints so they are all of the form
499 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
500 in b1 ... bk, and some a in a1...ai)
501 You can then eliminate this x from the non-constant inequalities by
502 rewriting these as a <= b, x >= constant, and delete the x variable.
503 You can then repeat this for any remaining x variables, and then we have
504 an easy to use variable <= constant (or no variables at all) form that we
505 can construct our bounds from.
507 In our case, each time we eliminate, we construct part of the bound from
508 the ith variable, then delete the ith variable.
510 Remember the constant are in our vector a, our coefficient matrix is A,
511 and our invariant coefficient matrix is B.
513 SIZE is the size of the matrices being passed.
514 DEPTH is the loop nest depth.
515 INVARIANTS is the number of loop invariants.
516 A, B, and a are the coefficient matrix, invariant coefficient, and a
517 vector of constants, respectively. */
519 static lambda_loopnest
520 compute_nest_using_fourier_motzkin (int size
,
528 int multiple
, f1
, f2
;
530 lambda_linear_expression expression
;
532 lambda_loopnest auxillary_nest
;
533 lambda_matrix swapmatrix
, A1
, B1
;
534 lambda_vector swapvector
, a1
;
537 A1
= lambda_matrix_new (128, depth
);
538 B1
= lambda_matrix_new (128, invariants
);
539 a1
= lambda_vector_new (128);
541 auxillary_nest
= lambda_loopnest_new (depth
, invariants
);
543 for (i
= depth
- 1; i
>= 0; i
--)
545 loop
= lambda_loop_new ();
546 LN_LOOPS (auxillary_nest
)[i
] = loop
;
549 for (j
= 0; j
< size
; j
++)
553 /* Any linear expression in the matrix with a coefficient less
554 than 0 becomes part of the new lower bound. */
555 expression
= lambda_linear_expression_new (depth
, invariants
);
557 for (k
= 0; k
< i
; k
++)
558 LLE_COEFFICIENTS (expression
)[k
] = A
[j
][k
];
560 for (k
= 0; k
< invariants
; k
++)
561 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = -1 * B
[j
][k
];
563 LLE_DENOMINATOR (expression
) = -1 * A
[j
][i
];
564 LLE_CONSTANT (expression
) = -1 * a
[j
];
566 /* Ignore if identical to the existing lower bound. */
567 if (!lle_equal (LL_LOWER_BOUND (loop
),
568 expression
, depth
, invariants
))
570 LLE_NEXT (expression
) = LL_LOWER_BOUND (loop
);
571 LL_LOWER_BOUND (loop
) = expression
;
575 else if (A
[j
][i
] > 0)
577 /* Any linear expression with a coefficient greater than 0
578 becomes part of the new upper bound. */
579 expression
= lambda_linear_expression_new (depth
, invariants
);
580 for (k
= 0; k
< i
; k
++)
581 LLE_COEFFICIENTS (expression
)[k
] = -1 * A
[j
][k
];
583 for (k
= 0; k
< invariants
; k
++)
584 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = B
[j
][k
];
586 LLE_DENOMINATOR (expression
) = A
[j
][i
];
587 LLE_CONSTANT (expression
) = a
[j
];
589 /* Ignore if identical to the existing upper bound. */
590 if (!lle_equal (LL_UPPER_BOUND (loop
),
591 expression
, depth
, invariants
))
593 LLE_NEXT (expression
) = LL_UPPER_BOUND (loop
);
594 LL_UPPER_BOUND (loop
) = expression
;
600 /* This portion creates a new system of linear inequalities by deleting
601 the i'th variable, reducing the system by one variable. */
603 for (j
= 0; j
< size
; j
++)
605 /* If the coefficient for the i'th variable is 0, then we can just
606 eliminate the variable straightaway. Otherwise, we have to
607 multiply through by the coefficients we are eliminating. */
610 lambda_vector_copy (A
[j
], A1
[newsize
], depth
);
611 lambda_vector_copy (B
[j
], B1
[newsize
], invariants
);
615 else if (A
[j
][i
] > 0)
617 for (k
= 0; k
< size
; k
++)
621 multiple
= lcm (A
[j
][i
], A
[k
][i
]);
622 f1
= multiple
/ A
[j
][i
];
623 f2
= -1 * multiple
/ A
[k
][i
];
625 lambda_vector_add_mc (A
[j
], f1
, A
[k
], f2
,
627 lambda_vector_add_mc (B
[j
], f1
, B
[k
], f2
,
628 B1
[newsize
], invariants
);
629 a1
[newsize
] = f1
* a
[j
] + f2
* a
[k
];
651 return auxillary_nest
;
654 /* Compute the loop bounds for the auxiliary space NEST.
655 Input system used is Ax <= b. TRANS is the unimodular transformation.
656 Given the original nest, this function will
657 1. Convert the nest into matrix form, which consists of a matrix for the
658 coefficients, a matrix for the
659 invariant coefficients, and a vector for the constants.
660 2. Use the matrix form to calculate the lattice base for the nest (which is
662 3. Compose the dense space transform with the user specified transform, to
663 get a transform we can easily calculate transformed bounds for.
664 4. Multiply the composed transformation matrix times the matrix form of the
666 5. Transform the newly created matrix (from step 4) back into a loop nest
667 using fourier motzkin elimination to figure out the bounds. */
669 static lambda_loopnest
670 lambda_compute_auxillary_space (lambda_loopnest nest
,
671 lambda_trans_matrix trans
)
673 lambda_matrix A
, B
, A1
, B1
;
675 lambda_matrix invertedtrans
;
676 int depth
, invariants
, size
;
679 lambda_linear_expression expression
;
680 lambda_lattice lattice
;
682 depth
= LN_DEPTH (nest
);
683 invariants
= LN_INVARIANTS (nest
);
685 /* Unfortunately, we can't know the number of constraints we'll have
686 ahead of time, but this should be enough even in ridiculous loop nest
687 cases. We must not go over this limit. */
688 A
= lambda_matrix_new (128, depth
);
689 B
= lambda_matrix_new (128, invariants
);
690 a
= lambda_vector_new (128);
692 A1
= lambda_matrix_new (128, depth
);
693 B1
= lambda_matrix_new (128, invariants
);
694 a1
= lambda_vector_new (128);
696 /* Store the bounds in the equation matrix A, constant vector a, and
697 invariant matrix B, so that we have Ax <= a + B.
698 This requires a little equation rearranging so that everything is on the
699 correct side of the inequality. */
701 for (i
= 0; i
< depth
; i
++)
703 loop
= LN_LOOPS (nest
)[i
];
705 /* First we do the lower bound. */
706 if (LL_STEP (loop
) > 0)
707 expression
= LL_LOWER_BOUND (loop
);
709 expression
= LL_UPPER_BOUND (loop
);
711 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
713 /* Fill in the coefficient. */
714 for (j
= 0; j
< i
; j
++)
715 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
717 /* And the invariant coefficient. */
718 for (j
= 0; j
< invariants
; j
++)
719 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
721 /* And the constant. */
722 a
[size
] = LLE_CONSTANT (expression
);
724 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
725 constants and single variables on */
726 A
[size
][i
] = -1 * LLE_DENOMINATOR (expression
);
728 for (j
= 0; j
< invariants
; j
++)
732 /* Need to increase matrix sizes above. */
733 gcc_assert (size
<= 127);
737 /* Then do the exact same thing for the upper bounds. */
738 if (LL_STEP (loop
) > 0)
739 expression
= LL_UPPER_BOUND (loop
);
741 expression
= LL_LOWER_BOUND (loop
);
743 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
745 /* Fill in the coefficient. */
746 for (j
= 0; j
< i
; j
++)
747 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
749 /* And the invariant coefficient. */
750 for (j
= 0; j
< invariants
; j
++)
751 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
753 /* And the constant. */
754 a
[size
] = LLE_CONSTANT (expression
);
756 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
757 for (j
= 0; j
< i
; j
++)
759 A
[size
][i
] = LLE_DENOMINATOR (expression
);
761 /* Need to increase matrix sizes above. */
762 gcc_assert (size
<= 127);
767 /* Compute the lattice base x = base * y + origin, where y is the
769 lattice
= lambda_lattice_compute_base (nest
);
771 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
774 lambda_matrix_mult (A
, LATTICE_BASE (lattice
), A1
, size
, depth
, depth
);
776 /* a1 = a - A * origin constant. */
777 lambda_matrix_vector_mult (A
, size
, depth
, LATTICE_ORIGIN (lattice
), a1
);
778 lambda_vector_add_mc (a
, 1, a1
, -1, a1
, size
);
780 /* B1 = B - A * origin invariant. */
781 lambda_matrix_mult (A
, LATTICE_ORIGIN_INVARIANTS (lattice
), B1
, size
, depth
,
783 lambda_matrix_add_mc (B
, 1, B1
, -1, B1
, size
, invariants
);
785 /* Now compute the auxiliary space bounds by first inverting U, multiplying
786 it by A1, then performing fourier motzkin. */
788 invertedtrans
= lambda_matrix_new (depth
, depth
);
790 /* Compute the inverse of U. */
791 lambda_matrix_inverse (LTM_MATRIX (trans
),
792 invertedtrans
, depth
);
795 lambda_matrix_mult (A1
, invertedtrans
, A
, size
, depth
, depth
);
797 return compute_nest_using_fourier_motzkin (size
, depth
, invariants
,
801 /* Compute the loop bounds for the target space, using the bounds of
802 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
803 The target space loop bounds are computed by multiplying the triangular
804 matrix H by the auxiliary nest, to get the new loop bounds. The sign of
805 the loop steps (positive or negative) is then used to swap the bounds if
806 the loop counts downwards.
807 Return the target loopnest. */
809 static lambda_loopnest
810 lambda_compute_target_space (lambda_loopnest auxillary_nest
,
811 lambda_trans_matrix H
, lambda_vector stepsigns
)
813 lambda_matrix inverse
, H1
;
814 int determinant
, i
, j
;
818 lambda_loopnest target_nest
;
819 int depth
, invariants
;
820 lambda_matrix target
;
822 lambda_loop auxillary_loop
, target_loop
;
823 lambda_linear_expression expression
, auxillary_expr
, target_expr
, tmp_expr
;
825 depth
= LN_DEPTH (auxillary_nest
);
826 invariants
= LN_INVARIANTS (auxillary_nest
);
828 inverse
= lambda_matrix_new (depth
, depth
);
829 determinant
= lambda_matrix_inverse (LTM_MATRIX (H
), inverse
, depth
);
831 /* H1 is H excluding its diagonal. */
832 H1
= lambda_matrix_new (depth
, depth
);
833 lambda_matrix_copy (LTM_MATRIX (H
), H1
, depth
, depth
);
835 for (i
= 0; i
< depth
; i
++)
838 /* Computes the linear offsets of the loop bounds. */
839 target
= lambda_matrix_new (depth
, depth
);
840 lambda_matrix_mult (H1
, inverse
, target
, depth
, depth
, depth
);
842 target_nest
= lambda_loopnest_new (depth
, invariants
);
844 for (i
= 0; i
< depth
; i
++)
847 /* Get a new loop structure. */
848 target_loop
= lambda_loop_new ();
849 LN_LOOPS (target_nest
)[i
] = target_loop
;
851 /* Computes the gcd of the coefficients of the linear part. */
852 gcd1
= gcd_vector (target
[i
], i
);
854 /* Include the denominator in the GCD. */
855 gcd1
= gcd (gcd1
, determinant
);
857 /* Now divide through by the gcd. */
858 for (j
= 0; j
< i
; j
++)
859 target
[i
][j
] = target
[i
][j
] / gcd1
;
861 expression
= lambda_linear_expression_new (depth
, invariants
);
862 lambda_vector_copy (target
[i
], LLE_COEFFICIENTS (expression
), depth
);
863 LLE_DENOMINATOR (expression
) = determinant
/ gcd1
;
864 LLE_CONSTANT (expression
) = 0;
865 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression
),
867 LL_LINEAR_OFFSET (target_loop
) = expression
;
870 /* For each loop, compute the new bounds from H. */
871 for (i
= 0; i
< depth
; i
++)
873 auxillary_loop
= LN_LOOPS (auxillary_nest
)[i
];
874 target_loop
= LN_LOOPS (target_nest
)[i
];
875 LL_STEP (target_loop
) = LTM_MATRIX (H
)[i
][i
];
876 factor
= LTM_MATRIX (H
)[i
][i
];
878 /* First we do the lower bound. */
879 auxillary_expr
= LL_LOWER_BOUND (auxillary_loop
);
881 for (; auxillary_expr
!= NULL
;
882 auxillary_expr
= LLE_NEXT (auxillary_expr
))
884 target_expr
= lambda_linear_expression_new (depth
, invariants
);
885 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
886 depth
, inverse
, depth
,
887 LLE_COEFFICIENTS (target_expr
));
888 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
889 LLE_COEFFICIENTS (target_expr
), depth
,
892 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
893 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
894 LLE_INVARIANT_COEFFICIENTS (target_expr
),
896 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
897 LLE_INVARIANT_COEFFICIENTS (target_expr
),
899 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
901 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
903 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
905 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
907 LLE_INVARIANT_COEFFICIENTS
908 (target_expr
), invariants
,
910 LLE_DENOMINATOR (target_expr
) =
911 LLE_DENOMINATOR (target_expr
) * determinant
;
913 /* Find the gcd and divide by it here, rather than doing it
914 at the tree level. */
915 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
916 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
918 gcd1
= gcd (gcd1
, gcd2
);
919 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
920 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
921 for (j
= 0; j
< depth
; j
++)
922 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
923 for (j
= 0; j
< invariants
; j
++)
924 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
925 LLE_CONSTANT (target_expr
) /= gcd1
;
926 LLE_DENOMINATOR (target_expr
) /= gcd1
;
927 /* Ignore if identical to existing bound. */
928 if (!lle_equal (LL_LOWER_BOUND (target_loop
), target_expr
, depth
,
931 LLE_NEXT (target_expr
) = LL_LOWER_BOUND (target_loop
);
932 LL_LOWER_BOUND (target_loop
) = target_expr
;
935 /* Now do the upper bound. */
936 auxillary_expr
= LL_UPPER_BOUND (auxillary_loop
);
938 for (; auxillary_expr
!= NULL
;
939 auxillary_expr
= LLE_NEXT (auxillary_expr
))
941 target_expr
= lambda_linear_expression_new (depth
, invariants
);
942 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
943 depth
, inverse
, depth
,
944 LLE_COEFFICIENTS (target_expr
));
945 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
946 LLE_COEFFICIENTS (target_expr
), depth
,
948 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
949 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
950 LLE_INVARIANT_COEFFICIENTS (target_expr
),
952 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
953 LLE_INVARIANT_COEFFICIENTS (target_expr
),
955 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
957 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
959 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
961 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
963 LLE_INVARIANT_COEFFICIENTS
964 (target_expr
), invariants
,
966 LLE_DENOMINATOR (target_expr
) =
967 LLE_DENOMINATOR (target_expr
) * determinant
;
969 /* Find the gcd and divide by it here, instead of at the
971 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
972 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
974 gcd1
= gcd (gcd1
, gcd2
);
975 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
976 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
977 for (j
= 0; j
< depth
; j
++)
978 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
979 for (j
= 0; j
< invariants
; j
++)
980 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
981 LLE_CONSTANT (target_expr
) /= gcd1
;
982 LLE_DENOMINATOR (target_expr
) /= gcd1
;
983 /* Ignore if equal to existing bound. */
984 if (!lle_equal (LL_UPPER_BOUND (target_loop
), target_expr
, depth
,
987 LLE_NEXT (target_expr
) = LL_UPPER_BOUND (target_loop
);
988 LL_UPPER_BOUND (target_loop
) = target_expr
;
992 for (i
= 0; i
< depth
; i
++)
994 target_loop
= LN_LOOPS (target_nest
)[i
];
995 /* If necessary, exchange the upper and lower bounds and negate
997 if (stepsigns
[i
] < 0)
999 LL_STEP (target_loop
) *= -1;
1000 tmp_expr
= LL_LOWER_BOUND (target_loop
);
1001 LL_LOWER_BOUND (target_loop
) = LL_UPPER_BOUND (target_loop
);
1002 LL_UPPER_BOUND (target_loop
) = tmp_expr
;
1008 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
1011 static lambda_vector
1012 lambda_compute_step_signs (lambda_trans_matrix trans
, lambda_vector stepsigns
)
1014 lambda_matrix matrix
, H
;
1016 lambda_vector newsteps
;
1017 int i
, j
, factor
, minimum_column
;
1020 matrix
= LTM_MATRIX (trans
);
1021 size
= LTM_ROWSIZE (trans
);
1022 H
= lambda_matrix_new (size
, size
);
1024 newsteps
= lambda_vector_new (size
);
1025 lambda_vector_copy (stepsigns
, newsteps
, size
);
1027 lambda_matrix_copy (matrix
, H
, size
, size
);
1029 for (j
= 0; j
< size
; j
++)
1033 for (i
= j
; i
< size
; i
++)
1035 lambda_matrix_col_negate (H
, size
, i
);
1036 while (lambda_vector_first_nz (row
, size
, j
+ 1) < size
)
1038 minimum_column
= lambda_vector_min_nz (row
, size
, j
);
1039 lambda_matrix_col_exchange (H
, size
, j
, minimum_column
);
1042 newsteps
[j
] = newsteps
[minimum_column
];
1043 newsteps
[minimum_column
] = temp
;
1045 for (i
= j
+ 1; i
< size
; i
++)
1047 factor
= row
[i
] / row
[j
];
1048 lambda_matrix_col_add (H
, size
, j
, i
, -1 * factor
);
1055 /* Transform NEST according to TRANS, and return the new loopnest.
1057 1. Computing a lattice base for the transformation
1058 2. Composing the dense base with the specified transformation (TRANS)
1059 3. Decomposing the combined transformation into a lower triangular portion,
1060 and a unimodular portion.
1061 4. Computing the auxiliary nest using the unimodular portion.
1062 5. Computing the target nest using the auxiliary nest and the lower
1063 triangular portion. */
1066 lambda_loopnest_transform (lambda_loopnest nest
, lambda_trans_matrix trans
)
1068 lambda_loopnest auxillary_nest
, target_nest
;
1070 int depth
, invariants
;
1072 lambda_lattice lattice
;
1073 lambda_trans_matrix trans1
, H
, U
;
1075 lambda_linear_expression expression
;
1076 lambda_vector origin
;
1077 lambda_matrix origin_invariants
;
1078 lambda_vector stepsigns
;
1081 depth
= LN_DEPTH (nest
);
1082 invariants
= LN_INVARIANTS (nest
);
1084 /* Keep track of the signs of the loop steps. */
1085 stepsigns
= lambda_vector_new (depth
);
1086 for (i
= 0; i
< depth
; i
++)
1088 if (LL_STEP (LN_LOOPS (nest
)[i
]) > 0)
1094 /* Compute the lattice base. */
1095 lattice
= lambda_lattice_compute_base (nest
);
1096 trans1
= lambda_trans_matrix_new (depth
, depth
);
1098 /* Multiply the transformation matrix by the lattice base. */
1100 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_BASE (lattice
),
1101 LTM_MATRIX (trans1
), depth
, depth
, depth
);
1103 /* Compute the Hermite normal form for the new transformation matrix. */
1104 H
= lambda_trans_matrix_new (depth
, depth
);
1105 U
= lambda_trans_matrix_new (depth
, depth
);
1106 lambda_matrix_hermite (LTM_MATRIX (trans1
), depth
, LTM_MATRIX (H
),
1109 /* Compute the auxiliary loop nest's space from the unimodular
1111 auxillary_nest
= lambda_compute_auxillary_space (nest
, U
);
1113 /* Compute the loop step signs from the old step signs and the
1114 transformation matrix. */
1115 stepsigns
= lambda_compute_step_signs (trans1
, stepsigns
);
1117 /* Compute the target loop nest space from the auxiliary nest and
1118 the lower triangular matrix H. */
1119 target_nest
= lambda_compute_target_space (auxillary_nest
, H
, stepsigns
);
1120 origin
= lambda_vector_new (depth
);
1121 origin_invariants
= lambda_matrix_new (depth
, invariants
);
1122 lambda_matrix_vector_mult (LTM_MATRIX (trans
), depth
, depth
,
1123 LATTICE_ORIGIN (lattice
), origin
);
1124 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_ORIGIN_INVARIANTS (lattice
),
1125 origin_invariants
, depth
, depth
, invariants
);
1127 for (i
= 0; i
< depth
; i
++)
1129 loop
= LN_LOOPS (target_nest
)[i
];
1130 expression
= LL_LINEAR_OFFSET (loop
);
1131 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression
), depth
))
1134 f
= LLE_DENOMINATOR (expression
);
1136 LLE_CONSTANT (expression
) += f
* origin
[i
];
1138 for (j
= 0; j
< invariants
; j
++)
1139 LLE_INVARIANT_COEFFICIENTS (expression
)[j
] +=
1140 f
* origin_invariants
[i
][j
];
1147 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148 return the new expression. DEPTH is the depth of the loopnest.
1149 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1151 is the amount we have to add/subtract from the expression because of the
1152 type of comparison it is used in. */
1154 static lambda_linear_expression
1155 gcc_tree_to_linear_expression (int depth
, tree expr
,
1156 VEC(tree
,heap
) *outerinductionvars
,
1157 VEC(tree
,heap
) *invariants
, int extra
)
1159 lambda_linear_expression lle
= NULL
;
1160 switch (TREE_CODE (expr
))
1164 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1165 LLE_CONSTANT (lle
) = TREE_INT_CST_LOW (expr
);
1167 LLE_CONSTANT (lle
) += extra
;
1169 LLE_DENOMINATOR (lle
) = 1;
1176 for (i
= 0; VEC_iterate (tree
, outerinductionvars
, i
, iv
); i
++)
1179 if (SSA_NAME_VAR (iv
) == SSA_NAME_VAR (expr
))
1181 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1182 LLE_COEFFICIENTS (lle
)[i
] = 1;
1184 LLE_CONSTANT (lle
) = extra
;
1186 LLE_DENOMINATOR (lle
) = 1;
1189 for (i
= 0; VEC_iterate (tree
, invariants
, i
, invar
); i
++)
1192 if (SSA_NAME_VAR (invar
) == SSA_NAME_VAR (expr
))
1194 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1195 LLE_INVARIANT_COEFFICIENTS (lle
)[i
] = 1;
1197 LLE_CONSTANT (lle
) = extra
;
1198 LLE_DENOMINATOR (lle
) = 1;
1210 /* Return the depth of the loopnest NEST */
1213 depth_of_nest (struct loop
*nest
)
1225 /* Return true if OP is invariant in LOOP and all outer loops. */
1228 invariant_in_loop_and_outer_loops (struct loop
*loop
, tree op
)
1230 if (is_gimple_min_invariant (op
))
1232 if (loop
->depth
== 0)
1234 if (!expr_invariant_in_loop_p (loop
, op
))
1237 && !invariant_in_loop_and_outer_loops (loop
->outer
, op
))
1242 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1243 or NULL if it could not be converted.
1244 DEPTH is the depth of the loop.
1245 INVARIANTS is a pointer to the array of loop invariants.
1246 The induction variable for this loop should be stored in the parameter
1248 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1251 gcc_loop_to_lambda_loop (struct loop
*loop
, int depth
,
1252 VEC(tree
,heap
) ** invariants
,
1253 tree
* ourinductionvar
,
1254 VEC(tree
,heap
) * outerinductionvars
,
1255 VEC(tree
,heap
) ** lboundvars
,
1256 VEC(tree
,heap
) ** uboundvars
,
1257 VEC(int,heap
) ** steps
)
1261 tree access_fn
, inductionvar
;
1263 lambda_loop lloop
= NULL
;
1264 lambda_linear_expression lbound
, ubound
;
1268 tree lboundvar
, uboundvar
, uboundresult
;
1271 /* Find out induction var and exit condition. */
1272 inductionvar
= find_induction_var_from_exit_cond (loop
);
1273 exit_cond
= get_loop_exit_condition (loop
);
1275 if (inductionvar
== NULL
|| exit_cond
== NULL
)
1277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1279 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1283 test
= TREE_OPERAND (exit_cond
, 0);
1285 if (SSA_NAME_DEF_STMT (inductionvar
) == NULL_TREE
)
1288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1290 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1295 phi
= SSA_NAME_DEF_STMT (inductionvar
);
1296 if (TREE_CODE (phi
) != PHI_NODE
)
1298 uses
= STMT_USE_OPS (phi
);
1303 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1305 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1310 phi
= USE_OP (uses
, 0);
1311 phi
= SSA_NAME_DEF_STMT (phi
);
1312 if (TREE_CODE (phi
) != PHI_NODE
)
1315 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1317 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1323 /* The induction variable name/version we want to put in the array is the
1324 result of the induction variable phi node. */
1325 *ourinductionvar
= PHI_RESULT (phi
);
1326 access_fn
= instantiate_parameters
1327 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1328 if (access_fn
== chrec_dont_know
)
1330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1332 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1337 step
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1338 if (!step
|| step
== chrec_dont_know
)
1340 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1342 "Unable to convert loop: Cannot determine step of loop.\n");
1346 if (TREE_CODE (step
) != INTEGER_CST
)
1349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1351 "Unable to convert loop: Step of loop is not integer.\n");
1355 stepint
= TREE_INT_CST_LOW (step
);
1357 /* Only want phis for induction vars, which will have two
1359 if (PHI_NUM_ARGS (phi
) != 2)
1361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1363 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1367 /* Another induction variable check. One argument's source should be
1368 in the loop, one outside the loop. */
1369 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
)
1370 && flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 1)->src
))
1373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1375 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1380 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
))
1382 lboundvar
= PHI_ARG_DEF (phi
, 1);
1383 lbound
= gcc_tree_to_linear_expression (depth
, lboundvar
,
1384 outerinductionvars
, *invariants
,
1389 lboundvar
= PHI_ARG_DEF (phi
, 0);
1390 lbound
= gcc_tree_to_linear_expression (depth
, lboundvar
,
1391 outerinductionvars
, *invariants
,
1398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1400 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1404 /* One part of the test may be a loop invariant tree. */
1405 VEC_reserve (tree
, heap
, *invariants
, 1);
1406 if (TREE_CODE (TREE_OPERAND (test
, 1)) == SSA_NAME
1407 && invariant_in_loop_and_outer_loops (loop
, TREE_OPERAND (test
, 1)))
1408 VEC_quick_push (tree
, *invariants
, TREE_OPERAND (test
, 1));
1409 else if (TREE_CODE (TREE_OPERAND (test
, 0)) == SSA_NAME
1410 && invariant_in_loop_and_outer_loops (loop
, TREE_OPERAND (test
, 0)))
1411 VEC_quick_push (tree
, *invariants
, TREE_OPERAND (test
, 0));
1413 /* The non-induction variable part of the test is the upper bound variable.
1415 if (TREE_OPERAND (test
, 0) == inductionvar
)
1416 uboundvar
= TREE_OPERAND (test
, 1);
1418 uboundvar
= TREE_OPERAND (test
, 0);
1421 /* We only size the vectors assuming we have, at max, 2 times as many
1422 invariants as we do loops (one for each bound).
1423 This is just an arbitrary number, but it has to be matched against the
1425 gcc_assert (VEC_length (tree
, *invariants
) <= (unsigned int) (2 * depth
));
1428 /* We might have some leftover. */
1429 if (TREE_CODE (test
) == LT_EXPR
)
1430 extra
= -1 * stepint
;
1431 else if (TREE_CODE (test
) == NE_EXPR
)
1432 extra
= -1 * stepint
;
1433 else if (TREE_CODE (test
) == GT_EXPR
)
1434 extra
= -1 * stepint
;
1435 else if (TREE_CODE (test
) == EQ_EXPR
)
1436 extra
= 1 * stepint
;
1438 ubound
= gcc_tree_to_linear_expression (depth
, uboundvar
,
1440 *invariants
, extra
);
1441 uboundresult
= build (PLUS_EXPR
, TREE_TYPE (uboundvar
), uboundvar
,
1442 build_int_cst (TREE_TYPE (uboundvar
), extra
));
1443 VEC_safe_push (tree
, heap
, *uboundvars
, uboundresult
);
1444 VEC_safe_push (tree
, heap
, *lboundvars
, lboundvar
);
1445 VEC_safe_push (int, heap
, *steps
, stepint
);
1448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1450 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1454 lloop
= lambda_loop_new ();
1455 LL_STEP (lloop
) = stepint
;
1456 LL_LOWER_BOUND (lloop
) = lbound
;
1457 LL_UPPER_BOUND (lloop
) = ubound
;
1461 /* Given a LOOP, find the induction variable it is testing against in the exit
1462 condition. Return the induction variable if found, NULL otherwise. */
1465 find_induction_var_from_exit_cond (struct loop
*loop
)
1467 tree expr
= get_loop_exit_condition (loop
);
1470 if (expr
== NULL_TREE
)
1472 if (TREE_CODE (expr
) != COND_EXPR
)
1474 test
= TREE_OPERAND (expr
, 0);
1475 if (!COMPARISON_CLASS_P (test
))
1478 /* Find the side that is invariant in this loop. The ivar must be the other
1481 if (expr_invariant_in_loop_p (loop
, TREE_OPERAND (test
, 0)))
1482 ivarop
= TREE_OPERAND (test
, 1);
1483 else if (expr_invariant_in_loop_p (loop
, TREE_OPERAND (test
, 1)))
1484 ivarop
= TREE_OPERAND (test
, 0);
1488 if (TREE_CODE (ivarop
) != SSA_NAME
)
1493 DEF_VEC_P(lambda_loop
);
1494 DEF_VEC_ALLOC_P(lambda_loop
,heap
);
1496 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1497 Return the new loop nest.
1498 INDUCTIONVARS is a pointer to an array of induction variables for the
1499 loopnest that will be filled in during this process.
1500 INVARIANTS is a pointer to an array of invariants that will be filled in
1501 during this process. */
1504 gcc_loopnest_to_lambda_loopnest (struct loops
*currloops
,
1505 struct loop
* loop_nest
,
1506 VEC(tree
,heap
) **inductionvars
,
1507 VEC(tree
,heap
) **invariants
,
1508 bool need_perfect_nest
)
1510 lambda_loopnest ret
= NULL
;
1514 VEC(lambda_loop
,heap
) *loops
= NULL
;
1515 VEC(tree
,heap
) *uboundvars
= NULL
;
1516 VEC(tree
,heap
) *lboundvars
= NULL
;
1517 VEC(int,heap
) *steps
= NULL
;
1518 lambda_loop newloop
;
1519 tree inductionvar
= NULL
;
1521 depth
= depth_of_nest (loop_nest
);
1525 newloop
= gcc_loop_to_lambda_loop (temp
, depth
, invariants
,
1526 &inductionvar
, *inductionvars
,
1527 &lboundvars
, &uboundvars
,
1531 VEC_safe_push (tree
, heap
, *inductionvars
, inductionvar
);
1532 VEC_safe_push (lambda_loop
, heap
, loops
, newloop
);
1535 if (need_perfect_nest
)
1537 if (!perfect_nestify (currloops
, loop_nest
,
1538 lboundvars
, uboundvars
, steps
, *inductionvars
))
1542 "Not a perfect loop nest and couldn't convert to one.\n");
1547 "Successfully converted loop nest to perfect loop nest.\n");
1549 ret
= lambda_loopnest_new (depth
, 2 * depth
);
1550 for (i
= 0; VEC_iterate (lambda_loop
, loops
, i
, newloop
); i
++)
1551 LN_LOOPS (ret
)[i
] = newloop
;
1553 VEC_free (lambda_loop
, heap
, loops
);
1554 VEC_free (tree
, heap
, uboundvars
);
1555 VEC_free (tree
, heap
, lboundvars
);
1556 VEC_free (int, heap
, steps
);
1561 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1562 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1563 inserted for us are stored. INDUCTION_VARS is the array of induction
1564 variables for the loop this LBV is from. TYPE is the tree type to use for
1565 the variables and trees involved. */
1568 lbv_to_gcc_expression (lambda_body_vector lbv
,
1569 tree type
, VEC(tree
,heap
) *induction_vars
,
1570 tree
*stmts_to_insert
)
1572 tree stmts
, stmt
, resvar
, name
;
1575 tree_stmt_iterator tsi
;
1577 /* Create a statement list and a linear expression temporary. */
1578 stmts
= alloc_stmt_list ();
1579 resvar
= create_tmp_var (type
, "lbvtmp");
1580 add_referenced_tmp_var (resvar
);
1583 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1584 name
= make_ssa_name (resvar
, stmt
);
1585 TREE_OPERAND (stmt
, 0) = name
;
1586 tsi
= tsi_last (stmts
);
1587 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1589 for (i
= 0; VEC_iterate (tree
, induction_vars
, i
, iv
); i
++)
1591 if (LBV_COEFFICIENTS (lbv
)[i
] != 0)
1596 /* newname = coefficient * induction_variable */
1597 coeffmult
= build_int_cst (type
, LBV_COEFFICIENTS (lbv
)[i
]);
1598 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1599 fold (build (MULT_EXPR
, type
, iv
, coeffmult
)));
1601 newname
= make_ssa_name (resvar
, stmt
);
1602 TREE_OPERAND (stmt
, 0) = newname
;
1604 tsi
= tsi_last (stmts
);
1605 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1607 /* name = name + newname */
1608 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1609 build (PLUS_EXPR
, type
, name
, newname
));
1610 name
= make_ssa_name (resvar
, stmt
);
1611 TREE_OPERAND (stmt
, 0) = name
;
1613 tsi
= tsi_last (stmts
);
1614 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1619 /* Handle any denominator that occurs. */
1620 if (LBV_DENOMINATOR (lbv
) != 1)
1622 tree denominator
= build_int_cst (type
, LBV_DENOMINATOR (lbv
));
1623 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1624 build (CEIL_DIV_EXPR
, type
, name
, denominator
));
1625 name
= make_ssa_name (resvar
, stmt
);
1626 TREE_OPERAND (stmt
, 0) = name
;
1628 tsi
= tsi_last (stmts
);
1629 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1631 *stmts_to_insert
= stmts
;
1635 /* Convert a linear expression from coefficient and constant form to a
1637 Return the tree that represents the final value of the expression.
1638 LLE is the linear expression to convert.
1639 OFFSET is the linear offset to apply to the expression.
1640 TYPE is the tree type to use for the variables and math.
1641 INDUCTION_VARS is a vector of induction variables for the loops.
1642 INVARIANTS is a vector of the loop nest invariants.
1643 WRAP specifies what tree code to wrap the results in, if there is more than
1644 one (it is either MAX_EXPR, or MIN_EXPR).
1645 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1646 statements that need to be inserted for the linear expression. */
1649 lle_to_gcc_expression (lambda_linear_expression lle
,
1650 lambda_linear_expression offset
,
1652 VEC(tree
,heap
) *induction_vars
,
1653 VEC(tree
,heap
) *invariants
,
1654 enum tree_code wrap
, tree
*stmts_to_insert
)
1656 tree stmts
, stmt
, resvar
, name
;
1658 tree_stmt_iterator tsi
;
1660 VEC(tree
,heap
) *results
= NULL
;
1662 gcc_assert (wrap
== MAX_EXPR
|| wrap
== MIN_EXPR
);
1664 /* Create a statement list and a linear expression temporary. */
1665 stmts
= alloc_stmt_list ();
1666 resvar
= create_tmp_var (type
, "lletmp");
1667 add_referenced_tmp_var (resvar
);
1669 /* Build up the linear expressions, and put the variable representing the
1670 result in the results array. */
1671 for (; lle
!= NULL
; lle
= LLE_NEXT (lle
))
1673 /* Start at name = 0. */
1674 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1675 name
= make_ssa_name (resvar
, stmt
);
1676 TREE_OPERAND (stmt
, 0) = name
;
1678 tsi
= tsi_last (stmts
);
1679 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1681 /* First do the induction variables.
1682 at the end, name = name + all the induction variables added
1684 for (i
= 0; VEC_iterate (tree
, induction_vars
, i
, iv
); i
++)
1686 if (LLE_COEFFICIENTS (lle
)[i
] != 0)
1692 /* mult = induction variable * coefficient. */
1693 if (LLE_COEFFICIENTS (lle
)[i
] == 1)
1695 mult
= VEC_index (tree
, induction_vars
, i
);
1699 coeff
= build_int_cst (type
,
1700 LLE_COEFFICIENTS (lle
)[i
]);
1701 mult
= fold (build (MULT_EXPR
, type
, iv
, coeff
));
1704 /* newname = mult */
1705 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1706 newname
= make_ssa_name (resvar
, stmt
);
1707 TREE_OPERAND (stmt
, 0) = newname
;
1709 tsi
= tsi_last (stmts
);
1710 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1712 /* name = name + newname */
1713 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1714 build (PLUS_EXPR
, type
, name
, newname
));
1715 name
= make_ssa_name (resvar
, stmt
);
1716 TREE_OPERAND (stmt
, 0) = name
;
1718 tsi
= tsi_last (stmts
);
1719 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1723 /* Handle our invariants.
1724 At the end, we have name = name + result of adding all multiplied
1726 for (i
= 0; VEC_iterate (tree
, invariants
, i
, invar
); i
++)
1728 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] != 0)
1733 int invcoeff
= LLE_INVARIANT_COEFFICIENTS (lle
)[i
];
1734 /* mult = invariant * coefficient */
1741 coeff
= build_int_cst (type
, invcoeff
);
1742 mult
= fold (build (MULT_EXPR
, type
, invar
, coeff
));
1745 /* newname = mult */
1746 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1747 newname
= make_ssa_name (resvar
, stmt
);
1748 TREE_OPERAND (stmt
, 0) = newname
;
1750 tsi
= tsi_last (stmts
);
1751 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1753 /* name = name + newname */
1754 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1755 build (PLUS_EXPR
, type
, name
, newname
));
1756 name
= make_ssa_name (resvar
, stmt
);
1757 TREE_OPERAND (stmt
, 0) = name
;
1759 tsi
= tsi_last (stmts
);
1760 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1764 /* Now handle the constant.
1765 name = name + constant. */
1766 if (LLE_CONSTANT (lle
) != 0)
1768 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1769 build (PLUS_EXPR
, type
, name
,
1770 build_int_cst (type
, LLE_CONSTANT (lle
))));
1771 name
= make_ssa_name (resvar
, stmt
);
1772 TREE_OPERAND (stmt
, 0) = name
;
1774 tsi
= tsi_last (stmts
);
1775 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1778 /* Now handle the offset.
1779 name = name + linear offset. */
1780 if (LLE_CONSTANT (offset
) != 0)
1782 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1783 build (PLUS_EXPR
, type
, name
,
1784 build_int_cst (type
, LLE_CONSTANT (offset
))));
1785 name
= make_ssa_name (resvar
, stmt
);
1786 TREE_OPERAND (stmt
, 0) = name
;
1788 tsi
= tsi_last (stmts
);
1789 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1792 /* Handle any denominator that occurs. */
1793 if (LLE_DENOMINATOR (lle
) != 1)
1795 stmt
= build_int_cst (type
, LLE_DENOMINATOR (lle
));
1796 stmt
= build (wrap
== MAX_EXPR
? CEIL_DIV_EXPR
: FLOOR_DIV_EXPR
,
1798 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, stmt
);
1800 /* name = {ceil, floor}(name/denominator) */
1801 name
= make_ssa_name (resvar
, stmt
);
1802 TREE_OPERAND (stmt
, 0) = name
;
1803 tsi
= tsi_last (stmts
);
1804 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1806 VEC_safe_push (tree
, heap
, results
, name
);
1809 /* Again, out of laziness, we don't handle this case yet. It's not
1810 hard, it just hasn't occurred. */
1811 gcc_assert (VEC_length (tree
, results
) <= 2);
1813 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1814 if (VEC_length (tree
, results
) > 1)
1816 tree op1
= VEC_index (tree
, results
, 0);
1817 tree op2
= VEC_index (tree
, results
, 1);
1818 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1819 build (wrap
, type
, op1
, op2
));
1820 name
= make_ssa_name (resvar
, stmt
);
1821 TREE_OPERAND (stmt
, 0) = name
;
1822 tsi
= tsi_last (stmts
);
1823 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1826 VEC_free (tree
, heap
, results
);
1828 *stmts_to_insert
= stmts
;
1832 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1833 it, back into gcc code. This changes the
1834 loops, their induction variables, and their bodies, so that they
1835 match the transformed loopnest.
1836 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1838 OLD_IVS is a vector of induction variables from the old loopnest.
1839 INVARIANTS is a vector of loop invariants from the old loopnest.
1840 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1841 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1845 lambda_loopnest_to_gcc_loopnest (struct loop
*old_loopnest
,
1846 VEC(tree
,heap
) *old_ivs
,
1847 VEC(tree
,heap
) *invariants
,
1848 lambda_loopnest new_loopnest
,
1849 lambda_trans_matrix transform
)
1854 VEC(tree
,heap
) *new_ivs
= NULL
;
1857 block_stmt_iterator bsi
;
1861 transform
= lambda_trans_matrix_inverse (transform
);
1862 fprintf (dump_file
, "Inverse of transformation matrix:\n");
1863 print_lambda_trans_matrix (dump_file
, transform
);
1865 depth
= depth_of_nest (old_loopnest
);
1866 temp
= old_loopnest
;
1870 lambda_loop newloop
;
1873 tree ivvar
, ivvarinced
, exitcond
, stmts
;
1874 enum tree_code testtype
;
1875 tree newupperbound
, newlowerbound
;
1876 lambda_linear_expression offset
;
1881 oldiv
= VEC_index (tree
, old_ivs
, i
);
1882 type
= TREE_TYPE (oldiv
);
1884 /* First, build the new induction variable temporary */
1886 ivvar
= create_tmp_var (type
, "lnivtmp");
1887 add_referenced_tmp_var (ivvar
);
1889 VEC_safe_push (tree
, heap
, new_ivs
, ivvar
);
1891 newloop
= LN_LOOPS (new_loopnest
)[i
];
1893 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1895 offset
= LL_LINEAR_OFFSET (newloop
);
1897 gcc_assert (LLE_DENOMINATOR (offset
) == 1 &&
1898 lambda_vector_zerop (LLE_COEFFICIENTS (offset
), depth
));
1900 /* Now build the new lower bounds, and insert the statements
1901 necessary to generate it on the loop preheader. */
1902 newlowerbound
= lle_to_gcc_expression (LL_LOWER_BOUND (newloop
),
1903 LL_LINEAR_OFFSET (newloop
),
1906 invariants
, MAX_EXPR
, &stmts
);
1907 bsi_insert_on_edge (loop_preheader_edge (temp
), stmts
);
1908 bsi_commit_edge_inserts ();
1909 /* Build the new upper bound and insert its statements in the
1910 basic block of the exit condition */
1911 newupperbound
= lle_to_gcc_expression (LL_UPPER_BOUND (newloop
),
1912 LL_LINEAR_OFFSET (newloop
),
1915 invariants
, MIN_EXPR
, &stmts
);
1916 exit
= temp
->single_exit
;
1917 exitcond
= get_loop_exit_condition (temp
);
1918 bb
= bb_for_stmt (exitcond
);
1919 bsi
= bsi_start (bb
);
1920 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
1922 /* Create the new iv. */
1924 standard_iv_increment_position (temp
, &bsi
, &insert_after
);
1925 create_iv (newlowerbound
,
1926 build_int_cst (type
, LL_STEP (newloop
)),
1927 ivvar
, temp
, &bsi
, insert_after
, &ivvar
,
1930 /* Unfortunately, the incremented ivvar that create_iv inserted may not
1931 dominate the block containing the exit condition.
1932 So we simply create our own incremented iv to use in the new exit
1933 test, and let redundancy elimination sort it out. */
1934 inc_stmt
= build (PLUS_EXPR
, type
,
1935 ivvar
, build_int_cst (type
, LL_STEP (newloop
)));
1936 inc_stmt
= build (MODIFY_EXPR
, void_type_node
, SSA_NAME_VAR (ivvar
),
1938 ivvarinced
= make_ssa_name (SSA_NAME_VAR (ivvar
), inc_stmt
);
1939 TREE_OPERAND (inc_stmt
, 0) = ivvarinced
;
1940 bsi
= bsi_for_stmt (exitcond
);
1941 bsi_insert_before (&bsi
, inc_stmt
, BSI_SAME_STMT
);
1943 /* Replace the exit condition with the new upper bound
1946 testtype
= LL_STEP (newloop
) >= 0 ? LE_EXPR
: GE_EXPR
;
1948 /* We want to build a conditional where true means exit the loop, and
1949 false means continue the loop.
1950 So swap the testtype if this isn't the way things are.*/
1952 if (exit
->flags
& EDGE_FALSE_VALUE
)
1953 testtype
= swap_tree_comparison (testtype
);
1955 COND_EXPR_COND (exitcond
) = build (testtype
,
1957 newupperbound
, ivvarinced
);
1958 update_stmt (exitcond
);
1959 VEC_replace (tree
, new_ivs
, i
, ivvar
);
1965 /* Rewrite uses of the old ivs so that they are now specified in terms of
1968 for (i
= 0; VEC_iterate (tree
, old_ivs
, i
, oldiv
); i
++)
1970 imm_use_iterator imm_iter
;
1971 use_operand_p imm_use
;
1973 tree oldiv_stmt
= SSA_NAME_DEF_STMT (oldiv
);
1975 gcc_assert (TREE_CODE (oldiv_stmt
) == PHI_NODE
1976 || NUM_DEFS (STMT_DEF_OPS (oldiv_stmt
)) == 1);
1977 if (TREE_CODE (oldiv_stmt
) == PHI_NODE
)
1978 oldiv_def
= PHI_RESULT (oldiv_stmt
);
1980 oldiv_def
= DEF_OP (STMT_DEF_OPS (oldiv_stmt
), 0);
1982 FOR_EACH_IMM_USE_SAFE (imm_use
, imm_iter
, oldiv_def
)
1984 tree stmt
= USE_STMT (imm_use
);
1985 use_operand_p use_p
;
1987 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
1988 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_USE
)
1990 if (USE_FROM_PTR (use_p
) == oldiv
)
1993 lambda_body_vector lbv
, newlbv
;
1994 /* Compute the new expression for the induction
1996 depth
= VEC_length (tree
, new_ivs
);
1997 lbv
= lambda_body_vector_new (depth
);
1998 LBV_COEFFICIENTS (lbv
)[i
] = 1;
2000 newlbv
= lambda_body_vector_compute_new (transform
, lbv
);
2002 newiv
= lbv_to_gcc_expression (newlbv
, TREE_TYPE (oldiv
),
2004 bsi
= bsi_for_stmt (stmt
);
2005 /* Insert the statements to build that
2007 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
2008 propagate_value (use_p
, newiv
);
2015 VEC_free (tree
, heap
, new_ivs
);
2018 /* Returns true when the vector V is lexicographically positive, in
2019 other words, when the first nonzero element is positive. */
2022 lambda_vector_lexico_pos (lambda_vector v
,
2026 for (i
= 0; i
< n
; i
++)
2039 /* Return TRUE if this is not interesting statement from the perspective of
2040 determining if we have a perfect loop nest. */
2043 not_interesting_stmt (tree stmt
)
2045 /* Note that COND_EXPR's aren't interesting because if they were exiting the
2046 loop, we would have already failed the number of exits tests. */
2047 if (TREE_CODE (stmt
) == LABEL_EXPR
2048 || TREE_CODE (stmt
) == GOTO_EXPR
2049 || TREE_CODE (stmt
) == COND_EXPR
)
2054 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
2057 phi_loop_edge_uses_def (struct loop
*loop
, tree phi
, tree def
)
2060 for (i
= 0; i
< PHI_NUM_ARGS (phi
); i
++)
2061 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, i
)->src
))
2062 if (PHI_ARG_DEF (phi
, i
) == def
)
2067 /* Return TRUE if STMT is a use of PHI_RESULT. */
2070 stmt_uses_phi_result (tree stmt
, tree phi_result
)
2072 use_optype uses
= STMT_USE_OPS (stmt
);
2074 /* This is conservatively true, because we only want SIMPLE bumpers
2075 of the form x +- constant for our pass. */
2076 if (NUM_USES (uses
) != 1)
2078 if (USE_OP (uses
, 0) == phi_result
)
2084 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2085 in-loop-edge in a phi node, and the operand it uses is the result of that
2088 i_3 = PHI (0, i_29); */
2091 stmt_is_bumper_for_loop (struct loop
*loop
, tree stmt
)
2095 def_optype defs
= STMT_DEF_OPS (stmt
);
2096 imm_use_iterator iter
;
2097 use_operand_p use_p
;
2099 if (NUM_DEFS (defs
) != 1)
2101 def
= DEF_OP (defs
, 0);
2102 FOR_EACH_IMM_USE_FAST (use_p
, iter
, def
)
2104 use
= USE_STMT (use_p
);
2105 if (TREE_CODE (use
) == PHI_NODE
)
2107 if (phi_loop_edge_uses_def (loop
, use
, def
))
2108 if (stmt_uses_phi_result (stmt
, PHI_RESULT (use
)))
2116 /* Return true if LOOP is a perfect loop nest.
2117 Perfect loop nests are those loop nests where all code occurs in the
2118 innermost loop body.
2119 If S is a program statement, then
2128 is not a perfect loop nest because of S1.
2136 is a perfect loop nest.
2138 Since we don't have high level loops anymore, we basically have to walk our
2139 statements and ignore those that are there because the loop needs them (IE
2140 the induction variable increment, and jump back to the top of the loop). */
2143 perfect_nest_p (struct loop
*loop
)
2151 bbs
= get_loop_body (loop
);
2152 exit_cond
= get_loop_exit_condition (loop
);
2153 for (i
= 0; i
< loop
->num_nodes
; i
++)
2155 if (bbs
[i
]->loop_father
== loop
)
2157 block_stmt_iterator bsi
;
2158 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
2160 tree stmt
= bsi_stmt (bsi
);
2161 if (stmt
== exit_cond
2162 || not_interesting_stmt (stmt
)
2163 || stmt_is_bumper_for_loop (loop
, stmt
))
2171 /* See if the inner loops are perfectly nested as well. */
2173 return perfect_nest_p (loop
->inner
);
2177 /* Replace the USES of tree X in STMT with tree Y */
2180 replace_uses_of_x_with_y (tree stmt
, tree x
, tree y
)
2182 use_optype uses
= STMT_USE_OPS (stmt
);
2184 for (i
= 0; i
< NUM_USES (uses
); i
++)
2186 if (USE_OP (uses
, i
) == x
)
2187 SET_USE_OP (uses
, i
, y
);
2191 /* Return TRUE if STMT uses tree OP in it's uses. */
2194 stmt_uses_op (tree stmt
, tree op
)
2196 use_optype uses
= STMT_USE_OPS (stmt
);
2198 for (i
= 0; i
< NUM_USES (uses
); i
++)
2200 if (USE_OP (uses
, i
) == op
)
2206 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2207 one. LOOPIVS is a vector of induction variables, one per loop.
2208 ATM, we only handle imperfect nests of depth 2, where all of the statements
2209 occur after the inner loop. */
2212 can_convert_to_perfect_nest (struct loop
*loop
,
2213 VEC(tree
,heap
) *loopivs
)
2216 tree exit_condition
, phi
;
2218 block_stmt_iterator bsi
;
2219 basic_block exitdest
;
2221 /* Can't handle triply nested+ loops yet. */
2222 if (!loop
->inner
|| loop
->inner
->inner
)
2225 /* We only handle moving the after-inner-body statements right now, so make
2226 sure all the statements we need to move are located in that position. */
2227 bbs
= get_loop_body (loop
);
2228 exit_condition
= get_loop_exit_condition (loop
);
2229 for (i
= 0; i
< loop
->num_nodes
; i
++)
2231 if (bbs
[i
]->loop_father
== loop
)
2233 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
2236 tree stmt
= bsi_stmt (bsi
);
2239 if (stmt
== exit_condition
2240 || not_interesting_stmt (stmt
)
2241 || stmt_is_bumper_for_loop (loop
, stmt
))
2243 /* If the statement uses inner loop ivs, we == screwed. */
2244 for (j
= 1; VEC_iterate (tree
, loopivs
, j
, iv
); j
++)
2245 if (stmt_uses_op (stmt
, iv
))
2248 /* If the bb of a statement we care about isn't dominated by
2249 the header of the inner loop, then we are also screwed. */
2250 if (!dominated_by_p (CDI_DOMINATORS
,
2252 loop
->inner
->header
))
2258 /* We also need to make sure the loop exit only has simple copy phis in it,
2259 otherwise we don't know how to transform it into a perfect nest right
2261 exitdest
= loop
->single_exit
->dest
;
2263 for (phi
= phi_nodes (exitdest
); phi
; phi
= PHI_CHAIN (phi
))
2264 if (PHI_NUM_ARGS (phi
) != 1)
2275 /* Transform the loop nest into a perfect nest, if possible.
2276 LOOPS is the current struct loops *
2277 LOOP is the loop nest to transform into a perfect nest
2278 LBOUNDS are the lower bounds for the loops to transform
2279 UBOUNDS are the upper bounds for the loops to transform
2280 STEPS is the STEPS for the loops to transform.
2281 LOOPIVS is the induction variables for the loops to transform.
2283 Basically, for the case of
2285 FOR (i = 0; i < 50; i++)
2287 FOR (j =0; j < 50; j++)
2294 This function will transform it into a perfect loop nest by splitting the
2295 outer loop into two loops, like so:
2297 FOR (i = 0; i < 50; i++)
2299 FOR (j = 0; j < 50; j++)
2305 FOR (i = 0; i < 50; i ++)
2310 Return FALSE if we can't make this loop into a perfect nest. */
2312 perfect_nestify (struct loops
*loops
,
2314 VEC(tree
,heap
) *lbounds
,
2315 VEC(tree
,heap
) *ubounds
,
2316 VEC(int,heap
) *steps
,
2317 VEC(tree
,heap
) *loopivs
)
2320 tree exit_condition
;
2321 tree then_label
, else_label
, cond_stmt
;
2322 basic_block preheaderbb
, headerbb
, bodybb
, latchbb
, olddest
;
2324 block_stmt_iterator bsi
;
2327 struct loop
*newloop
;
2331 tree oldivvar
, ivvar
, ivvarinced
;
2332 VEC(tree
,heap
) *phis
= NULL
;
2334 if (!can_convert_to_perfect_nest (loop
, loopivs
))
2337 /* Create the new loop */
2339 olddest
= loop
->single_exit
->dest
;
2340 preheaderbb
= loop_split_edge_with (loop
->single_exit
, NULL
);
2341 headerbb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2343 /* Push the exit phi nodes that we are moving. */
2344 for (phi
= phi_nodes (olddest
); phi
; phi
= PHI_CHAIN (phi
))
2346 VEC_reserve (tree
, heap
, phis
, 2);
2347 VEC_quick_push (tree
, phis
, PHI_RESULT (phi
));
2348 VEC_quick_push (tree
, phis
, PHI_ARG_DEF (phi
, 0));
2350 e
= redirect_edge_and_branch (single_succ_edge (preheaderbb
), headerbb
);
2352 /* Remove the exit phis from the old basic block. Make sure to set
2353 PHI_RESULT to null so it doesn't get released. */
2354 while (phi_nodes (olddest
) != NULL
)
2356 SET_PHI_RESULT (phi_nodes (olddest
), NULL
);
2357 remove_phi_node (phi_nodes (olddest
), NULL
);
2360 /* and add them back to the new basic block. */
2361 while (VEC_length (tree
, phis
) != 0)
2365 def
= VEC_pop (tree
, phis
);
2366 phiname
= VEC_pop (tree
, phis
);
2367 phi
= create_phi_node (phiname
, preheaderbb
);
2368 add_phi_arg (phi
, def
, single_pred_edge (preheaderbb
));
2370 flush_pending_stmts (e
);
2371 VEC_free (tree
, heap
, phis
);
2373 bodybb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2374 latchbb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2375 make_edge (headerbb
, bodybb
, EDGE_FALLTHRU
);
2376 then_label
= build1 (GOTO_EXPR
, void_type_node
, tree_block_label (latchbb
));
2377 else_label
= build1 (GOTO_EXPR
, void_type_node
, tree_block_label (olddest
));
2378 cond_stmt
= build (COND_EXPR
, void_type_node
,
2379 build (NE_EXPR
, boolean_type_node
,
2382 then_label
, else_label
);
2383 bsi
= bsi_start (bodybb
);
2384 bsi_insert_after (&bsi
, cond_stmt
, BSI_NEW_STMT
);
2385 e
= make_edge (bodybb
, olddest
, EDGE_FALSE_VALUE
);
2386 make_edge (bodybb
, latchbb
, EDGE_TRUE_VALUE
);
2387 make_edge (latchbb
, headerbb
, EDGE_FALLTHRU
);
2389 /* Update the loop structures. */
2390 newloop
= duplicate_loop (loops
, loop
, olddest
->loop_father
);
2391 newloop
->header
= headerbb
;
2392 newloop
->latch
= latchbb
;
2393 newloop
->single_exit
= e
;
2394 add_bb_to_loop (latchbb
, newloop
);
2395 add_bb_to_loop (bodybb
, newloop
);
2396 add_bb_to_loop (headerbb
, newloop
);
2397 set_immediate_dominator (CDI_DOMINATORS
, bodybb
, headerbb
);
2398 set_immediate_dominator (CDI_DOMINATORS
, headerbb
, preheaderbb
);
2399 set_immediate_dominator (CDI_DOMINATORS
, preheaderbb
,
2400 loop
->single_exit
->src
);
2401 set_immediate_dominator (CDI_DOMINATORS
, latchbb
, bodybb
);
2402 set_immediate_dominator (CDI_DOMINATORS
, olddest
, bodybb
);
2403 /* Create the new iv. */
2404 ivvar
= create_tmp_var (integer_type_node
, "perfectiv");
2405 add_referenced_tmp_var (ivvar
);
2406 standard_iv_increment_position (newloop
, &bsi
, &insert_after
);
2407 create_iv (VEC_index (tree
, lbounds
, 0),
2408 build_int_cst (integer_type_node
, VEC_index (int, steps
, 0)),
2409 ivvar
, newloop
, &bsi
, insert_after
, &ivvar
, &ivvarinced
);
2411 /* Create the new upper bound. This may be not just a variable, so we copy
2412 it to one just in case. */
2414 exit_condition
= get_loop_exit_condition (newloop
);
2415 uboundvar
= create_tmp_var (integer_type_node
, "uboundvar");
2416 add_referenced_tmp_var (uboundvar
);
2417 stmt
= build (MODIFY_EXPR
, void_type_node
, uboundvar
,
2418 VEC_index (tree
, ubounds
, 0));
2419 uboundvar
= make_ssa_name (uboundvar
, stmt
);
2420 TREE_OPERAND (stmt
, 0) = uboundvar
;
2423 bsi_insert_after (&bsi
, stmt
, BSI_SAME_STMT
);
2425 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
2427 COND_EXPR_COND (exit_condition
) = build (GE_EXPR
,
2432 bbs
= get_loop_body (loop
);
2433 /* Now replace the induction variable in the moved statements with the
2434 correct loop induction variable. */
2435 oldivvar
= VEC_index (tree
, loopivs
, 0);
2436 for (i
= 0; i
< loop
->num_nodes
; i
++)
2438 block_stmt_iterator tobsi
= bsi_last (bodybb
);
2439 if (bbs
[i
]->loop_father
== loop
)
2441 /* Note that the bsi only needs to be explicitly incremented
2442 when we don't move something, since it is automatically
2443 incremented when we do. */
2444 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
);)
2447 tree n
, stmt
= bsi_stmt (bsi
);
2449 if (stmt
== exit_condition
2450 || not_interesting_stmt (stmt
)
2451 || stmt_is_bumper_for_loop (loop
, stmt
))
2457 replace_uses_of_x_with_y (stmt
, oldivvar
, ivvar
);
2458 bsi_move_before (&bsi
, &tobsi
);
2460 /* If the statement has any virtual operands, they may
2461 need to be rewired because the original loop may
2462 still reference them. */
2463 FOR_EACH_SSA_TREE_OPERAND (n
, stmt
, i
, SSA_OP_ALL_VIRTUALS
)
2464 mark_sym_for_renaming (SSA_NAME_VAR (n
));
2470 return perfect_nest_p (loop
);
2473 /* Return true if TRANS is a legal transformation matrix that respects
2474 the dependence vectors in DISTS and DIRS. The conservative answer
2477 "Wolfe proves that a unimodular transformation represented by the
2478 matrix T is legal when applied to a loop nest with a set of
2479 lexicographically non-negative distance vectors RDG if and only if
2480 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2481 i.e.: if and only if it transforms the lexicographically positive
2482 distance vectors to lexicographically positive vectors. Note that
2483 a unimodular matrix must transform the zero vector (and only it) to
2484 the zero vector." S.Muchnick. */
2487 lambda_transform_legal_p (lambda_trans_matrix trans
,
2489 varray_type dependence_relations
)
2492 lambda_vector distres
;
2493 struct data_dependence_relation
*ddr
;
2495 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
2496 && LTM_ROWSIZE (trans
) == nb_loops
);
2498 /* When there is an unknown relation in the dependence_relations, we
2499 know that it is no worth looking at this loop nest: give up. */
2500 ddr
= (struct data_dependence_relation
*)
2501 VARRAY_GENERIC_PTR (dependence_relations
, 0);
2504 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
2507 distres
= lambda_vector_new (nb_loops
);
2509 /* For each distance vector in the dependence graph. */
2510 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (dependence_relations
); i
++)
2512 ddr
= (struct data_dependence_relation
*)
2513 VARRAY_GENERIC_PTR (dependence_relations
, i
);
2515 /* Don't care about relations for which we know that there is no
2516 dependence, nor about read-read (aka. output-dependences):
2517 these data accesses can happen in any order. */
2518 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
2519 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
2522 /* Conservatively answer: "this transformation is not valid". */
2523 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
2526 /* If the dependence could not be captured by a distance vector,
2527 conservatively answer that the transform is not valid. */
2528 if (DDR_DIST_VECT (ddr
) == NULL
)
2531 /* Compute trans.dist_vect */
2532 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
2533 DDR_DIST_VECT (ddr
), distres
);
2535 if (!lambda_vector_lexico_pos (distres
, nb_loops
))