1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
46 /* This loop nest code generation is based on non-singular matrix
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
118 /* Lattice stuff that is internal to the code generation algorithm. */
122 /* Lattice base matrix. */
124 /* Lattice dimension. */
126 /* Origin vector for the coefficients. */
127 lambda_vector origin
;
128 /* Origin matrix for the invariants. */
129 lambda_matrix origin_invariants
;
130 /* Number of invariants. */
134 #define LATTICE_BASE(T) ((T)->base)
135 #define LATTICE_DIMENSION(T) ((T)->dimension)
136 #define LATTICE_ORIGIN(T) ((T)->origin)
137 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
138 #define LATTICE_INVARIANTS(T) ((T)->invariants)
140 static bool lle_equal (lambda_linear_expression
, lambda_linear_expression
,
142 static lambda_lattice
lambda_lattice_new (int, int);
143 static lambda_lattice
lambda_lattice_compute_base (lambda_loopnest
);
145 static tree
find_induction_var_from_exit_cond (struct loop
*);
147 /* Create a new lambda body vector. */
150 lambda_body_vector_new (int size
)
152 lambda_body_vector ret
;
154 ret
= ggc_alloc (sizeof (*ret
));
155 LBV_COEFFICIENTS (ret
) = lambda_vector_new (size
);
156 LBV_SIZE (ret
) = size
;
157 LBV_DENOMINATOR (ret
) = 1;
161 /* Compute the new coefficients for the vector based on the
162 *inverse* of the transformation matrix. */
165 lambda_body_vector_compute_new (lambda_trans_matrix transform
,
166 lambda_body_vector vect
)
168 lambda_body_vector temp
;
171 /* Make sure the matrix is square. */
172 gcc_assert (LTM_ROWSIZE (transform
) == LTM_COLSIZE (transform
));
174 depth
= LTM_ROWSIZE (transform
);
176 temp
= lambda_body_vector_new (depth
);
177 LBV_DENOMINATOR (temp
) =
178 LBV_DENOMINATOR (vect
) * LTM_DENOMINATOR (transform
);
179 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect
), depth
,
180 LTM_MATRIX (transform
), depth
,
181 LBV_COEFFICIENTS (temp
));
182 LBV_SIZE (temp
) = LBV_SIZE (vect
);
186 /* Print out a lambda body vector. */
189 print_lambda_body_vector (FILE * outfile
, lambda_body_vector body
)
191 print_lambda_vector (outfile
, LBV_COEFFICIENTS (body
), LBV_SIZE (body
));
194 /* Return TRUE if two linear expressions are equal. */
197 lle_equal (lambda_linear_expression lle1
, lambda_linear_expression lle2
,
198 int depth
, int invariants
)
202 if (lle1
== NULL
|| lle2
== NULL
)
204 if (LLE_CONSTANT (lle1
) != LLE_CONSTANT (lle2
))
206 if (LLE_DENOMINATOR (lle1
) != LLE_DENOMINATOR (lle2
))
208 for (i
= 0; i
< depth
; i
++)
209 if (LLE_COEFFICIENTS (lle1
)[i
] != LLE_COEFFICIENTS (lle2
)[i
])
211 for (i
= 0; i
< invariants
; i
++)
212 if (LLE_INVARIANT_COEFFICIENTS (lle1
)[i
] !=
213 LLE_INVARIANT_COEFFICIENTS (lle2
)[i
])
218 /* Create a new linear expression with dimension DIM, and total number
219 of invariants INVARIANTS. */
221 lambda_linear_expression
222 lambda_linear_expression_new (int dim
, int invariants
)
224 lambda_linear_expression ret
;
226 ret
= ggc_alloc_cleared (sizeof (*ret
));
228 LLE_COEFFICIENTS (ret
) = lambda_vector_new (dim
);
229 LLE_CONSTANT (ret
) = 0;
230 LLE_INVARIANT_COEFFICIENTS (ret
) = lambda_vector_new (invariants
);
231 LLE_DENOMINATOR (ret
) = 1;
232 LLE_NEXT (ret
) = NULL
;
237 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
238 The starting letter used for variable names is START. */
241 print_linear_expression (FILE * outfile
, lambda_vector expr
, int size
,
246 for (i
= 0; i
< size
; i
++)
253 fprintf (outfile
, "-");
256 else if (expr
[i
] > 0)
257 fprintf (outfile
, " + ");
259 fprintf (outfile
, " - ");
260 if (abs (expr
[i
]) == 1)
261 fprintf (outfile
, "%c", start
+ i
);
263 fprintf (outfile
, "%d%c", abs (expr
[i
]), start
+ i
);
268 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
269 depth/number of coefficients is given by DEPTH, the number of invariants is
270 given by INVARIANTS, and the character to start variable names with is given
274 print_lambda_linear_expression (FILE * outfile
,
275 lambda_linear_expression expr
,
276 int depth
, int invariants
, char start
)
278 fprintf (outfile
, "\tLinear expression: ");
279 print_linear_expression (outfile
, LLE_COEFFICIENTS (expr
), depth
, start
);
280 fprintf (outfile
, " constant: %d ", LLE_CONSTANT (expr
));
281 fprintf (outfile
, " invariants: ");
282 print_linear_expression (outfile
, LLE_INVARIANT_COEFFICIENTS (expr
),
284 fprintf (outfile
, " denominator: %d\n", LLE_DENOMINATOR (expr
));
287 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
288 coefficients is given by DEPTH, the number of invariants is
289 given by INVARIANTS, and the character to start variable names with is given
293 print_lambda_loop (FILE * outfile
, lambda_loop loop
, int depth
,
294 int invariants
, char start
)
297 lambda_linear_expression expr
;
301 expr
= LL_LINEAR_OFFSET (loop
);
302 step
= LL_STEP (loop
);
303 fprintf (outfile
, " step size = %d \n", step
);
307 fprintf (outfile
, " linear offset: \n");
308 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
,
312 fprintf (outfile
, " lower bound: \n");
313 for (expr
= LL_LOWER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
314 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
315 fprintf (outfile
, " upper bound: \n");
316 for (expr
= LL_UPPER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
317 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
320 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
321 number of invariants. */
324 lambda_loopnest_new (int depth
, int invariants
)
327 ret
= ggc_alloc (sizeof (*ret
));
329 LN_LOOPS (ret
) = ggc_alloc_cleared (depth
* sizeof (lambda_loop
));
330 LN_DEPTH (ret
) = depth
;
331 LN_INVARIANTS (ret
) = invariants
;
336 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
337 character to use for loop names is given by START. */
340 print_lambda_loopnest (FILE * outfile
, lambda_loopnest nest
, char start
)
343 for (i
= 0; i
< LN_DEPTH (nest
); i
++)
345 fprintf (outfile
, "Loop %c\n", start
+ i
);
346 print_lambda_loop (outfile
, LN_LOOPS (nest
)[i
], LN_DEPTH (nest
),
347 LN_INVARIANTS (nest
), 'i');
348 fprintf (outfile
, "\n");
352 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
355 static lambda_lattice
356 lambda_lattice_new (int depth
, int invariants
)
359 ret
= ggc_alloc (sizeof (*ret
));
360 LATTICE_BASE (ret
) = lambda_matrix_new (depth
, depth
);
361 LATTICE_ORIGIN (ret
) = lambda_vector_new (depth
);
362 LATTICE_ORIGIN_INVARIANTS (ret
) = lambda_matrix_new (depth
, invariants
);
363 LATTICE_DIMENSION (ret
) = depth
;
364 LATTICE_INVARIANTS (ret
) = invariants
;
368 /* Compute the lattice base for NEST. The lattice base is essentially a
369 non-singular transform from a dense base space to a sparse iteration space.
370 We use it so that we don't have to specially handle the case of a sparse
371 iteration space in other parts of the algorithm. As a result, this routine
372 only does something interesting (IE produce a matrix that isn't the
373 identity matrix) if NEST is a sparse space. */
375 static lambda_lattice
376 lambda_lattice_compute_base (lambda_loopnest nest
)
379 int depth
, invariants
;
384 lambda_linear_expression expression
;
386 depth
= LN_DEPTH (nest
);
387 invariants
= LN_INVARIANTS (nest
);
389 ret
= lambda_lattice_new (depth
, invariants
);
390 base
= LATTICE_BASE (ret
);
391 for (i
= 0; i
< depth
; i
++)
393 loop
= LN_LOOPS (nest
)[i
];
395 step
= LL_STEP (loop
);
396 /* If we have a step of 1, then the base is one, and the
397 origin and invariant coefficients are 0. */
400 for (j
= 0; j
< depth
; j
++)
403 LATTICE_ORIGIN (ret
)[i
] = 0;
404 for (j
= 0; j
< invariants
; j
++)
405 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] = 0;
409 /* Otherwise, we need the lower bound expression (which must
410 be an affine function) to determine the base. */
411 expression
= LL_LOWER_BOUND (loop
);
412 gcc_assert (expression
&& LLE_NEXT (expression
)
413 && LLE_DENOMINATOR (expression
) == 1);
415 /* The lower triangular portion of the base is going to be the
416 coefficient times the step */
417 for (j
= 0; j
< i
; j
++)
418 base
[i
][j
] = LLE_COEFFICIENTS (expression
)[j
]
419 * LL_STEP (LN_LOOPS (nest
)[j
]);
421 for (j
= i
+ 1; j
< depth
; j
++)
424 /* Origin for this loop is the constant of the lower bound
426 LATTICE_ORIGIN (ret
)[i
] = LLE_CONSTANT (expression
);
428 /* Coefficient for the invariants are equal to the invariant
429 coefficients in the expression. */
430 for (j
= 0; j
< invariants
; j
++)
431 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] =
432 LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
438 /* Compute the greatest common denominator of two numbers (A and B) using
439 Euclid's algorithm. */
460 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
463 gcd_vector (lambda_vector vector
, int size
)
471 for (i
= 1; i
< size
; i
++)
472 gcd1
= gcd (gcd1
, vector
[i
]);
477 /* Compute the least common multiple of two numbers A and B . */
482 return (abs (a
) * abs (b
) / gcd (a
, b
));
485 /* Compute the loop bounds for the auxiliary space NEST.
486 Input system used is Ax <= b. TRANS is the unimodular transformation. */
488 static lambda_loopnest
489 lambda_compute_auxillary_space (lambda_loopnest nest
,
490 lambda_trans_matrix trans
)
492 lambda_matrix A
, B
, A1
, B1
, temp0
;
493 lambda_vector a
, a1
, temp1
;
494 lambda_matrix invertedtrans
;
495 int determinant
, depth
, invariants
, size
, newsize
;
497 lambda_loopnest auxillary_nest
;
499 lambda_linear_expression expression
;
500 lambda_lattice lattice
;
502 int multiple
, f1
, f2
;
504 depth
= LN_DEPTH (nest
);
505 invariants
= LN_INVARIANTS (nest
);
507 /* Unfortunately, we can't know the number of constraints we'll have
508 ahead of time, but this should be enough even in ridiculous loop nest
509 cases. We abort if we go over this limit. */
510 A
= lambda_matrix_new (128, depth
);
511 B
= lambda_matrix_new (128, invariants
);
512 a
= lambda_vector_new (128);
514 A1
= lambda_matrix_new (128, depth
);
515 B1
= lambda_matrix_new (128, invariants
);
516 a1
= lambda_vector_new (128);
518 /* Store the bounds in the equation matrix A, constant vector a, and
519 invariant matrix B, so that we have Ax <= a + B.
520 This requires a little equation rearranging so that everything is on the
521 correct side of the inequality. */
523 for (i
= 0; i
< depth
; i
++)
525 loop
= LN_LOOPS (nest
)[i
];
527 /* First we do the lower bound. */
528 if (LL_STEP (loop
) > 0)
529 expression
= LL_LOWER_BOUND (loop
);
531 expression
= LL_UPPER_BOUND (loop
);
533 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
535 /* Fill in the coefficient. */
536 for (j
= 0; j
< i
; j
++)
537 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
539 /* And the invariant coefficient. */
540 for (j
= 0; j
< invariants
; j
++)
541 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
543 /* And the constant. */
544 a
[size
] = LLE_CONSTANT (expression
);
546 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
547 constants and single variables on */
548 A
[size
][i
] = -1 * LLE_DENOMINATOR (expression
);
550 for (j
= 0; j
< invariants
; j
++)
554 /* Need to increase matrix sizes above. */
555 gcc_assert (size
<= 127);
559 /* Then do the exact same thing for the upper bounds. */
560 if (LL_STEP (loop
) > 0)
561 expression
= LL_UPPER_BOUND (loop
);
563 expression
= LL_LOWER_BOUND (loop
);
565 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
567 /* Fill in the coefficient. */
568 for (j
= 0; j
< i
; j
++)
569 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
571 /* And the invariant coefficient. */
572 for (j
= 0; j
< invariants
; j
++)
573 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
575 /* And the constant. */
576 a
[size
] = LLE_CONSTANT (expression
);
578 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
579 for (j
= 0; j
< i
; j
++)
581 A
[size
][i
] = LLE_DENOMINATOR (expression
);
583 /* Need to increase matrix sizes above. */
584 gcc_assert (size
<= 127);
589 /* Compute the lattice base x = base * y + origin, where y is the
591 lattice
= lambda_lattice_compute_base (nest
);
593 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
596 lambda_matrix_mult (A
, LATTICE_BASE (lattice
), A1
, size
, depth
, depth
);
598 /* a1 = a - A * origin constant. */
599 lambda_matrix_vector_mult (A
, size
, depth
, LATTICE_ORIGIN (lattice
), a1
);
600 lambda_vector_add_mc (a
, 1, a1
, -1, a1
, size
);
602 /* B1 = B - A * origin invariant. */
603 lambda_matrix_mult (A
, LATTICE_ORIGIN_INVARIANTS (lattice
), B1
, size
, depth
,
605 lambda_matrix_add_mc (B
, 1, B1
, -1, B1
, size
, invariants
);
607 /* Now compute the auxiliary space bounds by first inverting U, multiplying
608 it by A1, then performing fourier motzkin. */
610 invertedtrans
= lambda_matrix_new (depth
, depth
);
612 /* Compute the inverse of U. */
613 determinant
= lambda_matrix_inverse (LTM_MATRIX (trans
),
614 invertedtrans
, depth
);
617 lambda_matrix_mult (A1
, invertedtrans
, A
, size
, depth
, depth
);
619 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
621 Fourier-Motzkin is a way of reducing systems of linear inequality so that
622 it is easy to calculate the answer and bounds.
623 A sketch of how it works:
624 Given a system of linear inequalities, ai * xj >= bk, you can always
625 rewrite the constraints so they are all of the form
626 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
627 in b1 ... bk, and some a in a1...ai)
628 You can then eliminate this x from the non-constant inequalities by
629 rewriting these as a <= b, x >= constant, and delete the x variable.
630 You can then repeat this for any remaining x variables, and then we have
631 an easy to use variable <= constant (or no variables at all) form that we
632 can construct our bounds from.
634 In our case, each time we eliminate, we construct part of the bound from
635 the ith variable, then delete the ith variable.
637 Remember the constant are in our vector a, our coefficient matrix is A,
638 and our invariant coefficient matrix is B */
640 /* Swap B and B1, and a1 and a */
649 auxillary_nest
= lambda_loopnest_new (depth
, invariants
);
651 for (i
= depth
- 1; i
>= 0; i
--)
653 loop
= lambda_loop_new ();
654 LN_LOOPS (auxillary_nest
)[i
] = loop
;
657 for (j
= 0; j
< size
; j
++)
662 expression
= lambda_linear_expression_new (depth
, invariants
);
664 for (k
= 0; k
< i
; k
++)
665 LLE_COEFFICIENTS (expression
)[k
] = A
[j
][k
];
666 for (k
= 0; k
< invariants
; k
++)
667 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = -1 * B
[j
][k
];
668 LLE_DENOMINATOR (expression
) = -1 * A
[j
][i
];
669 LLE_CONSTANT (expression
) = -1 * a
[j
];
670 /* Ignore if identical to the existing lower bound. */
671 if (!lle_equal (LL_LOWER_BOUND (loop
),
672 expression
, depth
, invariants
))
674 LLE_NEXT (expression
) = LL_LOWER_BOUND (loop
);
675 LL_LOWER_BOUND (loop
) = expression
;
679 else if (A
[j
][i
] > 0)
682 expression
= lambda_linear_expression_new (depth
, invariants
);
683 for (k
= 0; k
< i
; k
++)
684 LLE_COEFFICIENTS (expression
)[k
] = -1 * A
[j
][k
];
685 LLE_CONSTANT (expression
) = a
[j
];
687 for (k
= 0; k
< invariants
; k
++)
688 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = B
[j
][k
];
690 LLE_DENOMINATOR (expression
) = A
[j
][i
];
691 /* Ignore if identical to the existing upper bound. */
692 if (!lle_equal (LL_UPPER_BOUND (loop
),
693 expression
, depth
, invariants
))
695 LLE_NEXT (expression
) = LL_UPPER_BOUND (loop
);
696 LL_UPPER_BOUND (loop
) = expression
;
701 /* creates a new system by deleting the i'th variable. */
703 for (j
= 0; j
< size
; j
++)
707 lambda_vector_copy (A
[j
], A1
[newsize
], depth
);
708 lambda_vector_copy (B
[j
], B1
[newsize
], invariants
);
712 else if (A
[j
][i
] > 0)
714 for (k
= 0; k
< size
; k
++)
718 multiple
= lcm (A
[j
][i
], A
[k
][i
]);
719 f1
= multiple
/ A
[j
][i
];
720 f2
= -1 * multiple
/ A
[k
][i
];
722 lambda_vector_add_mc (A
[j
], f1
, A
[k
], f2
,
724 lambda_vector_add_mc (B
[j
], f1
, B
[k
], f2
,
725 B1
[newsize
], invariants
);
726 a1
[newsize
] = f1
* a
[j
] + f2
* a
[k
];
748 return auxillary_nest
;
751 /* Compute the loop bounds for the target space, using the bounds of
752 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
753 done by matrix multiplication and then transformation of the new matrix
754 back into linear expression form.
755 Return the target loopnest. */
757 static lambda_loopnest
758 lambda_compute_target_space (lambda_loopnest auxillary_nest
,
759 lambda_trans_matrix H
, lambda_vector stepsigns
)
761 lambda_matrix inverse
, H1
;
762 int determinant
, i
, j
;
766 lambda_loopnest target_nest
;
767 int depth
, invariants
;
768 lambda_matrix target
;
770 lambda_loop auxillary_loop
, target_loop
;
771 lambda_linear_expression expression
, auxillary_expr
, target_expr
, tmp_expr
;
773 depth
= LN_DEPTH (auxillary_nest
);
774 invariants
= LN_INVARIANTS (auxillary_nest
);
776 inverse
= lambda_matrix_new (depth
, depth
);
777 determinant
= lambda_matrix_inverse (LTM_MATRIX (H
), inverse
, depth
);
779 /* H1 is H excluding its diagonal. */
780 H1
= lambda_matrix_new (depth
, depth
);
781 lambda_matrix_copy (LTM_MATRIX (H
), H1
, depth
, depth
);
783 for (i
= 0; i
< depth
; i
++)
786 /* Computes the linear offsets of the loop bounds. */
787 target
= lambda_matrix_new (depth
, depth
);
788 lambda_matrix_mult (H1
, inverse
, target
, depth
, depth
, depth
);
790 target_nest
= lambda_loopnest_new (depth
, invariants
);
792 for (i
= 0; i
< depth
; i
++)
795 /* Get a new loop structure. */
796 target_loop
= lambda_loop_new ();
797 LN_LOOPS (target_nest
)[i
] = target_loop
;
799 /* Computes the gcd of the coefficients of the linear part. */
800 gcd1
= gcd_vector (target
[i
], i
);
802 /* Include the denominator in the GCD */
803 gcd1
= gcd (gcd1
, determinant
);
805 /* Now divide through by the gcd */
806 for (j
= 0; j
< i
; j
++)
807 target
[i
][j
] = target
[i
][j
] / gcd1
;
809 expression
= lambda_linear_expression_new (depth
, invariants
);
810 lambda_vector_copy (target
[i
], LLE_COEFFICIENTS (expression
), depth
);
811 LLE_DENOMINATOR (expression
) = determinant
/ gcd1
;
812 LLE_CONSTANT (expression
) = 0;
813 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression
),
815 LL_LINEAR_OFFSET (target_loop
) = expression
;
818 /* For each loop, compute the new bounds from H */
819 for (i
= 0; i
< depth
; i
++)
821 auxillary_loop
= LN_LOOPS (auxillary_nest
)[i
];
822 target_loop
= LN_LOOPS (target_nest
)[i
];
823 LL_STEP (target_loop
) = LTM_MATRIX (H
)[i
][i
];
824 factor
= LTM_MATRIX (H
)[i
][i
];
826 /* First we do the lower bound. */
827 auxillary_expr
= LL_LOWER_BOUND (auxillary_loop
);
829 for (; auxillary_expr
!= NULL
;
830 auxillary_expr
= LLE_NEXT (auxillary_expr
))
832 target_expr
= lambda_linear_expression_new (depth
, invariants
);
833 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
834 depth
, inverse
, depth
,
835 LLE_COEFFICIENTS (target_expr
));
836 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
837 LLE_COEFFICIENTS (target_expr
), depth
,
840 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
841 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
842 LLE_INVARIANT_COEFFICIENTS (target_expr
),
844 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
845 LLE_INVARIANT_COEFFICIENTS (target_expr
),
847 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
849 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
851 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
853 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
855 LLE_INVARIANT_COEFFICIENTS
856 (target_expr
), invariants
,
858 LLE_DENOMINATOR (target_expr
) =
859 LLE_DENOMINATOR (target_expr
) * determinant
;
861 /* Find the gcd and divide by it here, rather than doing it
862 at the tree level. */
863 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
864 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
866 gcd1
= gcd (gcd1
, gcd2
);
867 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
868 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
869 for (j
= 0; j
< depth
; j
++)
870 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
871 for (j
= 0; j
< invariants
; j
++)
872 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
873 LLE_CONSTANT (target_expr
) /= gcd1
;
874 LLE_DENOMINATOR (target_expr
) /= gcd1
;
875 /* Ignore if identical to existing bound. */
876 if (!lle_equal (LL_LOWER_BOUND (target_loop
), target_expr
, depth
,
879 LLE_NEXT (target_expr
) = LL_LOWER_BOUND (target_loop
);
880 LL_LOWER_BOUND (target_loop
) = target_expr
;
883 /* Now do the upper bound. */
884 auxillary_expr
= LL_UPPER_BOUND (auxillary_loop
);
886 for (; auxillary_expr
!= NULL
;
887 auxillary_expr
= LLE_NEXT (auxillary_expr
))
889 target_expr
= lambda_linear_expression_new (depth
, invariants
);
890 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
891 depth
, inverse
, depth
,
892 LLE_COEFFICIENTS (target_expr
));
893 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
894 LLE_COEFFICIENTS (target_expr
), depth
,
896 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
897 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
898 LLE_INVARIANT_COEFFICIENTS (target_expr
),
900 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
901 LLE_INVARIANT_COEFFICIENTS (target_expr
),
903 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
905 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
907 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
909 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
911 LLE_INVARIANT_COEFFICIENTS
912 (target_expr
), invariants
,
914 LLE_DENOMINATOR (target_expr
) =
915 LLE_DENOMINATOR (target_expr
) * determinant
;
917 /* Find the gcd and divide by it here, instead of at the
919 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
920 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
922 gcd1
= gcd (gcd1
, gcd2
);
923 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
924 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
925 for (j
= 0; j
< depth
; j
++)
926 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
927 for (j
= 0; j
< invariants
; j
++)
928 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
929 LLE_CONSTANT (target_expr
) /= gcd1
;
930 LLE_DENOMINATOR (target_expr
) /= gcd1
;
931 /* Ignore if equal to existing bound. */
932 if (!lle_equal (LL_UPPER_BOUND (target_loop
), target_expr
, depth
,
935 LLE_NEXT (target_expr
) = LL_UPPER_BOUND (target_loop
);
936 LL_UPPER_BOUND (target_loop
) = target_expr
;
940 for (i
= 0; i
< depth
; i
++)
942 target_loop
= LN_LOOPS (target_nest
)[i
];
943 /* If necessary, exchange the upper and lower bounds and negate
945 if (stepsigns
[i
] < 0)
947 LL_STEP (target_loop
) *= -1;
948 tmp_expr
= LL_LOWER_BOUND (target_loop
);
949 LL_LOWER_BOUND (target_loop
) = LL_UPPER_BOUND (target_loop
);
950 LL_UPPER_BOUND (target_loop
) = tmp_expr
;
956 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
960 lambda_compute_step_signs (lambda_trans_matrix trans
, lambda_vector stepsigns
)
962 lambda_matrix matrix
, H
;
964 lambda_vector newsteps
;
965 int i
, j
, factor
, minimum_column
;
968 matrix
= LTM_MATRIX (trans
);
969 size
= LTM_ROWSIZE (trans
);
970 H
= lambda_matrix_new (size
, size
);
972 newsteps
= lambda_vector_new (size
);
973 lambda_vector_copy (stepsigns
, newsteps
, size
);
975 lambda_matrix_copy (matrix
, H
, size
, size
);
977 for (j
= 0; j
< size
; j
++)
981 for (i
= j
; i
< size
; i
++)
983 lambda_matrix_col_negate (H
, size
, i
);
984 while (lambda_vector_first_nz (row
, size
, j
+ 1) < size
)
986 minimum_column
= lambda_vector_min_nz (row
, size
, j
);
987 lambda_matrix_col_exchange (H
, size
, j
, minimum_column
);
990 newsteps
[j
] = newsteps
[minimum_column
];
991 newsteps
[minimum_column
] = temp
;
993 for (i
= j
+ 1; i
< size
; i
++)
995 factor
= row
[i
] / row
[j
];
996 lambda_matrix_col_add (H
, size
, j
, i
, -1 * factor
);
1003 /* Transform NEST according to TRANS, and return the new loopnest.
1005 1. Computing a lattice base for the transformation
1006 2. Composing the dense base with the specified transformation (TRANS)
1007 3. Decomposing the combined transformation into a lower triangular portion,
1008 and a unimodular portion.
1009 4. Computing the auxillary nest using the unimodular portion.
1010 5. Computing the target nest using the auxillary nest and the lower
1011 triangular portion. */
1014 lambda_loopnest_transform (lambda_loopnest nest
, lambda_trans_matrix trans
)
1016 lambda_loopnest auxillary_nest
, target_nest
;
1018 int depth
, invariants
;
1020 lambda_lattice lattice
;
1021 lambda_trans_matrix trans1
, H
, U
;
1023 lambda_linear_expression expression
;
1024 lambda_vector origin
;
1025 lambda_matrix origin_invariants
;
1026 lambda_vector stepsigns
;
1029 depth
= LN_DEPTH (nest
);
1030 invariants
= LN_INVARIANTS (nest
);
1032 /* Keep track of the signs of the loop steps. */
1033 stepsigns
= lambda_vector_new (depth
);
1034 for (i
= 0; i
< depth
; i
++)
1036 if (LL_STEP (LN_LOOPS (nest
)[i
]) > 0)
1042 /* Compute the lattice base. */
1043 lattice
= lambda_lattice_compute_base (nest
);
1044 trans1
= lambda_trans_matrix_new (depth
, depth
);
1046 /* Multiply the transformation matrix by the lattice base. */
1048 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_BASE (lattice
),
1049 LTM_MATRIX (trans1
), depth
, depth
, depth
);
1051 /* Compute the Hermite normal form for the new transformation matrix. */
1052 H
= lambda_trans_matrix_new (depth
, depth
);
1053 U
= lambda_trans_matrix_new (depth
, depth
);
1054 lambda_matrix_hermite (LTM_MATRIX (trans1
), depth
, LTM_MATRIX (H
),
1057 /* Compute the auxiliary loop nest's space from the unimodular
1059 auxillary_nest
= lambda_compute_auxillary_space (nest
, U
);
1061 /* Compute the loop step signs from the old step signs and the
1062 transformation matrix. */
1063 stepsigns
= lambda_compute_step_signs (trans1
, stepsigns
);
1065 /* Compute the target loop nest space from the auxiliary nest and
1066 the lower triangular matrix H. */
1067 target_nest
= lambda_compute_target_space (auxillary_nest
, H
, stepsigns
);
1068 origin
= lambda_vector_new (depth
);
1069 origin_invariants
= lambda_matrix_new (depth
, invariants
);
1070 lambda_matrix_vector_mult (LTM_MATRIX (trans
), depth
, depth
,
1071 LATTICE_ORIGIN (lattice
), origin
);
1072 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_ORIGIN_INVARIANTS (lattice
),
1073 origin_invariants
, depth
, depth
, invariants
);
1075 for (i
= 0; i
< depth
; i
++)
1077 loop
= LN_LOOPS (target_nest
)[i
];
1078 expression
= LL_LINEAR_OFFSET (loop
);
1079 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression
), depth
))
1082 f
= LLE_DENOMINATOR (expression
);
1084 LLE_CONSTANT (expression
) += f
* origin
[i
];
1086 for (j
= 0; j
< invariants
; j
++)
1087 LLE_INVARIANT_COEFFICIENTS (expression
)[j
] +=
1088 f
* origin_invariants
[i
][j
];
1095 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1096 return the new expression. DEPTH is the depth of the loopnest.
1097 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1098 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1099 is the amount we have to add/subtract from the expression because of the
1100 type of comparison it is used in. */
1102 static lambda_linear_expression
1103 gcc_tree_to_linear_expression (int depth
, tree expr
,
1104 VEC(tree
) *outerinductionvars
,
1105 VEC(tree
) *invariants
, int extra
)
1107 lambda_linear_expression lle
= NULL
;
1108 switch (TREE_CODE (expr
))
1112 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1113 LLE_CONSTANT (lle
) = TREE_INT_CST_LOW (expr
);
1115 LLE_CONSTANT (lle
) = extra
;
1117 LLE_DENOMINATOR (lle
) = 1;
1124 for (i
= 0; VEC_iterate (tree
, outerinductionvars
, i
, iv
); i
++)
1127 if (SSA_NAME_VAR (iv
) == SSA_NAME_VAR (expr
))
1129 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1130 LLE_COEFFICIENTS (lle
)[i
] = 1;
1132 LLE_CONSTANT (lle
) = extra
;
1134 LLE_DENOMINATOR (lle
) = 1;
1137 for (i
= 0; VEC_iterate (tree
, invariants
, i
, invar
); i
++)
1140 if (SSA_NAME_VAR (invar
) == SSA_NAME_VAR (expr
))
1142 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1143 LLE_INVARIANT_COEFFICIENTS (lle
)[i
] = 1;
1145 LLE_CONSTANT (lle
) = extra
;
1146 LLE_DENOMINATOR (lle
) = 1;
1158 /* Return true if OP is invariant in LOOP and all outer loops. */
1161 invariant_in_loop (struct loop
*loop
, tree op
)
1163 if (loop
->depth
== 0)
1165 if (TREE_CODE (op
) == SSA_NAME
)
1167 if (TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
1168 && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op
)))
1170 if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op
)))
1173 if (!invariant_in_loop (loop
->outer
, op
))
1175 return !flow_bb_inside_loop_p (loop
,
1176 bb_for_stmt (SSA_NAME_DEF_STMT (op
)));
1181 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1182 or NULL if it could not be converted.
1183 DEPTH is the depth of the loop.
1184 INVARIANTS is a pointer to the array of loop invariants.
1185 The induction variable for this loop should be stored in the parameter
1187 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1190 gcc_loop_to_lambda_loop (struct loop
*loop
, int depth
,
1191 VEC (tree
) ** invariants
,
1192 tree
* ourinductionvar
,
1193 VEC (tree
) * outerinductionvars
)
1197 tree access_fn
, inductionvar
;
1199 lambda_loop lloop
= NULL
;
1200 lambda_linear_expression lbound
, ubound
;
1207 /* Find out induction var and set the pointer so that the caller can
1208 append it to the outerinductionvars array later. */
1210 inductionvar
= find_induction_var_from_exit_cond (loop
);
1211 *ourinductionvar
= inductionvar
;
1213 exit_cond
= get_loop_exit_condition (loop
);
1215 if (inductionvar
== NULL
|| exit_cond
== NULL
)
1217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1219 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1223 test
= TREE_OPERAND (exit_cond
, 0);
1225 if (SSA_NAME_DEF_STMT (inductionvar
) == NULL_TREE
)
1228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1230 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1235 phi
= SSA_NAME_DEF_STMT (inductionvar
);
1236 if (TREE_CODE (phi
) != PHI_NODE
)
1238 get_stmt_operands (phi
);
1239 uses
= STMT_USE_OPS (phi
);
1244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1246 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1251 phi
= USE_OP (uses
, 0);
1252 phi
= SSA_NAME_DEF_STMT (phi
);
1253 if (TREE_CODE (phi
) != PHI_NODE
)
1256 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1258 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1264 access_fn
= instantiate_parameters
1265 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1270 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1275 step
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1276 if (!step
|| step
== chrec_dont_know
)
1278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1280 "Unable to convert loop: Cannot determine step of loop.\n");
1284 if (TREE_CODE (step
) != INTEGER_CST
)
1287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1289 "Unable to convert loop: Step of loop is not integer.\n");
1293 stepint
= TREE_INT_CST_LOW (step
);
1295 /* Only want phis for induction vars, which will have two
1297 if (PHI_NUM_ARGS (phi
) != 2)
1299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1301 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1305 /* Another induction variable check. One argument's source should be
1306 in the loop, one outside the loop. */
1307 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
)
1308 && flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 1)->src
))
1311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1313 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1318 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
))
1320 lbound
= gcc_tree_to_linear_expression (depth
, PHI_ARG_DEF (phi
, 1),
1321 outerinductionvars
, *invariants
,
1324 lbound
= gcc_tree_to_linear_expression (depth
, PHI_ARG_DEF (phi
, 0),
1325 outerinductionvars
, *invariants
,
1330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1332 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1336 /* One part of the test may be a loop invariant tree. */
1337 if (TREE_CODE (TREE_OPERAND (test
, 1)) == SSA_NAME
1338 && invariant_in_loop (loop
, TREE_OPERAND (test
, 1)))
1339 VEC_safe_push (tree
, *invariants
, TREE_OPERAND (test
, 1));
1340 else if (TREE_CODE (TREE_OPERAND (test
, 0)) == SSA_NAME
1341 && invariant_in_loop (loop
, TREE_OPERAND (test
, 0)))
1342 VEC_safe_push (tree
, *invariants
, TREE_OPERAND (test
, 0));
1344 /* The non-induction variable part of the test is the upper bound variable.
1346 if (TREE_OPERAND (test
, 0) == inductionvar
)
1347 uboundvar
= TREE_OPERAND (test
, 1);
1349 uboundvar
= TREE_OPERAND (test
, 0);
1352 /* We only size the vectors assuming we have, at max, 2 times as many
1353 invariants as we do loops (one for each bound).
1354 This is just an arbitrary number, but it has to be matched against the
1356 gcc_assert (VEC_length (tree
, *invariants
) <= (unsigned int) (2 * depth
));
1359 /* We might have some leftover. */
1360 if (TREE_CODE (test
) == LT_EXPR
)
1361 extra
= -1 * stepint
;
1362 else if (TREE_CODE (test
) == NE_EXPR
)
1363 extra
= -1 * stepint
;
1364 else if (TREE_CODE (test
) == GT_EXPR
)
1365 extra
= -1 * stepint
;
1367 ubound
= gcc_tree_to_linear_expression (depth
,
1370 *invariants
, extra
);
1374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1376 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1380 lloop
= lambda_loop_new ();
1381 LL_STEP (lloop
) = stepint
;
1382 LL_LOWER_BOUND (lloop
) = lbound
;
1383 LL_UPPER_BOUND (lloop
) = ubound
;
1387 /* Given a LOOP, find the induction variable it is testing against in the exit
1388 condition. Return the induction variable if found, NULL otherwise. */
1391 find_induction_var_from_exit_cond (struct loop
*loop
)
1393 tree expr
= get_loop_exit_condition (loop
);
1396 if (expr
== NULL_TREE
)
1398 if (TREE_CODE (expr
) != COND_EXPR
)
1400 test
= TREE_OPERAND (expr
, 0);
1401 if (TREE_CODE_CLASS (TREE_CODE (test
)) != '<')
1403 /* This is a guess. We say that for a <,!=,<= b, a is the induction
1405 For >, >=, we guess b is the induction variable.
1406 If we are wrong, it'll fail the rest of the induction variable tests, and
1407 everything will be fine anyway. */
1408 switch (TREE_CODE (test
))
1413 ivarop
= TREE_OPERAND (test
, 0);
1417 ivarop
= TREE_OPERAND (test
, 1);
1422 if (TREE_CODE (ivarop
) != SSA_NAME
)
1427 DEF_VEC_GC_P(lambda_loop
);
1428 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1429 Return the new loop nest.
1430 INDUCTIONVARS is a pointer to an array of induction variables for the
1431 loopnest that will be filled in during this process.
1432 INVARIANTS is a pointer to an array of invariants that will be filled in
1433 during this process. */
1436 gcc_loopnest_to_lambda_loopnest (struct loop
* loop_nest
,
1437 VEC (tree
) **inductionvars
,
1438 VEC (tree
) **invariants
)
1440 lambda_loopnest ret
;
1444 VEC (lambda_loop
) *loops
;
1445 lambda_loop newloop
;
1446 tree inductionvar
= NULL
;
1454 loops
= VEC_alloc (lambda_loop
, 1);
1455 *inductionvars
= VEC_alloc (tree
, 1);
1456 *invariants
= VEC_alloc (tree
, 1);
1460 newloop
= gcc_loop_to_lambda_loop (temp
, depth
, invariants
,
1461 &inductionvar
, *inductionvars
);
1464 VEC_safe_push (tree
, *inductionvars
, inductionvar
);
1465 VEC_safe_push (lambda_loop
, loops
, newloop
);
1469 ret
= lambda_loopnest_new (depth
, 2 * depth
);
1470 for (i
= 0; VEC_iterate (lambda_loop
, loops
, i
, newloop
); i
++)
1471 LN_LOOPS (ret
)[i
] = newloop
;
1477 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1478 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1479 inserted for us are stored. INDUCTION_VARS is the array of induction
1480 variables for the loop this LBV is from. */
1483 lbv_to_gcc_expression (lambda_body_vector lbv
,
1484 VEC (tree
) *induction_vars
, tree
* stmts_to_insert
)
1486 tree stmts
, stmt
, resvar
, name
;
1488 tree_stmt_iterator tsi
;
1490 /* Create a statement list and a linear expression temporary. */
1491 stmts
= alloc_stmt_list ();
1492 resvar
= create_tmp_var (integer_type_node
, "lletmp");
1493 add_referenced_tmp_var (resvar
);
1496 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1497 name
= make_ssa_name (resvar
, stmt
);
1498 TREE_OPERAND (stmt
, 0) = name
;
1499 tsi
= tsi_last (stmts
);
1500 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1502 for (i
= 0; i
< VEC_length (tree
,induction_vars
) ; i
++)
1504 if (LBV_COEFFICIENTS (lbv
)[i
] != 0)
1508 /* newname = coefficient * induction_variable */
1509 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1510 fold (build (MULT_EXPR
, integer_type_node
,
1511 VEC_index (tree
, induction_vars
, i
),
1512 build_int_cst (integer_type_node
,
1513 LBV_COEFFICIENTS (lbv
)[i
]))));
1514 newname
= make_ssa_name (resvar
, stmt
);
1515 TREE_OPERAND (stmt
, 0) = newname
;
1516 tsi
= tsi_last (stmts
);
1517 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1518 /* name = name + newname */
1519 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1520 build (PLUS_EXPR
, integer_type_node
, name
, newname
));
1521 name
= make_ssa_name (resvar
, stmt
);
1522 TREE_OPERAND (stmt
, 0) = name
;
1523 tsi
= tsi_last (stmts
);
1524 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1528 /* Handle any denominator that occurs. */
1529 if (LBV_DENOMINATOR (lbv
) != 1)
1531 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1532 build (CEIL_DIV_EXPR
, integer_type_node
,
1533 name
, build_int_cst (integer_type_node
,
1534 LBV_DENOMINATOR (lbv
))));
1535 name
= make_ssa_name (resvar
, stmt
);
1536 TREE_OPERAND (stmt
, 0) = name
;
1537 tsi
= tsi_last (stmts
);
1538 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1540 *stmts_to_insert
= stmts
;
1544 /* Convert a linear expression from coefficient and constant form to a
1546 Return the tree that represents the final value of the expression.
1547 LLE is the linear expression to convert.
1548 OFFSET is the linear offset to apply to the expression.
1549 INDUCTION_VARS is a vector of induction variables for the loops.
1550 INVARIANTS is a vector of the loop nest invariants.
1551 WRAP specifies what tree code to wrap the results in, if there is more than
1552 one (it is either MAX_EXPR, or MIN_EXPR).
1553 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1554 statements that need to be inserted for the linear expression. */
1557 lle_to_gcc_expression (lambda_linear_expression lle
,
1558 lambda_linear_expression offset
,
1559 VEC(tree
) *induction_vars
,
1560 VEC(tree
) *invariants
,
1561 enum tree_code wrap
, tree
* stmts_to_insert
)
1563 tree stmts
, stmt
, resvar
, name
;
1565 tree_stmt_iterator tsi
;
1569 /* Create a statement list and a linear expression temporary. */
1570 stmts
= alloc_stmt_list ();
1571 resvar
= create_tmp_var (integer_type_node
, "lletmp");
1572 add_referenced_tmp_var (resvar
);
1573 results
= VEC_alloc (tree
, 1);
1575 /* Build up the linear expressions, and put the variable representing the
1576 result in the results array. */
1577 for (; lle
!= NULL
; lle
= LLE_NEXT (lle
))
1579 /* Start at name = 0. */
1580 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1581 name
= make_ssa_name (resvar
, stmt
);
1582 TREE_OPERAND (stmt
, 0) = name
;
1583 tsi
= tsi_last (stmts
);
1584 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1586 /* First do the induction variables.
1587 at the end, name = name + all the induction variables added
1589 for (i
= 0; i
< VEC_length (tree
,induction_vars
); i
++)
1591 if (LLE_COEFFICIENTS (lle
)[i
] != 0)
1597 /* mult = induction variable * coefficient. */
1598 if (LLE_COEFFICIENTS (lle
)[i
] == 1)
1600 mult
= VEC_index (tree
, induction_vars
, i
);
1604 coeff
= build_int_cst (integer_type_node
,
1605 LLE_COEFFICIENTS (lle
)[i
]);
1606 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1607 VEC_index (tree
, induction_vars
, i
),
1611 /* newname = mult */
1612 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1613 newname
= make_ssa_name (resvar
, stmt
);
1614 TREE_OPERAND (stmt
, 0) = newname
;
1615 tsi
= tsi_last (stmts
);
1616 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1618 /* name = name + newname */
1619 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1620 build (PLUS_EXPR
, integer_type_node
,
1622 name
= make_ssa_name (resvar
, stmt
);
1623 TREE_OPERAND (stmt
, 0) = name
;
1624 tsi
= tsi_last (stmts
);
1625 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1629 /* Handle our invariants.
1630 At the end, we have name = name + result of adding all multiplied
1632 for (i
= 0; i
< VEC_length (tree
, invariants
); i
++)
1634 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] != 0)
1640 /* mult = invariant * coefficient */
1641 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] == 1)
1643 mult
= VEC_index (tree
, invariants
, i
);
1647 coeff
= build_int_cst (integer_type_node
,
1648 LLE_INVARIANT_COEFFICIENTS (lle
)[i
]);
1649 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1650 VEC_index (tree
, invariants
, i
),
1654 /* newname = mult */
1655 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1656 newname
= make_ssa_name (resvar
, stmt
);
1657 TREE_OPERAND (stmt
, 0) = newname
;
1658 tsi
= tsi_last (stmts
);
1659 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1661 /* name = name + newname */
1662 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1663 build (PLUS_EXPR
, integer_type_node
,
1665 name
= make_ssa_name (resvar
, stmt
);
1666 TREE_OPERAND (stmt
, 0) = name
;
1667 tsi
= tsi_last (stmts
);
1668 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1672 /* Now handle the constant.
1673 name = name + constant. */
1674 if (LLE_CONSTANT (lle
) != 0)
1676 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1677 build (PLUS_EXPR
, integer_type_node
,
1678 name
, build_int_cst (integer_type_node
,
1679 LLE_CONSTANT (lle
))));
1680 name
= make_ssa_name (resvar
, stmt
);
1681 TREE_OPERAND (stmt
, 0) = name
;
1682 tsi
= tsi_last (stmts
);
1683 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1686 /* Now handle the offset.
1687 name = name + linear offset. */
1688 if (LLE_CONSTANT (offset
) != 0)
1690 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1691 build (PLUS_EXPR
, integer_type_node
,
1692 name
, build_int_cst (integer_type_node
,
1693 LLE_CONSTANT (offset
))));
1694 name
= make_ssa_name (resvar
, stmt
);
1695 TREE_OPERAND (stmt
, 0) = name
;
1696 tsi
= tsi_last (stmts
);
1697 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1700 /* Handle any denominator that occurs. */
1701 if (LLE_DENOMINATOR (lle
) != 1)
1703 if (wrap
== MAX_EXPR
)
1704 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1705 build (CEIL_DIV_EXPR
, integer_type_node
,
1706 name
, build_int_cst (integer_type_node
,
1707 LLE_DENOMINATOR (lle
))));
1708 else if (wrap
== MIN_EXPR
)
1709 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1710 build (FLOOR_DIV_EXPR
, integer_type_node
,
1711 name
, build_int_cst (integer_type_node
,
1712 LLE_DENOMINATOR (lle
))));
1716 /* name = {ceil, floor}(name/denominator) */
1717 name
= make_ssa_name (resvar
, stmt
);
1718 TREE_OPERAND (stmt
, 0) = name
;
1719 tsi
= tsi_last (stmts
);
1720 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1722 VEC_safe_push (tree
, results
, name
);
1725 /* Again, out of laziness, we don't handle this case yet. It's not
1726 hard, it just hasn't occurred. */
1727 gcc_assert (VEC_length (tree
, results
) <= 2);
1729 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1730 if (VEC_length (tree
, results
) > 1)
1732 tree op1
= VEC_index (tree
, results
, 0);
1733 tree op2
= VEC_index (tree
, results
, 1);
1734 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1735 build (wrap
, integer_type_node
, op1
, op2
));
1736 name
= make_ssa_name (resvar
, stmt
);
1737 TREE_OPERAND (stmt
, 0) = name
;
1738 tsi
= tsi_last (stmts
);
1739 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1742 *stmts_to_insert
= stmts
;
1746 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1747 it, back into gcc code. This changes the
1748 loops, their induction variables, and their bodies, so that they
1749 match the transformed loopnest.
1750 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1752 OLD_IVS is a vector of induction variables from the old loopnest.
1753 INVARIANTS is a vector of loop invariants from the old loopnest.
1754 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1755 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1758 lambda_loopnest_to_gcc_loopnest (struct loop
*old_loopnest
,
1760 VEC(tree
) *invariants
,
1761 lambda_loopnest new_loopnest
,
1762 lambda_trans_matrix transform
)
1769 block_stmt_iterator bsi
;
1774 transform
= lambda_trans_matrix_inverse (transform
);
1775 fprintf (dump_file
, "Inverse of transformation matrix:\n");
1776 print_lambda_trans_matrix (dump_file
, transform
);
1778 temp
= old_loopnest
;
1779 new_ivs
= VEC_alloc (tree
, 1);
1785 temp
= old_loopnest
;
1789 lambda_loop newloop
;
1791 tree ivvar
, ivvarinced
, exitcond
, stmts
;
1792 enum tree_code testtype
;
1793 tree newupperbound
, newlowerbound
;
1794 lambda_linear_expression offset
;
1795 /* First, build the new induction variable temporary */
1797 ivvar
= create_tmp_var (integer_type_node
, "lnivtmp");
1798 add_referenced_tmp_var (ivvar
);
1800 VEC_safe_push (tree
, new_ivs
, ivvar
);
1802 newloop
= LN_LOOPS (new_loopnest
)[i
];
1804 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1806 offset
= LL_LINEAR_OFFSET (newloop
);
1808 gcc_assert (LLE_DENOMINATOR (offset
) == 1 &&
1809 lambda_vector_zerop (LLE_COEFFICIENTS (offset
), depth
));
1811 /* Now build the new lower bounds, and insert the statements
1812 necessary to generate it on the loop preheader. */
1813 newlowerbound
= lle_to_gcc_expression (LL_LOWER_BOUND (newloop
),
1814 LL_LINEAR_OFFSET (newloop
),
1816 invariants
, MAX_EXPR
, &stmts
);
1817 bsi_insert_on_edge (loop_preheader_edge (temp
), stmts
);
1818 bsi_commit_edge_inserts (NULL
);
1819 /* Build the new upper bound and insert its statements in the
1820 basic block of the exit condition */
1821 newupperbound
= lle_to_gcc_expression (LL_UPPER_BOUND (newloop
),
1822 LL_LINEAR_OFFSET (newloop
),
1824 invariants
, MIN_EXPR
, &stmts
);
1825 exitcond
= get_loop_exit_condition (temp
);
1826 bb
= bb_for_stmt (exitcond
);
1827 bsi
= bsi_start (bb
);
1828 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
1830 /* Create the new iv, and insert it's increment on the latch
1833 bb
= temp
->latch
->pred
->src
;
1834 bsi
= bsi_last (bb
);
1835 create_iv (newlowerbound
,
1836 build_int_cst (integer_type_node
, LL_STEP (newloop
)),
1837 ivvar
, temp
, &bsi
, false, &ivvar
,
1840 /* Replace the exit condition with the new upper bound
1842 testtype
= LL_STEP (newloop
) >= 0 ? LE_EXPR
: GE_EXPR
;
1843 COND_EXPR_COND (exitcond
) = build (testtype
,
1845 ivvarinced
, newupperbound
);
1846 modify_stmt (exitcond
);
1847 VEC_replace (tree
, new_ivs
, i
, ivvar
);
1853 /* Go through the loop and make iv replacements. */
1854 bbs
= get_loop_body (old_loopnest
);
1855 for (i
= 0; i
< old_loopnest
->num_nodes
; i
++)
1856 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
1858 tree stmt
= bsi_stmt (bsi
);
1862 get_stmt_operands (stmt
);
1863 uses
= STMT_USE_OPS (stmt
);
1864 for (j
= 0; j
< NUM_USES (uses
); j
++)
1867 use_operand_p use
= USE_OP_PTR (uses
, j
);
1868 for (k
= 0; k
< VEC_length (tree
, old_ivs
); k
++)
1870 tree oldiv
= VEC_index (tree
, old_ivs
, k
);
1871 if (USE_FROM_PTR (use
) == oldiv
)
1874 lambda_body_vector lbv
;
1876 /* Compute the new expression for the induction
1878 depth
= VEC_length (tree
, new_ivs
);
1879 lbv
= lambda_body_vector_new (depth
);
1880 LBV_COEFFICIENTS (lbv
)[k
] = 1;
1881 lbv
= lambda_body_vector_compute_new (transform
, lbv
);
1882 newiv
= lbv_to_gcc_expression (lbv
, new_ivs
, &stmts
);
1884 /* Insert the statements to build that
1886 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
1888 /* Replace the use with the result of that
1893 "Replacing induction variable use of ");
1894 print_generic_stmt (dump_file
, USE_FROM_PTR (use
), 0);
1895 fprintf (dump_file
, " with ");
1896 print_generic_stmt (dump_file
, newiv
, 0);
1897 fprintf (dump_file
, "\n");
1899 SET_USE (use
, newiv
);
1907 /* Returns true when the vector V is lexicographically positive, in
1908 other words, when the first non zero element is positive. */
1911 lambda_vector_lexico_pos (lambda_vector v
, unsigned n
)
1914 for (i
= 0; i
< n
; i
++)
1926 /* Return true if TRANS is a legal transformation matrix that respects
1927 the dependence vectors in DISTS and DIRS. The conservative answer
1930 "Wolfe proves that a unimodular transformation represented by the
1931 matrix T is legal when applied to a loop nest with a set of
1932 lexicographically non-negative distance vectors RDG if and only if
1933 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1934 i.e.: if and only if it transforms the lexicographically positive
1935 distance vectors to lexicographically positive vectors. Note that
1936 a unimodular matrix must transform the zero vector (and only it) to
1937 the zero vector." S.Muchnick. */
1940 lambda_transform_legal_p (lambda_trans_matrix trans
,
1941 int nb_loops
, varray_type dependence_relations
)
1944 lambda_vector distres
;
1945 struct data_dependence_relation
*ddr
;
1947 #if defined ENABLE_CHECKING
1948 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
1949 && LTM_ROWSIZE (trans
) == nb_loops
);
1952 /* When there is an unknown relation in the dependence_relations, we
1953 know that it is no worth looking at this loop nest: give up. */
1954 ddr
= (struct data_dependence_relation
*)
1955 VARRAY_GENERIC_PTR (dependence_relations
, 0);
1958 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1961 distres
= lambda_vector_new (nb_loops
);
1963 /* For each distance vector in the dependence graph. */
1964 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (dependence_relations
); i
++)
1966 ddr
= (struct data_dependence_relation
*)
1967 VARRAY_GENERIC_PTR (dependence_relations
, i
);
1969 /* Don't care about relations for which we know that there is no
1970 dependence, nor about read-read (aka. output-dependences):
1971 these data accesses can happen in any order. */
1972 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
1973 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
1975 /* Conservatively answer: "this transformation is not valid". */
1976 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1979 /* Compute trans.dist_vect */
1980 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
1981 DDR_DIST_VECT (ddr
), distres
);
1983 if (!lambda_vector_lexico_pos (distres
, nb_loops
))