1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
46 /* This loop nest code generation is based on non-singular matrix
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformatrion framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxillary space from the unimodular part (source loop
78 nest . U = auxillary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxillary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxillary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
118 /* Lattice stuff that is internal to the code generation algorithm. */
122 /* Lattice base matrix. */
124 /* Lattice dimension. */
126 /* Origin vector for the coefficients. */
127 lambda_vector origin
;
128 /* Origin matrix for the invariants. */
129 lambda_matrix origin_invariants
;
130 /* Number of invariants. */
134 #define LATTICE_BASE(T) ((T)->base)
135 #define LATTICE_DIMENSION(T) ((T)->dimension)
136 #define LATTICE_ORIGIN(T) ((T)->origin)
137 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
138 #define LATTICE_INVARIANTS(T) ((T)->invariants)
140 static bool lle_equal (lambda_linear_expression
, lambda_linear_expression
,
142 static lambda_lattice
lambda_lattice_new (int, int);
143 static lambda_lattice
lambda_lattice_compute_base (lambda_loopnest
);
145 static tree
find_induction_var_from_exit_cond (struct loop
*);
147 /* Create a new lambda body vector. */
150 lambda_body_vector_new (int size
)
152 lambda_body_vector ret
;
154 ret
= ggc_alloc (sizeof (*ret
));
155 LBV_COEFFICIENTS (ret
) = lambda_vector_new (size
);
156 LBV_SIZE (ret
) = size
;
157 LBV_DENOMINATOR (ret
) = 1;
161 /* Compute the new coefficients for the vector based on the
162 *inverse* of the transformation matrix. */
165 lambda_body_vector_compute_new (lambda_trans_matrix transform
,
166 lambda_body_vector vect
)
168 lambda_body_vector temp
;
171 /* Make sure the matrix is square. */
172 if (LTM_ROWSIZE (transform
) != LTM_COLSIZE (transform
))
175 depth
= LTM_ROWSIZE (transform
);
177 temp
= lambda_body_vector_new (depth
);
178 LBV_DENOMINATOR (temp
) =
179 LBV_DENOMINATOR (vect
) * LTM_DENOMINATOR (transform
);
180 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect
), depth
,
181 LTM_MATRIX (transform
), depth
,
182 LBV_COEFFICIENTS (temp
));
183 LBV_SIZE (temp
) = LBV_SIZE (vect
);
187 /* Print out a lambda body vector. */
190 print_lambda_body_vector (FILE * outfile
, lambda_body_vector body
)
192 print_lambda_vector (outfile
, LBV_COEFFICIENTS (body
), LBV_SIZE (body
));
195 /* Return TRUE if two linear expressions are equal. */
198 lle_equal (lambda_linear_expression lle1
, lambda_linear_expression lle2
,
199 int depth
, int invariants
)
203 if (lle1
== NULL
|| lle2
== NULL
)
205 if (LLE_CONSTANT (lle1
) != LLE_CONSTANT (lle2
))
207 if (LLE_DENOMINATOR (lle1
) != LLE_DENOMINATOR (lle2
))
209 for (i
= 0; i
< depth
; i
++)
210 if (LLE_COEFFICIENTS (lle1
)[i
] != LLE_COEFFICIENTS (lle2
)[i
])
212 for (i
= 0; i
< invariants
; i
++)
213 if (LLE_INVARIANT_COEFFICIENTS (lle1
)[i
] !=
214 LLE_INVARIANT_COEFFICIENTS (lle2
)[i
])
219 /* Create a new linear expression with dimension DIM, and total number
220 of invariants INVARIANTS. */
222 lambda_linear_expression
223 lambda_linear_expression_new (int dim
, int invariants
)
225 lambda_linear_expression ret
;
227 ret
= ggc_alloc_cleared (sizeof (*ret
));
229 LLE_COEFFICIENTS (ret
) = lambda_vector_new (dim
);
230 LLE_CONSTANT (ret
) = 0;
231 LLE_INVARIANT_COEFFICIENTS (ret
) = lambda_vector_new (invariants
);
232 LLE_DENOMINATOR (ret
) = 1;
233 LLE_NEXT (ret
) = NULL
;
238 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
239 The starting letter used for variable names is START. */
242 print_linear_expression (FILE * outfile
, lambda_vector expr
, int size
,
247 for (i
= 0; i
< size
; i
++)
254 fprintf (outfile
, "-");
257 else if (expr
[i
] > 0)
258 fprintf (outfile
, " + ");
260 fprintf (outfile
, " - ");
261 if (abs (expr
[i
]) == 1)
262 fprintf (outfile
, "%c", start
+ i
);
264 fprintf (outfile
, "%d%c", abs (expr
[i
]), start
+ i
);
269 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
270 depth/number of coefficients is given by DEPTH, the number of invariants is
271 given by INVARIANTS, and the character to start variable names with is given
275 print_lambda_linear_expression (FILE * outfile
,
276 lambda_linear_expression expr
,
277 int depth
, int invariants
, char start
)
279 fprintf (outfile
, "\tLinear expression: ");
280 print_linear_expression (outfile
, LLE_COEFFICIENTS (expr
), depth
, start
);
281 fprintf (outfile
, " constant: %d ", LLE_CONSTANT (expr
));
282 fprintf (outfile
, " invariants: ");
283 print_linear_expression (outfile
, LLE_INVARIANT_COEFFICIENTS (expr
),
285 fprintf (outfile
, " denominator: %d\n", LLE_DENOMINATOR (expr
));
288 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
289 coefficients is given by DEPTH, the number of invariants is
290 given by INVARIANTS, and the character to start variable names with is given
294 print_lambda_loop (FILE * outfile
, lambda_loop loop
, int depth
,
295 int invariants
, char start
)
298 lambda_linear_expression expr
;
303 expr
= LL_LINEAR_OFFSET (loop
);
304 step
= LL_STEP (loop
);
305 fprintf (outfile
, " step size = %d \n", step
);
309 fprintf (outfile
, " linear offset: \n");
310 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
,
314 fprintf (outfile
, " lower bound: \n");
315 for (expr
= LL_LOWER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
316 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
317 fprintf (outfile
, " upper bound: \n");
318 for (expr
= LL_UPPER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
319 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
322 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
323 number of invariants. */
326 lambda_loopnest_new (int depth
, int invariants
)
329 ret
= ggc_alloc (sizeof (*ret
));
331 LN_LOOPS (ret
) = ggc_alloc_cleared (depth
* sizeof (lambda_loop
));
332 LN_DEPTH (ret
) = depth
;
333 LN_INVARIANTS (ret
) = invariants
;
338 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
339 character to use for loop names is given by START. */
342 print_lambda_loopnest (FILE * outfile
, lambda_loopnest nest
, char start
)
345 for (i
= 0; i
< LN_DEPTH (nest
); i
++)
347 fprintf (outfile
, "Loop %c\n", start
+ i
);
348 print_lambda_loop (outfile
, LN_LOOPS (nest
)[i
], LN_DEPTH (nest
),
349 LN_INVARIANTS (nest
), 'i');
350 fprintf (outfile
, "\n");
354 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
357 static lambda_lattice
358 lambda_lattice_new (int depth
, int invariants
)
361 ret
= ggc_alloc (sizeof (*ret
));
362 LATTICE_BASE (ret
) = lambda_matrix_new (depth
, depth
);
363 LATTICE_ORIGIN (ret
) = lambda_vector_new (depth
);
364 LATTICE_ORIGIN_INVARIANTS (ret
) = lambda_matrix_new (depth
, invariants
);
365 LATTICE_DIMENSION (ret
) = depth
;
366 LATTICE_INVARIANTS (ret
) = invariants
;
370 /* Compute the lattice base for NEST. The lattice base is essentially a
371 non-singular transform from a dense base space to a sparse iteration space.
372 We use it so that we don't have to specially handle the case of a sparse
373 iteration space in other parts of the algorithm. As a result, this routine
374 only does something interesting (IE produce a matrix that isn't the
375 identity matrix) if NEST is a sparse space. */
377 static lambda_lattice
378 lambda_lattice_compute_base (lambda_loopnest nest
)
381 int depth
, invariants
;
386 lambda_linear_expression expression
;
388 depth
= LN_DEPTH (nest
);
389 invariants
= LN_INVARIANTS (nest
);
391 ret
= lambda_lattice_new (depth
, invariants
);
392 base
= LATTICE_BASE (ret
);
393 for (i
= 0; i
< depth
; i
++)
395 loop
= LN_LOOPS (nest
)[i
];
398 step
= LL_STEP (loop
);
399 /* If we have a step of 1, then the base is one, and the
400 origin and invariant coefficients are 0. */
403 for (j
= 0; j
< depth
; j
++)
406 LATTICE_ORIGIN (ret
)[i
] = 0;
407 for (j
= 0; j
< invariants
; j
++)
408 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] = 0;
412 /* Otherwise, we need the lower bound expression (which must
413 be an affine function) to determine the base. */
414 expression
= LL_LOWER_BOUND (loop
);
416 || LLE_NEXT (expression
) || LLE_DENOMINATOR (expression
) != 1)
419 /* The lower triangular portion of the base is going to be the
420 coefficient times the step */
421 for (j
= 0; j
< i
; j
++)
422 base
[i
][j
] = LLE_COEFFICIENTS (expression
)[j
]
423 * LL_STEP (LN_LOOPS (nest
)[j
]);
425 for (j
= i
+ 1; j
< depth
; j
++)
428 /* Origin for this loop is the constant of the lower bound
430 LATTICE_ORIGIN (ret
)[i
] = LLE_CONSTANT (expression
);
432 /* Coefficient for the invariants are equal to the invariant
433 coefficients in the expression. */
434 for (j
= 0; j
< invariants
; j
++)
435 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] =
436 LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
442 /* Compute the greatest common denominator of two numbers (A and B) using
443 Euclid's algorithm. */
464 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
467 gcd_vector (lambda_vector vector
, int size
)
475 for (i
= 1; i
< size
; i
++)
476 gcd1
= gcd (gcd1
, vector
[i
]);
481 /* Compute the least common multiple of two numbers A and B . */
486 return (abs (a
) * abs (b
) / gcd (a
, b
));
489 /* Compute the loop bounds for the auxiliary space NEST.
490 Input system used is Ax <= b. TRANS is the unimodular transformation. */
492 static lambda_loopnest
493 lambda_compute_auxillary_space (lambda_loopnest nest
,
494 lambda_trans_matrix trans
)
496 lambda_matrix A
, B
, A1
, B1
, temp0
;
497 lambda_vector a
, a1
, temp1
;
498 lambda_matrix invertedtrans
;
499 int determinant
, depth
, invariants
, size
, newsize
;
501 lambda_loopnest auxillary_nest
;
503 lambda_linear_expression expression
;
504 lambda_lattice lattice
;
506 int multiple
, f1
, f2
;
508 depth
= LN_DEPTH (nest
);
509 invariants
= LN_INVARIANTS (nest
);
511 /* Unfortunately, we can't know the number of constraints we'll have
512 ahead of time, but this should be enough even in ridiculous loop nest
513 cases. We abort if we go over this limit. */
514 A
= lambda_matrix_new (128, depth
);
515 B
= lambda_matrix_new (128, invariants
);
516 a
= lambda_vector_new (128);
518 A1
= lambda_matrix_new (128, depth
);
519 B1
= lambda_matrix_new (128, invariants
);
520 a1
= lambda_vector_new (128);
522 /* Store the bounds in the equation matrix A, constant vector a, and
523 invariant matrix B, so that we have Ax <= a + B.
524 This requires a little equation rearranging so that everything is on the
525 correct side of the inequality. */
527 for (i
= 0; i
< depth
; i
++)
529 loop
= LN_LOOPS (nest
)[i
];
531 /* First we do the lower bound. */
532 if (LL_STEP (loop
) > 0)
533 expression
= LL_LOWER_BOUND (loop
);
535 expression
= LL_UPPER_BOUND (loop
);
537 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
539 /* Fill in the coefficient. */
540 for (j
= 0; j
< i
; j
++)
541 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
543 /* And the invariant coefficient. */
544 for (j
= 0; j
< invariants
; j
++)
545 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
547 /* And the constant. */
548 a
[size
] = LLE_CONSTANT (expression
);
550 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
551 constants and single variables on */
552 A
[size
][i
] = -1 * LLE_DENOMINATOR (expression
);
554 for (j
= 0; j
< invariants
; j
++)
558 /* Need to increase matrix sizes above. */
563 /* Then do the exact same thing for the upper bounds. */
564 if (LL_STEP (loop
) > 0)
565 expression
= LL_UPPER_BOUND (loop
);
567 expression
= LL_LOWER_BOUND (loop
);
569 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
571 /* Fill in the coefficient. */
572 for (j
= 0; j
< i
; j
++)
573 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
575 /* And the invariant coefficient. */
576 for (j
= 0; j
< invariants
; j
++)
577 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
579 /* And the constant. */
580 a
[size
] = LLE_CONSTANT (expression
);
582 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
583 for (j
= 0; j
< i
; j
++)
585 A
[size
][i
] = LLE_DENOMINATOR (expression
);
587 /* Need to increase matrix sizes above. */
593 /* Compute the lattice base x = base * y + origin, where y is the
595 lattice
= lambda_lattice_compute_base (nest
);
597 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
600 lambda_matrix_mult (A
, LATTICE_BASE (lattice
), A1
, size
, depth
, depth
);
602 /* a1 = a - A * origin constant. */
603 lambda_matrix_vector_mult (A
, size
, depth
, LATTICE_ORIGIN (lattice
), a1
);
604 lambda_vector_add_mc (a
, 1, a1
, -1, a1
, size
);
606 /* B1 = B - A * origin invariant. */
607 lambda_matrix_mult (A
, LATTICE_ORIGIN_INVARIANTS (lattice
), B1
, size
, depth
,
609 lambda_matrix_add_mc (B
, 1, B1
, -1, B1
, size
, invariants
);
611 /* Now compute the auxiliary space bounds by first inverting U, multiplying
612 it by A1, then performing fourier motzkin. */
614 invertedtrans
= lambda_matrix_new (depth
, depth
);
616 /* Compute the inverse of U. */
617 determinant
= lambda_matrix_inverse (LTM_MATRIX (trans
),
618 invertedtrans
, depth
);
621 lambda_matrix_mult (A1
, invertedtrans
, A
, size
, depth
, depth
);
623 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
625 Fourier-Motzkin is a way of reducing systems of linear inequality so that
626 it is easy to calculate the answer and bounds.
627 A sketch of how it works:
628 Given a system of linear inequalities, ai * xj >= bk, you can always
629 rewrite the constraints so they are all of the form
630 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
631 in b1 ... bk, and some a in a1...ai)
632 You can then eliminate this x from the non-constant inequalities by
633 rewriting these as a <= b, x >= constant, and delete the x variable.
634 You can then repeat this for any remaining x variables, and then we have
635 an easy to use variable <= constant (or no variables at all) form that we
636 can construct our bounds from.
638 In our case, each time we eliminate, we construct part of the bound from
639 the ith variable, then delete the ith variable.
641 Remember the constant are in our vector a, our coefficient matrix is A,
642 and our invariant coefficient matrix is B */
644 /* Swap B and B1, and a1 and a */
653 auxillary_nest
= lambda_loopnest_new (depth
, invariants
);
655 for (i
= depth
- 1; i
>= 0; i
--)
657 loop
= lambda_loop_new ();
658 LN_LOOPS (auxillary_nest
)[i
] = loop
;
661 for (j
= 0; j
< size
; j
++)
666 expression
= lambda_linear_expression_new (depth
, invariants
);
668 for (k
= 0; k
< i
; k
++)
669 LLE_COEFFICIENTS (expression
)[k
] = A
[j
][k
];
670 for (k
= 0; k
< invariants
; k
++)
671 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = -1 * B
[j
][k
];
672 LLE_DENOMINATOR (expression
) = -1 * A
[j
][i
];
673 LLE_CONSTANT (expression
) = -1 * a
[j
];
674 /* Ignore if identical to the existing lower bound. */
675 if (!lle_equal (LL_LOWER_BOUND (loop
),
676 expression
, depth
, invariants
))
678 LLE_NEXT (expression
) = LL_LOWER_BOUND (loop
);
679 LL_LOWER_BOUND (loop
) = expression
;
683 else if (A
[j
][i
] > 0)
686 expression
= lambda_linear_expression_new (depth
, invariants
);
687 for (k
= 0; k
< i
; k
++)
688 LLE_COEFFICIENTS (expression
)[k
] = -1 * A
[j
][k
];
689 LLE_CONSTANT (expression
) = a
[j
];
691 for (k
= 0; k
< invariants
; k
++)
692 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = B
[j
][k
];
694 LLE_DENOMINATOR (expression
) = A
[j
][i
];
695 /* Ignore if identical to the existing upper bound. */
696 if (!lle_equal (LL_UPPER_BOUND (loop
),
697 expression
, depth
, invariants
))
699 LLE_NEXT (expression
) = LL_UPPER_BOUND (loop
);
700 LL_UPPER_BOUND (loop
) = expression
;
705 /* creates a new system by deleting the i'th variable. */
707 for (j
= 0; j
< size
; j
++)
711 lambda_vector_copy (A
[j
], A1
[newsize
], depth
);
712 lambda_vector_copy (B
[j
], B1
[newsize
], invariants
);
716 else if (A
[j
][i
] > 0)
718 for (k
= 0; k
< size
; k
++)
722 multiple
= lcm (A
[j
][i
], A
[k
][i
]);
723 f1
= multiple
/ A
[j
][i
];
724 f2
= -1 * multiple
/ A
[k
][i
];
726 lambda_vector_add_mc (A
[j
], f1
, A
[k
], f2
,
728 lambda_vector_add_mc (B
[j
], f1
, B
[k
], f2
,
729 B1
[newsize
], invariants
);
730 a1
[newsize
] = f1
* a
[j
] + f2
* a
[k
];
752 return auxillary_nest
;
755 /* Compute the loop bounds for the target space, using the bounds of
756 the auxillary nest AUXILLARY_NEST, and the triangular matrix H. This is
757 done by matrix multiplication and then transformation of the new matrix
758 back into linear expression form.
759 Return the target loopnest. */
761 static lambda_loopnest
762 lambda_compute_target_space (lambda_loopnest auxillary_nest
,
763 lambda_trans_matrix H
, lambda_vector stepsigns
)
765 lambda_matrix inverse
, H1
;
766 int determinant
, i
, j
;
770 lambda_loopnest target_nest
;
771 int depth
, invariants
;
772 lambda_matrix target
;
774 lambda_loop auxillary_loop
, target_loop
;
775 lambda_linear_expression expression
, auxillary_expr
, target_expr
, tmp_expr
;
777 depth
= LN_DEPTH (auxillary_nest
);
778 invariants
= LN_INVARIANTS (auxillary_nest
);
780 inverse
= lambda_matrix_new (depth
, depth
);
781 determinant
= lambda_matrix_inverse (LTM_MATRIX (H
), inverse
, depth
);
783 /* H1 is H excluding its diagonal. */
784 H1
= lambda_matrix_new (depth
, depth
);
785 lambda_matrix_copy (LTM_MATRIX (H
), H1
, depth
, depth
);
787 for (i
= 0; i
< depth
; i
++)
790 /* Computes the linear offsets of the loop bounds. */
791 target
= lambda_matrix_new (depth
, depth
);
792 lambda_matrix_mult (H1
, inverse
, target
, depth
, depth
, depth
);
794 target_nest
= lambda_loopnest_new (depth
, invariants
);
796 for (i
= 0; i
< depth
; i
++)
799 /* Get a new loop structure. */
800 target_loop
= lambda_loop_new ();
801 LN_LOOPS (target_nest
)[i
] = target_loop
;
803 /* Computes the gcd of the coefficients of the linear part. */
804 gcd1
= gcd_vector (target
[i
], i
);
806 /* Include the denominator in the GCD */
807 gcd1
= gcd (gcd1
, determinant
);
809 /* Now divide through by the gcd */
810 for (j
= 0; j
< i
; j
++)
811 target
[i
][j
] = target
[i
][j
] / gcd1
;
813 expression
= lambda_linear_expression_new (depth
, invariants
);
814 lambda_vector_copy (target
[i
], LLE_COEFFICIENTS (expression
), depth
);
815 LLE_DENOMINATOR (expression
) = determinant
/ gcd1
;
816 LLE_CONSTANT (expression
) = 0;
817 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression
),
819 LL_LINEAR_OFFSET (target_loop
) = expression
;
822 /* For each loop, compute the new bounds from H */
823 for (i
= 0; i
< depth
; i
++)
825 auxillary_loop
= LN_LOOPS (auxillary_nest
)[i
];
826 target_loop
= LN_LOOPS (target_nest
)[i
];
827 LL_STEP (target_loop
) = LTM_MATRIX (H
)[i
][i
];
828 factor
= LTM_MATRIX (H
)[i
][i
];
830 /* First we do the lower bound. */
831 auxillary_expr
= LL_LOWER_BOUND (auxillary_loop
);
833 for (; auxillary_expr
!= NULL
;
834 auxillary_expr
= LLE_NEXT (auxillary_expr
))
836 target_expr
= lambda_linear_expression_new (depth
, invariants
);
837 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
838 depth
, inverse
, depth
,
839 LLE_COEFFICIENTS (target_expr
));
840 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
841 LLE_COEFFICIENTS (target_expr
), depth
,
844 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
845 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
846 LLE_INVARIANT_COEFFICIENTS (target_expr
),
848 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
849 LLE_INVARIANT_COEFFICIENTS (target_expr
),
851 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
853 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
855 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
857 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
859 LLE_INVARIANT_COEFFICIENTS
860 (target_expr
), invariants
,
862 LLE_DENOMINATOR (target_expr
) =
863 LLE_DENOMINATOR (target_expr
) * determinant
;
865 /* Find the gcd and divide by it here, rather than doing it
866 at the tree level. */
867 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
868 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
870 gcd1
= gcd (gcd1
, gcd2
);
871 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
872 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
873 for (j
= 0; j
< depth
; j
++)
874 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
875 for (j
= 0; j
< invariants
; j
++)
876 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
877 LLE_CONSTANT (target_expr
) /= gcd1
;
878 LLE_DENOMINATOR (target_expr
) /= gcd1
;
879 /* Ignore if identical to existing bound. */
880 if (!lle_equal (LL_LOWER_BOUND (target_loop
), target_expr
, depth
,
883 LLE_NEXT (target_expr
) = LL_LOWER_BOUND (target_loop
);
884 LL_LOWER_BOUND (target_loop
) = target_expr
;
887 /* Now do the upper bound. */
888 auxillary_expr
= LL_UPPER_BOUND (auxillary_loop
);
890 for (; auxillary_expr
!= NULL
;
891 auxillary_expr
= LLE_NEXT (auxillary_expr
))
893 target_expr
= lambda_linear_expression_new (depth
, invariants
);
894 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
895 depth
, inverse
, depth
,
896 LLE_COEFFICIENTS (target_expr
));
897 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
898 LLE_COEFFICIENTS (target_expr
), depth
,
900 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
901 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
902 LLE_INVARIANT_COEFFICIENTS (target_expr
),
904 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
905 LLE_INVARIANT_COEFFICIENTS (target_expr
),
907 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
909 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
911 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
913 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
915 LLE_INVARIANT_COEFFICIENTS
916 (target_expr
), invariants
,
918 LLE_DENOMINATOR (target_expr
) =
919 LLE_DENOMINATOR (target_expr
) * determinant
;
921 /* Find the gcd and divide by it here, instead of at the
923 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
924 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
926 gcd1
= gcd (gcd1
, gcd2
);
927 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
928 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
929 for (j
= 0; j
< depth
; j
++)
930 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
931 for (j
= 0; j
< invariants
; j
++)
932 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
933 LLE_CONSTANT (target_expr
) /= gcd1
;
934 LLE_DENOMINATOR (target_expr
) /= gcd1
;
935 /* Ignore if equal to existing bound. */
936 if (!lle_equal (LL_UPPER_BOUND (target_loop
), target_expr
, depth
,
939 LLE_NEXT (target_expr
) = LL_UPPER_BOUND (target_loop
);
940 LL_UPPER_BOUND (target_loop
) = target_expr
;
944 for (i
= 0; i
< depth
; i
++)
946 target_loop
= LN_LOOPS (target_nest
)[i
];
947 /* If necessary, exchange the upper and lower bounds and negate
949 if (stepsigns
[i
] < 0)
951 LL_STEP (target_loop
) *= -1;
952 tmp_expr
= LL_LOWER_BOUND (target_loop
);
953 LL_LOWER_BOUND (target_loop
) = LL_UPPER_BOUND (target_loop
);
954 LL_UPPER_BOUND (target_loop
) = tmp_expr
;
960 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
964 lambda_compute_step_signs (lambda_trans_matrix trans
, lambda_vector stepsigns
)
966 lambda_matrix matrix
, H
;
968 lambda_vector newsteps
;
969 int i
, j
, factor
, minimum_column
;
972 matrix
= LTM_MATRIX (trans
);
973 size
= LTM_ROWSIZE (trans
);
974 H
= lambda_matrix_new (size
, size
);
976 newsteps
= lambda_vector_new (size
);
977 lambda_vector_copy (stepsigns
, newsteps
, size
);
979 lambda_matrix_copy (matrix
, H
, size
, size
);
981 for (j
= 0; j
< size
; j
++)
985 for (i
= j
; i
< size
; i
++)
987 lambda_matrix_col_negate (H
, size
, i
);
988 while (lambda_vector_first_nz (row
, size
, j
+ 1) < size
)
990 minimum_column
= lambda_vector_min_nz (row
, size
, j
);
991 lambda_matrix_col_exchange (H
, size
, j
, minimum_column
);
994 newsteps
[j
] = newsteps
[minimum_column
];
995 newsteps
[minimum_column
] = temp
;
997 for (i
= j
+ 1; i
< size
; i
++)
999 factor
= row
[i
] / row
[j
];
1000 lambda_matrix_col_add (H
, size
, j
, i
, -1 * factor
);
1007 /* Transform NEST according to TRANS, and return the new loopnest.
1009 1. Computing a lattice base for the transformation
1010 2. Composing the dense base with the specified transformation (TRANS)
1011 3. Decomposing the combined transformation into a lower triangular portion,
1012 and a unimodular portion.
1013 4. Computing the auxillary nest using the unimodular portion.
1014 5. Computing the target nest using the auxillary nest and the lower
1015 triangular portion. */
1018 lambda_loopnest_transform (lambda_loopnest nest
, lambda_trans_matrix trans
)
1020 lambda_loopnest auxillary_nest
, target_nest
;
1022 int depth
, invariants
;
1024 lambda_lattice lattice
;
1025 lambda_trans_matrix trans1
, H
, U
;
1027 lambda_linear_expression expression
;
1028 lambda_vector origin
;
1029 lambda_matrix origin_invariants
;
1030 lambda_vector stepsigns
;
1033 depth
= LN_DEPTH (nest
);
1034 invariants
= LN_INVARIANTS (nest
);
1036 /* Keep track of the signs of the loop steps. */
1037 stepsigns
= lambda_vector_new (depth
);
1038 for (i
= 0; i
< depth
; i
++)
1040 if (LL_STEP (LN_LOOPS (nest
)[i
]) > 0)
1046 /* Compute the lattice base. */
1047 lattice
= lambda_lattice_compute_base (nest
);
1048 trans1
= lambda_trans_matrix_new (depth
, depth
);
1050 /* Multiply the transformation matrix by the lattice base. */
1052 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_BASE (lattice
),
1053 LTM_MATRIX (trans1
), depth
, depth
, depth
);
1055 /* Compute the Hermite normal form for the new transformation matrix. */
1056 H
= lambda_trans_matrix_new (depth
, depth
);
1057 U
= lambda_trans_matrix_new (depth
, depth
);
1058 lambda_matrix_hermite (LTM_MATRIX (trans1
), depth
, LTM_MATRIX (H
),
1061 /* Compute the auxiliary loop nest's space from the unimodular
1063 auxillary_nest
= lambda_compute_auxillary_space (nest
, U
);
1065 /* Compute the loop step signs from the old step signs and the
1066 transformation matrix. */
1067 stepsigns
= lambda_compute_step_signs (trans1
, stepsigns
);
1069 /* Compute the target loop nest space from the auxiliary nest and
1070 the lower triangular matrix H. */
1071 target_nest
= lambda_compute_target_space (auxillary_nest
, H
, stepsigns
);
1072 origin
= lambda_vector_new (depth
);
1073 origin_invariants
= lambda_matrix_new (depth
, invariants
);
1074 lambda_matrix_vector_mult (LTM_MATRIX (trans
), depth
, depth
,
1075 LATTICE_ORIGIN (lattice
), origin
);
1076 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_ORIGIN_INVARIANTS (lattice
),
1077 origin_invariants
, depth
, depth
, invariants
);
1079 for (i
= 0; i
< depth
; i
++)
1081 loop
= LN_LOOPS (target_nest
)[i
];
1082 expression
= LL_LINEAR_OFFSET (loop
);
1083 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression
), depth
))
1086 f
= LLE_DENOMINATOR (expression
);
1088 LLE_CONSTANT (expression
) += f
* origin
[i
];
1090 for (j
= 0; j
< invariants
; j
++)
1091 LLE_INVARIANT_COEFFICIENTS (expression
)[j
] +=
1092 f
* origin_invariants
[i
][j
];
1099 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1100 return the new expression. DEPTH is the depth of the loopnest.
1101 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1102 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1103 is the amount we have to add/subtract from the expression because of the
1104 type of comparison it is used in. */
1106 static lambda_linear_expression
1107 gcc_tree_to_linear_expression (int depth
, tree expr
,
1108 VEC(tree
) *outerinductionvars
,
1109 VEC(tree
) *invariants
, int extra
)
1111 lambda_linear_expression lle
= NULL
;
1112 switch (TREE_CODE (expr
))
1116 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1117 LLE_CONSTANT (lle
) = TREE_INT_CST_LOW (expr
);
1119 LLE_CONSTANT (lle
) = extra
;
1121 LLE_DENOMINATOR (lle
) = 1;
1128 for (i
= 0; VEC_iterate (tree
, outerinductionvars
, i
, iv
); i
++)
1131 if (SSA_NAME_VAR (iv
) == SSA_NAME_VAR (expr
))
1133 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1134 LLE_COEFFICIENTS (lle
)[i
] = 1;
1136 LLE_CONSTANT (lle
) = extra
;
1138 LLE_DENOMINATOR (lle
) = 1;
1141 for (i
= 0; VEC_iterate (tree
, invariants
, i
, invar
); i
++)
1144 if (SSA_NAME_VAR (invar
) == SSA_NAME_VAR (expr
))
1146 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1147 LLE_INVARIANT_COEFFICIENTS (lle
)[i
] = 1;
1149 LLE_CONSTANT (lle
) = extra
;
1150 LLE_DENOMINATOR (lle
) = 1;
1162 /* Return true if OP is invariant in LOOP and all outer loops. */
1165 invariant_in_loop (struct loop
*loop
, tree op
)
1167 if (loop
->depth
== 0)
1169 if (TREE_CODE (op
) == SSA_NAME
)
1171 if (TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
1172 && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op
)))
1174 if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op
)))
1177 if (!invariant_in_loop (loop
->outer
, op
))
1179 return !flow_bb_inside_loop_p (loop
,
1180 bb_for_stmt (SSA_NAME_DEF_STMT (op
)));
1185 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1186 or NULL if it could not be converted.
1187 DEPTH is the depth of the loop.
1188 INVARIANTS is a pointer to the array of loop invariants.
1189 The induction variable for this loop should be stored in the parameter
1191 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1194 gcc_loop_to_lambda_loop (struct loop
*loop
, int depth
,
1195 VEC (tree
) ** invariants
,
1196 tree
* ourinductionvar
,
1197 VEC (tree
) * outerinductionvars
)
1201 tree access_fn
, inductionvar
;
1203 lambda_loop lloop
= NULL
;
1204 lambda_linear_expression lbound
, ubound
;
1211 /* Find out induction var and set the pointer so that the caller can
1212 append it to the outerinductionvars array later. */
1214 inductionvar
= find_induction_var_from_exit_cond (loop
);
1215 *ourinductionvar
= inductionvar
;
1217 exit_cond
= get_loop_exit_condition (loop
);
1219 if (inductionvar
== NULL
|| exit_cond
== NULL
)
1221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1223 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1227 test
= TREE_OPERAND (exit_cond
, 0);
1228 if (TREE_CODE (test
) != LE_EXPR
1229 && TREE_CODE (test
) != LT_EXPR
&& TREE_CODE (test
) != NE_EXPR
)
1232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1235 "Unable to convert loop: Loop exit test uses unhandled test condition:");
1236 print_generic_stmt (dump_file
, test
, 0);
1237 fprintf (dump_file
, "\n");
1241 if (SSA_NAME_DEF_STMT (inductionvar
) == NULL_TREE
)
1244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1246 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1251 phi
= SSA_NAME_DEF_STMT (inductionvar
);
1252 if (TREE_CODE (phi
) != PHI_NODE
)
1254 get_stmt_operands (phi
);
1255 uses
= STMT_USE_OPS (phi
);
1260 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1262 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1267 phi
= USE_OP (uses
, 0);
1268 phi
= SSA_NAME_DEF_STMT (phi
);
1269 if (TREE_CODE (phi
) != PHI_NODE
)
1272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1274 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1280 access_fn
= instantiate_parameters
1281 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1286 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1291 step
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1292 if (!step
|| step
== chrec_dont_know
)
1294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1296 "Unable to convert loop: Cannot determine step of loop.\n");
1300 if (TREE_CODE (step
) != INTEGER_CST
)
1303 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1305 "Unable to convert loop: Step of loop is not integer.\n");
1309 stepint
= TREE_INT_CST_LOW (step
);
1311 /* Only want phis for induction vars, which will have two
1313 if (PHI_NUM_ARGS (phi
) != 2)
1315 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1317 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1321 /* Another induction variable check. One argument's source should be
1322 in the loop, one outside the loop. */
1323 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
)
1324 && flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 1)->src
))
1327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1329 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1334 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
))
1336 lbound
= gcc_tree_to_linear_expression (depth
, PHI_ARG_DEF (phi
, 1),
1337 outerinductionvars
, *invariants
,
1340 lbound
= gcc_tree_to_linear_expression (depth
, PHI_ARG_DEF (phi
, 0),
1341 outerinductionvars
, *invariants
,
1346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1348 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1352 if (TREE_CODE (TREE_OPERAND (test
, 1)) == SSA_NAME
)
1353 if (invariant_in_loop (loop
, TREE_OPERAND (test
, 1)))
1354 VEC_safe_push (tree
, *invariants
, TREE_OPERAND (test
, 1));
1356 /* We only size the vectors assuming we have, at max, 2 times as many
1357 invariants as we do loops (one for each bound).
1358 This is just an arbitrary number, but it has to be matched against the
1360 if (VEC_length (tree
, *invariants
) > (unsigned int) (2 * depth
))
1363 /* We might have some leftover. */
1364 if (TREE_CODE (test
) == LT_EXPR
)
1365 extra
= -1 * stepint
;
1366 else if (TREE_CODE (test
) == NE_EXPR
)
1367 extra
= -1 * stepint
;
1369 ubound
= gcc_tree_to_linear_expression (depth
,
1370 TREE_OPERAND (test
, 1),
1372 *invariants
, extra
);
1376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1378 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1382 lloop
= lambda_loop_new ();
1383 LL_STEP (lloop
) = stepint
;
1384 LL_LOWER_BOUND (lloop
) = lbound
;
1385 LL_UPPER_BOUND (lloop
) = ubound
;
1389 /* Given a LOOP, find the induction variable it is testing against in the exit
1390 condition. Return the induction variable if found, NULL otherwise. */
1393 find_induction_var_from_exit_cond (struct loop
*loop
)
1395 tree expr
= get_loop_exit_condition (loop
);
1397 if (expr
== NULL_TREE
)
1399 if (TREE_CODE (expr
) != COND_EXPR
)
1401 test
= TREE_OPERAND (expr
, 0);
1402 if (TREE_CODE_CLASS (TREE_CODE (test
)) != '<')
1404 if (TREE_CODE (TREE_OPERAND (test
, 0)) != SSA_NAME
)
1406 return TREE_OPERAND (test
, 0);
1409 DEF_VEC_P(lambda_loop
);
1410 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1411 Return the new loop nest.
1412 INDUCTIONVARS is a pointer to an array of induction variables for the
1413 loopnest that will be filled in during this process.
1414 INVARIANTS is a pointer to an array of invariants that will be filled in
1415 during this process. */
1418 gcc_loopnest_to_lambda_loopnest (struct loop
* loop_nest
,
1419 VEC (tree
) **inductionvars
,
1420 VEC (tree
) **invariants
)
1422 lambda_loopnest ret
;
1426 VEC (lambda_loop
) *loops
;
1427 lambda_loop newloop
;
1428 tree inductionvar
= NULL
;
1436 loops
= VEC_alloc (lambda_loop
, 1);
1437 *inductionvars
= VEC_alloc (tree
, 1);
1438 *invariants
= VEC_alloc (tree
, 1);
1442 newloop
= gcc_loop_to_lambda_loop (temp
, depth
, invariants
,
1443 &inductionvar
, *inductionvars
);
1446 VEC_safe_push (tree
, *inductionvars
, inductionvar
);
1447 VEC_safe_push (lambda_loop
, loops
, newloop
);
1451 ret
= lambda_loopnest_new (depth
, 2 * depth
);
1452 for (i
= 0; VEC_iterate (lambda_loop
, loops
, i
, newloop
); i
++)
1453 LN_LOOPS (ret
)[i
] = newloop
;
1459 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1460 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1461 inserted for us are stored. INDUCTION_VARS is the array of induction
1462 variables for the loop this LBV is from. */
1465 lbv_to_gcc_expression (lambda_body_vector lbv
,
1466 VEC (tree
) *induction_vars
, tree
* stmts_to_insert
)
1468 tree stmts
, stmt
, resvar
, name
;
1470 tree_stmt_iterator tsi
;
1472 /* Create a statement list and a linear expression temporary. */
1473 stmts
= alloc_stmt_list ();
1474 resvar
= create_tmp_var (integer_type_node
, "lletmp");
1475 add_referenced_tmp_var (resvar
);
1478 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1479 name
= make_ssa_name (resvar
, stmt
);
1480 TREE_OPERAND (stmt
, 0) = name
;
1481 tsi
= tsi_last (stmts
);
1482 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1484 for (i
= 0; i
< VEC_length (tree
,induction_vars
) ; i
++)
1486 if (LBV_COEFFICIENTS (lbv
)[i
] != 0)
1490 /* newname = coefficient * induction_variable */
1491 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1492 fold (build (MULT_EXPR
, integer_type_node
,
1493 VEC_index (tree
, induction_vars
, i
),
1494 build_int_cst (integer_type_node
,
1495 LBV_COEFFICIENTS (lbv
)[i
]))));
1496 newname
= make_ssa_name (resvar
, stmt
);
1497 TREE_OPERAND (stmt
, 0) = newname
;
1498 tsi
= tsi_last (stmts
);
1499 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1500 /* name = name + newname */
1501 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1502 build (PLUS_EXPR
, integer_type_node
, name
, newname
));
1503 name
= make_ssa_name (resvar
, stmt
);
1504 TREE_OPERAND (stmt
, 0) = name
;
1505 tsi
= tsi_last (stmts
);
1506 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1510 /* Handle any denominator that occurs. */
1511 if (LBV_DENOMINATOR (lbv
) != 1)
1513 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1514 build (CEIL_DIV_EXPR
, integer_type_node
,
1515 name
, build_int_cst (integer_type_node
,
1516 LBV_DENOMINATOR (lbv
))));
1517 name
= make_ssa_name (resvar
, stmt
);
1518 TREE_OPERAND (stmt
, 0) = name
;
1519 tsi
= tsi_last (stmts
);
1520 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1522 *stmts_to_insert
= stmts
;
1526 /* Convert a linear expression from coefficient and constant form to a
1528 Return the tree that represents the final value of the expression.
1529 LLE is the linear expression to convert.
1530 OFFSET is the linear offset to apply to the expression.
1531 INDUCTION_VARS is a vector of induction variables for the loops.
1532 INVARIANTS is a vector of the loop nest invariants.
1533 WRAP specifies what tree code to wrap the results in, if there is more than
1534 one (it is either MAX_EXPR, or MIN_EXPR).
1535 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1536 statements that need to be inserted for the linear expression. */
1539 lle_to_gcc_expression (lambda_linear_expression lle
,
1540 lambda_linear_expression offset
,
1541 VEC(tree
) *induction_vars
,
1542 VEC(tree
) *invariants
,
1543 enum tree_code wrap
, tree
* stmts_to_insert
)
1545 tree stmts
, stmt
, resvar
, name
;
1547 tree_stmt_iterator tsi
;
1551 /* Create a statement list and a linear expression temporary. */
1552 stmts
= alloc_stmt_list ();
1553 resvar
= create_tmp_var (integer_type_node
, "lletmp");
1554 add_referenced_tmp_var (resvar
);
1555 results
= VEC_alloc (tree
, 1);
1557 /* Build up the linear expressions, and put the variable representing the
1558 result in the results array. */
1559 for (; lle
!= NULL
; lle
= LLE_NEXT (lle
))
1561 /* Start at name = 0. */
1562 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1563 name
= make_ssa_name (resvar
, stmt
);
1564 TREE_OPERAND (stmt
, 0) = name
;
1565 tsi
= tsi_last (stmts
);
1566 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1568 /* First do the induction variables.
1569 at the end, name = name + all the induction variables added
1571 for (i
= 0; i
< VEC_length (tree
,induction_vars
); i
++)
1573 if (LLE_COEFFICIENTS (lle
)[i
] != 0)
1579 /* mult = induction variable * coefficient. */
1580 if (LLE_COEFFICIENTS (lle
)[i
] == 1)
1582 mult
= VEC_index (tree
, induction_vars
, i
);
1586 coeff
= build_int_cst (integer_type_node
,
1587 LLE_COEFFICIENTS (lle
)[i
]);
1588 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1589 VEC_index (tree
, induction_vars
, i
),
1593 /* newname = mult */
1594 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1595 newname
= make_ssa_name (resvar
, stmt
);
1596 TREE_OPERAND (stmt
, 0) = newname
;
1597 tsi
= tsi_last (stmts
);
1598 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1600 /* name = name + newname */
1601 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1602 build (PLUS_EXPR
, integer_type_node
,
1604 name
= make_ssa_name (resvar
, stmt
);
1605 TREE_OPERAND (stmt
, 0) = name
;
1606 tsi
= tsi_last (stmts
);
1607 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1611 /* Handle our invariants.
1612 At the end, we have name = name + result of adding all multiplied
1614 for (i
= 0; i
< VEC_length (tree
, invariants
); i
++)
1616 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] != 0)
1622 /* mult = invariant * coefficient */
1623 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] == 1)
1625 mult
= VEC_index (tree
, invariants
, i
);
1629 coeff
= build_int_cst (integer_type_node
,
1630 LLE_INVARIANT_COEFFICIENTS (lle
)[i
]);
1631 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1632 VEC_index (tree
, invariants
, i
),
1636 /* newname = mult */
1637 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1638 newname
= make_ssa_name (resvar
, stmt
);
1639 TREE_OPERAND (stmt
, 0) = newname
;
1640 tsi
= tsi_last (stmts
);
1641 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1643 /* name = name + newname */
1644 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1645 build (PLUS_EXPR
, integer_type_node
,
1647 name
= make_ssa_name (resvar
, stmt
);
1648 TREE_OPERAND (stmt
, 0) = name
;
1649 tsi
= tsi_last (stmts
);
1650 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1654 /* Now handle the constant.
1655 name = name + constant. */
1656 if (LLE_CONSTANT (lle
) != 0)
1658 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1659 build (PLUS_EXPR
, integer_type_node
,
1660 name
, build_int_cst (integer_type_node
,
1661 LLE_CONSTANT (lle
))));
1662 name
= make_ssa_name (resvar
, stmt
);
1663 TREE_OPERAND (stmt
, 0) = name
;
1664 tsi
= tsi_last (stmts
);
1665 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1668 /* Now handle the offset.
1669 name = name + linear offset. */
1670 if (LLE_CONSTANT (offset
) != 0)
1672 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1673 build (PLUS_EXPR
, integer_type_node
,
1674 name
, build_int_cst (integer_type_node
,
1675 LLE_CONSTANT (offset
))));
1676 name
= make_ssa_name (resvar
, stmt
);
1677 TREE_OPERAND (stmt
, 0) = name
;
1678 tsi
= tsi_last (stmts
);
1679 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1682 /* Handle any denominator that occurs. */
1683 if (LLE_DENOMINATOR (lle
) != 1)
1685 if (wrap
== MAX_EXPR
)
1686 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1687 build (CEIL_DIV_EXPR
, integer_type_node
,
1688 name
, build_int_cst (integer_type_node
,
1689 LLE_DENOMINATOR (lle
))));
1690 else if (wrap
== MIN_EXPR
)
1691 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1692 build (FLOOR_DIV_EXPR
, integer_type_node
,
1693 name
, build_int_cst (integer_type_node
,
1694 LLE_DENOMINATOR (lle
))));
1698 /* name = {ceil, floor}(name/denominator) */
1699 name
= make_ssa_name (resvar
, stmt
);
1700 TREE_OPERAND (stmt
, 0) = name
;
1701 tsi
= tsi_last (stmts
);
1702 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1704 VEC_safe_push (tree
, results
, name
);
1707 /* Again, out of laziness, we don't handle this case yet. It's not
1708 hard, it just hasn't occurred. */
1709 if (VEC_length (tree
, results
) > 2)
1712 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1713 if (VEC_length (tree
, results
) > 1)
1715 tree op1
= VEC_index (tree
, results
, 0);
1716 tree op2
= VEC_index (tree
, results
, 1);
1717 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1718 build (wrap
, integer_type_node
, op1
, op2
));
1719 name
= make_ssa_name (resvar
, stmt
);
1720 TREE_OPERAND (stmt
, 0) = name
;
1721 tsi
= tsi_last (stmts
);
1722 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1725 *stmts_to_insert
= stmts
;
1729 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1730 it, back into gcc code. This changes the
1731 loops, their induction variables, and their bodies, so that they
1732 match the transformed loopnest.
1733 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1735 OLD_IVS is a vector of induction variables from the old loopnest.
1736 INVARIANTS is a vector of loop invariants from the old loopnest.
1737 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1738 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1741 lambda_loopnest_to_gcc_loopnest (struct loop
*old_loopnest
,
1743 VEC(tree
) *invariants
,
1744 lambda_loopnest new_loopnest
,
1745 lambda_trans_matrix transform
)
1752 block_stmt_iterator bsi
;
1757 transform
= lambda_trans_matrix_inverse (transform
);
1758 fprintf (dump_file
, "Inverse of transformation matrix:\n");
1759 print_lambda_trans_matrix (dump_file
, transform
);
1761 temp
= old_loopnest
;
1762 new_ivs
= VEC_alloc (tree
, 1);
1768 temp
= old_loopnest
;
1772 lambda_loop newloop
;
1774 tree ivvar
, ivvarinced
, exitcond
, stmts
;
1775 enum tree_code testtype
;
1776 tree newupperbound
, newlowerbound
;
1777 lambda_linear_expression offset
;
1778 /* First, build the new induction variable temporary */
1780 ivvar
= create_tmp_var (integer_type_node
, "lnivtmp");
1781 add_referenced_tmp_var (ivvar
);
1783 VEC_safe_push (tree
, new_ivs
, ivvar
);
1785 newloop
= LN_LOOPS (new_loopnest
)[i
];
1787 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1789 offset
= LL_LINEAR_OFFSET (newloop
);
1791 if (LLE_DENOMINATOR (offset
) != 1
1792 || !lambda_vector_zerop (LLE_COEFFICIENTS (offset
), depth
))
1795 /* Now build the new lower bounds, and insert the statements
1796 necessary to generate it on the loop preheader. */
1797 newlowerbound
= lle_to_gcc_expression (LL_LOWER_BOUND (newloop
),
1798 LL_LINEAR_OFFSET (newloop
),
1800 invariants
, MAX_EXPR
, &stmts
);
1801 bsi_insert_on_edge (loop_preheader_edge (temp
), stmts
);
1802 bsi_commit_edge_inserts (NULL
);
1803 /* Build the new upper bound and insert its statements in the
1804 basic block of the exit condition */
1805 newupperbound
= lle_to_gcc_expression (LL_UPPER_BOUND (newloop
),
1806 LL_LINEAR_OFFSET (newloop
),
1808 invariants
, MIN_EXPR
, &stmts
);
1809 exitcond
= get_loop_exit_condition (temp
);
1810 bb
= bb_for_stmt (exitcond
);
1811 bsi
= bsi_start (bb
);
1812 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
1814 /* Create the new iv, and insert it's increment on the latch
1817 bb
= temp
->latch
->pred
->src
;
1818 bsi
= bsi_last (bb
);
1819 create_iv (newlowerbound
,
1820 build_int_cst (integer_type_node
, LL_STEP (newloop
)),
1821 ivvar
, temp
, &bsi
, false, &ivvar
,
1824 /* Replace the exit condition with the new upper bound
1826 testtype
= LL_STEP (newloop
) >= 0 ? LE_EXPR
: GE_EXPR
;
1827 COND_EXPR_COND (exitcond
) = build (testtype
,
1829 ivvarinced
, newupperbound
);
1830 modify_stmt (exitcond
);
1831 VEC_replace (tree
, new_ivs
, i
, ivvar
);
1837 /* Go through the loop and make iv replacements. */
1838 bbs
= get_loop_body (old_loopnest
);
1839 for (i
= 0; i
< old_loopnest
->num_nodes
; i
++)
1840 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
1842 tree stmt
= bsi_stmt (bsi
);
1846 get_stmt_operands (stmt
);
1847 uses
= STMT_USE_OPS (stmt
);
1848 for (j
= 0; j
< NUM_USES (uses
); j
++)
1851 use_operand_p use
= USE_OP_PTR (uses
, j
);
1852 for (k
= 0; k
< VEC_length (tree
, old_ivs
); k
++)
1854 tree oldiv
= VEC_index (tree
, old_ivs
, k
);
1855 if (USE_FROM_PTR (use
) == oldiv
)
1858 lambda_body_vector lbv
;
1860 /* Compute the new expression for the induction
1862 depth
= VEC_length (tree
, new_ivs
);
1863 lbv
= lambda_body_vector_new (depth
);
1864 LBV_COEFFICIENTS (lbv
)[k
] = 1;
1865 lbv
= lambda_body_vector_compute_new (transform
, lbv
);
1866 newiv
= lbv_to_gcc_expression (lbv
, new_ivs
, &stmts
);
1868 /* Insert the statements to build that
1870 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
1872 /* Replace the use with the result of that
1877 "Replacing induction variable use of ");
1878 print_generic_stmt (dump_file
, USE_FROM_PTR (use
), 0);
1879 fprintf (dump_file
, " with ");
1880 print_generic_stmt (dump_file
, newiv
, 0);
1881 fprintf (dump_file
, "\n");
1883 SET_USE (use
, newiv
);
1891 /* Returns true when the vector V is lexicographically positive, in
1892 other words, when the first non zero element is positive. */
1895 lambda_vector_lexico_pos (lambda_vector v
, unsigned n
)
1898 for (i
= 0; i
< n
; i
++)
1910 /* Return true if TRANS is a legal transformation matrix that respects
1911 the dependence vectors in DISTS and DIRS. The conservative answer
1914 "Wolfe proves that a unimodular transformation represented by the
1915 matrix T is legal when applied to a loop nest with a set of
1916 lexicographically non-negative distance vectors RDG if and only if
1917 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1918 ie.: if and only if it transforms the lexicographically positive
1919 distance vectors to lexicographically positive vectors. Note that
1920 a unimodular matrix must transform the zero vector (and only it) to
1921 the zero vector." S.Muchnick. */
1924 lambda_transform_legal_p (lambda_trans_matrix trans
,
1925 int nb_loops
, varray_type dependence_relations
)
1928 lambda_vector distres
;
1929 struct data_dependence_relation
*ddr
;
1931 #if defined ENABLE_CHECKING
1932 if (LTM_COLSIZE (trans
) != nb_loops
|| LTM_ROWSIZE (trans
) != nb_loops
)
1936 /* When there is an unknown relation in the dependence_relations, we
1937 know that it is no worth looking at this loop nest: give up. */
1938 ddr
= (struct data_dependence_relation
*)
1939 VARRAY_GENERIC_PTR (dependence_relations
, 0);
1942 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1945 distres
= lambda_vector_new (nb_loops
);
1947 /* For each distance vector in the dependence graph. */
1948 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (dependence_relations
); i
++)
1950 ddr
= (struct data_dependence_relation
*)
1951 VARRAY_GENERIC_PTR (dependence_relations
, i
);
1953 /* Don't care about relations for which we know that there is no
1954 dependence, nor about read-read (aka. output-dependences):
1955 these data accesses can happen in any order. */
1956 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
1957 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
1959 /* Conservatively answer: "this transformation is not valid". */
1960 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1963 /* Compute trans.dist_vect */
1964 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
1965 DDR_DIST_VECT (ddr
), distres
);
1967 if (!lambda_vector_lexico_pos (distres
, nb_loops
))