* arm.h (REVERSE_CONDITION): Define.
[official-gcc.git] / gcc / lambda-code.c
blob664092797ea757672e5288c44ad40e0a305a6ee7
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
46 /* This loop nest code generation is based on non-singular matrix
47 math.
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformatrion framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
57 of basis vectors.
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxillary space from the unimodular part (source loop
78 nest . U = auxillary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxillary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxillary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
118 /* Lattice stuff that is internal to the code generation algorithm. */
120 typedef struct
122 /* Lattice base matrix. */
123 lambda_matrix base;
124 /* Lattice dimension. */
125 int dimension;
126 /* Origin vector for the coefficients. */
127 lambda_vector origin;
128 /* Origin matrix for the invariants. */
129 lambda_matrix origin_invariants;
130 /* Number of invariants. */
131 int invariants;
132 } *lambda_lattice;
134 #define LATTICE_BASE(T) ((T)->base)
135 #define LATTICE_DIMENSION(T) ((T)->dimension)
136 #define LATTICE_ORIGIN(T) ((T)->origin)
137 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
138 #define LATTICE_INVARIANTS(T) ((T)->invariants)
140 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
141 int, int);
142 static lambda_lattice lambda_lattice_new (int, int);
143 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
145 static tree find_induction_var_from_exit_cond (struct loop *);
147 /* Create a new lambda body vector. */
149 lambda_body_vector
150 lambda_body_vector_new (int size)
152 lambda_body_vector ret;
154 ret = ggc_alloc (sizeof (*ret));
155 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
156 LBV_SIZE (ret) = size;
157 LBV_DENOMINATOR (ret) = 1;
158 return ret;
161 /* Compute the new coefficients for the vector based on the
162 *inverse* of the transformation matrix. */
164 lambda_body_vector
165 lambda_body_vector_compute_new (lambda_trans_matrix transform,
166 lambda_body_vector vect)
168 lambda_body_vector temp;
169 int depth;
171 /* Make sure the matrix is square. */
172 if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform))
173 abort ();
175 depth = LTM_ROWSIZE (transform);
177 temp = lambda_body_vector_new (depth);
178 LBV_DENOMINATOR (temp) =
179 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
180 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
181 LTM_MATRIX (transform), depth,
182 LBV_COEFFICIENTS (temp));
183 LBV_SIZE (temp) = LBV_SIZE (vect);
184 return temp;
187 /* Print out a lambda body vector. */
189 void
190 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
192 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
195 /* Return TRUE if two linear expressions are equal. */
197 static bool
198 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
199 int depth, int invariants)
201 int i;
203 if (lle1 == NULL || lle2 == NULL)
204 return false;
205 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
206 return false;
207 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
208 return false;
209 for (i = 0; i < depth; i++)
210 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
211 return false;
212 for (i = 0; i < invariants; i++)
213 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
214 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
215 return false;
216 return true;
219 /* Create a new linear expression with dimension DIM, and total number
220 of invariants INVARIANTS. */
222 lambda_linear_expression
223 lambda_linear_expression_new (int dim, int invariants)
225 lambda_linear_expression ret;
227 ret = ggc_alloc_cleared (sizeof (*ret));
229 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
230 LLE_CONSTANT (ret) = 0;
231 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
232 LLE_DENOMINATOR (ret) = 1;
233 LLE_NEXT (ret) = NULL;
235 return ret;
238 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
239 The starting letter used for variable names is START. */
241 static void
242 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
243 char start)
245 int i;
246 bool first = true;
247 for (i = 0; i < size; i++)
249 if (expr[i] != 0)
251 if (first)
253 if (expr[i] < 0)
254 fprintf (outfile, "-");
255 first = false;
257 else if (expr[i] > 0)
258 fprintf (outfile, " + ");
259 else
260 fprintf (outfile, " - ");
261 if (abs (expr[i]) == 1)
262 fprintf (outfile, "%c", start + i);
263 else
264 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
269 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
270 depth/number of coefficients is given by DEPTH, the number of invariants is
271 given by INVARIANTS, and the character to start variable names with is given
272 by START. */
274 void
275 print_lambda_linear_expression (FILE * outfile,
276 lambda_linear_expression expr,
277 int depth, int invariants, char start)
279 fprintf (outfile, "\tLinear expression: ");
280 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
281 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
282 fprintf (outfile, " invariants: ");
283 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
284 invariants, 'A');
285 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
288 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
289 coefficients is given by DEPTH, the number of invariants is
290 given by INVARIANTS, and the character to start variable names with is given
291 by START. */
293 void
294 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
295 int invariants, char start)
297 int step;
298 lambda_linear_expression expr;
300 if (!loop)
301 abort ();
303 expr = LL_LINEAR_OFFSET (loop);
304 step = LL_STEP (loop);
305 fprintf (outfile, " step size = %d \n", step);
307 if (expr)
309 fprintf (outfile, " linear offset: \n");
310 print_lambda_linear_expression (outfile, expr, depth, invariants,
311 start);
314 fprintf (outfile, " lower bound: \n");
315 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
316 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
317 fprintf (outfile, " upper bound: \n");
318 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
319 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
323 number of invariants. */
325 lambda_loopnest
326 lambda_loopnest_new (int depth, int invariants)
328 lambda_loopnest ret;
329 ret = ggc_alloc (sizeof (*ret));
331 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
332 LN_DEPTH (ret) = depth;
333 LN_INVARIANTS (ret) = invariants;
335 return ret;
338 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
339 character to use for loop names is given by START. */
341 void
342 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
344 int i;
345 for (i = 0; i < LN_DEPTH (nest); i++)
347 fprintf (outfile, "Loop %c\n", start + i);
348 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
349 LN_INVARIANTS (nest), 'i');
350 fprintf (outfile, "\n");
354 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
355 of invariants. */
357 static lambda_lattice
358 lambda_lattice_new (int depth, int invariants)
360 lambda_lattice ret;
361 ret = ggc_alloc (sizeof (*ret));
362 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
363 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
364 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
365 LATTICE_DIMENSION (ret) = depth;
366 LATTICE_INVARIANTS (ret) = invariants;
367 return ret;
370 /* Compute the lattice base for NEST. The lattice base is essentially a
371 non-singular transform from a dense base space to a sparse iteration space.
372 We use it so that we don't have to specially handle the case of a sparse
373 iteration space in other parts of the algorithm. As a result, this routine
374 only does something interesting (IE produce a matrix that isn't the
375 identity matrix) if NEST is a sparse space. */
377 static lambda_lattice
378 lambda_lattice_compute_base (lambda_loopnest nest)
380 lambda_lattice ret;
381 int depth, invariants;
382 lambda_matrix base;
384 int i, j, step;
385 lambda_loop loop;
386 lambda_linear_expression expression;
388 depth = LN_DEPTH (nest);
389 invariants = LN_INVARIANTS (nest);
391 ret = lambda_lattice_new (depth, invariants);
392 base = LATTICE_BASE (ret);
393 for (i = 0; i < depth; i++)
395 loop = LN_LOOPS (nest)[i];
396 if (!loop)
397 abort ();
398 step = LL_STEP (loop);
399 /* If we have a step of 1, then the base is one, and the
400 origin and invariant coefficients are 0. */
401 if (step == 1)
403 for (j = 0; j < depth; j++)
404 base[i][j] = 0;
405 base[i][i] = 1;
406 LATTICE_ORIGIN (ret)[i] = 0;
407 for (j = 0; j < invariants; j++)
408 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
410 else
412 /* Otherwise, we need the lower bound expression (which must
413 be an affine function) to determine the base. */
414 expression = LL_LOWER_BOUND (loop);
415 if (!expression
416 || LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1)
417 abort ();
419 /* The lower triangular portion of the base is going to be the
420 coefficient times the step */
421 for (j = 0; j < i; j++)
422 base[i][j] = LLE_COEFFICIENTS (expression)[j]
423 * LL_STEP (LN_LOOPS (nest)[j]);
424 base[i][i] = step;
425 for (j = i + 1; j < depth; j++)
426 base[i][j] = 0;
428 /* Origin for this loop is the constant of the lower bound
429 expression. */
430 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
432 /* Coefficient for the invariants are equal to the invariant
433 coefficients in the expression. */
434 for (j = 0; j < invariants; j++)
435 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
436 LLE_INVARIANT_COEFFICIENTS (expression)[j];
439 return ret;
442 /* Compute the greatest common denominator of two numbers (A and B) using
443 Euclid's algorithm. */
445 static int
446 gcd (int a, int b)
449 int x, y, z;
451 x = abs (a);
452 y = abs (b);
454 while (x > 0)
456 z = y % x;
457 y = x;
458 x = z;
461 return (y);
464 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
466 static int
467 gcd_vector (lambda_vector vector, int size)
469 int i;
470 int gcd1 = 0;
472 if (size > 0)
474 gcd1 = vector[0];
475 for (i = 1; i < size; i++)
476 gcd1 = gcd (gcd1, vector[i]);
478 return gcd1;
481 /* Compute the least common multiple of two numbers A and B . */
483 static int
484 lcm (int a, int b)
486 return (abs (a) * abs (b) / gcd (a, b));
489 /* Compute the loop bounds for the auxiliary space NEST.
490 Input system used is Ax <= b. TRANS is the unimodular transformation. */
492 static lambda_loopnest
493 lambda_compute_auxillary_space (lambda_loopnest nest,
494 lambda_trans_matrix trans)
496 lambda_matrix A, B, A1, B1, temp0;
497 lambda_vector a, a1, temp1;
498 lambda_matrix invertedtrans;
499 int determinant, depth, invariants, size, newsize;
500 int i, j, k;
501 lambda_loopnest auxillary_nest;
502 lambda_loop loop;
503 lambda_linear_expression expression;
504 lambda_lattice lattice;
506 int multiple, f1, f2;
508 depth = LN_DEPTH (nest);
509 invariants = LN_INVARIANTS (nest);
511 /* Unfortunately, we can't know the number of constraints we'll have
512 ahead of time, but this should be enough even in ridiculous loop nest
513 cases. We abort if we go over this limit. */
514 A = lambda_matrix_new (128, depth);
515 B = lambda_matrix_new (128, invariants);
516 a = lambda_vector_new (128);
518 A1 = lambda_matrix_new (128, depth);
519 B1 = lambda_matrix_new (128, invariants);
520 a1 = lambda_vector_new (128);
522 /* Store the bounds in the equation matrix A, constant vector a, and
523 invariant matrix B, so that we have Ax <= a + B.
524 This requires a little equation rearranging so that everything is on the
525 correct side of the inequality. */
526 size = 0;
527 for (i = 0; i < depth; i++)
529 loop = LN_LOOPS (nest)[i];
531 /* First we do the lower bound. */
532 if (LL_STEP (loop) > 0)
533 expression = LL_LOWER_BOUND (loop);
534 else
535 expression = LL_UPPER_BOUND (loop);
537 for (; expression != NULL; expression = LLE_NEXT (expression))
539 /* Fill in the coefficient. */
540 for (j = 0; j < i; j++)
541 A[size][j] = LLE_COEFFICIENTS (expression)[j];
543 /* And the invariant coefficient. */
544 for (j = 0; j < invariants; j++)
545 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
547 /* And the constant. */
548 a[size] = LLE_CONSTANT (expression);
550 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
551 constants and single variables on */
552 A[size][i] = -1 * LLE_DENOMINATOR (expression);
553 a[size] *= -1;
554 for (j = 0; j < invariants; j++)
555 B[size][j] *= -1;
557 size++;
558 /* Need to increase matrix sizes above. */
559 if (size > 127)
560 abort ();
563 /* Then do the exact same thing for the upper bounds. */
564 if (LL_STEP (loop) > 0)
565 expression = LL_UPPER_BOUND (loop);
566 else
567 expression = LL_LOWER_BOUND (loop);
569 for (; expression != NULL; expression = LLE_NEXT (expression))
571 /* Fill in the coefficient. */
572 for (j = 0; j < i; j++)
573 A[size][j] = LLE_COEFFICIENTS (expression)[j];
575 /* And the invariant coefficient. */
576 for (j = 0; j < invariants; j++)
577 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
579 /* And the constant. */
580 a[size] = LLE_CONSTANT (expression);
582 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
583 for (j = 0; j < i; j++)
584 A[size][j] *= -1;
585 A[size][i] = LLE_DENOMINATOR (expression);
586 size++;
587 /* Need to increase matrix sizes above. */
588 if (size > 127)
589 abort ();
593 /* Compute the lattice base x = base * y + origin, where y is the
594 base space. */
595 lattice = lambda_lattice_compute_base (nest);
597 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
599 /* A1 = A * L */
600 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
602 /* a1 = a - A * origin constant. */
603 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
604 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
606 /* B1 = B - A * origin invariant. */
607 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
608 invariants);
609 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
611 /* Now compute the auxiliary space bounds by first inverting U, multiplying
612 it by A1, then performing fourier motzkin. */
614 invertedtrans = lambda_matrix_new (depth, depth);
616 /* Compute the inverse of U. */
617 determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
618 invertedtrans, depth);
620 /* A = A1 inv(U). */
621 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
623 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
624 auxillary nest.
625 Fourier-Motzkin is a way of reducing systems of linear inequality so that
626 it is easy to calculate the answer and bounds.
627 A sketch of how it works:
628 Given a system of linear inequalities, ai * xj >= bk, you can always
629 rewrite the constraints so they are all of the form
630 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
631 in b1 ... bk, and some a in a1...ai)
632 You can then eliminate this x from the non-constant inequalities by
633 rewriting these as a <= b, x >= constant, and delete the x variable.
634 You can then repeat this for any remaining x variables, and then we have
635 an easy to use variable <= constant (or no variables at all) form that we
636 can construct our bounds from.
638 In our case, each time we eliminate, we construct part of the bound from
639 the ith variable, then delete the ith variable.
641 Remember the constant are in our vector a, our coefficient matrix is A,
642 and our invariant coefficient matrix is B */
644 /* Swap B and B1, and a1 and a */
645 temp0 = B1;
646 B1 = B;
647 B = temp0;
649 temp1 = a1;
650 a1 = a;
651 a = temp1;
653 auxillary_nest = lambda_loopnest_new (depth, invariants);
655 for (i = depth - 1; i >= 0; i--)
657 loop = lambda_loop_new ();
658 LN_LOOPS (auxillary_nest)[i] = loop;
659 LL_STEP (loop) = 1;
661 for (j = 0; j < size; j++)
663 if (A[j][i] < 0)
665 /* Lower bound. */
666 expression = lambda_linear_expression_new (depth, invariants);
668 for (k = 0; k < i; k++)
669 LLE_COEFFICIENTS (expression)[k] = A[j][k];
670 for (k = 0; k < invariants; k++)
671 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
672 LLE_DENOMINATOR (expression) = -1 * A[j][i];
673 LLE_CONSTANT (expression) = -1 * a[j];
674 /* Ignore if identical to the existing lower bound. */
675 if (!lle_equal (LL_LOWER_BOUND (loop),
676 expression, depth, invariants))
678 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
679 LL_LOWER_BOUND (loop) = expression;
683 else if (A[j][i] > 0)
685 /* Upper bound. */
686 expression = lambda_linear_expression_new (depth, invariants);
687 for (k = 0; k < i; k++)
688 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
689 LLE_CONSTANT (expression) = a[j];
691 for (k = 0; k < invariants; k++)
692 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
694 LLE_DENOMINATOR (expression) = A[j][i];
695 /* Ignore if identical to the existing upper bound. */
696 if (!lle_equal (LL_UPPER_BOUND (loop),
697 expression, depth, invariants))
699 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
700 LL_UPPER_BOUND (loop) = expression;
705 /* creates a new system by deleting the i'th variable. */
706 newsize = 0;
707 for (j = 0; j < size; j++)
709 if (A[j][i] == 0)
711 lambda_vector_copy (A[j], A1[newsize], depth);
712 lambda_vector_copy (B[j], B1[newsize], invariants);
713 a1[newsize] = a[j];
714 newsize++;
716 else if (A[j][i] > 0)
718 for (k = 0; k < size; k++)
720 if (A[k][i] < 0)
722 multiple = lcm (A[j][i], A[k][i]);
723 f1 = multiple / A[j][i];
724 f2 = -1 * multiple / A[k][i];
726 lambda_vector_add_mc (A[j], f1, A[k], f2,
727 A1[newsize], depth);
728 lambda_vector_add_mc (B[j], f1, B[k], f2,
729 B1[newsize], invariants);
730 a1[newsize] = f1 * a[j] + f2 * a[k];
731 newsize++;
737 temp0 = A;
738 A = A1;
739 A1 = temp0;
741 temp0 = B;
742 B = B1;
743 B1 = temp0;
745 temp1 = a;
746 a = a1;
747 a1 = temp1;
749 size = newsize;
752 return auxillary_nest;
755 /* Compute the loop bounds for the target space, using the bounds of
756 the auxillary nest AUXILLARY_NEST, and the triangular matrix H. This is
757 done by matrix multiplication and then transformation of the new matrix
758 back into linear expression form.
759 Return the target loopnest. */
761 static lambda_loopnest
762 lambda_compute_target_space (lambda_loopnest auxillary_nest,
763 lambda_trans_matrix H, lambda_vector stepsigns)
765 lambda_matrix inverse, H1;
766 int determinant, i, j;
767 int gcd1, gcd2;
768 int factor;
770 lambda_loopnest target_nest;
771 int depth, invariants;
772 lambda_matrix target;
774 lambda_loop auxillary_loop, target_loop;
775 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
777 depth = LN_DEPTH (auxillary_nest);
778 invariants = LN_INVARIANTS (auxillary_nest);
780 inverse = lambda_matrix_new (depth, depth);
781 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
783 /* H1 is H excluding its diagonal. */
784 H1 = lambda_matrix_new (depth, depth);
785 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
787 for (i = 0; i < depth; i++)
788 H1[i][i] = 0;
790 /* Computes the linear offsets of the loop bounds. */
791 target = lambda_matrix_new (depth, depth);
792 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
794 target_nest = lambda_loopnest_new (depth, invariants);
796 for (i = 0; i < depth; i++)
799 /* Get a new loop structure. */
800 target_loop = lambda_loop_new ();
801 LN_LOOPS (target_nest)[i] = target_loop;
803 /* Computes the gcd of the coefficients of the linear part. */
804 gcd1 = gcd_vector (target[i], i);
806 /* Include the denominator in the GCD */
807 gcd1 = gcd (gcd1, determinant);
809 /* Now divide through by the gcd */
810 for (j = 0; j < i; j++)
811 target[i][j] = target[i][j] / gcd1;
813 expression = lambda_linear_expression_new (depth, invariants);
814 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
815 LLE_DENOMINATOR (expression) = determinant / gcd1;
816 LLE_CONSTANT (expression) = 0;
817 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
818 invariants);
819 LL_LINEAR_OFFSET (target_loop) = expression;
822 /* For each loop, compute the new bounds from H */
823 for (i = 0; i < depth; i++)
825 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
826 target_loop = LN_LOOPS (target_nest)[i];
827 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
828 factor = LTM_MATRIX (H)[i][i];
830 /* First we do the lower bound. */
831 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
833 for (; auxillary_expr != NULL;
834 auxillary_expr = LLE_NEXT (auxillary_expr))
836 target_expr = lambda_linear_expression_new (depth, invariants);
837 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
838 depth, inverse, depth,
839 LLE_COEFFICIENTS (target_expr));
840 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
841 LLE_COEFFICIENTS (target_expr), depth,
842 factor);
844 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
845 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
846 LLE_INVARIANT_COEFFICIENTS (target_expr),
847 invariants);
848 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
849 LLE_INVARIANT_COEFFICIENTS (target_expr),
850 invariants, factor);
851 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
853 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
855 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
856 * determinant;
857 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
858 (target_expr),
859 LLE_INVARIANT_COEFFICIENTS
860 (target_expr), invariants,
861 determinant);
862 LLE_DENOMINATOR (target_expr) =
863 LLE_DENOMINATOR (target_expr) * determinant;
865 /* Find the gcd and divide by it here, rather than doing it
866 at the tree level. */
867 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
868 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
869 invariants);
870 gcd1 = gcd (gcd1, gcd2);
871 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
872 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
873 for (j = 0; j < depth; j++)
874 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
875 for (j = 0; j < invariants; j++)
876 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
877 LLE_CONSTANT (target_expr) /= gcd1;
878 LLE_DENOMINATOR (target_expr) /= gcd1;
879 /* Ignore if identical to existing bound. */
880 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
881 invariants))
883 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
884 LL_LOWER_BOUND (target_loop) = target_expr;
887 /* Now do the upper bound. */
888 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
890 for (; auxillary_expr != NULL;
891 auxillary_expr = LLE_NEXT (auxillary_expr))
893 target_expr = lambda_linear_expression_new (depth, invariants);
894 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
895 depth, inverse, depth,
896 LLE_COEFFICIENTS (target_expr));
897 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
898 LLE_COEFFICIENTS (target_expr), depth,
899 factor);
900 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
901 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
902 LLE_INVARIANT_COEFFICIENTS (target_expr),
903 invariants);
904 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
905 LLE_INVARIANT_COEFFICIENTS (target_expr),
906 invariants, factor);
907 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
909 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
911 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
912 * determinant;
913 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
914 (target_expr),
915 LLE_INVARIANT_COEFFICIENTS
916 (target_expr), invariants,
917 determinant);
918 LLE_DENOMINATOR (target_expr) =
919 LLE_DENOMINATOR (target_expr) * determinant;
921 /* Find the gcd and divide by it here, instead of at the
922 tree level. */
923 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
924 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
925 invariants);
926 gcd1 = gcd (gcd1, gcd2);
927 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
928 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
929 for (j = 0; j < depth; j++)
930 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
931 for (j = 0; j < invariants; j++)
932 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
933 LLE_CONSTANT (target_expr) /= gcd1;
934 LLE_DENOMINATOR (target_expr) /= gcd1;
935 /* Ignore if equal to existing bound. */
936 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
937 invariants))
939 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
940 LL_UPPER_BOUND (target_loop) = target_expr;
944 for (i = 0; i < depth; i++)
946 target_loop = LN_LOOPS (target_nest)[i];
947 /* If necessary, exchange the upper and lower bounds and negate
948 the step size. */
949 if (stepsigns[i] < 0)
951 LL_STEP (target_loop) *= -1;
952 tmp_expr = LL_LOWER_BOUND (target_loop);
953 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
954 LL_UPPER_BOUND (target_loop) = tmp_expr;
957 return target_nest;
960 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
961 result. */
963 static lambda_vector
964 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
966 lambda_matrix matrix, H;
967 int size;
968 lambda_vector newsteps;
969 int i, j, factor, minimum_column;
970 int temp;
972 matrix = LTM_MATRIX (trans);
973 size = LTM_ROWSIZE (trans);
974 H = lambda_matrix_new (size, size);
976 newsteps = lambda_vector_new (size);
977 lambda_vector_copy (stepsigns, newsteps, size);
979 lambda_matrix_copy (matrix, H, size, size);
981 for (j = 0; j < size; j++)
983 lambda_vector row;
984 row = H[j];
985 for (i = j; i < size; i++)
986 if (row[i] < 0)
987 lambda_matrix_col_negate (H, size, i);
988 while (lambda_vector_first_nz (row, size, j + 1) < size)
990 minimum_column = lambda_vector_min_nz (row, size, j);
991 lambda_matrix_col_exchange (H, size, j, minimum_column);
993 temp = newsteps[j];
994 newsteps[j] = newsteps[minimum_column];
995 newsteps[minimum_column] = temp;
997 for (i = j + 1; i < size; i++)
999 factor = row[i] / row[j];
1000 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1004 return newsteps;
1007 /* Transform NEST according to TRANS, and return the new loopnest.
1008 This involves
1009 1. Computing a lattice base for the transformation
1010 2. Composing the dense base with the specified transformation (TRANS)
1011 3. Decomposing the combined transformation into a lower triangular portion,
1012 and a unimodular portion.
1013 4. Computing the auxillary nest using the unimodular portion.
1014 5. Computing the target nest using the auxillary nest and the lower
1015 triangular portion. */
1017 lambda_loopnest
1018 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1020 lambda_loopnest auxillary_nest, target_nest;
1022 int depth, invariants;
1023 int i, j;
1024 lambda_lattice lattice;
1025 lambda_trans_matrix trans1, H, U;
1026 lambda_loop loop;
1027 lambda_linear_expression expression;
1028 lambda_vector origin;
1029 lambda_matrix origin_invariants;
1030 lambda_vector stepsigns;
1031 int f;
1033 depth = LN_DEPTH (nest);
1034 invariants = LN_INVARIANTS (nest);
1036 /* Keep track of the signs of the loop steps. */
1037 stepsigns = lambda_vector_new (depth);
1038 for (i = 0; i < depth; i++)
1040 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1041 stepsigns[i] = 1;
1042 else
1043 stepsigns[i] = -1;
1046 /* Compute the lattice base. */
1047 lattice = lambda_lattice_compute_base (nest);
1048 trans1 = lambda_trans_matrix_new (depth, depth);
1050 /* Multiply the transformation matrix by the lattice base. */
1052 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1053 LTM_MATRIX (trans1), depth, depth, depth);
1055 /* Compute the Hermite normal form for the new transformation matrix. */
1056 H = lambda_trans_matrix_new (depth, depth);
1057 U = lambda_trans_matrix_new (depth, depth);
1058 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1059 LTM_MATRIX (U));
1061 /* Compute the auxiliary loop nest's space from the unimodular
1062 portion. */
1063 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1065 /* Compute the loop step signs from the old step signs and the
1066 transformation matrix. */
1067 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1069 /* Compute the target loop nest space from the auxiliary nest and
1070 the lower triangular matrix H. */
1071 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1072 origin = lambda_vector_new (depth);
1073 origin_invariants = lambda_matrix_new (depth, invariants);
1074 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1075 LATTICE_ORIGIN (lattice), origin);
1076 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1077 origin_invariants, depth, depth, invariants);
1079 for (i = 0; i < depth; i++)
1081 loop = LN_LOOPS (target_nest)[i];
1082 expression = LL_LINEAR_OFFSET (loop);
1083 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1084 f = 1;
1085 else
1086 f = LLE_DENOMINATOR (expression);
1088 LLE_CONSTANT (expression) += f * origin[i];
1090 for (j = 0; j < invariants; j++)
1091 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1092 f * origin_invariants[i][j];
1095 return target_nest;
1099 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1100 return the new expression. DEPTH is the depth of the loopnest.
1101 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1102 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1103 is the amount we have to add/subtract from the expression because of the
1104 type of comparison it is used in. */
1106 static lambda_linear_expression
1107 gcc_tree_to_linear_expression (int depth, tree expr,
1108 VEC(tree) *outerinductionvars,
1109 VEC(tree) *invariants, int extra)
1111 lambda_linear_expression lle = NULL;
1112 switch (TREE_CODE (expr))
1114 case INTEGER_CST:
1116 lle = lambda_linear_expression_new (depth, 2 * depth);
1117 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1118 if (extra != 0)
1119 LLE_CONSTANT (lle) = extra;
1121 LLE_DENOMINATOR (lle) = 1;
1123 break;
1124 case SSA_NAME:
1126 tree iv, invar;
1127 size_t i;
1128 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1129 if (iv != NULL)
1131 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1133 lle = lambda_linear_expression_new (depth, 2 * depth);
1134 LLE_COEFFICIENTS (lle)[i] = 1;
1135 if (extra != 0)
1136 LLE_CONSTANT (lle) = extra;
1138 LLE_DENOMINATOR (lle) = 1;
1141 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1142 if (invar != NULL)
1144 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1146 lle = lambda_linear_expression_new (depth, 2 * depth);
1147 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1148 if (extra != 0)
1149 LLE_CONSTANT (lle) = extra;
1150 LLE_DENOMINATOR (lle) = 1;
1154 break;
1155 default:
1156 return NULL;
1159 return lle;
1162 /* Return true if OP is invariant in LOOP and all outer loops. */
1164 static bool
1165 invariant_in_loop (struct loop *loop, tree op)
1167 if (loop->depth == 0)
1168 return true;
1169 if (TREE_CODE (op) == SSA_NAME)
1171 if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
1172 && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1173 return true;
1174 if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1175 return false;
1176 if (loop->outer)
1177 if (!invariant_in_loop (loop->outer, op))
1178 return false;
1179 return !flow_bb_inside_loop_p (loop,
1180 bb_for_stmt (SSA_NAME_DEF_STMT (op)));
1182 return false;
1185 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1186 or NULL if it could not be converted.
1187 DEPTH is the depth of the loop.
1188 INVARIANTS is a pointer to the array of loop invariants.
1189 The induction variable for this loop should be stored in the parameter
1190 OURINDUCTIONVAR.
1191 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1193 static lambda_loop
1194 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1195 VEC (tree) ** invariants,
1196 tree * ourinductionvar,
1197 VEC (tree) * outerinductionvars)
1199 tree phi;
1200 tree exit_cond;
1201 tree access_fn, inductionvar;
1202 tree step;
1203 lambda_loop lloop = NULL;
1204 lambda_linear_expression lbound, ubound;
1205 tree test;
1206 int stepint;
1207 int extra = 0;
1209 use_optype uses;
1211 /* Find out induction var and set the pointer so that the caller can
1212 append it to the outerinductionvars array later. */
1214 inductionvar = find_induction_var_from_exit_cond (loop);
1215 *ourinductionvar = inductionvar;
1217 exit_cond = get_loop_exit_condition (loop);
1219 if (inductionvar == NULL || exit_cond == NULL)
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1222 fprintf (dump_file,
1223 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1224 return NULL;
1227 test = TREE_OPERAND (exit_cond, 0);
1228 if (TREE_CODE (test) != LE_EXPR
1229 && TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR)
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1234 fprintf (dump_file,
1235 "Unable to convert loop: Loop exit test uses unhandled test condition:");
1236 print_generic_stmt (dump_file, test, 0);
1237 fprintf (dump_file, "\n");
1239 return NULL;
1241 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1245 fprintf (dump_file,
1246 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1248 return NULL;
1251 phi = SSA_NAME_DEF_STMT (inductionvar);
1252 if (TREE_CODE (phi) != PHI_NODE)
1254 get_stmt_operands (phi);
1255 uses = STMT_USE_OPS (phi);
1257 if (!uses)
1260 if (dump_file && (dump_flags & TDF_DETAILS))
1261 fprintf (dump_file,
1262 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1264 return NULL;
1267 phi = USE_OP (uses, 0);
1268 phi = SSA_NAME_DEF_STMT (phi);
1269 if (TREE_CODE (phi) != PHI_NODE)
1272 if (dump_file && (dump_flags & TDF_DETAILS))
1273 fprintf (dump_file,
1274 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1275 return NULL;
1280 access_fn = instantiate_parameters
1281 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1282 if (!access_fn)
1284 if (dump_file && (dump_flags & TDF_DETAILS))
1285 fprintf (dump_file,
1286 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1288 return NULL;
1291 step = evolution_part_in_loop_num (access_fn, loop->num);
1292 if (!step || step == chrec_dont_know)
1294 if (dump_file && (dump_flags & TDF_DETAILS))
1295 fprintf (dump_file,
1296 "Unable to convert loop: Cannot determine step of loop.\n");
1298 return NULL;
1300 if (TREE_CODE (step) != INTEGER_CST)
1303 if (dump_file && (dump_flags & TDF_DETAILS))
1304 fprintf (dump_file,
1305 "Unable to convert loop: Step of loop is not integer.\n");
1306 return NULL;
1309 stepint = TREE_INT_CST_LOW (step);
1311 /* Only want phis for induction vars, which will have two
1312 arguments. */
1313 if (PHI_NUM_ARGS (phi) != 2)
1315 if (dump_file && (dump_flags & TDF_DETAILS))
1316 fprintf (dump_file,
1317 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1318 return NULL;
1321 /* Another induction variable check. One argument's source should be
1322 in the loop, one outside the loop. */
1323 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1324 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1327 if (dump_file && (dump_flags & TDF_DETAILS))
1328 fprintf (dump_file,
1329 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1331 return NULL;
1334 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1336 lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
1337 outerinductionvars, *invariants,
1339 else
1340 lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
1341 outerinductionvars, *invariants,
1343 if (!lbound)
1346 if (dump_file && (dump_flags & TDF_DETAILS))
1347 fprintf (dump_file,
1348 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1350 return NULL;
1352 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME)
1353 if (invariant_in_loop (loop, TREE_OPERAND (test, 1)))
1354 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1356 /* We only size the vectors assuming we have, at max, 2 times as many
1357 invariants as we do loops (one for each bound).
1358 This is just an arbitrary number, but it has to be matched against the
1359 code below. */
1360 if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth))
1361 abort ();
1363 /* We might have some leftover. */
1364 if (TREE_CODE (test) == LT_EXPR)
1365 extra = -1 * stepint;
1366 else if (TREE_CODE (test) == NE_EXPR)
1367 extra = -1 * stepint;
1369 ubound = gcc_tree_to_linear_expression (depth,
1370 TREE_OPERAND (test, 1),
1371 outerinductionvars,
1372 *invariants, extra);
1373 if (!ubound)
1376 if (dump_file && (dump_flags & TDF_DETAILS))
1377 fprintf (dump_file,
1378 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1379 return NULL;
1382 lloop = lambda_loop_new ();
1383 LL_STEP (lloop) = stepint;
1384 LL_LOWER_BOUND (lloop) = lbound;
1385 LL_UPPER_BOUND (lloop) = ubound;
1386 return lloop;
1389 /* Given a LOOP, find the induction variable it is testing against in the exit
1390 condition. Return the induction variable if found, NULL otherwise. */
1392 static tree
1393 find_induction_var_from_exit_cond (struct loop *loop)
1395 tree expr = get_loop_exit_condition (loop);
1396 tree test;
1397 if (expr == NULL_TREE)
1398 return NULL_TREE;
1399 if (TREE_CODE (expr) != COND_EXPR)
1400 return NULL_TREE;
1401 test = TREE_OPERAND (expr, 0);
1402 if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
1403 return NULL_TREE;
1404 if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME)
1405 return NULL_TREE;
1406 return TREE_OPERAND (test, 0);
1409 DEF_VEC_P(lambda_loop);
1410 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1411 Return the new loop nest.
1412 INDUCTIONVARS is a pointer to an array of induction variables for the
1413 loopnest that will be filled in during this process.
1414 INVARIANTS is a pointer to an array of invariants that will be filled in
1415 during this process. */
1417 lambda_loopnest
1418 gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
1419 VEC (tree) **inductionvars,
1420 VEC (tree) **invariants)
1422 lambda_loopnest ret;
1423 struct loop *temp;
1424 int depth = 0;
1425 size_t i;
1426 VEC (lambda_loop) *loops;
1427 lambda_loop newloop;
1428 tree inductionvar = NULL;
1430 temp = loop_nest;
1431 while (temp)
1433 depth++;
1434 temp = temp->inner;
1436 loops = VEC_alloc (lambda_loop, 1);
1437 *inductionvars = VEC_alloc (tree, 1);
1438 *invariants = VEC_alloc (tree, 1);
1439 temp = loop_nest;
1440 while (temp)
1442 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1443 &inductionvar, *inductionvars);
1444 if (!newloop)
1445 return NULL;
1446 VEC_safe_push (tree, *inductionvars, inductionvar);
1447 VEC_safe_push (lambda_loop, loops, newloop);
1448 temp = temp->inner;
1451 ret = lambda_loopnest_new (depth, 2 * depth);
1452 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1453 LN_LOOPS (ret)[i] = newloop;
1455 return ret;
1459 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1460 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1461 inserted for us are stored. INDUCTION_VARS is the array of induction
1462 variables for the loop this LBV is from. */
1464 static tree
1465 lbv_to_gcc_expression (lambda_body_vector lbv,
1466 VEC (tree) *induction_vars, tree * stmts_to_insert)
1468 tree stmts, stmt, resvar, name;
1469 size_t i;
1470 tree_stmt_iterator tsi;
1472 /* Create a statement list and a linear expression temporary. */
1473 stmts = alloc_stmt_list ();
1474 resvar = create_tmp_var (integer_type_node, "lletmp");
1475 add_referenced_tmp_var (resvar);
1477 /* Start at 0. */
1478 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1479 name = make_ssa_name (resvar, stmt);
1480 TREE_OPERAND (stmt, 0) = name;
1481 tsi = tsi_last (stmts);
1482 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1484 for (i = 0; i < VEC_length (tree ,induction_vars) ; i++)
1486 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1488 tree newname;
1490 /* newname = coefficient * induction_variable */
1491 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1492 fold (build (MULT_EXPR, integer_type_node,
1493 VEC_index (tree, induction_vars, i),
1494 build_int_cst (integer_type_node,
1495 LBV_COEFFICIENTS (lbv)[i]))));
1496 newname = make_ssa_name (resvar, stmt);
1497 TREE_OPERAND (stmt, 0) = newname;
1498 tsi = tsi_last (stmts);
1499 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1500 /* name = name + newname */
1501 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1502 build (PLUS_EXPR, integer_type_node, name, newname));
1503 name = make_ssa_name (resvar, stmt);
1504 TREE_OPERAND (stmt, 0) = name;
1505 tsi = tsi_last (stmts);
1506 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1510 /* Handle any denominator that occurs. */
1511 if (LBV_DENOMINATOR (lbv) != 1)
1513 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1514 build (CEIL_DIV_EXPR, integer_type_node,
1515 name, build_int_cst (integer_type_node,
1516 LBV_DENOMINATOR (lbv))));
1517 name = make_ssa_name (resvar, stmt);
1518 TREE_OPERAND (stmt, 0) = name;
1519 tsi = tsi_last (stmts);
1520 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1522 *stmts_to_insert = stmts;
1523 return name;
1526 /* Convert a linear expression from coefficient and constant form to a
1527 gcc tree.
1528 Return the tree that represents the final value of the expression.
1529 LLE is the linear expression to convert.
1530 OFFSET is the linear offset to apply to the expression.
1531 INDUCTION_VARS is a vector of induction variables for the loops.
1532 INVARIANTS is a vector of the loop nest invariants.
1533 WRAP specifies what tree code to wrap the results in, if there is more than
1534 one (it is either MAX_EXPR, or MIN_EXPR).
1535 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1536 statements that need to be inserted for the linear expression. */
1538 static tree
1539 lle_to_gcc_expression (lambda_linear_expression lle,
1540 lambda_linear_expression offset,
1541 VEC(tree) *induction_vars,
1542 VEC(tree) *invariants,
1543 enum tree_code wrap, tree * stmts_to_insert)
1545 tree stmts, stmt, resvar, name;
1546 size_t i;
1547 tree_stmt_iterator tsi;
1548 VEC(tree) *results;
1550 name = NULL_TREE;
1551 /* Create a statement list and a linear expression temporary. */
1552 stmts = alloc_stmt_list ();
1553 resvar = create_tmp_var (integer_type_node, "lletmp");
1554 add_referenced_tmp_var (resvar);
1555 results = VEC_alloc (tree, 1);
1557 /* Build up the linear expressions, and put the variable representing the
1558 result in the results array. */
1559 for (; lle != NULL; lle = LLE_NEXT (lle))
1561 /* Start at name = 0. */
1562 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1563 name = make_ssa_name (resvar, stmt);
1564 TREE_OPERAND (stmt, 0) = name;
1565 tsi = tsi_last (stmts);
1566 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1568 /* First do the induction variables.
1569 at the end, name = name + all the induction variables added
1570 together. */
1571 for (i = 0; i < VEC_length (tree ,induction_vars); i++)
1573 if (LLE_COEFFICIENTS (lle)[i] != 0)
1575 tree newname;
1576 tree mult;
1577 tree coeff;
1579 /* mult = induction variable * coefficient. */
1580 if (LLE_COEFFICIENTS (lle)[i] == 1)
1582 mult = VEC_index (tree, induction_vars, i);
1584 else
1586 coeff = build_int_cst (integer_type_node,
1587 LLE_COEFFICIENTS (lle)[i]);
1588 mult = fold (build (MULT_EXPR, integer_type_node,
1589 VEC_index (tree, induction_vars, i),
1590 coeff));
1593 /* newname = mult */
1594 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1595 newname = make_ssa_name (resvar, stmt);
1596 TREE_OPERAND (stmt, 0) = newname;
1597 tsi = tsi_last (stmts);
1598 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1600 /* name = name + newname */
1601 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1602 build (PLUS_EXPR, integer_type_node,
1603 name, newname));
1604 name = make_ssa_name (resvar, stmt);
1605 TREE_OPERAND (stmt, 0) = name;
1606 tsi = tsi_last (stmts);
1607 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1611 /* Handle our invariants.
1612 At the end, we have name = name + result of adding all multiplied
1613 invariants. */
1614 for (i = 0; i < VEC_length (tree, invariants); i++)
1616 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1618 tree newname;
1619 tree mult;
1620 tree coeff;
1622 /* mult = invariant * coefficient */
1623 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] == 1)
1625 mult = VEC_index (tree, invariants, i);
1627 else
1629 coeff = build_int_cst (integer_type_node,
1630 LLE_INVARIANT_COEFFICIENTS (lle)[i]);
1631 mult = fold (build (MULT_EXPR, integer_type_node,
1632 VEC_index (tree, invariants, i),
1633 coeff));
1636 /* newname = mult */
1637 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1638 newname = make_ssa_name (resvar, stmt);
1639 TREE_OPERAND (stmt, 0) = newname;
1640 tsi = tsi_last (stmts);
1641 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1643 /* name = name + newname */
1644 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1645 build (PLUS_EXPR, integer_type_node,
1646 name, newname));
1647 name = make_ssa_name (resvar, stmt);
1648 TREE_OPERAND (stmt, 0) = name;
1649 tsi = tsi_last (stmts);
1650 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1654 /* Now handle the constant.
1655 name = name + constant. */
1656 if (LLE_CONSTANT (lle) != 0)
1658 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1659 build (PLUS_EXPR, integer_type_node,
1660 name, build_int_cst (integer_type_node,
1661 LLE_CONSTANT (lle))));
1662 name = make_ssa_name (resvar, stmt);
1663 TREE_OPERAND (stmt, 0) = name;
1664 tsi = tsi_last (stmts);
1665 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1668 /* Now handle the offset.
1669 name = name + linear offset. */
1670 if (LLE_CONSTANT (offset) != 0)
1672 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1673 build (PLUS_EXPR, integer_type_node,
1674 name, build_int_cst (integer_type_node,
1675 LLE_CONSTANT (offset))));
1676 name = make_ssa_name (resvar, stmt);
1677 TREE_OPERAND (stmt, 0) = name;
1678 tsi = tsi_last (stmts);
1679 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1682 /* Handle any denominator that occurs. */
1683 if (LLE_DENOMINATOR (lle) != 1)
1685 if (wrap == MAX_EXPR)
1686 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1687 build (CEIL_DIV_EXPR, integer_type_node,
1688 name, build_int_cst (integer_type_node,
1689 LLE_DENOMINATOR (lle))));
1690 else if (wrap == MIN_EXPR)
1691 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1692 build (FLOOR_DIV_EXPR, integer_type_node,
1693 name, build_int_cst (integer_type_node,
1694 LLE_DENOMINATOR (lle))));
1695 else
1696 abort ();
1698 /* name = {ceil, floor}(name/denominator) */
1699 name = make_ssa_name (resvar, stmt);
1700 TREE_OPERAND (stmt, 0) = name;
1701 tsi = tsi_last (stmts);
1702 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1704 VEC_safe_push (tree, results, name);
1707 /* Again, out of laziness, we don't handle this case yet. It's not
1708 hard, it just hasn't occurred. */
1709 if (VEC_length (tree, results) > 2)
1710 abort ();
1712 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1713 if (VEC_length (tree, results) > 1)
1715 tree op1 = VEC_index (tree, results, 0);
1716 tree op2 = VEC_index (tree, results, 1);
1717 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1718 build (wrap, integer_type_node, op1, op2));
1719 name = make_ssa_name (resvar, stmt);
1720 TREE_OPERAND (stmt, 0) = name;
1721 tsi = tsi_last (stmts);
1722 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1725 *stmts_to_insert = stmts;
1726 return name;
1729 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1730 it, back into gcc code. This changes the
1731 loops, their induction variables, and their bodies, so that they
1732 match the transformed loopnest.
1733 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1734 loopnest.
1735 OLD_IVS is a vector of induction variables from the old loopnest.
1736 INVARIANTS is a vector of loop invariants from the old loopnest.
1737 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1738 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1739 NEW_LOOPNEST. */
1740 void
1741 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1742 VEC(tree) *old_ivs,
1743 VEC(tree) *invariants,
1744 lambda_loopnest new_loopnest,
1745 lambda_trans_matrix transform)
1748 struct loop *temp;
1749 size_t i = 0;
1750 size_t depth = 0;
1751 VEC(tree) *new_ivs;
1752 block_stmt_iterator bsi;
1753 basic_block *bbs;
1755 if (dump_file)
1757 transform = lambda_trans_matrix_inverse (transform);
1758 fprintf (dump_file, "Inverse of transformation matrix:\n");
1759 print_lambda_trans_matrix (dump_file, transform);
1761 temp = old_loopnest;
1762 new_ivs = VEC_alloc (tree, 1);
1763 while (temp)
1765 temp = temp->inner;
1766 depth++;
1768 temp = old_loopnest;
1770 while (temp)
1772 lambda_loop newloop;
1773 basic_block bb;
1774 tree ivvar, ivvarinced, exitcond, stmts;
1775 enum tree_code testtype;
1776 tree newupperbound, newlowerbound;
1777 lambda_linear_expression offset;
1778 /* First, build the new induction variable temporary */
1780 ivvar = create_tmp_var (integer_type_node, "lnivtmp");
1781 add_referenced_tmp_var (ivvar);
1783 VEC_safe_push (tree, new_ivs, ivvar);
1785 newloop = LN_LOOPS (new_loopnest)[i];
1787 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1788 cases for now. */
1789 offset = LL_LINEAR_OFFSET (newloop);
1791 if (LLE_DENOMINATOR (offset) != 1
1792 || !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth))
1793 abort ();
1795 /* Now build the new lower bounds, and insert the statements
1796 necessary to generate it on the loop preheader. */
1797 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1798 LL_LINEAR_OFFSET (newloop),
1799 new_ivs,
1800 invariants, MAX_EXPR, &stmts);
1801 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1802 bsi_commit_edge_inserts (NULL);
1803 /* Build the new upper bound and insert its statements in the
1804 basic block of the exit condition */
1805 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1806 LL_LINEAR_OFFSET (newloop),
1807 new_ivs,
1808 invariants, MIN_EXPR, &stmts);
1809 exitcond = get_loop_exit_condition (temp);
1810 bb = bb_for_stmt (exitcond);
1811 bsi = bsi_start (bb);
1812 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1814 /* Create the new iv, and insert it's increment on the latch
1815 block. */
1817 bb = temp->latch->pred->src;
1818 bsi = bsi_last (bb);
1819 create_iv (newlowerbound,
1820 build_int_cst (integer_type_node, LL_STEP (newloop)),
1821 ivvar, temp, &bsi, false, &ivvar,
1822 &ivvarinced);
1824 /* Replace the exit condition with the new upper bound
1825 comparison. */
1826 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1827 COND_EXPR_COND (exitcond) = build (testtype,
1828 boolean_type_node,
1829 ivvarinced, newupperbound);
1830 modify_stmt (exitcond);
1831 VEC_replace (tree, new_ivs, i, ivvar);
1833 i++;
1834 temp = temp->inner;
1837 /* Go through the loop and make iv replacements. */
1838 bbs = get_loop_body (old_loopnest);
1839 for (i = 0; i < old_loopnest->num_nodes; i++)
1840 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1842 tree stmt = bsi_stmt (bsi);
1843 use_optype uses;
1844 size_t j;
1846 get_stmt_operands (stmt);
1847 uses = STMT_USE_OPS (stmt);
1848 for (j = 0; j < NUM_USES (uses); j++)
1850 size_t k;
1851 use_operand_p use = USE_OP_PTR (uses, j);
1852 for (k = 0; k < VEC_length (tree, old_ivs); k++)
1854 tree oldiv = VEC_index (tree, old_ivs, k);
1855 if (USE_FROM_PTR (use) == oldiv)
1857 tree newiv, stmts;
1858 lambda_body_vector lbv;
1860 /* Compute the new expression for the induction
1861 variable. */
1862 depth = VEC_length (tree, new_ivs);
1863 lbv = lambda_body_vector_new (depth);
1864 LBV_COEFFICIENTS (lbv)[k] = 1;
1865 lbv = lambda_body_vector_compute_new (transform, lbv);
1866 newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
1868 /* Insert the statements to build that
1869 expression. */
1870 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1872 /* Replace the use with the result of that
1873 expression. */
1874 if (dump_file)
1876 fprintf (dump_file,
1877 "Replacing induction variable use of ");
1878 print_generic_stmt (dump_file, USE_FROM_PTR (use), 0);
1879 fprintf (dump_file, " with ");
1880 print_generic_stmt (dump_file, newiv, 0);
1881 fprintf (dump_file, "\n");
1883 SET_USE (use, newiv);
1891 /* Returns true when the vector V is lexicographically positive, in
1892 other words, when the first non zero element is positive. */
1894 static bool
1895 lambda_vector_lexico_pos (lambda_vector v, unsigned n)
1897 unsigned i;
1898 for (i = 0; i < n; i++)
1900 if (v[i] == 0)
1901 continue;
1902 if (v[i] < 0)
1903 return false;
1904 if (v[i] > 0)
1905 return true;
1907 return true;
1910 /* Return true if TRANS is a legal transformation matrix that respects
1911 the dependence vectors in DISTS and DIRS. The conservative answer
1912 is false.
1914 "Wolfe proves that a unimodular transformation represented by the
1915 matrix T is legal when applied to a loop nest with a set of
1916 lexicographically non-negative distance vectors RDG if and only if
1917 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1918 ie.: if and only if it transforms the lexicographically positive
1919 distance vectors to lexicographically positive vectors. Note that
1920 a unimodular matrix must transform the zero vector (and only it) to
1921 the zero vector." S.Muchnick. */
1923 bool
1924 lambda_transform_legal_p (lambda_trans_matrix trans,
1925 int nb_loops, varray_type dependence_relations)
1927 unsigned int i;
1928 lambda_vector distres;
1929 struct data_dependence_relation *ddr;
1931 #if defined ENABLE_CHECKING
1932 if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops)
1933 abort ();
1934 #endif
1936 /* When there is an unknown relation in the dependence_relations, we
1937 know that it is no worth looking at this loop nest: give up. */
1938 ddr = (struct data_dependence_relation *)
1939 VARRAY_GENERIC_PTR (dependence_relations, 0);
1940 if (ddr == NULL)
1941 return true;
1942 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1943 return false;
1945 distres = lambda_vector_new (nb_loops);
1947 /* For each distance vector in the dependence graph. */
1948 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
1950 ddr = (struct data_dependence_relation *)
1951 VARRAY_GENERIC_PTR (dependence_relations, i);
1953 /* Don't care about relations for which we know that there is no
1954 dependence, nor about read-read (aka. output-dependences):
1955 these data accesses can happen in any order. */
1956 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1957 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1958 continue;
1959 /* Conservatively answer: "this transformation is not valid". */
1960 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1961 return false;
1963 /* Compute trans.dist_vect */
1964 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1965 DDR_DIST_VECT (ddr), distres);
1967 if (!lambda_vector_lexico_pos (distres, nb_loops))
1968 return false;
1971 return true;