* config/darwin.c (machopic_validate_stub_or_non_lazy_ptr): Mark
[official-gcc.git] / gcc / lambda-code.c
blob21bea190574275193190c3493638523ff926a043
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
46 /* This loop nest code generation is based on non-singular matrix
47 math.
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
57 of basis vectors.
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
118 /* Lattice stuff that is internal to the code generation algorithm. */
120 typedef struct
122 /* Lattice base matrix. */
123 lambda_matrix base;
124 /* Lattice dimension. */
125 int dimension;
126 /* Origin vector for the coefficients. */
127 lambda_vector origin;
128 /* Origin matrix for the invariants. */
129 lambda_matrix origin_invariants;
130 /* Number of invariants. */
131 int invariants;
132 } *lambda_lattice;
134 #define LATTICE_BASE(T) ((T)->base)
135 #define LATTICE_DIMENSION(T) ((T)->dimension)
136 #define LATTICE_ORIGIN(T) ((T)->origin)
137 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
138 #define LATTICE_INVARIANTS(T) ((T)->invariants)
140 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
141 int, int);
142 static lambda_lattice lambda_lattice_new (int, int);
143 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
145 static tree find_induction_var_from_exit_cond (struct loop *);
147 /* Create a new lambda body vector. */
149 lambda_body_vector
150 lambda_body_vector_new (int size)
152 lambda_body_vector ret;
154 ret = ggc_alloc (sizeof (*ret));
155 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
156 LBV_SIZE (ret) = size;
157 LBV_DENOMINATOR (ret) = 1;
158 return ret;
161 /* Compute the new coefficients for the vector based on the
162 *inverse* of the transformation matrix. */
164 lambda_body_vector
165 lambda_body_vector_compute_new (lambda_trans_matrix transform,
166 lambda_body_vector vect)
168 lambda_body_vector temp;
169 int depth;
171 /* Make sure the matrix is square. */
172 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
174 depth = LTM_ROWSIZE (transform);
176 temp = lambda_body_vector_new (depth);
177 LBV_DENOMINATOR (temp) =
178 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
179 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
180 LTM_MATRIX (transform), depth,
181 LBV_COEFFICIENTS (temp));
182 LBV_SIZE (temp) = LBV_SIZE (vect);
183 return temp;
186 /* Print out a lambda body vector. */
188 void
189 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
191 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
194 /* Return TRUE if two linear expressions are equal. */
196 static bool
197 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
198 int depth, int invariants)
200 int i;
202 if (lle1 == NULL || lle2 == NULL)
203 return false;
204 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
205 return false;
206 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
207 return false;
208 for (i = 0; i < depth; i++)
209 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
210 return false;
211 for (i = 0; i < invariants; i++)
212 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
213 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
214 return false;
215 return true;
218 /* Create a new linear expression with dimension DIM, and total number
219 of invariants INVARIANTS. */
221 lambda_linear_expression
222 lambda_linear_expression_new (int dim, int invariants)
224 lambda_linear_expression ret;
226 ret = ggc_alloc_cleared (sizeof (*ret));
228 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
229 LLE_CONSTANT (ret) = 0;
230 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
231 LLE_DENOMINATOR (ret) = 1;
232 LLE_NEXT (ret) = NULL;
234 return ret;
237 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
238 The starting letter used for variable names is START. */
240 static void
241 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
242 char start)
244 int i;
245 bool first = true;
246 for (i = 0; i < size; i++)
248 if (expr[i] != 0)
250 if (first)
252 if (expr[i] < 0)
253 fprintf (outfile, "-");
254 first = false;
256 else if (expr[i] > 0)
257 fprintf (outfile, " + ");
258 else
259 fprintf (outfile, " - ");
260 if (abs (expr[i]) == 1)
261 fprintf (outfile, "%c", start + i);
262 else
263 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
268 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
269 depth/number of coefficients is given by DEPTH, the number of invariants is
270 given by INVARIANTS, and the character to start variable names with is given
271 by START. */
273 void
274 print_lambda_linear_expression (FILE * outfile,
275 lambda_linear_expression expr,
276 int depth, int invariants, char start)
278 fprintf (outfile, "\tLinear expression: ");
279 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
280 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
281 fprintf (outfile, " invariants: ");
282 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
283 invariants, 'A');
284 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
287 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
288 coefficients is given by DEPTH, the number of invariants is
289 given by INVARIANTS, and the character to start variable names with is given
290 by START. */
292 void
293 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
294 int invariants, char start)
296 int step;
297 lambda_linear_expression expr;
299 gcc_assert (loop);
301 expr = LL_LINEAR_OFFSET (loop);
302 step = LL_STEP (loop);
303 fprintf (outfile, " step size = %d \n", step);
305 if (expr)
307 fprintf (outfile, " linear offset: \n");
308 print_lambda_linear_expression (outfile, expr, depth, invariants,
309 start);
312 fprintf (outfile, " lower bound: \n");
313 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
314 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
315 fprintf (outfile, " upper bound: \n");
316 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
317 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
320 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
321 number of invariants. */
323 lambda_loopnest
324 lambda_loopnest_new (int depth, int invariants)
326 lambda_loopnest ret;
327 ret = ggc_alloc (sizeof (*ret));
329 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
330 LN_DEPTH (ret) = depth;
331 LN_INVARIANTS (ret) = invariants;
333 return ret;
336 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
337 character to use for loop names is given by START. */
339 void
340 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
342 int i;
343 for (i = 0; i < LN_DEPTH (nest); i++)
345 fprintf (outfile, "Loop %c\n", start + i);
346 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
347 LN_INVARIANTS (nest), 'i');
348 fprintf (outfile, "\n");
352 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
353 of invariants. */
355 static lambda_lattice
356 lambda_lattice_new (int depth, int invariants)
358 lambda_lattice ret;
359 ret = ggc_alloc (sizeof (*ret));
360 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
361 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
362 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
363 LATTICE_DIMENSION (ret) = depth;
364 LATTICE_INVARIANTS (ret) = invariants;
365 return ret;
368 /* Compute the lattice base for NEST. The lattice base is essentially a
369 non-singular transform from a dense base space to a sparse iteration space.
370 We use it so that we don't have to specially handle the case of a sparse
371 iteration space in other parts of the algorithm. As a result, this routine
372 only does something interesting (IE produce a matrix that isn't the
373 identity matrix) if NEST is a sparse space. */
375 static lambda_lattice
376 lambda_lattice_compute_base (lambda_loopnest nest)
378 lambda_lattice ret;
379 int depth, invariants;
380 lambda_matrix base;
382 int i, j, step;
383 lambda_loop loop;
384 lambda_linear_expression expression;
386 depth = LN_DEPTH (nest);
387 invariants = LN_INVARIANTS (nest);
389 ret = lambda_lattice_new (depth, invariants);
390 base = LATTICE_BASE (ret);
391 for (i = 0; i < depth; i++)
393 loop = LN_LOOPS (nest)[i];
394 gcc_assert (loop);
395 step = LL_STEP (loop);
396 /* If we have a step of 1, then the base is one, and the
397 origin and invariant coefficients are 0. */
398 if (step == 1)
400 for (j = 0; j < depth; j++)
401 base[i][j] = 0;
402 base[i][i] = 1;
403 LATTICE_ORIGIN (ret)[i] = 0;
404 for (j = 0; j < invariants; j++)
405 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
407 else
409 /* Otherwise, we need the lower bound expression (which must
410 be an affine function) to determine the base. */
411 expression = LL_LOWER_BOUND (loop);
412 gcc_assert (expression && LLE_NEXT (expression)
413 && LLE_DENOMINATOR (expression) == 1);
415 /* The lower triangular portion of the base is going to be the
416 coefficient times the step */
417 for (j = 0; j < i; j++)
418 base[i][j] = LLE_COEFFICIENTS (expression)[j]
419 * LL_STEP (LN_LOOPS (nest)[j]);
420 base[i][i] = step;
421 for (j = i + 1; j < depth; j++)
422 base[i][j] = 0;
424 /* Origin for this loop is the constant of the lower bound
425 expression. */
426 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
428 /* Coefficient for the invariants are equal to the invariant
429 coefficients in the expression. */
430 for (j = 0; j < invariants; j++)
431 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
432 LLE_INVARIANT_COEFFICIENTS (expression)[j];
435 return ret;
438 /* Compute the greatest common denominator of two numbers (A and B) using
439 Euclid's algorithm. */
441 static int
442 gcd (int a, int b)
445 int x, y, z;
447 x = abs (a);
448 y = abs (b);
450 while (x > 0)
452 z = y % x;
453 y = x;
454 x = z;
457 return (y);
460 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
462 static int
463 gcd_vector (lambda_vector vector, int size)
465 int i;
466 int gcd1 = 0;
468 if (size > 0)
470 gcd1 = vector[0];
471 for (i = 1; i < size; i++)
472 gcd1 = gcd (gcd1, vector[i]);
474 return gcd1;
477 /* Compute the least common multiple of two numbers A and B . */
479 static int
480 lcm (int a, int b)
482 return (abs (a) * abs (b) / gcd (a, b));
485 /* Compute the loop bounds for the auxiliary space NEST.
486 Input system used is Ax <= b. TRANS is the unimodular transformation. */
488 static lambda_loopnest
489 lambda_compute_auxillary_space (lambda_loopnest nest,
490 lambda_trans_matrix trans)
492 lambda_matrix A, B, A1, B1, temp0;
493 lambda_vector a, a1, temp1;
494 lambda_matrix invertedtrans;
495 int determinant, depth, invariants, size, newsize;
496 int i, j, k;
497 lambda_loopnest auxillary_nest;
498 lambda_loop loop;
499 lambda_linear_expression expression;
500 lambda_lattice lattice;
502 int multiple, f1, f2;
504 depth = LN_DEPTH (nest);
505 invariants = LN_INVARIANTS (nest);
507 /* Unfortunately, we can't know the number of constraints we'll have
508 ahead of time, but this should be enough even in ridiculous loop nest
509 cases. We abort if we go over this limit. */
510 A = lambda_matrix_new (128, depth);
511 B = lambda_matrix_new (128, invariants);
512 a = lambda_vector_new (128);
514 A1 = lambda_matrix_new (128, depth);
515 B1 = lambda_matrix_new (128, invariants);
516 a1 = lambda_vector_new (128);
518 /* Store the bounds in the equation matrix A, constant vector a, and
519 invariant matrix B, so that we have Ax <= a + B.
520 This requires a little equation rearranging so that everything is on the
521 correct side of the inequality. */
522 size = 0;
523 for (i = 0; i < depth; i++)
525 loop = LN_LOOPS (nest)[i];
527 /* First we do the lower bound. */
528 if (LL_STEP (loop) > 0)
529 expression = LL_LOWER_BOUND (loop);
530 else
531 expression = LL_UPPER_BOUND (loop);
533 for (; expression != NULL; expression = LLE_NEXT (expression))
535 /* Fill in the coefficient. */
536 for (j = 0; j < i; j++)
537 A[size][j] = LLE_COEFFICIENTS (expression)[j];
539 /* And the invariant coefficient. */
540 for (j = 0; j < invariants; j++)
541 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
543 /* And the constant. */
544 a[size] = LLE_CONSTANT (expression);
546 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
547 constants and single variables on */
548 A[size][i] = -1 * LLE_DENOMINATOR (expression);
549 a[size] *= -1;
550 for (j = 0; j < invariants; j++)
551 B[size][j] *= -1;
553 size++;
554 /* Need to increase matrix sizes above. */
555 gcc_assert (size <= 127);
559 /* Then do the exact same thing for the upper bounds. */
560 if (LL_STEP (loop) > 0)
561 expression = LL_UPPER_BOUND (loop);
562 else
563 expression = LL_LOWER_BOUND (loop);
565 for (; expression != NULL; expression = LLE_NEXT (expression))
567 /* Fill in the coefficient. */
568 for (j = 0; j < i; j++)
569 A[size][j] = LLE_COEFFICIENTS (expression)[j];
571 /* And the invariant coefficient. */
572 for (j = 0; j < invariants; j++)
573 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
575 /* And the constant. */
576 a[size] = LLE_CONSTANT (expression);
578 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
579 for (j = 0; j < i; j++)
580 A[size][j] *= -1;
581 A[size][i] = LLE_DENOMINATOR (expression);
582 size++;
583 /* Need to increase matrix sizes above. */
584 gcc_assert (size <= 127);
589 /* Compute the lattice base x = base * y + origin, where y is the
590 base space. */
591 lattice = lambda_lattice_compute_base (nest);
593 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
595 /* A1 = A * L */
596 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
598 /* a1 = a - A * origin constant. */
599 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
600 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
602 /* B1 = B - A * origin invariant. */
603 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
604 invariants);
605 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
607 /* Now compute the auxiliary space bounds by first inverting U, multiplying
608 it by A1, then performing fourier motzkin. */
610 invertedtrans = lambda_matrix_new (depth, depth);
612 /* Compute the inverse of U. */
613 determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
614 invertedtrans, depth);
616 /* A = A1 inv(U). */
617 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
619 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
620 auxillary nest.
621 Fourier-Motzkin is a way of reducing systems of linear inequality so that
622 it is easy to calculate the answer and bounds.
623 A sketch of how it works:
624 Given a system of linear inequalities, ai * xj >= bk, you can always
625 rewrite the constraints so they are all of the form
626 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
627 in b1 ... bk, and some a in a1...ai)
628 You can then eliminate this x from the non-constant inequalities by
629 rewriting these as a <= b, x >= constant, and delete the x variable.
630 You can then repeat this for any remaining x variables, and then we have
631 an easy to use variable <= constant (or no variables at all) form that we
632 can construct our bounds from.
634 In our case, each time we eliminate, we construct part of the bound from
635 the ith variable, then delete the ith variable.
637 Remember the constant are in our vector a, our coefficient matrix is A,
638 and our invariant coefficient matrix is B */
640 /* Swap B and B1, and a1 and a */
641 temp0 = B1;
642 B1 = B;
643 B = temp0;
645 temp1 = a1;
646 a1 = a;
647 a = temp1;
649 auxillary_nest = lambda_loopnest_new (depth, invariants);
651 for (i = depth - 1; i >= 0; i--)
653 loop = lambda_loop_new ();
654 LN_LOOPS (auxillary_nest)[i] = loop;
655 LL_STEP (loop) = 1;
657 for (j = 0; j < size; j++)
659 if (A[j][i] < 0)
661 /* Lower bound. */
662 expression = lambda_linear_expression_new (depth, invariants);
664 for (k = 0; k < i; k++)
665 LLE_COEFFICIENTS (expression)[k] = A[j][k];
666 for (k = 0; k < invariants; k++)
667 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
668 LLE_DENOMINATOR (expression) = -1 * A[j][i];
669 LLE_CONSTANT (expression) = -1 * a[j];
670 /* Ignore if identical to the existing lower bound. */
671 if (!lle_equal (LL_LOWER_BOUND (loop),
672 expression, depth, invariants))
674 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
675 LL_LOWER_BOUND (loop) = expression;
679 else if (A[j][i] > 0)
681 /* Upper bound. */
682 expression = lambda_linear_expression_new (depth, invariants);
683 for (k = 0; k < i; k++)
684 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
685 LLE_CONSTANT (expression) = a[j];
687 for (k = 0; k < invariants; k++)
688 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
690 LLE_DENOMINATOR (expression) = A[j][i];
691 /* Ignore if identical to the existing upper bound. */
692 if (!lle_equal (LL_UPPER_BOUND (loop),
693 expression, depth, invariants))
695 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
696 LL_UPPER_BOUND (loop) = expression;
701 /* creates a new system by deleting the i'th variable. */
702 newsize = 0;
703 for (j = 0; j < size; j++)
705 if (A[j][i] == 0)
707 lambda_vector_copy (A[j], A1[newsize], depth);
708 lambda_vector_copy (B[j], B1[newsize], invariants);
709 a1[newsize] = a[j];
710 newsize++;
712 else if (A[j][i] > 0)
714 for (k = 0; k < size; k++)
716 if (A[k][i] < 0)
718 multiple = lcm (A[j][i], A[k][i]);
719 f1 = multiple / A[j][i];
720 f2 = -1 * multiple / A[k][i];
722 lambda_vector_add_mc (A[j], f1, A[k], f2,
723 A1[newsize], depth);
724 lambda_vector_add_mc (B[j], f1, B[k], f2,
725 B1[newsize], invariants);
726 a1[newsize] = f1 * a[j] + f2 * a[k];
727 newsize++;
733 temp0 = A;
734 A = A1;
735 A1 = temp0;
737 temp0 = B;
738 B = B1;
739 B1 = temp0;
741 temp1 = a;
742 a = a1;
743 a1 = temp1;
745 size = newsize;
748 return auxillary_nest;
751 /* Compute the loop bounds for the target space, using the bounds of
752 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
753 done by matrix multiplication and then transformation of the new matrix
754 back into linear expression form.
755 Return the target loopnest. */
757 static lambda_loopnest
758 lambda_compute_target_space (lambda_loopnest auxillary_nest,
759 lambda_trans_matrix H, lambda_vector stepsigns)
761 lambda_matrix inverse, H1;
762 int determinant, i, j;
763 int gcd1, gcd2;
764 int factor;
766 lambda_loopnest target_nest;
767 int depth, invariants;
768 lambda_matrix target;
770 lambda_loop auxillary_loop, target_loop;
771 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
773 depth = LN_DEPTH (auxillary_nest);
774 invariants = LN_INVARIANTS (auxillary_nest);
776 inverse = lambda_matrix_new (depth, depth);
777 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
779 /* H1 is H excluding its diagonal. */
780 H1 = lambda_matrix_new (depth, depth);
781 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
783 for (i = 0; i < depth; i++)
784 H1[i][i] = 0;
786 /* Computes the linear offsets of the loop bounds. */
787 target = lambda_matrix_new (depth, depth);
788 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
790 target_nest = lambda_loopnest_new (depth, invariants);
792 for (i = 0; i < depth; i++)
795 /* Get a new loop structure. */
796 target_loop = lambda_loop_new ();
797 LN_LOOPS (target_nest)[i] = target_loop;
799 /* Computes the gcd of the coefficients of the linear part. */
800 gcd1 = gcd_vector (target[i], i);
802 /* Include the denominator in the GCD */
803 gcd1 = gcd (gcd1, determinant);
805 /* Now divide through by the gcd */
806 for (j = 0; j < i; j++)
807 target[i][j] = target[i][j] / gcd1;
809 expression = lambda_linear_expression_new (depth, invariants);
810 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
811 LLE_DENOMINATOR (expression) = determinant / gcd1;
812 LLE_CONSTANT (expression) = 0;
813 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
814 invariants);
815 LL_LINEAR_OFFSET (target_loop) = expression;
818 /* For each loop, compute the new bounds from H */
819 for (i = 0; i < depth; i++)
821 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
822 target_loop = LN_LOOPS (target_nest)[i];
823 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
824 factor = LTM_MATRIX (H)[i][i];
826 /* First we do the lower bound. */
827 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
829 for (; auxillary_expr != NULL;
830 auxillary_expr = LLE_NEXT (auxillary_expr))
832 target_expr = lambda_linear_expression_new (depth, invariants);
833 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
834 depth, inverse, depth,
835 LLE_COEFFICIENTS (target_expr));
836 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
837 LLE_COEFFICIENTS (target_expr), depth,
838 factor);
840 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
841 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
842 LLE_INVARIANT_COEFFICIENTS (target_expr),
843 invariants);
844 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
845 LLE_INVARIANT_COEFFICIENTS (target_expr),
846 invariants, factor);
847 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
849 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
851 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
852 * determinant;
853 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
854 (target_expr),
855 LLE_INVARIANT_COEFFICIENTS
856 (target_expr), invariants,
857 determinant);
858 LLE_DENOMINATOR (target_expr) =
859 LLE_DENOMINATOR (target_expr) * determinant;
861 /* Find the gcd and divide by it here, rather than doing it
862 at the tree level. */
863 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
864 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
865 invariants);
866 gcd1 = gcd (gcd1, gcd2);
867 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
868 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
869 for (j = 0; j < depth; j++)
870 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
871 for (j = 0; j < invariants; j++)
872 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
873 LLE_CONSTANT (target_expr) /= gcd1;
874 LLE_DENOMINATOR (target_expr) /= gcd1;
875 /* Ignore if identical to existing bound. */
876 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
877 invariants))
879 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
880 LL_LOWER_BOUND (target_loop) = target_expr;
883 /* Now do the upper bound. */
884 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
886 for (; auxillary_expr != NULL;
887 auxillary_expr = LLE_NEXT (auxillary_expr))
889 target_expr = lambda_linear_expression_new (depth, invariants);
890 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
891 depth, inverse, depth,
892 LLE_COEFFICIENTS (target_expr));
893 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
894 LLE_COEFFICIENTS (target_expr), depth,
895 factor);
896 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
897 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
898 LLE_INVARIANT_COEFFICIENTS (target_expr),
899 invariants);
900 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
901 LLE_INVARIANT_COEFFICIENTS (target_expr),
902 invariants, factor);
903 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
905 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
907 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
908 * determinant;
909 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
910 (target_expr),
911 LLE_INVARIANT_COEFFICIENTS
912 (target_expr), invariants,
913 determinant);
914 LLE_DENOMINATOR (target_expr) =
915 LLE_DENOMINATOR (target_expr) * determinant;
917 /* Find the gcd and divide by it here, instead of at the
918 tree level. */
919 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
920 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
921 invariants);
922 gcd1 = gcd (gcd1, gcd2);
923 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
924 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
925 for (j = 0; j < depth; j++)
926 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
927 for (j = 0; j < invariants; j++)
928 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
929 LLE_CONSTANT (target_expr) /= gcd1;
930 LLE_DENOMINATOR (target_expr) /= gcd1;
931 /* Ignore if equal to existing bound. */
932 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
933 invariants))
935 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
936 LL_UPPER_BOUND (target_loop) = target_expr;
940 for (i = 0; i < depth; i++)
942 target_loop = LN_LOOPS (target_nest)[i];
943 /* If necessary, exchange the upper and lower bounds and negate
944 the step size. */
945 if (stepsigns[i] < 0)
947 LL_STEP (target_loop) *= -1;
948 tmp_expr = LL_LOWER_BOUND (target_loop);
949 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
950 LL_UPPER_BOUND (target_loop) = tmp_expr;
953 return target_nest;
956 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
957 result. */
959 static lambda_vector
960 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
962 lambda_matrix matrix, H;
963 int size;
964 lambda_vector newsteps;
965 int i, j, factor, minimum_column;
966 int temp;
968 matrix = LTM_MATRIX (trans);
969 size = LTM_ROWSIZE (trans);
970 H = lambda_matrix_new (size, size);
972 newsteps = lambda_vector_new (size);
973 lambda_vector_copy (stepsigns, newsteps, size);
975 lambda_matrix_copy (matrix, H, size, size);
977 for (j = 0; j < size; j++)
979 lambda_vector row;
980 row = H[j];
981 for (i = j; i < size; i++)
982 if (row[i] < 0)
983 lambda_matrix_col_negate (H, size, i);
984 while (lambda_vector_first_nz (row, size, j + 1) < size)
986 minimum_column = lambda_vector_min_nz (row, size, j);
987 lambda_matrix_col_exchange (H, size, j, minimum_column);
989 temp = newsteps[j];
990 newsteps[j] = newsteps[minimum_column];
991 newsteps[minimum_column] = temp;
993 for (i = j + 1; i < size; i++)
995 factor = row[i] / row[j];
996 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1000 return newsteps;
1003 /* Transform NEST according to TRANS, and return the new loopnest.
1004 This involves
1005 1. Computing a lattice base for the transformation
1006 2. Composing the dense base with the specified transformation (TRANS)
1007 3. Decomposing the combined transformation into a lower triangular portion,
1008 and a unimodular portion.
1009 4. Computing the auxillary nest using the unimodular portion.
1010 5. Computing the target nest using the auxillary nest and the lower
1011 triangular portion. */
1013 lambda_loopnest
1014 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1016 lambda_loopnest auxillary_nest, target_nest;
1018 int depth, invariants;
1019 int i, j;
1020 lambda_lattice lattice;
1021 lambda_trans_matrix trans1, H, U;
1022 lambda_loop loop;
1023 lambda_linear_expression expression;
1024 lambda_vector origin;
1025 lambda_matrix origin_invariants;
1026 lambda_vector stepsigns;
1027 int f;
1029 depth = LN_DEPTH (nest);
1030 invariants = LN_INVARIANTS (nest);
1032 /* Keep track of the signs of the loop steps. */
1033 stepsigns = lambda_vector_new (depth);
1034 for (i = 0; i < depth; i++)
1036 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1037 stepsigns[i] = 1;
1038 else
1039 stepsigns[i] = -1;
1042 /* Compute the lattice base. */
1043 lattice = lambda_lattice_compute_base (nest);
1044 trans1 = lambda_trans_matrix_new (depth, depth);
1046 /* Multiply the transformation matrix by the lattice base. */
1048 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1049 LTM_MATRIX (trans1), depth, depth, depth);
1051 /* Compute the Hermite normal form for the new transformation matrix. */
1052 H = lambda_trans_matrix_new (depth, depth);
1053 U = lambda_trans_matrix_new (depth, depth);
1054 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1055 LTM_MATRIX (U));
1057 /* Compute the auxiliary loop nest's space from the unimodular
1058 portion. */
1059 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1061 /* Compute the loop step signs from the old step signs and the
1062 transformation matrix. */
1063 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1065 /* Compute the target loop nest space from the auxiliary nest and
1066 the lower triangular matrix H. */
1067 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1068 origin = lambda_vector_new (depth);
1069 origin_invariants = lambda_matrix_new (depth, invariants);
1070 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1071 LATTICE_ORIGIN (lattice), origin);
1072 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1073 origin_invariants, depth, depth, invariants);
1075 for (i = 0; i < depth; i++)
1077 loop = LN_LOOPS (target_nest)[i];
1078 expression = LL_LINEAR_OFFSET (loop);
1079 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1080 f = 1;
1081 else
1082 f = LLE_DENOMINATOR (expression);
1084 LLE_CONSTANT (expression) += f * origin[i];
1086 for (j = 0; j < invariants; j++)
1087 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1088 f * origin_invariants[i][j];
1091 return target_nest;
1095 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1096 return the new expression. DEPTH is the depth of the loopnest.
1097 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1098 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1099 is the amount we have to add/subtract from the expression because of the
1100 type of comparison it is used in. */
1102 static lambda_linear_expression
1103 gcc_tree_to_linear_expression (int depth, tree expr,
1104 VEC(tree) *outerinductionvars,
1105 VEC(tree) *invariants, int extra)
1107 lambda_linear_expression lle = NULL;
1108 switch (TREE_CODE (expr))
1110 case INTEGER_CST:
1112 lle = lambda_linear_expression_new (depth, 2 * depth);
1113 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1114 if (extra != 0)
1115 LLE_CONSTANT (lle) = extra;
1117 LLE_DENOMINATOR (lle) = 1;
1119 break;
1120 case SSA_NAME:
1122 tree iv, invar;
1123 size_t i;
1124 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1125 if (iv != NULL)
1127 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1129 lle = lambda_linear_expression_new (depth, 2 * depth);
1130 LLE_COEFFICIENTS (lle)[i] = 1;
1131 if (extra != 0)
1132 LLE_CONSTANT (lle) = extra;
1134 LLE_DENOMINATOR (lle) = 1;
1137 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1138 if (invar != NULL)
1140 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1142 lle = lambda_linear_expression_new (depth, 2 * depth);
1143 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1144 if (extra != 0)
1145 LLE_CONSTANT (lle) = extra;
1146 LLE_DENOMINATOR (lle) = 1;
1150 break;
1151 default:
1152 return NULL;
1155 return lle;
1158 /* Return true if OP is invariant in LOOP and all outer loops. */
1160 static bool
1161 invariant_in_loop (struct loop *loop, tree op)
1163 if (loop->depth == 0)
1164 return true;
1165 if (TREE_CODE (op) == SSA_NAME)
1167 if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
1168 && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1169 return true;
1170 if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1171 return false;
1172 if (loop->outer)
1173 if (!invariant_in_loop (loop->outer, op))
1174 return false;
1175 return !flow_bb_inside_loop_p (loop,
1176 bb_for_stmt (SSA_NAME_DEF_STMT (op)));
1178 return false;
1181 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1182 or NULL if it could not be converted.
1183 DEPTH is the depth of the loop.
1184 INVARIANTS is a pointer to the array of loop invariants.
1185 The induction variable for this loop should be stored in the parameter
1186 OURINDUCTIONVAR.
1187 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1189 static lambda_loop
1190 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1191 VEC (tree) ** invariants,
1192 tree * ourinductionvar,
1193 VEC (tree) * outerinductionvars)
1195 tree phi;
1196 tree exit_cond;
1197 tree access_fn, inductionvar;
1198 tree step;
1199 lambda_loop lloop = NULL;
1200 lambda_linear_expression lbound, ubound;
1201 tree test;
1202 int stepint;
1203 int extra = 0;
1204 tree uboundvar;
1205 use_optype uses;
1207 /* Find out induction var and set the pointer so that the caller can
1208 append it to the outerinductionvars array later. */
1210 inductionvar = find_induction_var_from_exit_cond (loop);
1211 *ourinductionvar = inductionvar;
1213 exit_cond = get_loop_exit_condition (loop);
1215 if (inductionvar == NULL || exit_cond == NULL)
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file,
1219 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1220 return NULL;
1223 test = TREE_OPERAND (exit_cond, 0);
1225 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 fprintf (dump_file,
1230 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1232 return NULL;
1235 phi = SSA_NAME_DEF_STMT (inductionvar);
1236 if (TREE_CODE (phi) != PHI_NODE)
1238 get_stmt_operands (phi);
1239 uses = STMT_USE_OPS (phi);
1241 if (!uses)
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1245 fprintf (dump_file,
1246 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1248 return NULL;
1251 phi = USE_OP (uses, 0);
1252 phi = SSA_NAME_DEF_STMT (phi);
1253 if (TREE_CODE (phi) != PHI_NODE)
1256 if (dump_file && (dump_flags & TDF_DETAILS))
1257 fprintf (dump_file,
1258 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1259 return NULL;
1264 access_fn = instantiate_parameters
1265 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1266 if (!access_fn)
1268 if (dump_file && (dump_flags & TDF_DETAILS))
1269 fprintf (dump_file,
1270 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1272 return NULL;
1275 step = evolution_part_in_loop_num (access_fn, loop->num);
1276 if (!step || step == chrec_dont_know)
1278 if (dump_file && (dump_flags & TDF_DETAILS))
1279 fprintf (dump_file,
1280 "Unable to convert loop: Cannot determine step of loop.\n");
1282 return NULL;
1284 if (TREE_CODE (step) != INTEGER_CST)
1287 if (dump_file && (dump_flags & TDF_DETAILS))
1288 fprintf (dump_file,
1289 "Unable to convert loop: Step of loop is not integer.\n");
1290 return NULL;
1293 stepint = TREE_INT_CST_LOW (step);
1295 /* Only want phis for induction vars, which will have two
1296 arguments. */
1297 if (PHI_NUM_ARGS (phi) != 2)
1299 if (dump_file && (dump_flags & TDF_DETAILS))
1300 fprintf (dump_file,
1301 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1302 return NULL;
1305 /* Another induction variable check. One argument's source should be
1306 in the loop, one outside the loop. */
1307 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1308 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1311 if (dump_file && (dump_flags & TDF_DETAILS))
1312 fprintf (dump_file,
1313 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1315 return NULL;
1318 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1320 lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
1321 outerinductionvars, *invariants,
1323 else
1324 lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
1325 outerinductionvars, *invariants,
1327 if (!lbound)
1330 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file,
1332 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1334 return NULL;
1336 /* One part of the test may be a loop invariant tree. */
1337 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1338 && invariant_in_loop (loop, TREE_OPERAND (test, 1)))
1339 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1340 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1341 && invariant_in_loop (loop, TREE_OPERAND (test, 0)))
1342 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1344 /* The non-induction variable part of the test is the upper bound variable.
1346 if (TREE_OPERAND (test, 0) == inductionvar)
1347 uboundvar = TREE_OPERAND (test, 1);
1348 else
1349 uboundvar = TREE_OPERAND (test, 0);
1352 /* We only size the vectors assuming we have, at max, 2 times as many
1353 invariants as we do loops (one for each bound).
1354 This is just an arbitrary number, but it has to be matched against the
1355 code below. */
1356 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1359 /* We might have some leftover. */
1360 if (TREE_CODE (test) == LT_EXPR)
1361 extra = -1 * stepint;
1362 else if (TREE_CODE (test) == NE_EXPR)
1363 extra = -1 * stepint;
1364 else if (TREE_CODE (test) == GT_EXPR)
1365 extra = -1 * stepint;
1367 ubound = gcc_tree_to_linear_expression (depth,
1368 uboundvar,
1369 outerinductionvars,
1370 *invariants, extra);
1371 if (!ubound)
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1375 fprintf (dump_file,
1376 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1377 return NULL;
1380 lloop = lambda_loop_new ();
1381 LL_STEP (lloop) = stepint;
1382 LL_LOWER_BOUND (lloop) = lbound;
1383 LL_UPPER_BOUND (lloop) = ubound;
1384 return lloop;
1387 /* Given a LOOP, find the induction variable it is testing against in the exit
1388 condition. Return the induction variable if found, NULL otherwise. */
1390 static tree
1391 find_induction_var_from_exit_cond (struct loop *loop)
1393 tree expr = get_loop_exit_condition (loop);
1394 tree ivarop;
1395 tree test;
1396 if (expr == NULL_TREE)
1397 return NULL_TREE;
1398 if (TREE_CODE (expr) != COND_EXPR)
1399 return NULL_TREE;
1400 test = TREE_OPERAND (expr, 0);
1401 if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
1402 return NULL_TREE;
1403 /* This is a guess. We say that for a <,!=,<= b, a is the induction
1404 variable.
1405 For >, >=, we guess b is the induction variable.
1406 If we are wrong, it'll fail the rest of the induction variable tests, and
1407 everything will be fine anyway. */
1408 switch (TREE_CODE (test))
1410 case LT_EXPR:
1411 case LE_EXPR:
1412 case NE_EXPR:
1413 ivarop = TREE_OPERAND (test, 0);
1414 break;
1415 case GT_EXPR:
1416 case GE_EXPR:
1417 ivarop = TREE_OPERAND (test, 1);
1418 break;
1419 default:
1420 gcc_unreachable();
1422 if (TREE_CODE (ivarop) != SSA_NAME)
1423 return NULL_TREE;
1424 return ivarop;
1427 DEF_VEC_GC_P(lambda_loop);
1428 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1429 Return the new loop nest.
1430 INDUCTIONVARS is a pointer to an array of induction variables for the
1431 loopnest that will be filled in during this process.
1432 INVARIANTS is a pointer to an array of invariants that will be filled in
1433 during this process. */
1435 lambda_loopnest
1436 gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
1437 VEC (tree) **inductionvars,
1438 VEC (tree) **invariants)
1440 lambda_loopnest ret;
1441 struct loop *temp;
1442 int depth = 0;
1443 size_t i;
1444 VEC (lambda_loop) *loops;
1445 lambda_loop newloop;
1446 tree inductionvar = NULL;
1448 temp = loop_nest;
1449 while (temp)
1451 depth++;
1452 temp = temp->inner;
1454 loops = VEC_alloc (lambda_loop, 1);
1455 *inductionvars = VEC_alloc (tree, 1);
1456 *invariants = VEC_alloc (tree, 1);
1457 temp = loop_nest;
1458 while (temp)
1460 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1461 &inductionvar, *inductionvars);
1462 if (!newloop)
1463 return NULL;
1464 VEC_safe_push (tree, *inductionvars, inductionvar);
1465 VEC_safe_push (lambda_loop, loops, newloop);
1466 temp = temp->inner;
1469 ret = lambda_loopnest_new (depth, 2 * depth);
1470 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1471 LN_LOOPS (ret)[i] = newloop;
1473 return ret;
1477 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1478 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1479 inserted for us are stored. INDUCTION_VARS is the array of induction
1480 variables for the loop this LBV is from. */
1482 static tree
1483 lbv_to_gcc_expression (lambda_body_vector lbv,
1484 VEC (tree) *induction_vars, tree * stmts_to_insert)
1486 tree stmts, stmt, resvar, name;
1487 size_t i;
1488 tree_stmt_iterator tsi;
1490 /* Create a statement list and a linear expression temporary. */
1491 stmts = alloc_stmt_list ();
1492 resvar = create_tmp_var (integer_type_node, "lletmp");
1493 add_referenced_tmp_var (resvar);
1495 /* Start at 0. */
1496 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1497 name = make_ssa_name (resvar, stmt);
1498 TREE_OPERAND (stmt, 0) = name;
1499 tsi = tsi_last (stmts);
1500 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1502 for (i = 0; i < VEC_length (tree ,induction_vars) ; i++)
1504 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1506 tree newname;
1508 /* newname = coefficient * induction_variable */
1509 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1510 fold (build (MULT_EXPR, integer_type_node,
1511 VEC_index (tree, induction_vars, i),
1512 build_int_cst (integer_type_node,
1513 LBV_COEFFICIENTS (lbv)[i]))));
1514 newname = make_ssa_name (resvar, stmt);
1515 TREE_OPERAND (stmt, 0) = newname;
1516 tsi = tsi_last (stmts);
1517 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1518 /* name = name + newname */
1519 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1520 build (PLUS_EXPR, integer_type_node, name, newname));
1521 name = make_ssa_name (resvar, stmt);
1522 TREE_OPERAND (stmt, 0) = name;
1523 tsi = tsi_last (stmts);
1524 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1528 /* Handle any denominator that occurs. */
1529 if (LBV_DENOMINATOR (lbv) != 1)
1531 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1532 build (CEIL_DIV_EXPR, integer_type_node,
1533 name, build_int_cst (integer_type_node,
1534 LBV_DENOMINATOR (lbv))));
1535 name = make_ssa_name (resvar, stmt);
1536 TREE_OPERAND (stmt, 0) = name;
1537 tsi = tsi_last (stmts);
1538 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1540 *stmts_to_insert = stmts;
1541 return name;
1544 /* Convert a linear expression from coefficient and constant form to a
1545 gcc tree.
1546 Return the tree that represents the final value of the expression.
1547 LLE is the linear expression to convert.
1548 OFFSET is the linear offset to apply to the expression.
1549 INDUCTION_VARS is a vector of induction variables for the loops.
1550 INVARIANTS is a vector of the loop nest invariants.
1551 WRAP specifies what tree code to wrap the results in, if there is more than
1552 one (it is either MAX_EXPR, or MIN_EXPR).
1553 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1554 statements that need to be inserted for the linear expression. */
1556 static tree
1557 lle_to_gcc_expression (lambda_linear_expression lle,
1558 lambda_linear_expression offset,
1559 VEC(tree) *induction_vars,
1560 VEC(tree) *invariants,
1561 enum tree_code wrap, tree * stmts_to_insert)
1563 tree stmts, stmt, resvar, name;
1564 size_t i;
1565 tree_stmt_iterator tsi;
1566 VEC(tree) *results;
1568 name = NULL_TREE;
1569 /* Create a statement list and a linear expression temporary. */
1570 stmts = alloc_stmt_list ();
1571 resvar = create_tmp_var (integer_type_node, "lletmp");
1572 add_referenced_tmp_var (resvar);
1573 results = VEC_alloc (tree, 1);
1575 /* Build up the linear expressions, and put the variable representing the
1576 result in the results array. */
1577 for (; lle != NULL; lle = LLE_NEXT (lle))
1579 /* Start at name = 0. */
1580 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1581 name = make_ssa_name (resvar, stmt);
1582 TREE_OPERAND (stmt, 0) = name;
1583 tsi = tsi_last (stmts);
1584 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1586 /* First do the induction variables.
1587 at the end, name = name + all the induction variables added
1588 together. */
1589 for (i = 0; i < VEC_length (tree ,induction_vars); i++)
1591 if (LLE_COEFFICIENTS (lle)[i] != 0)
1593 tree newname;
1594 tree mult;
1595 tree coeff;
1597 /* mult = induction variable * coefficient. */
1598 if (LLE_COEFFICIENTS (lle)[i] == 1)
1600 mult = VEC_index (tree, induction_vars, i);
1602 else
1604 coeff = build_int_cst (integer_type_node,
1605 LLE_COEFFICIENTS (lle)[i]);
1606 mult = fold (build (MULT_EXPR, integer_type_node,
1607 VEC_index (tree, induction_vars, i),
1608 coeff));
1611 /* newname = mult */
1612 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1613 newname = make_ssa_name (resvar, stmt);
1614 TREE_OPERAND (stmt, 0) = newname;
1615 tsi = tsi_last (stmts);
1616 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1618 /* name = name + newname */
1619 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1620 build (PLUS_EXPR, integer_type_node,
1621 name, newname));
1622 name = make_ssa_name (resvar, stmt);
1623 TREE_OPERAND (stmt, 0) = name;
1624 tsi = tsi_last (stmts);
1625 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1629 /* Handle our invariants.
1630 At the end, we have name = name + result of adding all multiplied
1631 invariants. */
1632 for (i = 0; i < VEC_length (tree, invariants); i++)
1634 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1636 tree newname;
1637 tree mult;
1638 tree coeff;
1640 /* mult = invariant * coefficient */
1641 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] == 1)
1643 mult = VEC_index (tree, invariants, i);
1645 else
1647 coeff = build_int_cst (integer_type_node,
1648 LLE_INVARIANT_COEFFICIENTS (lle)[i]);
1649 mult = fold (build (MULT_EXPR, integer_type_node,
1650 VEC_index (tree, invariants, i),
1651 coeff));
1654 /* newname = mult */
1655 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1656 newname = make_ssa_name (resvar, stmt);
1657 TREE_OPERAND (stmt, 0) = newname;
1658 tsi = tsi_last (stmts);
1659 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1661 /* name = name + newname */
1662 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1663 build (PLUS_EXPR, integer_type_node,
1664 name, newname));
1665 name = make_ssa_name (resvar, stmt);
1666 TREE_OPERAND (stmt, 0) = name;
1667 tsi = tsi_last (stmts);
1668 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1672 /* Now handle the constant.
1673 name = name + constant. */
1674 if (LLE_CONSTANT (lle) != 0)
1676 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1677 build (PLUS_EXPR, integer_type_node,
1678 name, build_int_cst (integer_type_node,
1679 LLE_CONSTANT (lle))));
1680 name = make_ssa_name (resvar, stmt);
1681 TREE_OPERAND (stmt, 0) = name;
1682 tsi = tsi_last (stmts);
1683 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1686 /* Now handle the offset.
1687 name = name + linear offset. */
1688 if (LLE_CONSTANT (offset) != 0)
1690 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1691 build (PLUS_EXPR, integer_type_node,
1692 name, build_int_cst (integer_type_node,
1693 LLE_CONSTANT (offset))));
1694 name = make_ssa_name (resvar, stmt);
1695 TREE_OPERAND (stmt, 0) = name;
1696 tsi = tsi_last (stmts);
1697 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1700 /* Handle any denominator that occurs. */
1701 if (LLE_DENOMINATOR (lle) != 1)
1703 if (wrap == MAX_EXPR)
1704 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1705 build (CEIL_DIV_EXPR, integer_type_node,
1706 name, build_int_cst (integer_type_node,
1707 LLE_DENOMINATOR (lle))));
1708 else if (wrap == MIN_EXPR)
1709 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1710 build (FLOOR_DIV_EXPR, integer_type_node,
1711 name, build_int_cst (integer_type_node,
1712 LLE_DENOMINATOR (lle))));
1713 else
1714 gcc_unreachable();
1716 /* name = {ceil, floor}(name/denominator) */
1717 name = make_ssa_name (resvar, stmt);
1718 TREE_OPERAND (stmt, 0) = name;
1719 tsi = tsi_last (stmts);
1720 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1722 VEC_safe_push (tree, results, name);
1725 /* Again, out of laziness, we don't handle this case yet. It's not
1726 hard, it just hasn't occurred. */
1727 gcc_assert (VEC_length (tree, results) <= 2);
1729 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1730 if (VEC_length (tree, results) > 1)
1732 tree op1 = VEC_index (tree, results, 0);
1733 tree op2 = VEC_index (tree, results, 1);
1734 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1735 build (wrap, integer_type_node, op1, op2));
1736 name = make_ssa_name (resvar, stmt);
1737 TREE_OPERAND (stmt, 0) = name;
1738 tsi = tsi_last (stmts);
1739 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1742 *stmts_to_insert = stmts;
1743 return name;
1746 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1747 it, back into gcc code. This changes the
1748 loops, their induction variables, and their bodies, so that they
1749 match the transformed loopnest.
1750 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1751 loopnest.
1752 OLD_IVS is a vector of induction variables from the old loopnest.
1753 INVARIANTS is a vector of loop invariants from the old loopnest.
1754 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1755 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1756 NEW_LOOPNEST. */
1757 void
1758 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1759 VEC(tree) *old_ivs,
1760 VEC(tree) *invariants,
1761 lambda_loopnest new_loopnest,
1762 lambda_trans_matrix transform)
1765 struct loop *temp;
1766 size_t i = 0;
1767 size_t depth = 0;
1768 VEC(tree) *new_ivs;
1769 block_stmt_iterator bsi;
1770 basic_block *bbs;
1772 if (dump_file)
1774 transform = lambda_trans_matrix_inverse (transform);
1775 fprintf (dump_file, "Inverse of transformation matrix:\n");
1776 print_lambda_trans_matrix (dump_file, transform);
1778 temp = old_loopnest;
1779 new_ivs = VEC_alloc (tree, 1);
1780 while (temp)
1782 temp = temp->inner;
1783 depth++;
1785 temp = old_loopnest;
1787 while (temp)
1789 lambda_loop newloop;
1790 basic_block bb;
1791 tree ivvar, ivvarinced, exitcond, stmts;
1792 enum tree_code testtype;
1793 tree newupperbound, newlowerbound;
1794 lambda_linear_expression offset;
1795 /* First, build the new induction variable temporary */
1797 ivvar = create_tmp_var (integer_type_node, "lnivtmp");
1798 add_referenced_tmp_var (ivvar);
1800 VEC_safe_push (tree, new_ivs, ivvar);
1802 newloop = LN_LOOPS (new_loopnest)[i];
1804 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1805 cases for now. */
1806 offset = LL_LINEAR_OFFSET (newloop);
1808 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1809 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1811 /* Now build the new lower bounds, and insert the statements
1812 necessary to generate it on the loop preheader. */
1813 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1814 LL_LINEAR_OFFSET (newloop),
1815 new_ivs,
1816 invariants, MAX_EXPR, &stmts);
1817 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1818 bsi_commit_edge_inserts (NULL);
1819 /* Build the new upper bound and insert its statements in the
1820 basic block of the exit condition */
1821 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1822 LL_LINEAR_OFFSET (newloop),
1823 new_ivs,
1824 invariants, MIN_EXPR, &stmts);
1825 exitcond = get_loop_exit_condition (temp);
1826 bb = bb_for_stmt (exitcond);
1827 bsi = bsi_start (bb);
1828 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1830 /* Create the new iv, and insert it's increment on the latch
1831 block. */
1833 bb = temp->latch->pred->src;
1834 bsi = bsi_last (bb);
1835 create_iv (newlowerbound,
1836 build_int_cst (integer_type_node, LL_STEP (newloop)),
1837 ivvar, temp, &bsi, false, &ivvar,
1838 &ivvarinced);
1840 /* Replace the exit condition with the new upper bound
1841 comparison. */
1842 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1843 COND_EXPR_COND (exitcond) = build (testtype,
1844 boolean_type_node,
1845 ivvarinced, newupperbound);
1846 modify_stmt (exitcond);
1847 VEC_replace (tree, new_ivs, i, ivvar);
1849 i++;
1850 temp = temp->inner;
1853 /* Go through the loop and make iv replacements. */
1854 bbs = get_loop_body (old_loopnest);
1855 for (i = 0; i < old_loopnest->num_nodes; i++)
1856 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1858 tree stmt = bsi_stmt (bsi);
1859 use_optype uses;
1860 size_t j;
1862 get_stmt_operands (stmt);
1863 uses = STMT_USE_OPS (stmt);
1864 for (j = 0; j < NUM_USES (uses); j++)
1866 size_t k;
1867 use_operand_p use = USE_OP_PTR (uses, j);
1868 for (k = 0; k < VEC_length (tree, old_ivs); k++)
1870 tree oldiv = VEC_index (tree, old_ivs, k);
1871 if (USE_FROM_PTR (use) == oldiv)
1873 tree newiv, stmts;
1874 lambda_body_vector lbv;
1876 /* Compute the new expression for the induction
1877 variable. */
1878 depth = VEC_length (tree, new_ivs);
1879 lbv = lambda_body_vector_new (depth);
1880 LBV_COEFFICIENTS (lbv)[k] = 1;
1881 lbv = lambda_body_vector_compute_new (transform, lbv);
1882 newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
1884 /* Insert the statements to build that
1885 expression. */
1886 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1888 /* Replace the use with the result of that
1889 expression. */
1890 if (dump_file)
1892 fprintf (dump_file,
1893 "Replacing induction variable use of ");
1894 print_generic_stmt (dump_file, USE_FROM_PTR (use), 0);
1895 fprintf (dump_file, " with ");
1896 print_generic_stmt (dump_file, newiv, 0);
1897 fprintf (dump_file, "\n");
1899 SET_USE (use, newiv);
1907 /* Returns true when the vector V is lexicographically positive, in
1908 other words, when the first non zero element is positive. */
1910 static bool
1911 lambda_vector_lexico_pos (lambda_vector v, unsigned n)
1913 unsigned i;
1914 for (i = 0; i < n; i++)
1916 if (v[i] == 0)
1917 continue;
1918 if (v[i] < 0)
1919 return false;
1920 if (v[i] > 0)
1921 return true;
1923 return true;
1926 /* Return true if TRANS is a legal transformation matrix that respects
1927 the dependence vectors in DISTS and DIRS. The conservative answer
1928 is false.
1930 "Wolfe proves that a unimodular transformation represented by the
1931 matrix T is legal when applied to a loop nest with a set of
1932 lexicographically non-negative distance vectors RDG if and only if
1933 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1934 i.e.: if and only if it transforms the lexicographically positive
1935 distance vectors to lexicographically positive vectors. Note that
1936 a unimodular matrix must transform the zero vector (and only it) to
1937 the zero vector." S.Muchnick. */
1939 bool
1940 lambda_transform_legal_p (lambda_trans_matrix trans,
1941 int nb_loops, varray_type dependence_relations)
1943 unsigned int i;
1944 lambda_vector distres;
1945 struct data_dependence_relation *ddr;
1947 #if defined ENABLE_CHECKING
1948 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1949 && LTM_ROWSIZE (trans) == nb_loops);
1950 #endif
1952 /* When there is an unknown relation in the dependence_relations, we
1953 know that it is no worth looking at this loop nest: give up. */
1954 ddr = (struct data_dependence_relation *)
1955 VARRAY_GENERIC_PTR (dependence_relations, 0);
1956 if (ddr == NULL)
1957 return true;
1958 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1959 return false;
1961 distres = lambda_vector_new (nb_loops);
1963 /* For each distance vector in the dependence graph. */
1964 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
1966 ddr = (struct data_dependence_relation *)
1967 VARRAY_GENERIC_PTR (dependence_relations, i);
1969 /* Don't care about relations for which we know that there is no
1970 dependence, nor about read-read (aka. output-dependences):
1971 these data accesses can happen in any order. */
1972 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1973 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1974 continue;
1975 /* Conservatively answer: "this transformation is not valid". */
1976 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1977 return false;
1979 /* Compute trans.dist_vect */
1980 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1981 DDR_DIST_VECT (ddr), distres);
1983 if (!lambda_vector_lexico_pos (distres, nb_loops))
1984 return false;
1987 return true;