PR tree-optimization/43833
[official-gcc/alias-decl.git] / gcc / lambda-code.c
blobfff6ff800f358c5f51a31492abfc066ae6b0e8fa
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dberlin@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "obstack.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
45 #include "vecprim.h"
46 #include "pointer-set.h"
48 /* This loop nest code generation is based on non-singular matrix
49 math.
51 A little terminology and a general sketch of the algorithm. See "A singular
52 loop transformation framework based on non-singular matrices" by Wei Li and
53 Keshav Pingali for formal proofs that the various statements below are
54 correct.
56 A loop iteration space represents the points traversed by the loop. A point in the
57 iteration space can be represented by a vector of size <loop depth>. You can
58 therefore represent the iteration space as an integral combinations of a set
59 of basis vectors.
61 A loop iteration space is dense if every integer point between the loop
62 bounds is a point in the iteration space. Every loop with a step of 1
63 therefore has a dense iteration space.
65 for i = 1 to 3, step 1 is a dense iteration space.
67 A loop iteration space is sparse if it is not dense. That is, the iteration
68 space skips integer points that are within the loop bounds.
70 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 2 is skipped.
73 Dense source spaces are easy to transform, because they don't skip any
74 points to begin with. Thus we can compute the exact bounds of the target
75 space using min/max and floor/ceil.
77 For a dense source space, we take the transformation matrix, decompose it
78 into a lower triangular part (H) and a unimodular part (U).
79 We then compute the auxiliary space from the unimodular part (source loop
80 nest . U = auxiliary space) , which has two important properties:
81 1. It traverses the iterations in the same lexicographic order as the source
82 space.
83 2. It is a dense space when the source is a dense space (even if the target
84 space is going to be sparse).
86 Given the auxiliary space, we use the lower triangular part to compute the
87 bounds in the target space by simple matrix multiplication.
88 The gaps in the target space (IE the new loop step sizes) will be the
89 diagonals of the H matrix.
91 Sparse source spaces require another step, because you can't directly compute
92 the exact bounds of the auxiliary and target space from the sparse space.
93 Rather than try to come up with a separate algorithm to handle sparse source
94 spaces directly, we just find a legal transformation matrix that gives you
95 the sparse source space, from a dense space, and then transform the dense
96 space.
98 For a regular sparse space, you can represent the source space as an integer
99 lattice, and the base space of that lattice will always be dense. Thus, we
100 effectively use the lattice to figure out the transformation from the lattice
101 base space, to the sparse iteration space (IE what transform was applied to
102 the dense space to make it sparse). We then compose this transform with the
103 transformation matrix specified by the user (since our matrix transformations
104 are closed under composition, this is okay). We can then use the base space
105 (which is dense) plus the composed transformation matrix, to compute the rest
106 of the transform using the dense space algorithm above.
108 In other words, our sparse source space (B) is decomposed into a dense base
109 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
110 We then compute the composition of L and the user transformation matrix (T),
111 so that T is now a transform from A to the result, instead of from B to the
112 result.
113 IE A.(LT) = result instead of B.T = result
114 Since A is now a dense source space, we can use the dense source space
115 algorithm above to compute the result of applying transform (LT) to A.
117 Fourier-Motzkin elimination is used to compute the bounds of the base space
118 of the lattice. */
120 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
121 VEC(tree,heap) *, VEC(int,heap) *,
122 VEC(tree,heap) *);
123 /* Lattice stuff that is internal to the code generation algorithm. */
125 typedef struct lambda_lattice_s
127 /* Lattice base matrix. */
128 lambda_matrix base;
129 /* Lattice dimension. */
130 int dimension;
131 /* Origin vector for the coefficients. */
132 lambda_vector origin;
133 /* Origin matrix for the invariants. */
134 lambda_matrix origin_invariants;
135 /* Number of invariants. */
136 int invariants;
137 } *lambda_lattice;
139 #define LATTICE_BASE(T) ((T)->base)
140 #define LATTICE_DIMENSION(T) ((T)->dimension)
141 #define LATTICE_ORIGIN(T) ((T)->origin)
142 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
143 #define LATTICE_INVARIANTS(T) ((T)->invariants)
145 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
146 int, int);
147 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
148 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
149 struct obstack *);
151 static bool can_convert_to_perfect_nest (struct loop *);
153 /* Create a new lambda loop in LAMBDA_OBSTACK. */
155 static lambda_loop
156 lambda_loop_new (struct obstack * lambda_obstack)
158 lambda_loop result = (lambda_loop)
159 obstack_alloc (lambda_obstack, sizeof (struct lambda_loop_s));
160 memset (result, 0, sizeof (struct lambda_loop_s));
161 return result;
164 /* Create a new lambda body vector. */
166 lambda_body_vector
167 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
169 lambda_body_vector ret;
171 ret = (lambda_body_vector) obstack_alloc (lambda_obstack,
172 sizeof (*ret));
173 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
174 LBV_SIZE (ret) = size;
175 LBV_DENOMINATOR (ret) = 1;
176 return ret;
179 /* Compute the new coefficients for the vector based on the
180 *inverse* of the transformation matrix. */
182 lambda_body_vector
183 lambda_body_vector_compute_new (lambda_trans_matrix transform,
184 lambda_body_vector vect,
185 struct obstack * lambda_obstack)
187 lambda_body_vector temp;
188 int depth;
190 /* Make sure the matrix is square. */
191 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
193 depth = LTM_ROWSIZE (transform);
195 temp = lambda_body_vector_new (depth, lambda_obstack);
196 LBV_DENOMINATOR (temp) =
197 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
198 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
199 LTM_MATRIX (transform), depth,
200 LBV_COEFFICIENTS (temp));
201 LBV_SIZE (temp) = LBV_SIZE (vect);
202 return temp;
205 /* Print out a lambda body vector. */
207 void
208 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
210 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
213 /* Return TRUE if two linear expressions are equal. */
215 static bool
216 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
217 int depth, int invariants)
219 int i;
221 if (lle1 == NULL || lle2 == NULL)
222 return false;
223 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
224 return false;
225 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
226 return false;
227 for (i = 0; i < depth; i++)
228 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
229 return false;
230 for (i = 0; i < invariants; i++)
231 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
232 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
233 return false;
234 return true;
237 /* Create a new linear expression with dimension DIM, and total number
238 of invariants INVARIANTS. */
240 lambda_linear_expression
241 lambda_linear_expression_new (int dim, int invariants,
242 struct obstack * lambda_obstack)
244 lambda_linear_expression ret;
246 ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
247 sizeof (*ret));
248 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
249 LLE_CONSTANT (ret) = 0;
250 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
251 LLE_DENOMINATOR (ret) = 1;
252 LLE_NEXT (ret) = NULL;
254 return ret;
257 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
258 The starting letter used for variable names is START. */
260 static void
261 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
262 char start)
264 int i;
265 bool first = true;
266 for (i = 0; i < size; i++)
268 if (expr[i] != 0)
270 if (first)
272 if (expr[i] < 0)
273 fprintf (outfile, "-");
274 first = false;
276 else if (expr[i] > 0)
277 fprintf (outfile, " + ");
278 else
279 fprintf (outfile, " - ");
280 if (abs (expr[i]) == 1)
281 fprintf (outfile, "%c", start + i);
282 else
283 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
288 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
289 depth/number of coefficients is given by DEPTH, the number of invariants is
290 given by INVARIANTS, and the character to start variable names with is given
291 by START. */
293 void
294 print_lambda_linear_expression (FILE * outfile,
295 lambda_linear_expression expr,
296 int depth, int invariants, char start)
298 fprintf (outfile, "\tLinear expression: ");
299 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
300 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
301 fprintf (outfile, " invariants: ");
302 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
303 invariants, 'A');
304 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
307 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
308 coefficients is given by DEPTH, the number of invariants is
309 given by INVARIANTS, and the character to start variable names with is given
310 by START. */
312 void
313 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
314 int invariants, char start)
316 int step;
317 lambda_linear_expression expr;
319 gcc_assert (loop);
321 expr = LL_LINEAR_OFFSET (loop);
322 step = LL_STEP (loop);
323 fprintf (outfile, " step size = %d \n", step);
325 if (expr)
327 fprintf (outfile, " linear offset: \n");
328 print_lambda_linear_expression (outfile, expr, depth, invariants,
329 start);
332 fprintf (outfile, " lower bound: \n");
333 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
334 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
335 fprintf (outfile, " upper bound: \n");
336 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
337 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
340 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
341 number of invariants. */
343 lambda_loopnest
344 lambda_loopnest_new (int depth, int invariants,
345 struct obstack * lambda_obstack)
347 lambda_loopnest ret;
348 ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
350 LN_LOOPS (ret) = (lambda_loop *)
351 obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
352 LN_DEPTH (ret) = depth;
353 LN_INVARIANTS (ret) = invariants;
355 return ret;
358 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
359 character to use for loop names is given by START. */
361 void
362 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
364 int i;
365 for (i = 0; i < LN_DEPTH (nest); i++)
367 fprintf (outfile, "Loop %c\n", start + i);
368 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
369 LN_INVARIANTS (nest), 'i');
370 fprintf (outfile, "\n");
374 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
375 of invariants. */
377 static lambda_lattice
378 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
380 lambda_lattice ret
381 = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
382 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth, lambda_obstack);
383 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
384 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants,
385 lambda_obstack);
386 LATTICE_DIMENSION (ret) = depth;
387 LATTICE_INVARIANTS (ret) = invariants;
388 return ret;
391 /* Compute the lattice base for NEST. The lattice base is essentially a
392 non-singular transform from a dense base space to a sparse iteration space.
393 We use it so that we don't have to specially handle the case of a sparse
394 iteration space in other parts of the algorithm. As a result, this routine
395 only does something interesting (IE produce a matrix that isn't the
396 identity matrix) if NEST is a sparse space. */
398 static lambda_lattice
399 lambda_lattice_compute_base (lambda_loopnest nest,
400 struct obstack * lambda_obstack)
402 lambda_lattice ret;
403 int depth, invariants;
404 lambda_matrix base;
406 int i, j, step;
407 lambda_loop loop;
408 lambda_linear_expression expression;
410 depth = LN_DEPTH (nest);
411 invariants = LN_INVARIANTS (nest);
413 ret = lambda_lattice_new (depth, invariants, lambda_obstack);
414 base = LATTICE_BASE (ret);
415 for (i = 0; i < depth; i++)
417 loop = LN_LOOPS (nest)[i];
418 gcc_assert (loop);
419 step = LL_STEP (loop);
420 /* If we have a step of 1, then the base is one, and the
421 origin and invariant coefficients are 0. */
422 if (step == 1)
424 for (j = 0; j < depth; j++)
425 base[i][j] = 0;
426 base[i][i] = 1;
427 LATTICE_ORIGIN (ret)[i] = 0;
428 for (j = 0; j < invariants; j++)
429 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
431 else
433 /* Otherwise, we need the lower bound expression (which must
434 be an affine function) to determine the base. */
435 expression = LL_LOWER_BOUND (loop);
436 gcc_assert (expression && !LLE_NEXT (expression)
437 && LLE_DENOMINATOR (expression) == 1);
439 /* The lower triangular portion of the base is going to be the
440 coefficient times the step */
441 for (j = 0; j < i; j++)
442 base[i][j] = LLE_COEFFICIENTS (expression)[j]
443 * LL_STEP (LN_LOOPS (nest)[j]);
444 base[i][i] = step;
445 for (j = i + 1; j < depth; j++)
446 base[i][j] = 0;
448 /* Origin for this loop is the constant of the lower bound
449 expression. */
450 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
452 /* Coefficient for the invariants are equal to the invariant
453 coefficients in the expression. */
454 for (j = 0; j < invariants; j++)
455 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
456 LLE_INVARIANT_COEFFICIENTS (expression)[j];
459 return ret;
462 /* Compute the least common multiple of two numbers A and B . */
465 least_common_multiple (int a, int b)
467 return (abs (a) * abs (b) / gcd (a, b));
470 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
471 auxiliary nest.
472 Fourier-Motzkin is a way of reducing systems of linear inequalities so that
473 it is easy to calculate the answer and bounds.
474 A sketch of how it works:
475 Given a system of linear inequalities, ai * xj >= bk, you can always
476 rewrite the constraints so they are all of the form
477 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
478 in b1 ... bk, and some a in a1...ai)
479 You can then eliminate this x from the non-constant inequalities by
480 rewriting these as a <= b, x >= constant, and delete the x variable.
481 You can then repeat this for any remaining x variables, and then we have
482 an easy to use variable <= constant (or no variables at all) form that we
483 can construct our bounds from.
485 In our case, each time we eliminate, we construct part of the bound from
486 the ith variable, then delete the ith variable.
488 Remember the constant are in our vector a, our coefficient matrix is A,
489 and our invariant coefficient matrix is B.
491 SIZE is the size of the matrices being passed.
492 DEPTH is the loop nest depth.
493 INVARIANTS is the number of loop invariants.
494 A, B, and a are the coefficient matrix, invariant coefficient, and a
495 vector of constants, respectively. */
497 static lambda_loopnest
498 compute_nest_using_fourier_motzkin (int size,
499 int depth,
500 int invariants,
501 lambda_matrix A,
502 lambda_matrix B,
503 lambda_vector a,
504 struct obstack * lambda_obstack)
507 int multiple, f1, f2;
508 int i, j, k;
509 lambda_linear_expression expression;
510 lambda_loop loop;
511 lambda_loopnest auxillary_nest;
512 lambda_matrix swapmatrix, A1, B1;
513 lambda_vector swapvector, a1;
514 int newsize;
516 A1 = lambda_matrix_new (128, depth, lambda_obstack);
517 B1 = lambda_matrix_new (128, invariants, lambda_obstack);
518 a1 = lambda_vector_new (128);
520 auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
522 for (i = depth - 1; i >= 0; i--)
524 loop = lambda_loop_new (lambda_obstack);
525 LN_LOOPS (auxillary_nest)[i] = loop;
526 LL_STEP (loop) = 1;
528 for (j = 0; j < size; j++)
530 if (A[j][i] < 0)
532 /* Any linear expression in the matrix with a coefficient less
533 than 0 becomes part of the new lower bound. */
534 expression = lambda_linear_expression_new (depth, invariants,
535 lambda_obstack);
537 for (k = 0; k < i; k++)
538 LLE_COEFFICIENTS (expression)[k] = A[j][k];
540 for (k = 0; k < invariants; k++)
541 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
543 LLE_DENOMINATOR (expression) = -1 * A[j][i];
544 LLE_CONSTANT (expression) = -1 * a[j];
546 /* Ignore if identical to the existing lower bound. */
547 if (!lle_equal (LL_LOWER_BOUND (loop),
548 expression, depth, invariants))
550 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
551 LL_LOWER_BOUND (loop) = expression;
555 else if (A[j][i] > 0)
557 /* Any linear expression with a coefficient greater than 0
558 becomes part of the new upper bound. */
559 expression = lambda_linear_expression_new (depth, invariants,
560 lambda_obstack);
561 for (k = 0; k < i; k++)
562 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
564 for (k = 0; k < invariants; k++)
565 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
567 LLE_DENOMINATOR (expression) = A[j][i];
568 LLE_CONSTANT (expression) = a[j];
570 /* Ignore if identical to the existing upper bound. */
571 if (!lle_equal (LL_UPPER_BOUND (loop),
572 expression, depth, invariants))
574 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
575 LL_UPPER_BOUND (loop) = expression;
581 /* This portion creates a new system of linear inequalities by deleting
582 the i'th variable, reducing the system by one variable. */
583 newsize = 0;
584 for (j = 0; j < size; j++)
586 /* If the coefficient for the i'th variable is 0, then we can just
587 eliminate the variable straightaway. Otherwise, we have to
588 multiply through by the coefficients we are eliminating. */
589 if (A[j][i] == 0)
591 lambda_vector_copy (A[j], A1[newsize], depth);
592 lambda_vector_copy (B[j], B1[newsize], invariants);
593 a1[newsize] = a[j];
594 newsize++;
596 else if (A[j][i] > 0)
598 for (k = 0; k < size; k++)
600 if (A[k][i] < 0)
602 multiple = least_common_multiple (A[j][i], A[k][i]);
603 f1 = multiple / A[j][i];
604 f2 = -1 * multiple / A[k][i];
606 lambda_vector_add_mc (A[j], f1, A[k], f2,
607 A1[newsize], depth);
608 lambda_vector_add_mc (B[j], f1, B[k], f2,
609 B1[newsize], invariants);
610 a1[newsize] = f1 * a[j] + f2 * a[k];
611 newsize++;
617 swapmatrix = A;
618 A = A1;
619 A1 = swapmatrix;
621 swapmatrix = B;
622 B = B1;
623 B1 = swapmatrix;
625 swapvector = a;
626 a = a1;
627 a1 = swapvector;
629 size = newsize;
632 return auxillary_nest;
635 /* Compute the loop bounds for the auxiliary space NEST.
636 Input system used is Ax <= b. TRANS is the unimodular transformation.
637 Given the original nest, this function will
638 1. Convert the nest into matrix form, which consists of a matrix for the
639 coefficients, a matrix for the
640 invariant coefficients, and a vector for the constants.
641 2. Use the matrix form to calculate the lattice base for the nest (which is
642 a dense space)
643 3. Compose the dense space transform with the user specified transform, to
644 get a transform we can easily calculate transformed bounds for.
645 4. Multiply the composed transformation matrix times the matrix form of the
646 loop.
647 5. Transform the newly created matrix (from step 4) back into a loop nest
648 using Fourier-Motzkin elimination to figure out the bounds. */
650 static lambda_loopnest
651 lambda_compute_auxillary_space (lambda_loopnest nest,
652 lambda_trans_matrix trans,
653 struct obstack * lambda_obstack)
655 lambda_matrix A, B, A1, B1;
656 lambda_vector a, a1;
657 lambda_matrix invertedtrans;
658 int depth, invariants, size;
659 int i, j;
660 lambda_loop loop;
661 lambda_linear_expression expression;
662 lambda_lattice lattice;
664 depth = LN_DEPTH (nest);
665 invariants = LN_INVARIANTS (nest);
667 /* Unfortunately, we can't know the number of constraints we'll have
668 ahead of time, but this should be enough even in ridiculous loop nest
669 cases. We must not go over this limit. */
670 A = lambda_matrix_new (128, depth, lambda_obstack);
671 B = lambda_matrix_new (128, invariants, lambda_obstack);
672 a = lambda_vector_new (128);
674 A1 = lambda_matrix_new (128, depth, lambda_obstack);
675 B1 = lambda_matrix_new (128, invariants, lambda_obstack);
676 a1 = lambda_vector_new (128);
678 /* Store the bounds in the equation matrix A, constant vector a, and
679 invariant matrix B, so that we have Ax <= a + B.
680 This requires a little equation rearranging so that everything is on the
681 correct side of the inequality. */
682 size = 0;
683 for (i = 0; i < depth; i++)
685 loop = LN_LOOPS (nest)[i];
687 /* First we do the lower bound. */
688 if (LL_STEP (loop) > 0)
689 expression = LL_LOWER_BOUND (loop);
690 else
691 expression = LL_UPPER_BOUND (loop);
693 for (; expression != NULL; expression = LLE_NEXT (expression))
695 /* Fill in the coefficient. */
696 for (j = 0; j < i; j++)
697 A[size][j] = LLE_COEFFICIENTS (expression)[j];
699 /* And the invariant coefficient. */
700 for (j = 0; j < invariants; j++)
701 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
703 /* And the constant. */
704 a[size] = LLE_CONSTANT (expression);
706 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
707 constants and single variables on */
708 A[size][i] = -1 * LLE_DENOMINATOR (expression);
709 a[size] *= -1;
710 for (j = 0; j < invariants; j++)
711 B[size][j] *= -1;
713 size++;
714 /* Need to increase matrix sizes above. */
715 gcc_assert (size <= 127);
719 /* Then do the exact same thing for the upper bounds. */
720 if (LL_STEP (loop) > 0)
721 expression = LL_UPPER_BOUND (loop);
722 else
723 expression = LL_LOWER_BOUND (loop);
725 for (; expression != NULL; expression = LLE_NEXT (expression))
727 /* Fill in the coefficient. */
728 for (j = 0; j < i; j++)
729 A[size][j] = LLE_COEFFICIENTS (expression)[j];
731 /* And the invariant coefficient. */
732 for (j = 0; j < invariants; j++)
733 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
735 /* And the constant. */
736 a[size] = LLE_CONSTANT (expression);
738 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
739 for (j = 0; j < i; j++)
740 A[size][j] *= -1;
741 A[size][i] = LLE_DENOMINATOR (expression);
742 size++;
743 /* Need to increase matrix sizes above. */
744 gcc_assert (size <= 127);
749 /* Compute the lattice base x = base * y + origin, where y is the
750 base space. */
751 lattice = lambda_lattice_compute_base (nest, lambda_obstack);
753 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
755 /* A1 = A * L */
756 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
758 /* a1 = a - A * origin constant. */
759 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
760 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
762 /* B1 = B - A * origin invariant. */
763 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
764 invariants);
765 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
767 /* Now compute the auxiliary space bounds by first inverting U, multiplying
768 it by A1, then performing Fourier-Motzkin. */
770 invertedtrans = lambda_matrix_new (depth, depth, lambda_obstack);
772 /* Compute the inverse of U. */
773 lambda_matrix_inverse (LTM_MATRIX (trans),
774 invertedtrans, depth, lambda_obstack);
776 /* A = A1 inv(U). */
777 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
779 return compute_nest_using_fourier_motzkin (size, depth, invariants,
780 A, B1, a1, lambda_obstack);
783 /* Compute the loop bounds for the target space, using the bounds of
784 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
785 The target space loop bounds are computed by multiplying the triangular
786 matrix H by the auxiliary nest, to get the new loop bounds. The sign of
787 the loop steps (positive or negative) is then used to swap the bounds if
788 the loop counts downwards.
789 Return the target loopnest. */
791 static lambda_loopnest
792 lambda_compute_target_space (lambda_loopnest auxillary_nest,
793 lambda_trans_matrix H, lambda_vector stepsigns,
794 struct obstack * lambda_obstack)
796 lambda_matrix inverse, H1;
797 int determinant, i, j;
798 int gcd1, gcd2;
799 int factor;
801 lambda_loopnest target_nest;
802 int depth, invariants;
803 lambda_matrix target;
805 lambda_loop auxillary_loop, target_loop;
806 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
808 depth = LN_DEPTH (auxillary_nest);
809 invariants = LN_INVARIANTS (auxillary_nest);
811 inverse = lambda_matrix_new (depth, depth, lambda_obstack);
812 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth,
813 lambda_obstack);
815 /* H1 is H excluding its diagonal. */
816 H1 = lambda_matrix_new (depth, depth, lambda_obstack);
817 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
819 for (i = 0; i < depth; i++)
820 H1[i][i] = 0;
822 /* Computes the linear offsets of the loop bounds. */
823 target = lambda_matrix_new (depth, depth, lambda_obstack);
824 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
826 target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
828 for (i = 0; i < depth; i++)
831 /* Get a new loop structure. */
832 target_loop = lambda_loop_new (lambda_obstack);
833 LN_LOOPS (target_nest)[i] = target_loop;
835 /* Computes the gcd of the coefficients of the linear part. */
836 gcd1 = lambda_vector_gcd (target[i], i);
838 /* Include the denominator in the GCD. */
839 gcd1 = gcd (gcd1, determinant);
841 /* Now divide through by the gcd. */
842 for (j = 0; j < i; j++)
843 target[i][j] = target[i][j] / gcd1;
845 expression = lambda_linear_expression_new (depth, invariants,
846 lambda_obstack);
847 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
848 LLE_DENOMINATOR (expression) = determinant / gcd1;
849 LLE_CONSTANT (expression) = 0;
850 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
851 invariants);
852 LL_LINEAR_OFFSET (target_loop) = expression;
855 /* For each loop, compute the new bounds from H. */
856 for (i = 0; i < depth; i++)
858 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
859 target_loop = LN_LOOPS (target_nest)[i];
860 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
861 factor = LTM_MATRIX (H)[i][i];
863 /* First we do the lower bound. */
864 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
866 for (; auxillary_expr != NULL;
867 auxillary_expr = LLE_NEXT (auxillary_expr))
869 target_expr = lambda_linear_expression_new (depth, invariants,
870 lambda_obstack);
871 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
872 depth, inverse, depth,
873 LLE_COEFFICIENTS (target_expr));
874 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
875 LLE_COEFFICIENTS (target_expr), depth,
876 factor);
878 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
879 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
880 LLE_INVARIANT_COEFFICIENTS (target_expr),
881 invariants);
882 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
883 LLE_INVARIANT_COEFFICIENTS (target_expr),
884 invariants, factor);
885 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
887 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
889 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
890 * determinant;
891 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
892 (target_expr),
893 LLE_INVARIANT_COEFFICIENTS
894 (target_expr), invariants,
895 determinant);
896 LLE_DENOMINATOR (target_expr) =
897 LLE_DENOMINATOR (target_expr) * determinant;
899 /* Find the gcd and divide by it here, rather than doing it
900 at the tree level. */
901 gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
902 gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
903 invariants);
904 gcd1 = gcd (gcd1, gcd2);
905 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
906 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
907 for (j = 0; j < depth; j++)
908 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
909 for (j = 0; j < invariants; j++)
910 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
911 LLE_CONSTANT (target_expr) /= gcd1;
912 LLE_DENOMINATOR (target_expr) /= gcd1;
913 /* Ignore if identical to existing bound. */
914 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
915 invariants))
917 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
918 LL_LOWER_BOUND (target_loop) = target_expr;
921 /* Now do the upper bound. */
922 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
924 for (; auxillary_expr != NULL;
925 auxillary_expr = LLE_NEXT (auxillary_expr))
927 target_expr = lambda_linear_expression_new (depth, invariants,
928 lambda_obstack);
929 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
930 depth, inverse, depth,
931 LLE_COEFFICIENTS (target_expr));
932 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
933 LLE_COEFFICIENTS (target_expr), depth,
934 factor);
935 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
936 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
937 LLE_INVARIANT_COEFFICIENTS (target_expr),
938 invariants);
939 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
940 LLE_INVARIANT_COEFFICIENTS (target_expr),
941 invariants, factor);
942 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
944 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
946 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
947 * determinant;
948 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
949 (target_expr),
950 LLE_INVARIANT_COEFFICIENTS
951 (target_expr), invariants,
952 determinant);
953 LLE_DENOMINATOR (target_expr) =
954 LLE_DENOMINATOR (target_expr) * determinant;
956 /* Find the gcd and divide by it here, instead of at the
957 tree level. */
958 gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
959 gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
960 invariants);
961 gcd1 = gcd (gcd1, gcd2);
962 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
963 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
964 for (j = 0; j < depth; j++)
965 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
966 for (j = 0; j < invariants; j++)
967 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
968 LLE_CONSTANT (target_expr) /= gcd1;
969 LLE_DENOMINATOR (target_expr) /= gcd1;
970 /* Ignore if equal to existing bound. */
971 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
972 invariants))
974 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
975 LL_UPPER_BOUND (target_loop) = target_expr;
979 for (i = 0; i < depth; i++)
981 target_loop = LN_LOOPS (target_nest)[i];
982 /* If necessary, exchange the upper and lower bounds and negate
983 the step size. */
984 if (stepsigns[i] < 0)
986 LL_STEP (target_loop) *= -1;
987 tmp_expr = LL_LOWER_BOUND (target_loop);
988 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
989 LL_UPPER_BOUND (target_loop) = tmp_expr;
992 return target_nest;
995 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
996 result. */
998 static lambda_vector
999 lambda_compute_step_signs (lambda_trans_matrix trans,
1000 lambda_vector stepsigns,
1001 struct obstack * lambda_obstack)
1003 lambda_matrix matrix, H;
1004 int size;
1005 lambda_vector newsteps;
1006 int i, j, factor, minimum_column;
1007 int temp;
1009 matrix = LTM_MATRIX (trans);
1010 size = LTM_ROWSIZE (trans);
1011 H = lambda_matrix_new (size, size, lambda_obstack);
1013 newsteps = lambda_vector_new (size);
1014 lambda_vector_copy (stepsigns, newsteps, size);
1016 lambda_matrix_copy (matrix, H, size, size);
1018 for (j = 0; j < size; j++)
1020 lambda_vector row;
1021 row = H[j];
1022 for (i = j; i < size; i++)
1023 if (row[i] < 0)
1024 lambda_matrix_col_negate (H, size, i);
1025 while (lambda_vector_first_nz (row, size, j + 1) < size)
1027 minimum_column = lambda_vector_min_nz (row, size, j);
1028 lambda_matrix_col_exchange (H, size, j, minimum_column);
1030 temp = newsteps[j];
1031 newsteps[j] = newsteps[minimum_column];
1032 newsteps[minimum_column] = temp;
1034 for (i = j + 1; i < size; i++)
1036 factor = row[i] / row[j];
1037 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1041 return newsteps;
1044 /* Transform NEST according to TRANS, and return the new loopnest.
1045 This involves
1046 1. Computing a lattice base for the transformation
1047 2. Composing the dense base with the specified transformation (TRANS)
1048 3. Decomposing the combined transformation into a lower triangular portion,
1049 and a unimodular portion.
1050 4. Computing the auxiliary nest using the unimodular portion.
1051 5. Computing the target nest using the auxiliary nest and the lower
1052 triangular portion. */
1054 lambda_loopnest
1055 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1056 struct obstack * lambda_obstack)
1058 lambda_loopnest auxillary_nest, target_nest;
1060 int depth, invariants;
1061 int i, j;
1062 lambda_lattice lattice;
1063 lambda_trans_matrix trans1, H, U;
1064 lambda_loop loop;
1065 lambda_linear_expression expression;
1066 lambda_vector origin;
1067 lambda_matrix origin_invariants;
1068 lambda_vector stepsigns;
1069 int f;
1071 depth = LN_DEPTH (nest);
1072 invariants = LN_INVARIANTS (nest);
1074 /* Keep track of the signs of the loop steps. */
1075 stepsigns = lambda_vector_new (depth);
1076 for (i = 0; i < depth; i++)
1078 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1079 stepsigns[i] = 1;
1080 else
1081 stepsigns[i] = -1;
1084 /* Compute the lattice base. */
1085 lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1086 trans1 = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1088 /* Multiply the transformation matrix by the lattice base. */
1090 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1091 LTM_MATRIX (trans1), depth, depth, depth);
1093 /* Compute the Hermite normal form for the new transformation matrix. */
1094 H = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1095 U = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1096 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1097 LTM_MATRIX (U));
1099 /* Compute the auxiliary loop nest's space from the unimodular
1100 portion. */
1101 auxillary_nest = lambda_compute_auxillary_space (nest, U,
1102 lambda_obstack);
1104 /* Compute the loop step signs from the old step signs and the
1105 transformation matrix. */
1106 stepsigns = lambda_compute_step_signs (trans1, stepsigns,
1107 lambda_obstack);
1109 /* Compute the target loop nest space from the auxiliary nest and
1110 the lower triangular matrix H. */
1111 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1112 lambda_obstack);
1113 origin = lambda_vector_new (depth);
1114 origin_invariants = lambda_matrix_new (depth, invariants, lambda_obstack);
1115 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1116 LATTICE_ORIGIN (lattice), origin);
1117 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1118 origin_invariants, depth, depth, invariants);
1120 for (i = 0; i < depth; i++)
1122 loop = LN_LOOPS (target_nest)[i];
1123 expression = LL_LINEAR_OFFSET (loop);
1124 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1125 f = 1;
1126 else
1127 f = LLE_DENOMINATOR (expression);
1129 LLE_CONSTANT (expression) += f * origin[i];
1131 for (j = 0; j < invariants; j++)
1132 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1133 f * origin_invariants[i][j];
1136 return target_nest;
1140 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1141 return the new expression. DEPTH is the depth of the loopnest.
1142 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1143 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1144 is the amount we have to add/subtract from the expression because of the
1145 type of comparison it is used in. */
1147 static lambda_linear_expression
1148 gcc_tree_to_linear_expression (int depth, tree expr,
1149 VEC(tree,heap) *outerinductionvars,
1150 VEC(tree,heap) *invariants, int extra,
1151 struct obstack * lambda_obstack)
1153 lambda_linear_expression lle = NULL;
1154 switch (TREE_CODE (expr))
1156 case INTEGER_CST:
1158 lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1159 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1160 if (extra != 0)
1161 LLE_CONSTANT (lle) += extra;
1163 LLE_DENOMINATOR (lle) = 1;
1165 break;
1166 case SSA_NAME:
1168 tree iv, invar;
1169 size_t i;
1170 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1171 if (iv != NULL)
1173 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1175 lle = lambda_linear_expression_new (depth, 2 * depth,
1176 lambda_obstack);
1177 LLE_COEFFICIENTS (lle)[i] = 1;
1178 if (extra != 0)
1179 LLE_CONSTANT (lle) = extra;
1181 LLE_DENOMINATOR (lle) = 1;
1184 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1185 if (invar != NULL)
1187 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1189 lle = lambda_linear_expression_new (depth, 2 * depth,
1190 lambda_obstack);
1191 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1192 if (extra != 0)
1193 LLE_CONSTANT (lle) = extra;
1194 LLE_DENOMINATOR (lle) = 1;
1198 break;
1199 default:
1200 return NULL;
1203 return lle;
1206 /* Return the depth of the loopnest NEST */
1208 static int
1209 depth_of_nest (struct loop *nest)
1211 size_t depth = 0;
1212 while (nest)
1214 depth++;
1215 nest = nest->inner;
1217 return depth;
1221 /* Return true if OP is invariant in LOOP and all outer loops. */
1223 static bool
1224 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1226 if (is_gimple_min_invariant (op))
1227 return true;
1228 if (loop_depth (loop) == 0)
1229 return true;
1230 if (!expr_invariant_in_loop_p (loop, op))
1231 return false;
1232 if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1233 return false;
1234 return true;
1237 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1238 or NULL if it could not be converted.
1239 DEPTH is the depth of the loop.
1240 INVARIANTS is a pointer to the array of loop invariants.
1241 The induction variable for this loop should be stored in the parameter
1242 OURINDUCTIONVAR.
1243 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1245 static lambda_loop
1246 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1247 VEC(tree,heap) ** invariants,
1248 tree * ourinductionvar,
1249 VEC(tree,heap) * outerinductionvars,
1250 VEC(tree,heap) ** lboundvars,
1251 VEC(tree,heap) ** uboundvars,
1252 VEC(int,heap) ** steps,
1253 struct obstack * lambda_obstack)
1255 gimple phi;
1256 gimple exit_cond;
1257 tree access_fn, inductionvar;
1258 tree step;
1259 lambda_loop lloop = NULL;
1260 lambda_linear_expression lbound, ubound;
1261 tree test_lhs, test_rhs;
1262 int stepint;
1263 int extra = 0;
1264 tree lboundvar, uboundvar, uboundresult;
1266 /* Find out induction var and exit condition. */
1267 inductionvar = find_induction_var_from_exit_cond (loop);
1268 exit_cond = get_loop_exit_condition (loop);
1270 if (inductionvar == NULL || exit_cond == NULL)
1272 if (dump_file && (dump_flags & TDF_DETAILS))
1273 fprintf (dump_file,
1274 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1275 return NULL;
1278 if (SSA_NAME_DEF_STMT (inductionvar) == NULL)
1281 if (dump_file && (dump_flags & TDF_DETAILS))
1282 fprintf (dump_file,
1283 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1285 return NULL;
1288 phi = SSA_NAME_DEF_STMT (inductionvar);
1289 if (gimple_code (phi) != GIMPLE_PHI)
1291 tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1292 if (!op)
1295 if (dump_file && (dump_flags & TDF_DETAILS))
1296 fprintf (dump_file,
1297 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1299 return NULL;
1302 phi = SSA_NAME_DEF_STMT (op);
1303 if (gimple_code (phi) != GIMPLE_PHI)
1305 if (dump_file && (dump_flags & TDF_DETAILS))
1306 fprintf (dump_file,
1307 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1308 return NULL;
1312 /* The induction variable name/version we want to put in the array is the
1313 result of the induction variable phi node. */
1314 *ourinductionvar = PHI_RESULT (phi);
1315 access_fn = instantiate_parameters
1316 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1317 if (access_fn == chrec_dont_know)
1319 if (dump_file && (dump_flags & TDF_DETAILS))
1320 fprintf (dump_file,
1321 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1323 return NULL;
1326 step = evolution_part_in_loop_num (access_fn, loop->num);
1327 if (!step || step == chrec_dont_know)
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1330 fprintf (dump_file,
1331 "Unable to convert loop: Cannot determine step of loop.\n");
1333 return NULL;
1335 if (TREE_CODE (step) != INTEGER_CST)
1338 if (dump_file && (dump_flags & TDF_DETAILS))
1339 fprintf (dump_file,
1340 "Unable to convert loop: Step of loop is not integer.\n");
1341 return NULL;
1344 stepint = TREE_INT_CST_LOW (step);
1346 /* Only want phis for induction vars, which will have two
1347 arguments. */
1348 if (gimple_phi_num_args (phi) != 2)
1350 if (dump_file && (dump_flags & TDF_DETAILS))
1351 fprintf (dump_file,
1352 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1353 return NULL;
1356 /* Another induction variable check. One argument's source should be
1357 in the loop, one outside the loop. */
1358 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)
1359 && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src))
1362 if (dump_file && (dump_flags & TDF_DETAILS))
1363 fprintf (dump_file,
1364 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1366 return NULL;
1369 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
1371 lboundvar = PHI_ARG_DEF (phi, 1);
1372 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1373 outerinductionvars, *invariants,
1374 0, lambda_obstack);
1376 else
1378 lboundvar = PHI_ARG_DEF (phi, 0);
1379 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1380 outerinductionvars, *invariants,
1381 0, lambda_obstack);
1384 if (!lbound)
1387 if (dump_file && (dump_flags & TDF_DETAILS))
1388 fprintf (dump_file,
1389 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1391 return NULL;
1393 /* One part of the test may be a loop invariant tree. */
1394 VEC_reserve (tree, heap, *invariants, 1);
1395 test_lhs = gimple_cond_lhs (exit_cond);
1396 test_rhs = gimple_cond_rhs (exit_cond);
1398 if (TREE_CODE (test_rhs) == SSA_NAME
1399 && invariant_in_loop_and_outer_loops (loop, test_rhs))
1400 VEC_quick_push (tree, *invariants, test_rhs);
1401 else if (TREE_CODE (test_lhs) == SSA_NAME
1402 && invariant_in_loop_and_outer_loops (loop, test_lhs))
1403 VEC_quick_push (tree, *invariants, test_lhs);
1405 /* The non-induction variable part of the test is the upper bound variable.
1407 if (test_lhs == inductionvar)
1408 uboundvar = test_rhs;
1409 else
1410 uboundvar = test_lhs;
1412 /* We only size the vectors assuming we have, at max, 2 times as many
1413 invariants as we do loops (one for each bound).
1414 This is just an arbitrary number, but it has to be matched against the
1415 code below. */
1416 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1419 /* We might have some leftover. */
1420 if (gimple_cond_code (exit_cond) == LT_EXPR)
1421 extra = -1 * stepint;
1422 else if (gimple_cond_code (exit_cond) == NE_EXPR)
1423 extra = -1 * stepint;
1424 else if (gimple_cond_code (exit_cond) == GT_EXPR)
1425 extra = -1 * stepint;
1426 else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1427 extra = 1 * stepint;
1429 ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1430 outerinductionvars,
1431 *invariants, extra, lambda_obstack);
1432 uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1433 build_int_cst (TREE_TYPE (uboundvar), extra));
1434 VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1435 VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1436 VEC_safe_push (int, heap, *steps, stepint);
1437 if (!ubound)
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1440 fprintf (dump_file,
1441 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1442 return NULL;
1445 lloop = lambda_loop_new (lambda_obstack);
1446 LL_STEP (lloop) = stepint;
1447 LL_LOWER_BOUND (lloop) = lbound;
1448 LL_UPPER_BOUND (lloop) = ubound;
1449 return lloop;
1452 /* Given a LOOP, find the induction variable it is testing against in the exit
1453 condition. Return the induction variable if found, NULL otherwise. */
1455 tree
1456 find_induction_var_from_exit_cond (struct loop *loop)
1458 gimple expr = get_loop_exit_condition (loop);
1459 tree ivarop;
1460 tree test_lhs, test_rhs;
1461 if (expr == NULL)
1462 return NULL_TREE;
1463 if (gimple_code (expr) != GIMPLE_COND)
1464 return NULL_TREE;
1465 test_lhs = gimple_cond_lhs (expr);
1466 test_rhs = gimple_cond_rhs (expr);
1468 /* Find the side that is invariant in this loop. The ivar must be the other
1469 side. */
1471 if (expr_invariant_in_loop_p (loop, test_lhs))
1472 ivarop = test_rhs;
1473 else if (expr_invariant_in_loop_p (loop, test_rhs))
1474 ivarop = test_lhs;
1475 else
1476 return NULL_TREE;
1478 if (TREE_CODE (ivarop) != SSA_NAME)
1479 return NULL_TREE;
1480 return ivarop;
1483 DEF_VEC_P(lambda_loop);
1484 DEF_VEC_ALLOC_P(lambda_loop,heap);
1486 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1487 Return the new loop nest.
1488 INDUCTIONVARS is a pointer to an array of induction variables for the
1489 loopnest that will be filled in during this process.
1490 INVARIANTS is a pointer to an array of invariants that will be filled in
1491 during this process. */
1493 lambda_loopnest
1494 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1495 VEC(tree,heap) **inductionvars,
1496 VEC(tree,heap) **invariants,
1497 struct obstack * lambda_obstack)
1499 lambda_loopnest ret = NULL;
1500 struct loop *temp = loop_nest;
1501 int depth = depth_of_nest (loop_nest);
1502 size_t i;
1503 VEC(lambda_loop,heap) *loops = NULL;
1504 VEC(tree,heap) *uboundvars = NULL;
1505 VEC(tree,heap) *lboundvars = NULL;
1506 VEC(int,heap) *steps = NULL;
1507 lambda_loop newloop;
1508 tree inductionvar = NULL;
1509 bool perfect_nest = perfect_nest_p (loop_nest);
1511 if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1512 goto fail;
1514 while (temp)
1516 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1517 &inductionvar, *inductionvars,
1518 &lboundvars, &uboundvars,
1519 &steps, lambda_obstack);
1520 if (!newloop)
1521 goto fail;
1523 VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1524 VEC_safe_push (lambda_loop, heap, loops, newloop);
1525 temp = temp->inner;
1528 if (!perfect_nest)
1530 if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1531 *inductionvars))
1533 if (dump_file)
1534 fprintf (dump_file,
1535 "Not a perfect loop nest and couldn't convert to one.\n");
1536 goto fail;
1538 else if (dump_file)
1539 fprintf (dump_file,
1540 "Successfully converted loop nest to perfect loop nest.\n");
1543 ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1545 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1546 LN_LOOPS (ret)[i] = newloop;
1548 fail:
1549 VEC_free (lambda_loop, heap, loops);
1550 VEC_free (tree, heap, uboundvars);
1551 VEC_free (tree, heap, lboundvars);
1552 VEC_free (int, heap, steps);
1554 return ret;
1557 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1558 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1559 inserted for us are stored. INDUCTION_VARS is the array of induction
1560 variables for the loop this LBV is from. TYPE is the tree type to use for
1561 the variables and trees involved. */
1563 static tree
1564 lbv_to_gcc_expression (lambda_body_vector lbv,
1565 tree type, VEC(tree,heap) *induction_vars,
1566 gimple_seq *stmts_to_insert)
1568 int k;
1569 tree resvar;
1570 tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1572 k = LBV_DENOMINATOR (lbv);
1573 gcc_assert (k != 0);
1574 if (k != 1)
1575 expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1577 resvar = create_tmp_var (type, "lbvtmp");
1578 add_referenced_var (resvar);
1579 return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1582 /* Convert a linear expression from coefficient and constant form to a
1583 gcc tree.
1584 Return the tree that represents the final value of the expression.
1585 LLE is the linear expression to convert.
1586 OFFSET is the linear offset to apply to the expression.
1587 TYPE is the tree type to use for the variables and math.
1588 INDUCTION_VARS is a vector of induction variables for the loops.
1589 INVARIANTS is a vector of the loop nest invariants.
1590 WRAP specifies what tree code to wrap the results in, if there is more than
1591 one (it is either MAX_EXPR, or MIN_EXPR).
1592 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1593 statements that need to be inserted for the linear expression. */
1595 static tree
1596 lle_to_gcc_expression (lambda_linear_expression lle,
1597 lambda_linear_expression offset,
1598 tree type,
1599 VEC(tree,heap) *induction_vars,
1600 VEC(tree,heap) *invariants,
1601 enum tree_code wrap, gimple_seq *stmts_to_insert)
1603 int k;
1604 tree resvar;
1605 tree expr = NULL_TREE;
1606 VEC(tree,heap) *results = NULL;
1608 gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1610 /* Build up the linear expressions. */
1611 for (; lle != NULL; lle = LLE_NEXT (lle))
1613 expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1614 expr = fold_build2 (PLUS_EXPR, type, expr,
1615 build_linear_expr (type,
1616 LLE_INVARIANT_COEFFICIENTS (lle),
1617 invariants));
1619 k = LLE_CONSTANT (lle);
1620 if (k)
1621 expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1623 k = LLE_CONSTANT (offset);
1624 if (k)
1625 expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1627 k = LLE_DENOMINATOR (lle);
1628 if (k != 1)
1629 expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1630 type, expr, build_int_cst (type, k));
1632 expr = fold (expr);
1633 VEC_safe_push (tree, heap, results, expr);
1636 gcc_assert (expr);
1638 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1639 if (VEC_length (tree, results) > 1)
1641 size_t i;
1642 tree op;
1644 expr = VEC_index (tree, results, 0);
1645 for (i = 1; VEC_iterate (tree, results, i, op); i++)
1646 expr = fold_build2 (wrap, type, expr, op);
1649 VEC_free (tree, heap, results);
1651 resvar = create_tmp_var (type, "lletmp");
1652 add_referenced_var (resvar);
1653 return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1656 /* Remove the induction variable defined at IV_STMT. */
1658 void
1659 remove_iv (gimple iv_stmt)
1661 gimple_stmt_iterator si = gsi_for_stmt (iv_stmt);
1663 if (gimple_code (iv_stmt) == GIMPLE_PHI)
1665 unsigned i;
1667 for (i = 0; i < gimple_phi_num_args (iv_stmt); i++)
1669 gimple stmt;
1670 imm_use_iterator imm_iter;
1671 tree arg = gimple_phi_arg_def (iv_stmt, i);
1672 bool used = false;
1674 if (TREE_CODE (arg) != SSA_NAME)
1675 continue;
1677 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1678 if (stmt != iv_stmt && !is_gimple_debug (stmt))
1679 used = true;
1681 if (!used)
1682 remove_iv (SSA_NAME_DEF_STMT (arg));
1685 remove_phi_node (&si, true);
1687 else
1689 gsi_remove (&si, true);
1690 release_defs (iv_stmt);
1694 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1695 it, back into gcc code. This changes the
1696 loops, their induction variables, and their bodies, so that they
1697 match the transformed loopnest.
1698 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1699 loopnest.
1700 OLD_IVS is a vector of induction variables from the old loopnest.
1701 INVARIANTS is a vector of loop invariants from the old loopnest.
1702 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1703 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1704 NEW_LOOPNEST. */
1706 void
1707 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1708 VEC(tree,heap) *old_ivs,
1709 VEC(tree,heap) *invariants,
1710 VEC(gimple,heap) **remove_ivs,
1711 lambda_loopnest new_loopnest,
1712 lambda_trans_matrix transform,
1713 struct obstack * lambda_obstack)
1715 struct loop *temp;
1716 size_t i = 0;
1717 unsigned j;
1718 size_t depth = 0;
1719 VEC(tree,heap) *new_ivs = NULL;
1720 tree oldiv;
1721 gimple_stmt_iterator bsi;
1723 transform = lambda_trans_matrix_inverse (transform, lambda_obstack);
1725 if (dump_file)
1727 fprintf (dump_file, "Inverse of transformation matrix:\n");
1728 print_lambda_trans_matrix (dump_file, transform);
1730 depth = depth_of_nest (old_loopnest);
1731 temp = old_loopnest;
1733 while (temp)
1735 lambda_loop newloop;
1736 basic_block bb;
1737 edge exit;
1738 tree ivvar, ivvarinced;
1739 gimple exitcond;
1740 gimple_seq stmts;
1741 enum tree_code testtype;
1742 tree newupperbound, newlowerbound;
1743 lambda_linear_expression offset;
1744 tree type;
1745 bool insert_after;
1746 gimple inc_stmt;
1748 oldiv = VEC_index (tree, old_ivs, i);
1749 type = TREE_TYPE (oldiv);
1751 /* First, build the new induction variable temporary */
1753 ivvar = create_tmp_var (type, "lnivtmp");
1754 add_referenced_var (ivvar);
1756 VEC_safe_push (tree, heap, new_ivs, ivvar);
1758 newloop = LN_LOOPS (new_loopnest)[i];
1760 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1761 cases for now. */
1762 offset = LL_LINEAR_OFFSET (newloop);
1764 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1765 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1767 /* Now build the new lower bounds, and insert the statements
1768 necessary to generate it on the loop preheader. */
1769 stmts = NULL;
1770 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1771 LL_LINEAR_OFFSET (newloop),
1772 type,
1773 new_ivs,
1774 invariants, MAX_EXPR, &stmts);
1776 if (stmts)
1778 gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts);
1779 gsi_commit_edge_inserts ();
1781 /* Build the new upper bound and insert its statements in the
1782 basic block of the exit condition */
1783 stmts = NULL;
1784 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1785 LL_LINEAR_OFFSET (newloop),
1786 type,
1787 new_ivs,
1788 invariants, MIN_EXPR, &stmts);
1789 exit = single_exit (temp);
1790 exitcond = get_loop_exit_condition (temp);
1791 bb = gimple_bb (exitcond);
1792 bsi = gsi_after_labels (bb);
1793 if (stmts)
1794 gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT);
1796 /* Create the new iv. */
1798 standard_iv_increment_position (temp, &bsi, &insert_after);
1799 create_iv (newlowerbound,
1800 build_int_cst (type, LL_STEP (newloop)),
1801 ivvar, temp, &bsi, insert_after, &ivvar,
1802 NULL);
1804 /* Unfortunately, the incremented ivvar that create_iv inserted may not
1805 dominate the block containing the exit condition.
1806 So we simply create our own incremented iv to use in the new exit
1807 test, and let redundancy elimination sort it out. */
1808 inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar),
1809 ivvar,
1810 build_int_cst (type, LL_STEP (newloop)));
1812 ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1813 gimple_assign_set_lhs (inc_stmt, ivvarinced);
1814 bsi = gsi_for_stmt (exitcond);
1815 gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1817 /* Replace the exit condition with the new upper bound
1818 comparison. */
1820 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1822 /* We want to build a conditional where true means exit the loop, and
1823 false means continue the loop.
1824 So swap the testtype if this isn't the way things are.*/
1826 if (exit->flags & EDGE_FALSE_VALUE)
1827 testtype = swap_tree_comparison (testtype);
1829 gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced);
1830 update_stmt (exitcond);
1831 VEC_replace (tree, new_ivs, i, ivvar);
1833 i++;
1834 temp = temp->inner;
1837 /* Rewrite uses of the old ivs so that they are now specified in terms of
1838 the new ivs. */
1840 for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1842 imm_use_iterator imm_iter;
1843 use_operand_p use_p;
1844 tree oldiv_def;
1845 gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1846 gimple stmt;
1848 if (gimple_code (oldiv_stmt) == GIMPLE_PHI)
1849 oldiv_def = PHI_RESULT (oldiv_stmt);
1850 else
1851 oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1852 gcc_assert (oldiv_def != NULL_TREE);
1854 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1856 tree newiv;
1857 gimple_seq stmts;
1858 lambda_body_vector lbv, newlbv;
1860 if (is_gimple_debug (stmt))
1861 continue;
1863 /* Compute the new expression for the induction
1864 variable. */
1865 depth = VEC_length (tree, new_ivs);
1866 lbv = lambda_body_vector_new (depth, lambda_obstack);
1867 LBV_COEFFICIENTS (lbv)[i] = 1;
1869 newlbv = lambda_body_vector_compute_new (transform, lbv,
1870 lambda_obstack);
1872 stmts = NULL;
1873 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1874 new_ivs, &stmts);
1876 if (stmts && gimple_code (stmt) != GIMPLE_PHI)
1878 bsi = gsi_for_stmt (stmt);
1879 gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT);
1882 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1883 propagate_value (use_p, newiv);
1885 if (stmts && gimple_code (stmt) == GIMPLE_PHI)
1886 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1887 if (gimple_phi_arg_def (stmt, j) == newiv)
1888 gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts);
1890 update_stmt (stmt);
1893 /* Remove the now unused induction variable. */
1894 VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt);
1896 VEC_free (tree, heap, new_ivs);
1899 /* Return TRUE if this is not interesting statement from the perspective of
1900 determining if we have a perfect loop nest. */
1902 static bool
1903 not_interesting_stmt (gimple stmt)
1905 /* Note that COND_EXPR's aren't interesting because if they were exiting the
1906 loop, we would have already failed the number of exits tests. */
1907 if (gimple_code (stmt) == GIMPLE_LABEL
1908 || gimple_code (stmt) == GIMPLE_GOTO
1909 || gimple_code (stmt) == GIMPLE_COND
1910 || is_gimple_debug (stmt))
1911 return true;
1912 return false;
1915 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
1917 static bool
1918 phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def)
1920 unsigned i;
1921 for (i = 0; i < gimple_phi_num_args (phi); i++)
1922 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src))
1923 if (PHI_ARG_DEF (phi, i) == def)
1924 return true;
1925 return false;
1928 /* Return TRUE if STMT is a use of PHI_RESULT. */
1930 static bool
1931 stmt_uses_phi_result (gimple stmt, tree phi_result)
1933 tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1935 /* This is conservatively true, because we only want SIMPLE bumpers
1936 of the form x +- constant for our pass. */
1937 return (use == phi_result);
1940 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1941 in-loop-edge in a phi node, and the operand it uses is the result of that
1942 phi node.
1943 I.E. i_29 = i_3 + 1
1944 i_3 = PHI (0, i_29); */
1946 static bool
1947 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1949 gimple use;
1950 tree def;
1951 imm_use_iterator iter;
1952 use_operand_p use_p;
1954 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1955 if (!def)
1956 return false;
1958 FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1960 use = USE_STMT (use_p);
1961 if (gimple_code (use) == GIMPLE_PHI)
1963 if (phi_loop_edge_uses_def (loop, use, def))
1964 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1965 return true;
1968 return false;
1972 /* Return true if LOOP is a perfect loop nest.
1973 Perfect loop nests are those loop nests where all code occurs in the
1974 innermost loop body.
1975 If S is a program statement, then
1977 i.e.
1978 DO I = 1, 20
1980 DO J = 1, 20
1982 END DO
1983 END DO
1984 is not a perfect loop nest because of S1.
1986 DO I = 1, 20
1987 DO J = 1, 20
1990 END DO
1991 END DO
1992 is a perfect loop nest.
1994 Since we don't have high level loops anymore, we basically have to walk our
1995 statements and ignore those that are there because the loop needs them (IE
1996 the induction variable increment, and jump back to the top of the loop). */
1998 bool
1999 perfect_nest_p (struct loop *loop)
2001 basic_block *bbs;
2002 size_t i;
2003 gimple exit_cond;
2005 /* Loops at depth 0 are perfect nests. */
2006 if (!loop->inner)
2007 return true;
2009 bbs = get_loop_body (loop);
2010 exit_cond = get_loop_exit_condition (loop);
2012 for (i = 0; i < loop->num_nodes; i++)
2014 if (bbs[i]->loop_father == loop)
2016 gimple_stmt_iterator bsi;
2018 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2020 gimple stmt = gsi_stmt (bsi);
2022 if (gimple_code (stmt) == GIMPLE_COND
2023 && exit_cond != stmt)
2024 goto non_perfectly_nested;
2026 if (stmt == exit_cond
2027 || not_interesting_stmt (stmt)
2028 || stmt_is_bumper_for_loop (loop, stmt))
2029 continue;
2031 non_perfectly_nested:
2032 free (bbs);
2033 return false;
2038 free (bbs);
2040 return perfect_nest_p (loop->inner);
2043 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2044 YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2045 avoid creating duplicate temporaries and FIRSTBSI is statement
2046 iterator where new temporaries should be inserted at the beginning
2047 of body basic block. */
2049 static void
2050 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2051 int xstep, tree y, tree yinit,
2052 htab_t replacements,
2053 gimple_stmt_iterator *firstbsi)
2055 ssa_op_iter iter;
2056 use_operand_p use_p;
2058 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2060 tree use = USE_FROM_PTR (use_p);
2061 tree step = NULL_TREE;
2062 tree scev, init, val, var;
2063 gimple setstmt;
2064 struct tree_map *h, in;
2065 void **loc;
2067 /* Replace uses of X with Y right away. */
2068 if (use == x)
2070 SET_USE (use_p, y);
2071 continue;
2074 scev = instantiate_parameters (loop,
2075 analyze_scalar_evolution (loop, use));
2077 if (scev == NULL || scev == chrec_dont_know)
2078 continue;
2080 step = evolution_part_in_loop_num (scev, loop->num);
2081 if (step == NULL
2082 || step == chrec_dont_know
2083 || TREE_CODE (step) != INTEGER_CST
2084 || int_cst_value (step) != xstep)
2085 continue;
2087 /* Use REPLACEMENTS hash table to cache already created
2088 temporaries. */
2089 in.hash = htab_hash_pointer (use);
2090 in.base.from = use;
2091 h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2092 if (h != NULL)
2094 SET_USE (use_p, h->to);
2095 continue;
2098 /* USE which has the same step as X should be replaced
2099 with a temporary set to Y + YINIT - INIT. */
2100 init = initial_condition_in_loop_num (scev, loop->num);
2101 gcc_assert (init != NULL && init != chrec_dont_know);
2102 if (TREE_TYPE (use) == TREE_TYPE (y))
2104 val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2105 val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2106 if (val == y)
2108 /* If X has the same type as USE, the same step
2109 and same initial value, it can be replaced by Y. */
2110 SET_USE (use_p, y);
2111 continue;
2114 else
2116 val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2117 val = fold_convert (TREE_TYPE (use), val);
2118 val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2121 /* Create a temporary variable and insert it at the beginning
2122 of the loop body basic block, right after the PHI node
2123 which sets Y. */
2124 var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2125 add_referenced_var (var);
2126 val = force_gimple_operand_gsi (firstbsi, val, false, NULL,
2127 true, GSI_SAME_STMT);
2128 setstmt = gimple_build_assign (var, val);
2129 var = make_ssa_name (var, setstmt);
2130 gimple_assign_set_lhs (setstmt, var);
2131 gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT);
2132 update_stmt (setstmt);
2133 SET_USE (use_p, var);
2134 h = GGC_NEW (struct tree_map);
2135 h->hash = in.hash;
2136 h->base.from = use;
2137 h->to = var;
2138 loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2139 gcc_assert ((*(struct tree_map **)loc) == NULL);
2140 *(struct tree_map **) loc = h;
2144 /* Return true if STMT is an exit PHI for LOOP */
2146 static bool
2147 exit_phi_for_loop_p (struct loop *loop, gimple stmt)
2149 if (gimple_code (stmt) != GIMPLE_PHI
2150 || gimple_phi_num_args (stmt) != 1
2151 || gimple_bb (stmt) != single_exit (loop)->dest)
2152 return false;
2154 return true;
2157 /* Return true if STMT can be put back into the loop INNER, by
2158 copying it to the beginning of that loop and changing the uses. */
2160 static bool
2161 can_put_in_inner_loop (struct loop *inner, gimple stmt)
2163 imm_use_iterator imm_iter;
2164 use_operand_p use_p;
2166 gcc_assert (is_gimple_assign (stmt));
2167 if (gimple_vuse (stmt)
2168 || !stmt_invariant_in_loop_p (inner, stmt))
2169 return false;
2171 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2173 if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2175 basic_block immbb = gimple_bb (USE_STMT (use_p));
2177 if (!flow_bb_inside_loop_p (inner, immbb))
2178 return false;
2181 return true;
2184 /* Return true if STMT can be put *after* the inner loop of LOOP. */
2186 static bool
2187 can_put_after_inner_loop (struct loop *loop, gimple stmt)
2189 imm_use_iterator imm_iter;
2190 use_operand_p use_p;
2192 if (gimple_vuse (stmt))
2193 return false;
2195 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2197 if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2199 basic_block immbb = gimple_bb (USE_STMT (use_p));
2201 if (!dominated_by_p (CDI_DOMINATORS,
2202 immbb,
2203 loop->inner->header)
2204 && !can_put_in_inner_loop (loop->inner, stmt))
2205 return false;
2208 return true;
2211 /* Return true when the induction variable IV is simple enough to be
2212 re-synthesized. */
2214 static bool
2215 can_duplicate_iv (tree iv, struct loop *loop)
2217 tree scev = instantiate_parameters
2218 (loop, analyze_scalar_evolution (loop, iv));
2220 if (!automatically_generated_chrec_p (scev))
2222 tree step = evolution_part_in_loop_num (scev, loop->num);
2224 if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2225 return true;
2228 return false;
2231 /* If this is a scalar operation that can be put back into the inner
2232 loop, or after the inner loop, through copying, then do so. This
2233 works on the theory that any amount of scalar code we have to
2234 reduplicate into or after the loops is less expensive that the win
2235 we get from rearranging the memory walk the loop is doing so that
2236 it has better cache behavior. */
2238 static bool
2239 cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop)
2241 use_operand_p use_a, use_b;
2242 imm_use_iterator imm_iter;
2243 ssa_op_iter op_iter, op_iter1;
2244 tree op0 = gimple_assign_lhs (stmt);
2246 /* The statement should not define a variable used in the inner
2247 loop. */
2248 if (TREE_CODE (op0) == SSA_NAME
2249 && !can_duplicate_iv (op0, loop))
2250 FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2251 if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner)
2252 return true;
2254 FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2256 gimple node;
2257 tree op = USE_FROM_PTR (use_a);
2259 /* The variables should not be used in both loops. */
2260 if (!can_duplicate_iv (op, loop))
2261 FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2262 if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner)
2263 return true;
2265 /* The statement should not use the value of a scalar that was
2266 modified in the loop. */
2267 node = SSA_NAME_DEF_STMT (op);
2268 if (gimple_code (node) == GIMPLE_PHI)
2269 FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2271 tree arg = USE_FROM_PTR (use_b);
2273 if (TREE_CODE (arg) == SSA_NAME)
2275 gimple arg_stmt = SSA_NAME_DEF_STMT (arg);
2277 if (gimple_bb (arg_stmt)
2278 && (gimple_bb (arg_stmt)->loop_father == loop->inner))
2279 return true;
2284 return false;
2286 /* Return true when BB contains statements that can harm the transform
2287 to a perfect loop nest. */
2289 static bool
2290 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2292 gimple_stmt_iterator bsi;
2293 gimple exit_condition = get_loop_exit_condition (loop);
2295 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2297 gimple stmt = gsi_stmt (bsi);
2299 if (stmt == exit_condition
2300 || not_interesting_stmt (stmt)
2301 || stmt_is_bumper_for_loop (loop, stmt))
2302 continue;
2304 if (is_gimple_assign (stmt))
2306 if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2307 return true;
2309 if (can_duplicate_iv (gimple_assign_lhs (stmt), loop))
2310 continue;
2312 if (can_put_in_inner_loop (loop->inner, stmt)
2313 || can_put_after_inner_loop (loop, stmt))
2314 continue;
2317 /* If the bb of a statement we care about isn't dominated by the
2318 header of the inner loop, then we can't handle this case
2319 right now. This test ensures that the statement comes
2320 completely *after* the inner loop. */
2321 if (!dominated_by_p (CDI_DOMINATORS,
2322 gimple_bb (stmt),
2323 loop->inner->header))
2324 return true;
2327 return false;
2331 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2332 perfect one. At the moment, we only handle imperfect nests of
2333 depth 2, where all of the statements occur after the inner loop. */
2335 static bool
2336 can_convert_to_perfect_nest (struct loop *loop)
2338 basic_block *bbs;
2339 size_t i;
2340 gimple_stmt_iterator si;
2342 /* Can't handle triply nested+ loops yet. */
2343 if (!loop->inner || loop->inner->inner)
2344 return false;
2346 bbs = get_loop_body (loop);
2347 for (i = 0; i < loop->num_nodes; i++)
2348 if (bbs[i]->loop_father == loop
2349 && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2350 goto fail;
2352 /* We also need to make sure the loop exit only has simple copy phis in it,
2353 otherwise we don't know how to transform it into a perfect nest. */
2354 for (si = gsi_start_phis (single_exit (loop)->dest);
2355 !gsi_end_p (si);
2356 gsi_next (&si))
2357 if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2358 goto fail;
2360 free (bbs);
2361 return true;
2363 fail:
2364 free (bbs);
2365 return false;
2369 DEF_VEC_I(source_location);
2370 DEF_VEC_ALLOC_I(source_location,heap);
2372 /* Transform the loop nest into a perfect nest, if possible.
2373 LOOP is the loop nest to transform into a perfect nest
2374 LBOUNDS are the lower bounds for the loops to transform
2375 UBOUNDS are the upper bounds for the loops to transform
2376 STEPS is the STEPS for the loops to transform.
2377 LOOPIVS is the induction variables for the loops to transform.
2379 Basically, for the case of
2381 FOR (i = 0; i < 50; i++)
2383 FOR (j =0; j < 50; j++)
2385 <whatever>
2387 <some code>
2390 This function will transform it into a perfect loop nest by splitting the
2391 outer loop into two loops, like so:
2393 FOR (i = 0; i < 50; i++)
2395 FOR (j = 0; j < 50; j++)
2397 <whatever>
2401 FOR (i = 0; i < 50; i ++)
2403 <some code>
2406 Return FALSE if we can't make this loop into a perfect nest. */
2408 static bool
2409 perfect_nestify (struct loop *loop,
2410 VEC(tree,heap) *lbounds,
2411 VEC(tree,heap) *ubounds,
2412 VEC(int,heap) *steps,
2413 VEC(tree,heap) *loopivs)
2415 basic_block *bbs;
2416 gimple exit_condition;
2417 gimple cond_stmt;
2418 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2419 int i;
2420 gimple_stmt_iterator bsi, firstbsi;
2421 bool insert_after;
2422 edge e;
2423 struct loop *newloop;
2424 gimple phi;
2425 tree uboundvar;
2426 gimple stmt;
2427 tree oldivvar, ivvar, ivvarinced;
2428 VEC(tree,heap) *phis = NULL;
2429 VEC(source_location,heap) *locations = NULL;
2430 htab_t replacements = NULL;
2432 /* Create the new loop. */
2433 olddest = single_exit (loop)->dest;
2434 preheaderbb = split_edge (single_exit (loop));
2435 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2437 /* Push the exit phi nodes that we are moving. */
2438 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2440 phi = gsi_stmt (bsi);
2441 VEC_reserve (tree, heap, phis, 2);
2442 VEC_reserve (source_location, heap, locations, 1);
2443 VEC_quick_push (tree, phis, PHI_RESULT (phi));
2444 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2445 VEC_quick_push (source_location, locations,
2446 gimple_phi_arg_location (phi, 0));
2448 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2450 /* Remove the exit phis from the old basic block. */
2451 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2452 remove_phi_node (&bsi, false);
2454 /* and add them back to the new basic block. */
2455 while (VEC_length (tree, phis) != 0)
2457 tree def;
2458 tree phiname;
2459 source_location locus;
2460 def = VEC_pop (tree, phis);
2461 phiname = VEC_pop (tree, phis);
2462 locus = VEC_pop (source_location, locations);
2463 phi = create_phi_node (phiname, preheaderbb);
2464 add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus);
2466 flush_pending_stmts (e);
2467 VEC_free (tree, heap, phis);
2469 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2470 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2471 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2472 cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2473 NULL_TREE, NULL_TREE);
2474 bsi = gsi_start_bb (bodybb);
2475 gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2476 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2477 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2478 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2480 /* Update the loop structures. */
2481 newloop = duplicate_loop (loop, olddest->loop_father);
2482 newloop->header = headerbb;
2483 newloop->latch = latchbb;
2484 add_bb_to_loop (latchbb, newloop);
2485 add_bb_to_loop (bodybb, newloop);
2486 add_bb_to_loop (headerbb, newloop);
2487 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2488 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2489 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2490 single_exit (loop)->src);
2491 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2492 set_immediate_dominator (CDI_DOMINATORS, olddest,
2493 recompute_dominator (CDI_DOMINATORS, olddest));
2494 /* Create the new iv. */
2495 oldivvar = VEC_index (tree, loopivs, 0);
2496 ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2497 add_referenced_var (ivvar);
2498 standard_iv_increment_position (newloop, &bsi, &insert_after);
2499 create_iv (VEC_index (tree, lbounds, 0),
2500 build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2501 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2503 /* Create the new upper bound. This may be not just a variable, so we copy
2504 it to one just in case. */
2506 exit_condition = get_loop_exit_condition (newloop);
2507 uboundvar = create_tmp_var (TREE_TYPE (VEC_index (tree, ubounds, 0)),
2508 "uboundvar");
2509 add_referenced_var (uboundvar);
2510 stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0));
2511 uboundvar = make_ssa_name (uboundvar, stmt);
2512 gimple_assign_set_lhs (stmt, uboundvar);
2514 if (insert_after)
2515 gsi_insert_after (&bsi, stmt, GSI_SAME_STMT);
2516 else
2517 gsi_insert_before (&bsi, stmt, GSI_SAME_STMT);
2518 update_stmt (stmt);
2519 gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2520 update_stmt (exit_condition);
2521 replacements = htab_create_ggc (20, tree_map_hash,
2522 tree_map_eq, NULL);
2523 bbs = get_loop_body_in_dom_order (loop);
2524 /* Now move the statements, and replace the induction variable in the moved
2525 statements with the correct loop induction variable. */
2526 oldivvar = VEC_index (tree, loopivs, 0);
2527 firstbsi = gsi_start_bb (bodybb);
2528 for (i = loop->num_nodes - 1; i >= 0 ; i--)
2530 gimple_stmt_iterator tobsi = gsi_last_bb (bodybb);
2531 if (bbs[i]->loop_father == loop)
2533 /* If this is true, we are *before* the inner loop.
2534 If this isn't true, we are *after* it.
2536 The only time can_convert_to_perfect_nest returns true when we
2537 have statements before the inner loop is if they can be moved
2538 into the inner loop.
2540 The only time can_convert_to_perfect_nest returns true when we
2541 have statements after the inner loop is if they can be moved into
2542 the new split loop. */
2544 if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2546 gimple_stmt_iterator header_bsi
2547 = gsi_after_labels (loop->inner->header);
2549 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2551 gimple stmt = gsi_stmt (bsi);
2553 if (stmt == exit_condition
2554 || not_interesting_stmt (stmt)
2555 || stmt_is_bumper_for_loop (loop, stmt))
2557 gsi_next (&bsi);
2558 continue;
2561 gsi_move_before (&bsi, &header_bsi);
2564 else
2566 /* Note that the bsi only needs to be explicitly incremented
2567 when we don't move something, since it is automatically
2568 incremented when we do. */
2569 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2571 gimple stmt = gsi_stmt (bsi);
2573 if (stmt == exit_condition
2574 || not_interesting_stmt (stmt)
2575 || stmt_is_bumper_for_loop (loop, stmt))
2577 gsi_next (&bsi);
2578 continue;
2581 replace_uses_equiv_to_x_with_y
2582 (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2583 VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2585 gsi_move_before (&bsi, &tobsi);
2587 /* If the statement has any virtual operands, they may
2588 need to be rewired because the original loop may
2589 still reference them. */
2590 if (gimple_vuse (stmt))
2591 mark_sym_for_renaming (gimple_vop (cfun));
2598 free (bbs);
2599 htab_delete (replacements);
2600 return perfect_nest_p (loop);
2603 /* Return true if TRANS is a legal transformation matrix that respects
2604 the dependence vectors in DISTS and DIRS. The conservative answer
2605 is false.
2607 "Wolfe proves that a unimodular transformation represented by the
2608 matrix T is legal when applied to a loop nest with a set of
2609 lexicographically non-negative distance vectors RDG if and only if
2610 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2611 i.e.: if and only if it transforms the lexicographically positive
2612 distance vectors to lexicographically positive vectors. Note that
2613 a unimodular matrix must transform the zero vector (and only it) to
2614 the zero vector." S.Muchnick. */
2616 bool
2617 lambda_transform_legal_p (lambda_trans_matrix trans,
2618 int nb_loops,
2619 VEC (ddr_p, heap) *dependence_relations)
2621 unsigned int i, j;
2622 lambda_vector distres;
2623 struct data_dependence_relation *ddr;
2625 gcc_assert (LTM_COLSIZE (trans) == nb_loops
2626 && LTM_ROWSIZE (trans) == nb_loops);
2628 /* When there are no dependences, the transformation is correct. */
2629 if (VEC_length (ddr_p, dependence_relations) == 0)
2630 return true;
2632 ddr = VEC_index (ddr_p, dependence_relations, 0);
2633 if (ddr == NULL)
2634 return true;
2636 /* When there is an unknown relation in the dependence_relations, we
2637 know that it is no worth looking at this loop nest: give up. */
2638 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2639 return false;
2641 distres = lambda_vector_new (nb_loops);
2643 /* For each distance vector in the dependence graph. */
2644 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2646 /* Don't care about relations for which we know that there is no
2647 dependence, nor about read-read (aka. output-dependences):
2648 these data accesses can happen in any order. */
2649 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2650 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2651 continue;
2653 /* Conservatively answer: "this transformation is not valid". */
2654 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2655 return false;
2657 /* If the dependence could not be captured by a distance vector,
2658 conservatively answer that the transform is not valid. */
2659 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2660 return false;
2662 /* Compute trans.dist_vect */
2663 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2665 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2666 DDR_DIST_VECT (ddr, j), distres);
2668 if (!lambda_vector_lexico_pos (distres, nb_loops))
2669 return false;
2672 return true;
2676 /* Collects parameters from affine function ACCESS_FUNCTION, and push
2677 them in PARAMETERS. */
2679 static void
2680 lambda_collect_parameters_from_af (tree access_function,
2681 struct pointer_set_t *param_set,
2682 VEC (tree, heap) **parameters)
2684 if (access_function == NULL)
2685 return;
2687 if (TREE_CODE (access_function) == SSA_NAME
2688 && pointer_set_contains (param_set, access_function) == 0)
2690 pointer_set_insert (param_set, access_function);
2691 VEC_safe_push (tree, heap, *parameters, access_function);
2693 else
2695 int i, num_operands = tree_operand_length (access_function);
2697 for (i = 0; i < num_operands; i++)
2698 lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2699 param_set, parameters);
2703 /* Collects parameters from DATAREFS, and push them in PARAMETERS. */
2705 void
2706 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2707 VEC (tree, heap) **parameters)
2709 unsigned i, j;
2710 struct pointer_set_t *parameter_set = pointer_set_create ();
2711 data_reference_p data_reference;
2713 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++)
2714 for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2715 lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2716 parameter_set, parameters);
2717 pointer_set_destroy (parameter_set);
2720 /* Translates BASE_EXPR to vector CY. AM is needed for inferring
2721 indexing positions in the data access vector. CST is the analyzed
2722 integer constant. */
2724 static bool
2725 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2726 int cst)
2728 bool result = true;
2730 switch (TREE_CODE (base_expr))
2732 case INTEGER_CST:
2733 /* Constant part. */
2734 cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2735 return true;
2737 case SSA_NAME:
2739 int param_index =
2740 access_matrix_get_index_for_parameter (base_expr, am);
2742 if (param_index >= 0)
2744 cy[param_index] = cst + cy[param_index];
2745 return true;
2748 return false;
2751 case PLUS_EXPR:
2752 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2753 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2755 case MINUS_EXPR:
2756 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2757 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2759 case MULT_EXPR:
2760 if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2761 result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2762 cy, am, cst *
2763 int_cst_value (TREE_OPERAND (base_expr, 0)));
2764 else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2765 result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2766 cy, am, cst *
2767 int_cst_value (TREE_OPERAND (base_expr, 1)));
2768 else
2769 result = false;
2771 return result;
2773 case NEGATE_EXPR:
2774 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2776 default:
2777 return false;
2780 return result;
2783 /* Translates ACCESS_FUN to vector CY. AM is needed for inferring
2784 indexing positions in the data access vector. */
2786 static bool
2787 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2789 switch (TREE_CODE (access_fun))
2791 case POLYNOMIAL_CHREC:
2793 tree left = CHREC_LEFT (access_fun);
2794 tree right = CHREC_RIGHT (access_fun);
2795 unsigned var;
2797 if (TREE_CODE (right) != INTEGER_CST)
2798 return false;
2800 var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2801 cy[var] = int_cst_value (right);
2803 if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2804 return av_for_af (left, cy, am);
2805 else
2806 return av_for_af_base (left, cy, am, 1);
2809 case INTEGER_CST:
2810 /* Constant part. */
2811 return av_for_af_base (access_fun, cy, am, 1);
2813 default:
2814 return false;
2818 /* Initializes the access matrix for DATA_REFERENCE. */
2820 static bool
2821 build_access_matrix (data_reference_p data_reference,
2822 VEC (tree, heap) *parameters,
2823 VEC (loop_p, heap) *nest,
2824 struct obstack * lambda_obstack)
2826 struct access_matrix *am = (struct access_matrix *)
2827 obstack_alloc(lambda_obstack, sizeof (struct access_matrix));
2828 unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2829 unsigned nivs = VEC_length (loop_p, nest);
2830 unsigned lambda_nb_columns;
2832 AM_LOOP_NEST (am) = nest;
2833 AM_NB_INDUCTION_VARS (am) = nivs;
2834 AM_PARAMETERS (am) = parameters;
2836 lambda_nb_columns = AM_NB_COLUMNS (am);
2837 AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim);
2839 for (i = 0; i < ndim; i++)
2841 lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2842 tree access_function = DR_ACCESS_FN (data_reference, i);
2844 if (!av_for_af (access_function, access_vector, am))
2845 return false;
2847 VEC_quick_push (lambda_vector, AM_MATRIX (am), access_vector);
2850 DR_ACCESS_MATRIX (data_reference) = am;
2851 return true;
2854 /* Returns false when one of the access matrices cannot be built. */
2856 bool
2857 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2858 VEC (tree, heap) *parameters,
2859 VEC (loop_p, heap) *nest,
2860 struct obstack * lambda_obstack)
2862 data_reference_p dataref;
2863 unsigned ix;
2865 for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++)
2866 if (!build_access_matrix (dataref, parameters, nest, lambda_obstack))
2867 return false;
2869 return true;