Fix last entry v2.
[official-gcc.git] / gcc / tree-data-ref.c
blob89a0039355f0f55f2cc536f0c5f75a631bd30f41
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tm.h"
80 #include "ggc.h"
81 #include "tree.h"
83 /* These RTL headers are needed for basic-block.h. */
84 #include "rtl.h"
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
162 void
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
166 unsigned int i;
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
175 void
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
179 unsigned int i;
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
198 static void
199 dump_affine_function (FILE *outf, affine_fn fn)
201 unsigned i;
202 tree coef;
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
215 static void
216 dump_conflict_function (FILE *outf, conflict_function *cf)
218 unsigned i;
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
224 else
226 for (i = 0; i < cf->n; i++)
228 fprintf (outf, "[");
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
237 void
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
270 void
271 print_direction_vector (FILE *outf,
272 lambda_vector dirv,
273 int length)
275 int eq;
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
281 switch (dir)
283 case dir_positive:
284 fprintf (outf, " +");
285 break;
286 case dir_negative:
287 fprintf (outf, " -");
288 break;
289 case dir_equal:
290 fprintf (outf, " =");
291 break;
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
294 break;
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
297 break;
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
300 break;
301 case dir_star:
302 fprintf (outf, " *");
303 break;
304 default:
305 fprintf (outf, "indep");
306 break;
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
314 void
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
316 int length)
318 unsigned j;
319 lambda_vector v;
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
327 void
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
329 int length)
331 unsigned j;
332 lambda_vector v;
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
338 /* Debug version. */
340 void
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
348 void
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
354 dra = DDR_A (ddr);
355 drb = DDR_B (ddr);
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
365 unsigned int i;
366 struct loop *loopi;
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
387 DDR_NB_LOOPS (ddr));
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
394 DDR_NB_LOOPS (ddr));
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
403 void
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
407 switch (dir)
409 case dir_positive:
410 fprintf (file, "+");
411 break;
413 case dir_negative:
414 fprintf (file, "-");
415 break;
417 case dir_equal:
418 fprintf (file, "=");
419 break;
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
423 break;
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
427 break;
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
431 break;
433 case dir_star:
434 fprintf (file, "*");
435 break;
437 default:
438 break;
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
445 considered nest. */
447 void
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
450 unsigned int i, j;
451 struct data_dependence_relation *ddr;
452 lambda_vector v;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
477 void
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
492 void
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
496 tree var0, var1;
497 tree off0, off1;
498 enum tree_code code;
500 *var = exp;
501 STRIP_NOPS (exp);
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
505 switch (code)
507 case INTEGER_CST:
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
510 return;
512 case POINTER_PLUS_EXPR:
513 code = PLUS_EXPR;
514 /* FALLTHROUGH */
515 case PLUS_EXPR:
516 case MINUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
520 var0, var1));
521 *off = size_binop (code, off0, off1);
522 return;
524 case MULT_EXPR:
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
527 break;
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
531 var0, off1));
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
533 return;
535 case ADDR_EXPR:
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
544 break;
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
550 break;
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
554 if (poffset)
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 if (POINTER_TYPE_P (TREE_TYPE (base)))
559 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
560 base, fold_convert (sizetype, poffset));
561 else
562 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
563 fold_convert (TREE_TYPE (base), poffset));
566 var0 = fold_convert (type, base);
568 /* If variable length types are involved, punt, otherwise casts
569 might be converted into ARRAY_REFs in gimplify_conversion.
570 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
571 possibly no longer appears in current GIMPLE, might resurface.
572 This perhaps could run
573 if (TREE_CODE (var0) == NOP_EXPR
574 || TREE_CODE (var0) == CONVERT_EXPR)
576 gimplify_conversion (&var0);
577 // Attempt to fill in any within var0 found ARRAY_REF's
578 // element size from corresponding op embedded ARRAY_REF,
579 // if unsuccessful, just punt.
580 } */
581 while (POINTER_TYPE_P (type))
582 type = TREE_TYPE (type);
583 if (int_size_in_bytes (type) < 0)
584 break;
586 *var = var0;
587 *off = off0;
588 return;
591 case SSA_NAME:
593 tree def_stmt = SSA_NAME_DEF_STMT (exp);
594 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
596 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
598 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
599 && EXPR_P (def_stmt_rhs)
600 && !REFERENCE_CLASS_P (def_stmt_rhs)
601 && !get_call_expr_in (def_stmt_rhs))
603 split_constant_offset (def_stmt_rhs, &var0, &off0);
604 var0 = fold_convert (type, var0);
605 *var = var0;
606 *off = off0;
607 return;
610 break;
613 default:
614 break;
617 *off = ssize_int (0);
620 /* Returns the address ADDR of an object in a canonical shape (without nop
621 casts, and with type of pointer to the object). */
623 static tree
624 canonicalize_base_object_address (tree addr)
626 tree orig = addr;
628 STRIP_NOPS (addr);
630 /* The base address may be obtained by casting from integer, in that case
631 keep the cast. */
632 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
633 return orig;
635 if (TREE_CODE (addr) != ADDR_EXPR)
636 return addr;
638 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
641 /* Analyzes the behavior of the memory reference DR in the innermost loop that
642 contains it. */
644 void
645 dr_analyze_innermost (struct data_reference *dr)
647 tree stmt = DR_STMT (dr);
648 struct loop *loop = loop_containing_stmt (stmt);
649 tree ref = DR_REF (dr);
650 HOST_WIDE_INT pbitsize, pbitpos;
651 tree base, poffset;
652 enum machine_mode pmode;
653 int punsignedp, pvolatilep;
654 affine_iv base_iv, offset_iv;
655 tree init, dinit, step;
657 if (dump_file && (dump_flags & TDF_DETAILS))
658 fprintf (dump_file, "analyze_innermost: ");
660 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
661 &pmode, &punsignedp, &pvolatilep, false);
662 gcc_assert (base != NULL_TREE);
664 if (pbitpos % BITS_PER_UNIT != 0)
666 if (dump_file && (dump_flags & TDF_DETAILS))
667 fprintf (dump_file, "failed: bit offset alignment.\n");
668 return;
671 base = build_fold_addr_expr (base);
672 if (!simple_iv (loop, stmt, base, &base_iv, false))
674 if (dump_file && (dump_flags & TDF_DETAILS))
675 fprintf (dump_file, "failed: evolution of base is not affine.\n");
676 return;
678 if (!poffset)
680 offset_iv.base = ssize_int (0);
681 offset_iv.step = ssize_int (0);
683 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
687 return;
690 init = ssize_int (pbitpos / BITS_PER_UNIT);
691 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
692 init = size_binop (PLUS_EXPR, init, dinit);
693 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
694 init = size_binop (PLUS_EXPR, init, dinit);
696 step = size_binop (PLUS_EXPR,
697 fold_convert (ssizetype, base_iv.step),
698 fold_convert (ssizetype, offset_iv.step));
700 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
702 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
703 DR_INIT (dr) = init;
704 DR_STEP (dr) = step;
706 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
708 if (dump_file && (dump_flags & TDF_DETAILS))
709 fprintf (dump_file, "success.\n");
712 /* Determines the base object and the list of indices of memory reference
713 DR, analyzed in loop nest NEST. */
715 static void
716 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
718 tree stmt = DR_STMT (dr);
719 struct loop *loop = loop_containing_stmt (stmt);
720 VEC (tree, heap) *access_fns = NULL;
721 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
722 tree base, off, access_fn;
724 while (handled_component_p (aref))
726 if (TREE_CODE (aref) == ARRAY_REF)
728 op = TREE_OPERAND (aref, 1);
729 access_fn = analyze_scalar_evolution (loop, op);
730 access_fn = resolve_mixers (nest, access_fn);
731 VEC_safe_push (tree, heap, access_fns, access_fn);
733 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
736 aref = TREE_OPERAND (aref, 0);
739 if (INDIRECT_REF_P (aref))
741 op = TREE_OPERAND (aref, 0);
742 access_fn = analyze_scalar_evolution (loop, op);
743 access_fn = resolve_mixers (nest, access_fn);
744 base = initial_condition (access_fn);
745 split_constant_offset (base, &base, &off);
746 access_fn = chrec_replace_initial_condition (access_fn,
747 fold_convert (TREE_TYPE (base), off));
749 TREE_OPERAND (aref, 0) = base;
750 VEC_safe_push (tree, heap, access_fns, access_fn);
753 DR_BASE_OBJECT (dr) = ref;
754 DR_ACCESS_FNS (dr) = access_fns;
757 /* Extracts the alias analysis information from the memory reference DR. */
759 static void
760 dr_analyze_alias (struct data_reference *dr)
762 tree stmt = DR_STMT (dr);
763 tree ref = DR_REF (dr);
764 tree base = get_base_address (ref), addr, smt = NULL_TREE;
765 ssa_op_iter it;
766 tree op;
767 bitmap vops;
769 if (DECL_P (base))
770 smt = base;
771 else if (INDIRECT_REF_P (base))
773 addr = TREE_OPERAND (base, 0);
774 if (TREE_CODE (addr) == SSA_NAME)
776 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
777 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
781 DR_SYMBOL_TAG (dr) = smt;
782 if (smt && var_can_have_subvars (smt))
783 DR_SUBVARS (dr) = get_subvars_for_var (smt);
785 vops = BITMAP_ALLOC (NULL);
786 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
788 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
791 DR_VOPS (dr) = vops;
794 /* Returns true if the address of DR is invariant. */
796 static bool
797 dr_address_invariant_p (struct data_reference *dr)
799 unsigned i;
800 tree idx;
802 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
803 if (tree_contains_chrecs (idx, NULL))
804 return false;
806 return true;
809 /* Frees data reference DR. */
811 static void
812 free_data_ref (data_reference_p dr)
814 BITMAP_FREE (DR_VOPS (dr));
815 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
816 free (dr);
819 /* Analyzes memory reference MEMREF accessed in STMT. The reference
820 is read if IS_READ is true, write otherwise. Returns the
821 data_reference description of MEMREF. NEST is the outermost loop of the
822 loop nest in that the reference should be analyzed. */
824 struct data_reference *
825 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
827 struct data_reference *dr;
829 if (dump_file && (dump_flags & TDF_DETAILS))
831 fprintf (dump_file, "Creating dr for ");
832 print_generic_expr (dump_file, memref, TDF_SLIM);
833 fprintf (dump_file, "\n");
836 dr = XCNEW (struct data_reference);
837 DR_STMT (dr) = stmt;
838 DR_REF (dr) = memref;
839 DR_IS_READ (dr) = is_read;
841 dr_analyze_innermost (dr);
842 dr_analyze_indices (dr, nest);
843 dr_analyze_alias (dr);
845 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "\tbase_address: ");
848 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
849 fprintf (dump_file, "\n\toffset from base address: ");
850 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
851 fprintf (dump_file, "\n\tconstant offset from base address: ");
852 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
853 fprintf (dump_file, "\n\tstep: ");
854 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
855 fprintf (dump_file, "\n\taligned to: ");
856 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
857 fprintf (dump_file, "\n\tbase_object: ");
858 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
859 fprintf (dump_file, "\n\tsymbol tag: ");
860 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
861 fprintf (dump_file, "\n");
864 return dr;
867 /* Returns true if FNA == FNB. */
869 static bool
870 affine_function_equal_p (affine_fn fna, affine_fn fnb)
872 unsigned i, n = VEC_length (tree, fna);
874 if (n != VEC_length (tree, fnb))
875 return false;
877 for (i = 0; i < n; i++)
878 if (!operand_equal_p (VEC_index (tree, fna, i),
879 VEC_index (tree, fnb, i), 0))
880 return false;
882 return true;
885 /* If all the functions in CF are the same, returns one of them,
886 otherwise returns NULL. */
888 static affine_fn
889 common_affine_function (conflict_function *cf)
891 unsigned i;
892 affine_fn comm;
894 if (!CF_NONTRIVIAL_P (cf))
895 return NULL;
897 comm = cf->fns[0];
899 for (i = 1; i < cf->n; i++)
900 if (!affine_function_equal_p (comm, cf->fns[i]))
901 return NULL;
903 return comm;
906 /* Returns the base of the affine function FN. */
908 static tree
909 affine_function_base (affine_fn fn)
911 return VEC_index (tree, fn, 0);
914 /* Returns true if FN is a constant. */
916 static bool
917 affine_function_constant_p (affine_fn fn)
919 unsigned i;
920 tree coef;
922 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
923 if (!integer_zerop (coef))
924 return false;
926 return true;
929 /* Returns true if FN is the zero constant function. */
931 static bool
932 affine_function_zero_p (affine_fn fn)
934 return (integer_zerop (affine_function_base (fn))
935 && affine_function_constant_p (fn));
938 /* Applies operation OP on affine functions FNA and FNB, and returns the
939 result. */
941 static affine_fn
942 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
944 unsigned i, n, m;
945 affine_fn ret;
946 tree coef;
948 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
950 n = VEC_length (tree, fna);
951 m = VEC_length (tree, fnb);
953 else
955 n = VEC_length (tree, fnb);
956 m = VEC_length (tree, fna);
959 ret = VEC_alloc (tree, heap, m);
960 for (i = 0; i < n; i++)
961 VEC_quick_push (tree, ret,
962 fold_build2 (op, integer_type_node,
963 VEC_index (tree, fna, i),
964 VEC_index (tree, fnb, i)));
966 for (; VEC_iterate (tree, fna, i, coef); i++)
967 VEC_quick_push (tree, ret,
968 fold_build2 (op, integer_type_node,
969 coef, integer_zero_node));
970 for (; VEC_iterate (tree, fnb, i, coef); i++)
971 VEC_quick_push (tree, ret,
972 fold_build2 (op, integer_type_node,
973 integer_zero_node, coef));
975 return ret;
978 /* Returns the sum of affine functions FNA and FNB. */
980 static affine_fn
981 affine_fn_plus (affine_fn fna, affine_fn fnb)
983 return affine_fn_op (PLUS_EXPR, fna, fnb);
986 /* Returns the difference of affine functions FNA and FNB. */
988 static affine_fn
989 affine_fn_minus (affine_fn fna, affine_fn fnb)
991 return affine_fn_op (MINUS_EXPR, fna, fnb);
994 /* Frees affine function FN. */
996 static void
997 affine_fn_free (affine_fn fn)
999 VEC_free (tree, heap, fn);
1002 /* Determine for each subscript in the data dependence relation DDR
1003 the distance. */
1005 static void
1006 compute_subscript_distance (struct data_dependence_relation *ddr)
1008 conflict_function *cf_a, *cf_b;
1009 affine_fn fn_a, fn_b, diff;
1011 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1013 unsigned int i;
1015 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1017 struct subscript *subscript;
1019 subscript = DDR_SUBSCRIPT (ddr, i);
1020 cf_a = SUB_CONFLICTS_IN_A (subscript);
1021 cf_b = SUB_CONFLICTS_IN_B (subscript);
1023 fn_a = common_affine_function (cf_a);
1024 fn_b = common_affine_function (cf_b);
1025 if (!fn_a || !fn_b)
1027 SUB_DISTANCE (subscript) = chrec_dont_know;
1028 return;
1030 diff = affine_fn_minus (fn_a, fn_b);
1032 if (affine_function_constant_p (diff))
1033 SUB_DISTANCE (subscript) = affine_function_base (diff);
1034 else
1035 SUB_DISTANCE (subscript) = chrec_dont_know;
1037 affine_fn_free (diff);
1042 /* Returns the conflict function for "unknown". */
1044 static conflict_function *
1045 conflict_fn_not_known (void)
1047 conflict_function *fn = XCNEW (conflict_function);
1048 fn->n = NOT_KNOWN;
1050 return fn;
1053 /* Returns the conflict function for "independent". */
1055 static conflict_function *
1056 conflict_fn_no_dependence (void)
1058 conflict_function *fn = XCNEW (conflict_function);
1059 fn->n = NO_DEPENDENCE;
1061 return fn;
1064 /* Returns true if the address of OBJ is invariant in LOOP. */
1066 static bool
1067 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1069 while (handled_component_p (obj))
1071 if (TREE_CODE (obj) == ARRAY_REF)
1073 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1074 need to check the stride and the lower bound of the reference. */
1075 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1076 loop->num)
1077 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1078 loop->num))
1079 return false;
1081 else if (TREE_CODE (obj) == COMPONENT_REF)
1083 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1084 loop->num))
1085 return false;
1087 obj = TREE_OPERAND (obj, 0);
1090 if (!INDIRECT_REF_P (obj))
1091 return true;
1093 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1094 loop->num);
1097 /* Returns true if A and B are accesses to different objects, or to different
1098 fields of the same object. */
1100 static bool
1101 disjoint_objects_p (tree a, tree b)
1103 tree base_a, base_b;
1104 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1105 bool ret;
1107 base_a = get_base_address (a);
1108 base_b = get_base_address (b);
1110 if (DECL_P (base_a)
1111 && DECL_P (base_b)
1112 && base_a != base_b)
1113 return true;
1115 if (!operand_equal_p (base_a, base_b, 0))
1116 return false;
1118 /* Compare the component references of A and B. We must start from the inner
1119 ones, so record them to the vector first. */
1120 while (handled_component_p (a))
1122 VEC_safe_push (tree, heap, comp_a, a);
1123 a = TREE_OPERAND (a, 0);
1125 while (handled_component_p (b))
1127 VEC_safe_push (tree, heap, comp_b, b);
1128 b = TREE_OPERAND (b, 0);
1131 ret = false;
1132 while (1)
1134 if (VEC_length (tree, comp_a) == 0
1135 || VEC_length (tree, comp_b) == 0)
1136 break;
1138 a = VEC_pop (tree, comp_a);
1139 b = VEC_pop (tree, comp_b);
1141 /* Real and imaginary part of a variable do not alias. */
1142 if ((TREE_CODE (a) == REALPART_EXPR
1143 && TREE_CODE (b) == IMAGPART_EXPR)
1144 || (TREE_CODE (a) == IMAGPART_EXPR
1145 && TREE_CODE (b) == REALPART_EXPR))
1147 ret = true;
1148 break;
1151 if (TREE_CODE (a) != TREE_CODE (b))
1152 break;
1154 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1155 DR_BASE_OBJECT are always zero. */
1156 if (TREE_CODE (a) == ARRAY_REF)
1157 continue;
1158 else if (TREE_CODE (a) == COMPONENT_REF)
1160 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1161 continue;
1163 /* Different fields of unions may overlap. */
1164 base_a = TREE_OPERAND (a, 0);
1165 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1166 break;
1168 /* Different fields of structures cannot. */
1169 ret = true;
1170 break;
1172 else
1173 break;
1176 VEC_free (tree, heap, comp_a);
1177 VEC_free (tree, heap, comp_b);
1179 return ret;
1182 /* Returns false if we can prove that data references A and B do not alias,
1183 true otherwise. */
1185 static bool
1186 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1188 const_tree addr_a = DR_BASE_ADDRESS (a);
1189 const_tree addr_b = DR_BASE_ADDRESS (b);
1190 const_tree type_a, type_b;
1191 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1193 /* If the sets of virtual operands are disjoint, the memory references do not
1194 alias. */
1195 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1196 return false;
1198 /* If the accessed objects are disjoint, the memory references do not
1199 alias. */
1200 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1201 return false;
1203 if (!addr_a || !addr_b)
1204 return true;
1206 /* If the references are based on different static objects, they cannot alias
1207 (PTA should be able to disambiguate such accesses, but often it fails to,
1208 since currently we cannot distinguish between pointer and offset in pointer
1209 arithmetics). */
1210 if (TREE_CODE (addr_a) == ADDR_EXPR
1211 && TREE_CODE (addr_b) == ADDR_EXPR)
1212 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1214 /* An instruction writing through a restricted pointer is "independent" of any
1215 instruction reading or writing through a different restricted pointer,
1216 in the same block/scope. */
1218 type_a = TREE_TYPE (addr_a);
1219 type_b = TREE_TYPE (addr_b);
1220 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1222 if (TREE_CODE (addr_a) == SSA_NAME)
1223 decl_a = SSA_NAME_VAR (addr_a);
1224 if (TREE_CODE (addr_b) == SSA_NAME)
1225 decl_b = SSA_NAME_VAR (addr_b);
1227 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1228 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1229 && decl_a && DECL_P (decl_a)
1230 && decl_b && DECL_P (decl_b)
1231 && decl_a != decl_b
1232 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1233 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1234 return false;
1236 return true;
1239 /* Initialize a data dependence relation between data accesses A and
1240 B. NB_LOOPS is the number of loops surrounding the references: the
1241 size of the classic distance/direction vectors. */
1243 static struct data_dependence_relation *
1244 initialize_data_dependence_relation (struct data_reference *a,
1245 struct data_reference *b,
1246 VEC (loop_p, heap) *loop_nest)
1248 struct data_dependence_relation *res;
1249 unsigned int i;
1251 res = XNEW (struct data_dependence_relation);
1252 DDR_A (res) = a;
1253 DDR_B (res) = b;
1254 DDR_LOOP_NEST (res) = NULL;
1255 DDR_REVERSED_P (res) = false;
1256 DDR_SUBSCRIPTS (res) = NULL;
1257 DDR_DIR_VECTS (res) = NULL;
1258 DDR_DIST_VECTS (res) = NULL;
1260 if (a == NULL || b == NULL)
1262 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1263 return res;
1266 /* If the data references do not alias, then they are independent. */
1267 if (!dr_may_alias_p (a, b))
1269 DDR_ARE_DEPENDENT (res) = chrec_known;
1270 return res;
1273 /* If the references do not access the same object, we do not know
1274 whether they alias or not. */
1275 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1277 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1278 return res;
1281 /* If the base of the object is not invariant in the loop nest, we cannot
1282 analyze it. TODO -- in fact, it would suffice to record that there may
1283 be arbitrary dependences in the loops where the base object varies. */
1284 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1285 DR_BASE_OBJECT (a)))
1287 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1288 return res;
1291 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1293 DDR_AFFINE_P (res) = true;
1294 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1295 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1296 DDR_LOOP_NEST (res) = loop_nest;
1297 DDR_INNER_LOOP (res) = 0;
1299 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1301 struct subscript *subscript;
1303 subscript = XNEW (struct subscript);
1304 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1305 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1306 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1307 SUB_DISTANCE (subscript) = chrec_dont_know;
1308 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1311 return res;
1314 /* Frees memory used by the conflict function F. */
1316 static void
1317 free_conflict_function (conflict_function *f)
1319 unsigned i;
1321 if (CF_NONTRIVIAL_P (f))
1323 for (i = 0; i < f->n; i++)
1324 affine_fn_free (f->fns[i]);
1326 free (f);
1329 /* Frees memory used by SUBSCRIPTS. */
1331 static void
1332 free_subscripts (VEC (subscript_p, heap) *subscripts)
1334 unsigned i;
1335 subscript_p s;
1337 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1339 free_conflict_function (s->conflicting_iterations_in_a);
1340 free_conflict_function (s->conflicting_iterations_in_b);
1342 VEC_free (subscript_p, heap, subscripts);
1345 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1346 description. */
1348 static inline void
1349 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1350 tree chrec)
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1354 fprintf (dump_file, "(dependence classified: ");
1355 print_generic_expr (dump_file, chrec, 0);
1356 fprintf (dump_file, ")\n");
1359 DDR_ARE_DEPENDENT (ddr) = chrec;
1360 free_subscripts (DDR_SUBSCRIPTS (ddr));
1361 DDR_SUBSCRIPTS (ddr) = NULL;
1364 /* The dependence relation DDR cannot be represented by a distance
1365 vector. */
1367 static inline void
1368 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1370 if (dump_file && (dump_flags & TDF_DETAILS))
1371 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1373 DDR_AFFINE_P (ddr) = false;
1378 /* This section contains the classic Banerjee tests. */
1380 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1381 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1383 static inline bool
1384 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1386 return (evolution_function_is_constant_p (chrec_a)
1387 && evolution_function_is_constant_p (chrec_b));
1390 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1391 variable, i.e., if the SIV (Single Index Variable) test is true. */
1393 static bool
1394 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1396 if ((evolution_function_is_constant_p (chrec_a)
1397 && evolution_function_is_univariate_p (chrec_b))
1398 || (evolution_function_is_constant_p (chrec_b)
1399 && evolution_function_is_univariate_p (chrec_a)))
1400 return true;
1402 if (evolution_function_is_univariate_p (chrec_a)
1403 && evolution_function_is_univariate_p (chrec_b))
1405 switch (TREE_CODE (chrec_a))
1407 case POLYNOMIAL_CHREC:
1408 switch (TREE_CODE (chrec_b))
1410 case POLYNOMIAL_CHREC:
1411 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1412 return false;
1414 default:
1415 return true;
1418 default:
1419 return true;
1423 return false;
1426 /* Creates a conflict function with N dimensions. The affine functions
1427 in each dimension follow. */
1429 static conflict_function *
1430 conflict_fn (unsigned n, ...)
1432 unsigned i;
1433 conflict_function *ret = XCNEW (conflict_function);
1434 va_list ap;
1436 gcc_assert (0 < n && n <= MAX_DIM);
1437 va_start(ap, n);
1439 ret->n = n;
1440 for (i = 0; i < n; i++)
1441 ret->fns[i] = va_arg (ap, affine_fn);
1442 va_end(ap);
1444 return ret;
1447 /* Returns constant affine function with value CST. */
1449 static affine_fn
1450 affine_fn_cst (tree cst)
1452 affine_fn fn = VEC_alloc (tree, heap, 1);
1453 VEC_quick_push (tree, fn, cst);
1454 return fn;
1457 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1459 static affine_fn
1460 affine_fn_univar (tree cst, unsigned dim, tree coef)
1462 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1463 unsigned i;
1465 gcc_assert (dim > 0);
1466 VEC_quick_push (tree, fn, cst);
1467 for (i = 1; i < dim; i++)
1468 VEC_quick_push (tree, fn, integer_zero_node);
1469 VEC_quick_push (tree, fn, coef);
1470 return fn;
1473 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1474 *OVERLAPS_B are initialized to the functions that describe the
1475 relation between the elements accessed twice by CHREC_A and
1476 CHREC_B. For k >= 0, the following property is verified:
1478 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1480 static void
1481 analyze_ziv_subscript (tree chrec_a,
1482 tree chrec_b,
1483 conflict_function **overlaps_a,
1484 conflict_function **overlaps_b,
1485 tree *last_conflicts)
1487 tree difference;
1488 dependence_stats.num_ziv++;
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1491 fprintf (dump_file, "(analyze_ziv_subscript \n");
1493 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1494 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1495 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1497 switch (TREE_CODE (difference))
1499 case INTEGER_CST:
1500 if (integer_zerop (difference))
1502 /* The difference is equal to zero: the accessed index
1503 overlaps for each iteration in the loop. */
1504 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1505 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1506 *last_conflicts = chrec_dont_know;
1507 dependence_stats.num_ziv_dependent++;
1509 else
1511 /* The accesses do not overlap. */
1512 *overlaps_a = conflict_fn_no_dependence ();
1513 *overlaps_b = conflict_fn_no_dependence ();
1514 *last_conflicts = integer_zero_node;
1515 dependence_stats.num_ziv_independent++;
1517 break;
1519 default:
1520 /* We're not sure whether the indexes overlap. For the moment,
1521 conservatively answer "don't know". */
1522 if (dump_file && (dump_flags & TDF_DETAILS))
1523 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1525 *overlaps_a = conflict_fn_not_known ();
1526 *overlaps_b = conflict_fn_not_known ();
1527 *last_conflicts = chrec_dont_know;
1528 dependence_stats.num_ziv_unimplemented++;
1529 break;
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1533 fprintf (dump_file, ")\n");
1536 /* Sets NIT to the estimated number of executions of the statements in
1537 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1538 large as the number of iterations. If we have no reliable estimate,
1539 the function returns false, otherwise returns true. */
1541 bool
1542 estimated_loop_iterations (struct loop *loop, bool conservative,
1543 double_int *nit)
1545 estimate_numbers_of_iterations_loop (loop);
1546 if (conservative)
1548 if (!loop->any_upper_bound)
1549 return false;
1551 *nit = loop->nb_iterations_upper_bound;
1553 else
1555 if (!loop->any_estimate)
1556 return false;
1558 *nit = loop->nb_iterations_estimate;
1561 return true;
1564 /* Similar to estimated_loop_iterations, but returns the estimate only
1565 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1566 on the number of iterations of LOOP could not be derived, returns -1. */
1568 HOST_WIDE_INT
1569 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1571 double_int nit;
1572 HOST_WIDE_INT hwi_nit;
1574 if (!estimated_loop_iterations (loop, conservative, &nit))
1575 return -1;
1577 if (!double_int_fits_in_shwi_p (nit))
1578 return -1;
1579 hwi_nit = double_int_to_shwi (nit);
1581 return hwi_nit < 0 ? -1 : hwi_nit;
1584 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1585 and only if it fits to the int type. If this is not the case, or the
1586 estimate on the number of iterations of LOOP could not be derived, returns
1587 chrec_dont_know. */
1589 static tree
1590 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1592 double_int nit;
1593 tree type;
1595 if (!estimated_loop_iterations (loop, conservative, &nit))
1596 return chrec_dont_know;
1598 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1599 if (!double_int_fits_to_tree_p (type, nit))
1600 return chrec_dont_know;
1602 return double_int_to_tree (type, nit);
1605 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1606 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1607 *OVERLAPS_B are initialized to the functions that describe the
1608 relation between the elements accessed twice by CHREC_A and
1609 CHREC_B. For k >= 0, the following property is verified:
1611 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1613 static void
1614 analyze_siv_subscript_cst_affine (tree chrec_a,
1615 tree chrec_b,
1616 conflict_function **overlaps_a,
1617 conflict_function **overlaps_b,
1618 tree *last_conflicts)
1620 bool value0, value1, value2;
1621 tree difference, tmp;
1623 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1624 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1625 difference = chrec_fold_minus
1626 (integer_type_node, initial_condition (chrec_b), chrec_a);
1628 if (!chrec_is_positive (initial_condition (difference), &value0))
1630 if (dump_file && (dump_flags & TDF_DETAILS))
1631 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1633 dependence_stats.num_siv_unimplemented++;
1634 *overlaps_a = conflict_fn_not_known ();
1635 *overlaps_b = conflict_fn_not_known ();
1636 *last_conflicts = chrec_dont_know;
1637 return;
1639 else
1641 if (value0 == false)
1643 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1645 if (dump_file && (dump_flags & TDF_DETAILS))
1646 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1648 *overlaps_a = conflict_fn_not_known ();
1649 *overlaps_b = conflict_fn_not_known ();
1650 *last_conflicts = chrec_dont_know;
1651 dependence_stats.num_siv_unimplemented++;
1652 return;
1654 else
1656 if (value1 == true)
1658 /* Example:
1659 chrec_a = 12
1660 chrec_b = {10, +, 1}
1663 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1665 HOST_WIDE_INT numiter;
1666 struct loop *loop = get_chrec_loop (chrec_b);
1668 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1669 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1670 fold_build1 (ABS_EXPR,
1671 integer_type_node,
1672 difference),
1673 CHREC_RIGHT (chrec_b));
1674 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1675 *last_conflicts = integer_one_node;
1678 /* Perform weak-zero siv test to see if overlap is
1679 outside the loop bounds. */
1680 numiter = estimated_loop_iterations_int (loop, false);
1682 if (numiter >= 0
1683 && compare_tree_int (tmp, numiter) > 0)
1685 free_conflict_function (*overlaps_a);
1686 free_conflict_function (*overlaps_b);
1687 *overlaps_a = conflict_fn_no_dependence ();
1688 *overlaps_b = conflict_fn_no_dependence ();
1689 *last_conflicts = integer_zero_node;
1690 dependence_stats.num_siv_independent++;
1691 return;
1693 dependence_stats.num_siv_dependent++;
1694 return;
1697 /* When the step does not divide the difference, there are
1698 no overlaps. */
1699 else
1701 *overlaps_a = conflict_fn_no_dependence ();
1702 *overlaps_b = conflict_fn_no_dependence ();
1703 *last_conflicts = integer_zero_node;
1704 dependence_stats.num_siv_independent++;
1705 return;
1709 else
1711 /* Example:
1712 chrec_a = 12
1713 chrec_b = {10, +, -1}
1715 In this case, chrec_a will not overlap with chrec_b. */
1716 *overlaps_a = conflict_fn_no_dependence ();
1717 *overlaps_b = conflict_fn_no_dependence ();
1718 *last_conflicts = integer_zero_node;
1719 dependence_stats.num_siv_independent++;
1720 return;
1724 else
1726 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1729 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1731 *overlaps_a = conflict_fn_not_known ();
1732 *overlaps_b = conflict_fn_not_known ();
1733 *last_conflicts = chrec_dont_know;
1734 dependence_stats.num_siv_unimplemented++;
1735 return;
1737 else
1739 if (value2 == false)
1741 /* Example:
1742 chrec_a = 3
1743 chrec_b = {10, +, -1}
1745 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1747 HOST_WIDE_INT numiter;
1748 struct loop *loop = get_chrec_loop (chrec_b);
1750 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1751 tmp = fold_build2 (EXACT_DIV_EXPR,
1752 integer_type_node, difference,
1753 CHREC_RIGHT (chrec_b));
1754 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1755 *last_conflicts = integer_one_node;
1757 /* Perform weak-zero siv test to see if overlap is
1758 outside the loop bounds. */
1759 numiter = estimated_loop_iterations_int (loop, false);
1761 if (numiter >= 0
1762 && compare_tree_int (tmp, numiter) > 0)
1764 free_conflict_function (*overlaps_a);
1765 free_conflict_function (*overlaps_b);
1766 *overlaps_a = conflict_fn_no_dependence ();
1767 *overlaps_b = conflict_fn_no_dependence ();
1768 *last_conflicts = integer_zero_node;
1769 dependence_stats.num_siv_independent++;
1770 return;
1772 dependence_stats.num_siv_dependent++;
1773 return;
1776 /* When the step does not divide the difference, there
1777 are no overlaps. */
1778 else
1780 *overlaps_a = conflict_fn_no_dependence ();
1781 *overlaps_b = conflict_fn_no_dependence ();
1782 *last_conflicts = integer_zero_node;
1783 dependence_stats.num_siv_independent++;
1784 return;
1787 else
1789 /* Example:
1790 chrec_a = 3
1791 chrec_b = {4, +, 1}
1793 In this case, chrec_a will not overlap with chrec_b. */
1794 *overlaps_a = conflict_fn_no_dependence ();
1795 *overlaps_b = conflict_fn_no_dependence ();
1796 *last_conflicts = integer_zero_node;
1797 dependence_stats.num_siv_independent++;
1798 return;
1805 /* Helper recursive function for initializing the matrix A. Returns
1806 the initial value of CHREC. */
1808 static int
1809 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1811 gcc_assert (chrec);
1813 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1814 return int_cst_value (chrec);
1816 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1817 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1820 #define FLOOR_DIV(x,y) ((x) / (y))
1822 /* Solves the special case of the Diophantine equation:
1823 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1825 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1826 number of iterations that loops X and Y run. The overlaps will be
1827 constructed as evolutions in dimension DIM. */
1829 static void
1830 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1831 affine_fn *overlaps_a,
1832 affine_fn *overlaps_b,
1833 tree *last_conflicts, int dim)
1835 if (((step_a > 0 && step_b > 0)
1836 || (step_a < 0 && step_b < 0)))
1838 int step_overlaps_a, step_overlaps_b;
1839 int gcd_steps_a_b, last_conflict, tau2;
1841 gcd_steps_a_b = gcd (step_a, step_b);
1842 step_overlaps_a = step_b / gcd_steps_a_b;
1843 step_overlaps_b = step_a / gcd_steps_a_b;
1845 if (niter > 0)
1847 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1848 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1849 last_conflict = tau2;
1850 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1852 else
1853 *last_conflicts = chrec_dont_know;
1855 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1856 build_int_cst (NULL_TREE,
1857 step_overlaps_a));
1858 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1859 build_int_cst (NULL_TREE,
1860 step_overlaps_b));
1863 else
1865 *overlaps_a = affine_fn_cst (integer_zero_node);
1866 *overlaps_b = affine_fn_cst (integer_zero_node);
1867 *last_conflicts = integer_zero_node;
1871 /* Solves the special case of a Diophantine equation where CHREC_A is
1872 an affine bivariate function, and CHREC_B is an affine univariate
1873 function. For example,
1875 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1877 has the following overlapping functions:
1879 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1880 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1881 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1883 FORNOW: This is a specialized implementation for a case occurring in
1884 a common benchmark. Implement the general algorithm. */
1886 static void
1887 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1888 conflict_function **overlaps_a,
1889 conflict_function **overlaps_b,
1890 tree *last_conflicts)
1892 bool xz_p, yz_p, xyz_p;
1893 int step_x, step_y, step_z;
1894 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1895 affine_fn overlaps_a_xz, overlaps_b_xz;
1896 affine_fn overlaps_a_yz, overlaps_b_yz;
1897 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1898 affine_fn ova1, ova2, ovb;
1899 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1901 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1902 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1903 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1905 niter_x =
1906 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1907 false);
1908 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1909 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1911 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1913 if (dump_file && (dump_flags & TDF_DETAILS))
1914 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1916 *overlaps_a = conflict_fn_not_known ();
1917 *overlaps_b = conflict_fn_not_known ();
1918 *last_conflicts = chrec_dont_know;
1919 return;
1922 niter = MIN (niter_x, niter_z);
1923 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1924 &overlaps_a_xz,
1925 &overlaps_b_xz,
1926 &last_conflicts_xz, 1);
1927 niter = MIN (niter_y, niter_z);
1928 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1929 &overlaps_a_yz,
1930 &overlaps_b_yz,
1931 &last_conflicts_yz, 2);
1932 niter = MIN (niter_x, niter_z);
1933 niter = MIN (niter_y, niter);
1934 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1935 &overlaps_a_xyz,
1936 &overlaps_b_xyz,
1937 &last_conflicts_xyz, 3);
1939 xz_p = !integer_zerop (last_conflicts_xz);
1940 yz_p = !integer_zerop (last_conflicts_yz);
1941 xyz_p = !integer_zerop (last_conflicts_xyz);
1943 if (xz_p || yz_p || xyz_p)
1945 ova1 = affine_fn_cst (integer_zero_node);
1946 ova2 = affine_fn_cst (integer_zero_node);
1947 ovb = affine_fn_cst (integer_zero_node);
1948 if (xz_p)
1950 affine_fn t0 = ova1;
1951 affine_fn t2 = ovb;
1953 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1954 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1955 affine_fn_free (t0);
1956 affine_fn_free (t2);
1957 *last_conflicts = last_conflicts_xz;
1959 if (yz_p)
1961 affine_fn t0 = ova2;
1962 affine_fn t2 = ovb;
1964 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1965 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1966 affine_fn_free (t0);
1967 affine_fn_free (t2);
1968 *last_conflicts = last_conflicts_yz;
1970 if (xyz_p)
1972 affine_fn t0 = ova1;
1973 affine_fn t2 = ova2;
1974 affine_fn t4 = ovb;
1976 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1977 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1978 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1979 affine_fn_free (t0);
1980 affine_fn_free (t2);
1981 affine_fn_free (t4);
1982 *last_conflicts = last_conflicts_xyz;
1984 *overlaps_a = conflict_fn (2, ova1, ova2);
1985 *overlaps_b = conflict_fn (1, ovb);
1987 else
1989 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1990 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1991 *last_conflicts = integer_zero_node;
1994 affine_fn_free (overlaps_a_xz);
1995 affine_fn_free (overlaps_b_xz);
1996 affine_fn_free (overlaps_a_yz);
1997 affine_fn_free (overlaps_b_yz);
1998 affine_fn_free (overlaps_a_xyz);
1999 affine_fn_free (overlaps_b_xyz);
2002 /* Determines the overlapping elements due to accesses CHREC_A and
2003 CHREC_B, that are affine functions. This function cannot handle
2004 symbolic evolution functions, ie. when initial conditions are
2005 parameters, because it uses lambda matrices of integers. */
2007 static void
2008 analyze_subscript_affine_affine (tree chrec_a,
2009 tree chrec_b,
2010 conflict_function **overlaps_a,
2011 conflict_function **overlaps_b,
2012 tree *last_conflicts)
2014 unsigned nb_vars_a, nb_vars_b, dim;
2015 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2016 lambda_matrix A, U, S;
2018 if (eq_evolutions_p (chrec_a, chrec_b))
2020 /* The accessed index overlaps for each iteration in the
2021 loop. */
2022 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2023 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2024 *last_conflicts = chrec_dont_know;
2025 return;
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2030 /* For determining the initial intersection, we have to solve a
2031 Diophantine equation. This is the most time consuming part.
2033 For answering to the question: "Is there a dependence?" we have
2034 to prove that there exists a solution to the Diophantine
2035 equation, and that the solution is in the iteration domain,
2036 i.e. the solution is positive or zero, and that the solution
2037 happens before the upper bound loop.nb_iterations. Otherwise
2038 there is no dependence. This function outputs a description of
2039 the iterations that hold the intersections. */
2041 nb_vars_a = nb_vars_in_chrec (chrec_a);
2042 nb_vars_b = nb_vars_in_chrec (chrec_b);
2044 dim = nb_vars_a + nb_vars_b;
2045 U = lambda_matrix_new (dim, dim);
2046 A = lambda_matrix_new (dim, 1);
2047 S = lambda_matrix_new (dim, 1);
2049 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2050 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2051 gamma = init_b - init_a;
2053 /* Don't do all the hard work of solving the Diophantine equation
2054 when we already know the solution: for example,
2055 | {3, +, 1}_1
2056 | {3, +, 4}_2
2057 | gamma = 3 - 3 = 0.
2058 Then the first overlap occurs during the first iterations:
2059 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2061 if (gamma == 0)
2063 if (nb_vars_a == 1 && nb_vars_b == 1)
2065 HOST_WIDE_INT step_a, step_b;
2066 HOST_WIDE_INT niter, niter_a, niter_b;
2067 affine_fn ova, ovb;
2069 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2070 false);
2071 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2072 false);
2073 niter = MIN (niter_a, niter_b);
2074 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2075 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2077 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2078 &ova, &ovb,
2079 last_conflicts, 1);
2080 *overlaps_a = conflict_fn (1, ova);
2081 *overlaps_b = conflict_fn (1, ovb);
2084 else if (nb_vars_a == 2 && nb_vars_b == 1)
2085 compute_overlap_steps_for_affine_1_2
2086 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2088 else if (nb_vars_a == 1 && nb_vars_b == 2)
2089 compute_overlap_steps_for_affine_1_2
2090 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2092 else
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2096 *overlaps_a = conflict_fn_not_known ();
2097 *overlaps_b = conflict_fn_not_known ();
2098 *last_conflicts = chrec_dont_know;
2100 goto end_analyze_subs_aa;
2103 /* U.A = S */
2104 lambda_matrix_right_hermite (A, dim, 1, S, U);
2106 if (S[0][0] < 0)
2108 S[0][0] *= -1;
2109 lambda_matrix_row_negate (U, dim, 0);
2111 gcd_alpha_beta = S[0][0];
2113 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2114 but that is a quite strange case. Instead of ICEing, answer
2115 don't know. */
2116 if (gcd_alpha_beta == 0)
2118 *overlaps_a = conflict_fn_not_known ();
2119 *overlaps_b = conflict_fn_not_known ();
2120 *last_conflicts = chrec_dont_know;
2121 goto end_analyze_subs_aa;
2124 /* The classic "gcd-test". */
2125 if (!int_divides_p (gcd_alpha_beta, gamma))
2127 /* The "gcd-test" has determined that there is no integer
2128 solution, i.e. there is no dependence. */
2129 *overlaps_a = conflict_fn_no_dependence ();
2130 *overlaps_b = conflict_fn_no_dependence ();
2131 *last_conflicts = integer_zero_node;
2134 /* Both access functions are univariate. This includes SIV and MIV cases. */
2135 else if (nb_vars_a == 1 && nb_vars_b == 1)
2137 /* Both functions should have the same evolution sign. */
2138 if (((A[0][0] > 0 && -A[1][0] > 0)
2139 || (A[0][0] < 0 && -A[1][0] < 0)))
2141 /* The solutions are given by:
2143 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2144 | [u21 u22] [y0]
2146 For a given integer t. Using the following variables,
2148 | i0 = u11 * gamma / gcd_alpha_beta
2149 | j0 = u12 * gamma / gcd_alpha_beta
2150 | i1 = u21
2151 | j1 = u22
2153 the solutions are:
2155 | x0 = i0 + i1 * t,
2156 | y0 = j0 + j1 * t. */
2157 HOST_WIDE_INT i0, j0, i1, j1;
2159 i0 = U[0][0] * gamma / gcd_alpha_beta;
2160 j0 = U[0][1] * gamma / gcd_alpha_beta;
2161 i1 = U[1][0];
2162 j1 = U[1][1];
2164 if ((i1 == 0 && i0 < 0)
2165 || (j1 == 0 && j0 < 0))
2167 /* There is no solution.
2168 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2169 falls in here, but for the moment we don't look at the
2170 upper bound of the iteration domain. */
2171 *overlaps_a = conflict_fn_no_dependence ();
2172 *overlaps_b = conflict_fn_no_dependence ();
2173 *last_conflicts = integer_zero_node;
2174 goto end_analyze_subs_aa;
2177 if (i1 > 0 && j1 > 0)
2179 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2180 (get_chrec_loop (chrec_a), false);
2181 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2182 (get_chrec_loop (chrec_b), false);
2183 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2185 /* (X0, Y0) is a solution of the Diophantine equation:
2186 "chrec_a (X0) = chrec_b (Y0)". */
2187 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2188 CEIL (-j0, j1));
2189 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2190 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2192 /* (X1, Y1) is the smallest positive solution of the eq
2193 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2194 first conflict occurs. */
2195 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2196 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2197 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2199 if (niter > 0)
2201 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2202 FLOOR_DIV (niter - j0, j1));
2203 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2205 /* If the overlap occurs outside of the bounds of the
2206 loop, there is no dependence. */
2207 if (x1 > niter || y1 > niter)
2209 *overlaps_a = conflict_fn_no_dependence ();
2210 *overlaps_b = conflict_fn_no_dependence ();
2211 *last_conflicts = integer_zero_node;
2212 goto end_analyze_subs_aa;
2214 else
2215 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2217 else
2218 *last_conflicts = chrec_dont_know;
2220 *overlaps_a
2221 = conflict_fn (1,
2222 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2224 build_int_cst (NULL_TREE, i1)));
2225 *overlaps_b
2226 = conflict_fn (1,
2227 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2229 build_int_cst (NULL_TREE, j1)));
2231 else
2233 /* FIXME: For the moment, the upper bound of the
2234 iteration domain for i and j is not checked. */
2235 if (dump_file && (dump_flags & TDF_DETAILS))
2236 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2237 *overlaps_a = conflict_fn_not_known ();
2238 *overlaps_b = conflict_fn_not_known ();
2239 *last_conflicts = chrec_dont_know;
2242 else
2244 if (dump_file && (dump_flags & TDF_DETAILS))
2245 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2246 *overlaps_a = conflict_fn_not_known ();
2247 *overlaps_b = conflict_fn_not_known ();
2248 *last_conflicts = chrec_dont_know;
2251 else
2253 if (dump_file && (dump_flags & TDF_DETAILS))
2254 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2255 *overlaps_a = conflict_fn_not_known ();
2256 *overlaps_b = conflict_fn_not_known ();
2257 *last_conflicts = chrec_dont_know;
2260 end_analyze_subs_aa:
2261 if (dump_file && (dump_flags & TDF_DETAILS))
2263 fprintf (dump_file, " (overlaps_a = ");
2264 dump_conflict_function (dump_file, *overlaps_a);
2265 fprintf (dump_file, ")\n (overlaps_b = ");
2266 dump_conflict_function (dump_file, *overlaps_b);
2267 fprintf (dump_file, ")\n");
2268 fprintf (dump_file, ")\n");
2272 /* Returns true when analyze_subscript_affine_affine can be used for
2273 determining the dependence relation between chrec_a and chrec_b,
2274 that contain symbols. This function modifies chrec_a and chrec_b
2275 such that the analysis result is the same, and such that they don't
2276 contain symbols, and then can safely be passed to the analyzer.
2278 Example: The analysis of the following tuples of evolutions produce
2279 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2280 vs. {0, +, 1}_1
2282 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2283 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2286 static bool
2287 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2289 tree diff, type, left_a, left_b, right_b;
2291 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2292 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2293 /* FIXME: For the moment not handled. Might be refined later. */
2294 return false;
2296 type = chrec_type (*chrec_a);
2297 left_a = CHREC_LEFT (*chrec_a);
2298 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2299 diff = chrec_fold_minus (type, left_a, left_b);
2301 if (!evolution_function_is_constant_p (diff))
2302 return false;
2304 if (dump_file && (dump_flags & TDF_DETAILS))
2305 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2307 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2308 diff, CHREC_RIGHT (*chrec_a));
2309 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2310 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2311 build_int_cst (type, 0),
2312 right_b);
2313 return true;
2316 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2317 *OVERLAPS_B are initialized to the functions that describe the
2318 relation between the elements accessed twice by CHREC_A and
2319 CHREC_B. For k >= 0, the following property is verified:
2321 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2323 static void
2324 analyze_siv_subscript (tree chrec_a,
2325 tree chrec_b,
2326 conflict_function **overlaps_a,
2327 conflict_function **overlaps_b,
2328 tree *last_conflicts)
2330 dependence_stats.num_siv++;
2332 if (dump_file && (dump_flags & TDF_DETAILS))
2333 fprintf (dump_file, "(analyze_siv_subscript \n");
2335 if (evolution_function_is_constant_p (chrec_a)
2336 && evolution_function_is_affine_p (chrec_b))
2337 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2338 overlaps_a, overlaps_b, last_conflicts);
2340 else if (evolution_function_is_affine_p (chrec_a)
2341 && evolution_function_is_constant_p (chrec_b))
2342 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2343 overlaps_b, overlaps_a, last_conflicts);
2345 else if (evolution_function_is_affine_p (chrec_a)
2346 && evolution_function_is_affine_p (chrec_b))
2348 if (!chrec_contains_symbols (chrec_a)
2349 && !chrec_contains_symbols (chrec_b))
2351 analyze_subscript_affine_affine (chrec_a, chrec_b,
2352 overlaps_a, overlaps_b,
2353 last_conflicts);
2355 if (CF_NOT_KNOWN_P (*overlaps_a)
2356 || CF_NOT_KNOWN_P (*overlaps_b))
2357 dependence_stats.num_siv_unimplemented++;
2358 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2359 || CF_NO_DEPENDENCE_P (*overlaps_b))
2360 dependence_stats.num_siv_independent++;
2361 else
2362 dependence_stats.num_siv_dependent++;
2364 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2365 &chrec_b))
2367 analyze_subscript_affine_affine (chrec_a, chrec_b,
2368 overlaps_a, overlaps_b,
2369 last_conflicts);
2371 if (CF_NOT_KNOWN_P (*overlaps_a)
2372 || CF_NOT_KNOWN_P (*overlaps_b))
2373 dependence_stats.num_siv_unimplemented++;
2374 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2375 || CF_NO_DEPENDENCE_P (*overlaps_b))
2376 dependence_stats.num_siv_independent++;
2377 else
2378 dependence_stats.num_siv_dependent++;
2380 else
2381 goto siv_subscript_dontknow;
2384 else
2386 siv_subscript_dontknow:;
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 fprintf (dump_file, "siv test failed: unimplemented.\n");
2389 *overlaps_a = conflict_fn_not_known ();
2390 *overlaps_b = conflict_fn_not_known ();
2391 *last_conflicts = chrec_dont_know;
2392 dependence_stats.num_siv_unimplemented++;
2395 if (dump_file && (dump_flags & TDF_DETAILS))
2396 fprintf (dump_file, ")\n");
2399 /* Returns false if we can prove that the greatest common divisor of the steps
2400 of CHREC does not divide CST, false otherwise. */
2402 static bool
2403 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2405 HOST_WIDE_INT cd = 0, val;
2406 tree step;
2408 if (!host_integerp (cst, 0))
2409 return true;
2410 val = tree_low_cst (cst, 0);
2412 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2414 step = CHREC_RIGHT (chrec);
2415 if (!host_integerp (step, 0))
2416 return true;
2417 cd = gcd (cd, tree_low_cst (step, 0));
2418 chrec = CHREC_LEFT (chrec);
2421 return val % cd == 0;
2424 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2425 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2426 functions that describe the relation between the elements accessed
2427 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2428 is verified:
2430 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2432 static void
2433 analyze_miv_subscript (tree chrec_a,
2434 tree chrec_b,
2435 conflict_function **overlaps_a,
2436 conflict_function **overlaps_b,
2437 tree *last_conflicts,
2438 struct loop *loop_nest)
2440 /* FIXME: This is a MIV subscript, not yet handled.
2441 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2442 (A[i] vs. A[j]).
2444 In the SIV test we had to solve a Diophantine equation with two
2445 variables. In the MIV case we have to solve a Diophantine
2446 equation with 2*n variables (if the subscript uses n IVs).
2448 tree difference;
2449 dependence_stats.num_miv++;
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 fprintf (dump_file, "(analyze_miv_subscript \n");
2453 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2454 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2455 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2457 if (eq_evolutions_p (chrec_a, chrec_b))
2459 /* Access functions are the same: all the elements are accessed
2460 in the same order. */
2461 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2462 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2463 *last_conflicts = estimated_loop_iterations_tree
2464 (get_chrec_loop (chrec_a), true);
2465 dependence_stats.num_miv_dependent++;
2468 else if (evolution_function_is_constant_p (difference)
2469 /* For the moment, the following is verified:
2470 evolution_function_is_affine_multivariate_p (chrec_a,
2471 loop_nest->num) */
2472 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2474 /* testsuite/.../ssa-chrec-33.c
2475 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2477 The difference is 1, and all the evolution steps are multiples
2478 of 2, consequently there are no overlapping elements. */
2479 *overlaps_a = conflict_fn_no_dependence ();
2480 *overlaps_b = conflict_fn_no_dependence ();
2481 *last_conflicts = integer_zero_node;
2482 dependence_stats.num_miv_independent++;
2485 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2486 && !chrec_contains_symbols (chrec_a)
2487 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2488 && !chrec_contains_symbols (chrec_b))
2490 /* testsuite/.../ssa-chrec-35.c
2491 {0, +, 1}_2 vs. {0, +, 1}_3
2492 the overlapping elements are respectively located at iterations:
2493 {0, +, 1}_x and {0, +, 1}_x,
2494 in other words, we have the equality:
2495 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2497 Other examples:
2498 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2499 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2501 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2502 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2504 analyze_subscript_affine_affine (chrec_a, chrec_b,
2505 overlaps_a, overlaps_b, last_conflicts);
2507 if (CF_NOT_KNOWN_P (*overlaps_a)
2508 || CF_NOT_KNOWN_P (*overlaps_b))
2509 dependence_stats.num_miv_unimplemented++;
2510 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2511 || CF_NO_DEPENDENCE_P (*overlaps_b))
2512 dependence_stats.num_miv_independent++;
2513 else
2514 dependence_stats.num_miv_dependent++;
2517 else
2519 /* When the analysis is too difficult, answer "don't know". */
2520 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2523 *overlaps_a = conflict_fn_not_known ();
2524 *overlaps_b = conflict_fn_not_known ();
2525 *last_conflicts = chrec_dont_know;
2526 dependence_stats.num_miv_unimplemented++;
2529 if (dump_file && (dump_flags & TDF_DETAILS))
2530 fprintf (dump_file, ")\n");
2533 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2534 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2535 OVERLAP_ITERATIONS_B are initialized with two functions that
2536 describe the iterations that contain conflicting elements.
2538 Remark: For an integer k >= 0, the following equality is true:
2540 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2543 static void
2544 analyze_overlapping_iterations (tree chrec_a,
2545 tree chrec_b,
2546 conflict_function **overlap_iterations_a,
2547 conflict_function **overlap_iterations_b,
2548 tree *last_conflicts, struct loop *loop_nest)
2550 unsigned int lnn = loop_nest->num;
2552 dependence_stats.num_subscript_tests++;
2554 if (dump_file && (dump_flags & TDF_DETAILS))
2556 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2557 fprintf (dump_file, " (chrec_a = ");
2558 print_generic_expr (dump_file, chrec_a, 0);
2559 fprintf (dump_file, ")\n (chrec_b = ");
2560 print_generic_expr (dump_file, chrec_b, 0);
2561 fprintf (dump_file, ")\n");
2564 if (chrec_a == NULL_TREE
2565 || chrec_b == NULL_TREE
2566 || chrec_contains_undetermined (chrec_a)
2567 || chrec_contains_undetermined (chrec_b))
2569 dependence_stats.num_subscript_undetermined++;
2571 *overlap_iterations_a = conflict_fn_not_known ();
2572 *overlap_iterations_b = conflict_fn_not_known ();
2575 /* If they are the same chrec, and are affine, they overlap
2576 on every iteration. */
2577 else if (eq_evolutions_p (chrec_a, chrec_b)
2578 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2580 dependence_stats.num_same_subscript_function++;
2581 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2582 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2583 *last_conflicts = chrec_dont_know;
2586 /* If they aren't the same, and aren't affine, we can't do anything
2587 yet. */
2588 else if ((chrec_contains_symbols (chrec_a)
2589 || chrec_contains_symbols (chrec_b))
2590 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2591 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2593 dependence_stats.num_subscript_undetermined++;
2594 *overlap_iterations_a = conflict_fn_not_known ();
2595 *overlap_iterations_b = conflict_fn_not_known ();
2598 else if (ziv_subscript_p (chrec_a, chrec_b))
2599 analyze_ziv_subscript (chrec_a, chrec_b,
2600 overlap_iterations_a, overlap_iterations_b,
2601 last_conflicts);
2603 else if (siv_subscript_p (chrec_a, chrec_b))
2604 analyze_siv_subscript (chrec_a, chrec_b,
2605 overlap_iterations_a, overlap_iterations_b,
2606 last_conflicts);
2608 else
2609 analyze_miv_subscript (chrec_a, chrec_b,
2610 overlap_iterations_a, overlap_iterations_b,
2611 last_conflicts, loop_nest);
2613 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, " (overlap_iterations_a = ");
2616 dump_conflict_function (dump_file, *overlap_iterations_a);
2617 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2618 dump_conflict_function (dump_file, *overlap_iterations_b);
2619 fprintf (dump_file, ")\n");
2620 fprintf (dump_file, ")\n");
2624 /* Helper function for uniquely inserting distance vectors. */
2626 static void
2627 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2629 unsigned i;
2630 lambda_vector v;
2632 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2633 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2634 return;
2636 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2639 /* Helper function for uniquely inserting direction vectors. */
2641 static void
2642 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2644 unsigned i;
2645 lambda_vector v;
2647 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2648 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2649 return;
2651 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2654 /* Add a distance of 1 on all the loops outer than INDEX. If we
2655 haven't yet determined a distance for this outer loop, push a new
2656 distance vector composed of the previous distance, and a distance
2657 of 1 for this outer loop. Example:
2659 | loop_1
2660 | loop_2
2661 | A[10]
2662 | endloop_2
2663 | endloop_1
2665 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2666 save (0, 1), then we have to save (1, 0). */
2668 static void
2669 add_outer_distances (struct data_dependence_relation *ddr,
2670 lambda_vector dist_v, int index)
2672 /* For each outer loop where init_v is not set, the accesses are
2673 in dependence of distance 1 in the loop. */
2674 while (--index >= 0)
2676 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2677 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2678 save_v[index] = 1;
2679 save_dist_v (ddr, save_v);
2683 /* Return false when fail to represent the data dependence as a
2684 distance vector. INIT_B is set to true when a component has been
2685 added to the distance vector DIST_V. INDEX_CARRY is then set to
2686 the index in DIST_V that carries the dependence. */
2688 static bool
2689 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2690 struct data_reference *ddr_a,
2691 struct data_reference *ddr_b,
2692 lambda_vector dist_v, bool *init_b,
2693 int *index_carry)
2695 unsigned i;
2696 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2698 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2700 tree access_fn_a, access_fn_b;
2701 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2703 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2705 non_affine_dependence_relation (ddr);
2706 return false;
2709 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2710 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2712 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2713 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2715 int dist, index;
2716 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2717 DDR_LOOP_NEST (ddr));
2718 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2719 DDR_LOOP_NEST (ddr));
2721 /* The dependence is carried by the outermost loop. Example:
2722 | loop_1
2723 | A[{4, +, 1}_1]
2724 | loop_2
2725 | A[{5, +, 1}_2]
2726 | endloop_2
2727 | endloop_1
2728 In this case, the dependence is carried by loop_1. */
2729 index = index_a < index_b ? index_a : index_b;
2730 *index_carry = MIN (index, *index_carry);
2732 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2734 non_affine_dependence_relation (ddr);
2735 return false;
2738 dist = int_cst_value (SUB_DISTANCE (subscript));
2740 /* This is the subscript coupling test. If we have already
2741 recorded a distance for this loop (a distance coming from
2742 another subscript), it should be the same. For example,
2743 in the following code, there is no dependence:
2745 | loop i = 0, N, 1
2746 | T[i+1][i] = ...
2747 | ... = T[i][i]
2748 | endloop
2750 if (init_v[index] != 0 && dist_v[index] != dist)
2752 finalize_ddr_dependent (ddr, chrec_known);
2753 return false;
2756 dist_v[index] = dist;
2757 init_v[index] = 1;
2758 *init_b = true;
2760 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2762 /* This can be for example an affine vs. constant dependence
2763 (T[i] vs. T[3]) that is not an affine dependence and is
2764 not representable as a distance vector. */
2765 non_affine_dependence_relation (ddr);
2766 return false;
2770 return true;
2773 /* Return true when the DDR contains two data references that have the
2774 same access functions. */
2776 static bool
2777 same_access_functions (const struct data_dependence_relation *ddr)
2779 unsigned i;
2781 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2782 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2783 DR_ACCESS_FN (DDR_B (ddr), i)))
2784 return false;
2786 return true;
2789 /* Return true when the DDR contains only constant access functions. */
2791 static bool
2792 constant_access_functions (const struct data_dependence_relation *ddr)
2794 unsigned i;
2796 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2797 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2798 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2799 return false;
2801 return true;
2805 /* Helper function for the case where DDR_A and DDR_B are the same
2806 multivariate access function. */
2808 static void
2809 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2811 int x_1, x_2;
2812 tree c_1 = CHREC_LEFT (c_2);
2813 tree c_0 = CHREC_LEFT (c_1);
2814 lambda_vector dist_v;
2815 int v1, v2, cd;
2817 /* Polynomials with more than 2 variables are not handled yet. When
2818 the evolution steps are parameters, it is not possible to
2819 represent the dependence using classical distance vectors. */
2820 if (TREE_CODE (c_0) != INTEGER_CST
2821 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2822 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2824 DDR_AFFINE_P (ddr) = false;
2825 return;
2828 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2829 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2831 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2832 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2833 v1 = int_cst_value (CHREC_RIGHT (c_1));
2834 v2 = int_cst_value (CHREC_RIGHT (c_2));
2835 cd = gcd (v1, v2);
2836 v1 /= cd;
2837 v2 /= cd;
2839 if (v2 < 0)
2841 v2 = -v2;
2842 v1 = -v1;
2845 dist_v[x_1] = v2;
2846 dist_v[x_2] = -v1;
2847 save_dist_v (ddr, dist_v);
2849 add_outer_distances (ddr, dist_v, x_1);
2852 /* Helper function for the case where DDR_A and DDR_B are the same
2853 access functions. */
2855 static void
2856 add_other_self_distances (struct data_dependence_relation *ddr)
2858 lambda_vector dist_v;
2859 unsigned i;
2860 int index_carry = DDR_NB_LOOPS (ddr);
2862 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2864 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2866 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2868 if (!evolution_function_is_univariate_p (access_fun))
2870 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2872 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2873 return;
2876 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2877 return;
2880 index_carry = MIN (index_carry,
2881 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2882 DDR_LOOP_NEST (ddr)));
2886 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2887 add_outer_distances (ddr, dist_v, index_carry);
2890 static void
2891 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2893 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2895 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2896 save_dist_v (ddr, dist_v);
2899 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2900 is the case for example when access functions are the same and
2901 equal to a constant, as in:
2903 | loop_1
2904 | A[3] = ...
2905 | ... = A[3]
2906 | endloop_1
2908 in which case the distance vectors are (0) and (1). */
2910 static void
2911 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2913 unsigned i, j;
2915 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2917 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2918 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2919 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2921 for (j = 0; j < ca->n; j++)
2922 if (affine_function_zero_p (ca->fns[j]))
2924 insert_innermost_unit_dist_vector (ddr);
2925 return;
2928 for (j = 0; j < cb->n; j++)
2929 if (affine_function_zero_p (cb->fns[j]))
2931 insert_innermost_unit_dist_vector (ddr);
2932 return;
2937 /* Compute the classic per loop distance vector. DDR is the data
2938 dependence relation to build a vector from. Return false when fail
2939 to represent the data dependence as a distance vector. */
2941 static bool
2942 build_classic_dist_vector (struct data_dependence_relation *ddr,
2943 struct loop *loop_nest)
2945 bool init_b = false;
2946 int index_carry = DDR_NB_LOOPS (ddr);
2947 lambda_vector dist_v;
2949 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2950 return false;
2952 if (same_access_functions (ddr))
2954 /* Save the 0 vector. */
2955 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2956 save_dist_v (ddr, dist_v);
2958 if (constant_access_functions (ddr))
2959 add_distance_for_zero_overlaps (ddr);
2961 if (DDR_NB_LOOPS (ddr) > 1)
2962 add_other_self_distances (ddr);
2964 return true;
2967 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2968 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2969 dist_v, &init_b, &index_carry))
2970 return false;
2972 /* Save the distance vector if we initialized one. */
2973 if (init_b)
2975 /* Verify a basic constraint: classic distance vectors should
2976 always be lexicographically positive.
2978 Data references are collected in the order of execution of
2979 the program, thus for the following loop
2981 | for (i = 1; i < 100; i++)
2982 | for (j = 1; j < 100; j++)
2984 | t = T[j+1][i-1]; // A
2985 | T[j][i] = t + 2; // B
2988 references are collected following the direction of the wind:
2989 A then B. The data dependence tests are performed also
2990 following this order, such that we're looking at the distance
2991 separating the elements accessed by A from the elements later
2992 accessed by B. But in this example, the distance returned by
2993 test_dep (A, B) is lexicographically negative (-1, 1), that
2994 means that the access A occurs later than B with respect to
2995 the outer loop, ie. we're actually looking upwind. In this
2996 case we solve test_dep (B, A) looking downwind to the
2997 lexicographically positive solution, that returns the
2998 distance vector (1, -1). */
2999 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3001 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3002 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3003 loop_nest))
3004 return false;
3005 compute_subscript_distance (ddr);
3006 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3007 save_v, &init_b, &index_carry))
3008 return false;
3009 save_dist_v (ddr, save_v);
3010 DDR_REVERSED_P (ddr) = true;
3012 /* In this case there is a dependence forward for all the
3013 outer loops:
3015 | for (k = 1; k < 100; k++)
3016 | for (i = 1; i < 100; i++)
3017 | for (j = 1; j < 100; j++)
3019 | t = T[j+1][i-1]; // A
3020 | T[j][i] = t + 2; // B
3023 the vectors are:
3024 (0, 1, -1)
3025 (1, 1, -1)
3026 (1, -1, 1)
3028 if (DDR_NB_LOOPS (ddr) > 1)
3030 add_outer_distances (ddr, save_v, index_carry);
3031 add_outer_distances (ddr, dist_v, index_carry);
3034 else
3036 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3037 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3039 if (DDR_NB_LOOPS (ddr) > 1)
3041 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3043 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3044 DDR_A (ddr), loop_nest))
3045 return false;
3046 compute_subscript_distance (ddr);
3047 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3048 opposite_v, &init_b,
3049 &index_carry))
3050 return false;
3052 save_dist_v (ddr, save_v);
3053 add_outer_distances (ddr, dist_v, index_carry);
3054 add_outer_distances (ddr, opposite_v, index_carry);
3056 else
3057 save_dist_v (ddr, save_v);
3060 else
3062 /* There is a distance of 1 on all the outer loops: Example:
3063 there is a dependence of distance 1 on loop_1 for the array A.
3065 | loop_1
3066 | A[5] = ...
3067 | endloop
3069 add_outer_distances (ddr, dist_v,
3070 lambda_vector_first_nz (dist_v,
3071 DDR_NB_LOOPS (ddr), 0));
3074 if (dump_file && (dump_flags & TDF_DETAILS))
3076 unsigned i;
3078 fprintf (dump_file, "(build_classic_dist_vector\n");
3079 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3081 fprintf (dump_file, " dist_vector = (");
3082 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3083 DDR_NB_LOOPS (ddr));
3084 fprintf (dump_file, " )\n");
3086 fprintf (dump_file, ")\n");
3089 return true;
3092 /* Return the direction for a given distance.
3093 FIXME: Computing dir this way is suboptimal, since dir can catch
3094 cases that dist is unable to represent. */
3096 static inline enum data_dependence_direction
3097 dir_from_dist (int dist)
3099 if (dist > 0)
3100 return dir_positive;
3101 else if (dist < 0)
3102 return dir_negative;
3103 else
3104 return dir_equal;
3107 /* Compute the classic per loop direction vector. DDR is the data
3108 dependence relation to build a vector from. */
3110 static void
3111 build_classic_dir_vector (struct data_dependence_relation *ddr)
3113 unsigned i, j;
3114 lambda_vector dist_v;
3116 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3118 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3120 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3121 dir_v[j] = dir_from_dist (dist_v[j]);
3123 save_dir_v (ddr, dir_v);
3127 /* Helper function. Returns true when there is a dependence between
3128 data references DRA and DRB. */
3130 static bool
3131 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3132 struct data_reference *dra,
3133 struct data_reference *drb,
3134 struct loop *loop_nest)
3136 unsigned int i;
3137 tree last_conflicts;
3138 struct subscript *subscript;
3140 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3141 i++)
3143 conflict_function *overlaps_a, *overlaps_b;
3145 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3146 DR_ACCESS_FN (drb, i),
3147 &overlaps_a, &overlaps_b,
3148 &last_conflicts, loop_nest);
3150 if (CF_NOT_KNOWN_P (overlaps_a)
3151 || CF_NOT_KNOWN_P (overlaps_b))
3153 finalize_ddr_dependent (ddr, chrec_dont_know);
3154 dependence_stats.num_dependence_undetermined++;
3155 free_conflict_function (overlaps_a);
3156 free_conflict_function (overlaps_b);
3157 return false;
3160 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3161 || CF_NO_DEPENDENCE_P (overlaps_b))
3163 finalize_ddr_dependent (ddr, chrec_known);
3164 dependence_stats.num_dependence_independent++;
3165 free_conflict_function (overlaps_a);
3166 free_conflict_function (overlaps_b);
3167 return false;
3170 else
3172 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3173 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3174 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3178 return true;
3181 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3183 static void
3184 subscript_dependence_tester (struct data_dependence_relation *ddr,
3185 struct loop *loop_nest)
3188 if (dump_file && (dump_flags & TDF_DETAILS))
3189 fprintf (dump_file, "(subscript_dependence_tester \n");
3191 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3192 dependence_stats.num_dependence_dependent++;
3194 compute_subscript_distance (ddr);
3195 if (build_classic_dist_vector (ddr, loop_nest))
3196 build_classic_dir_vector (ddr);
3198 if (dump_file && (dump_flags & TDF_DETAILS))
3199 fprintf (dump_file, ")\n");
3202 /* Returns true when all the access functions of A are affine or
3203 constant with respect to LOOP_NEST. */
3205 static bool
3206 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3207 const struct loop *loop_nest)
3209 unsigned int i;
3210 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3211 tree t;
3213 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3214 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3215 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3216 return false;
3218 return true;
3221 /* Initializes an equation for an OMEGA problem using the information
3222 contained in the ACCESS_FUN. Returns true when the operation
3223 succeeded.
3225 PB is the omega constraint system.
3226 EQ is the number of the equation to be initialized.
3227 OFFSET is used for shifting the variables names in the constraints:
3228 a constrain is composed of 2 * the number of variables surrounding
3229 dependence accesses. OFFSET is set either to 0 for the first n variables,
3230 then it is set to n.
3231 ACCESS_FUN is expected to be an affine chrec. */
3233 static bool
3234 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3235 unsigned int offset, tree access_fun,
3236 struct data_dependence_relation *ddr)
3238 switch (TREE_CODE (access_fun))
3240 case POLYNOMIAL_CHREC:
3242 tree left = CHREC_LEFT (access_fun);
3243 tree right = CHREC_RIGHT (access_fun);
3244 int var = CHREC_VARIABLE (access_fun);
3245 unsigned var_idx;
3247 if (TREE_CODE (right) != INTEGER_CST)
3248 return false;
3250 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3251 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3253 /* Compute the innermost loop index. */
3254 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3256 if (offset == 0)
3257 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3258 += int_cst_value (right);
3260 switch (TREE_CODE (left))
3262 case POLYNOMIAL_CHREC:
3263 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3265 case INTEGER_CST:
3266 pb->eqs[eq].coef[0] += int_cst_value (left);
3267 return true;
3269 default:
3270 return false;
3274 case INTEGER_CST:
3275 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3276 return true;
3278 default:
3279 return false;
3283 /* As explained in the comments preceding init_omega_for_ddr, we have
3284 to set up a system for each loop level, setting outer loops
3285 variation to zero, and current loop variation to positive or zero.
3286 Save each lexico positive distance vector. */
3288 static void
3289 omega_extract_distance_vectors (omega_pb pb,
3290 struct data_dependence_relation *ddr)
3292 int eq, geq;
3293 unsigned i, j;
3294 struct loop *loopi, *loopj;
3295 enum omega_result res;
3297 /* Set a new problem for each loop in the nest. The basis is the
3298 problem that we have initialized until now. On top of this we
3299 add new constraints. */
3300 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3301 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3303 int dist = 0;
3304 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3305 DDR_NB_LOOPS (ddr));
3307 omega_copy_problem (copy, pb);
3309 /* For all the outer loops "loop_j", add "dj = 0". */
3310 for (j = 0;
3311 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3313 eq = omega_add_zero_eq (copy, omega_black);
3314 copy->eqs[eq].coef[j + 1] = 1;
3317 /* For "loop_i", add "0 <= di". */
3318 geq = omega_add_zero_geq (copy, omega_black);
3319 copy->geqs[geq].coef[i + 1] = 1;
3321 /* Reduce the constraint system, and test that the current
3322 problem is feasible. */
3323 res = omega_simplify_problem (copy);
3324 if (res == omega_false
3325 || res == omega_unknown
3326 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3327 goto next_problem;
3329 for (eq = 0; eq < copy->num_subs; eq++)
3330 if (copy->subs[eq].key == (int) i + 1)
3332 dist = copy->subs[eq].coef[0];
3333 goto found_dist;
3336 if (dist == 0)
3338 /* Reinitialize problem... */
3339 omega_copy_problem (copy, pb);
3340 for (j = 0;
3341 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3343 eq = omega_add_zero_eq (copy, omega_black);
3344 copy->eqs[eq].coef[j + 1] = 1;
3347 /* ..., but this time "di = 1". */
3348 eq = omega_add_zero_eq (copy, omega_black);
3349 copy->eqs[eq].coef[i + 1] = 1;
3350 copy->eqs[eq].coef[0] = -1;
3352 res = omega_simplify_problem (copy);
3353 if (res == omega_false
3354 || res == omega_unknown
3355 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3356 goto next_problem;
3358 for (eq = 0; eq < copy->num_subs; eq++)
3359 if (copy->subs[eq].key == (int) i + 1)
3361 dist = copy->subs[eq].coef[0];
3362 goto found_dist;
3366 found_dist:;
3367 /* Save the lexicographically positive distance vector. */
3368 if (dist >= 0)
3370 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3371 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3373 dist_v[i] = dist;
3375 for (eq = 0; eq < copy->num_subs; eq++)
3376 if (copy->subs[eq].key > 0)
3378 dist = copy->subs[eq].coef[0];
3379 dist_v[copy->subs[eq].key - 1] = dist;
3382 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3383 dir_v[j] = dir_from_dist (dist_v[j]);
3385 save_dist_v (ddr, dist_v);
3386 save_dir_v (ddr, dir_v);
3389 next_problem:;
3390 omega_free_problem (copy);
3394 /* This is called for each subscript of a tuple of data references:
3395 insert an equality for representing the conflicts. */
3397 static bool
3398 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3399 struct data_dependence_relation *ddr,
3400 omega_pb pb, bool *maybe_dependent)
3402 int eq;
3403 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3404 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3405 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3407 /* When the fun_a - fun_b is not constant, the dependence is not
3408 captured by the classic distance vector representation. */
3409 if (TREE_CODE (difference) != INTEGER_CST)
3410 return false;
3412 /* ZIV test. */
3413 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3415 /* There is no dependence. */
3416 *maybe_dependent = false;
3417 return true;
3420 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3421 integer_minus_one_node);
3423 eq = omega_add_zero_eq (pb, omega_black);
3424 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3425 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3426 /* There is probably a dependence, but the system of
3427 constraints cannot be built: answer "don't know". */
3428 return false;
3430 /* GCD test. */
3431 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3432 && !int_divides_p (lambda_vector_gcd
3433 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3434 2 * DDR_NB_LOOPS (ddr)),
3435 pb->eqs[eq].coef[0]))
3437 /* There is no dependence. */
3438 *maybe_dependent = false;
3439 return true;
3442 return true;
3445 /* Helper function, same as init_omega_for_ddr but specialized for
3446 data references A and B. */
3448 static bool
3449 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3450 struct data_dependence_relation *ddr,
3451 omega_pb pb, bool *maybe_dependent)
3453 unsigned i;
3454 int ineq;
3455 struct loop *loopi;
3456 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3458 /* Insert an equality per subscript. */
3459 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3461 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3462 ddr, pb, maybe_dependent))
3463 return false;
3464 else if (*maybe_dependent == false)
3466 /* There is no dependence. */
3467 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3468 return true;
3472 /* Insert inequalities: constraints corresponding to the iteration
3473 domain, i.e. the loops surrounding the references "loop_x" and
3474 the distance variables "dx". The layout of the OMEGA
3475 representation is as follows:
3476 - coef[0] is the constant
3477 - coef[1..nb_loops] are the protected variables that will not be
3478 removed by the solver: the "dx"
3479 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3481 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3482 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3484 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3486 /* 0 <= loop_x */
3487 ineq = omega_add_zero_geq (pb, omega_black);
3488 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3490 /* 0 <= loop_x + dx */
3491 ineq = omega_add_zero_geq (pb, omega_black);
3492 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3493 pb->geqs[ineq].coef[i + 1] = 1;
3495 if (nbi != -1)
3497 /* loop_x <= nb_iters */
3498 ineq = omega_add_zero_geq (pb, omega_black);
3499 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3500 pb->geqs[ineq].coef[0] = nbi;
3502 /* loop_x + dx <= nb_iters */
3503 ineq = omega_add_zero_geq (pb, omega_black);
3504 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3505 pb->geqs[ineq].coef[i + 1] = -1;
3506 pb->geqs[ineq].coef[0] = nbi;
3508 /* A step "dx" bigger than nb_iters is not feasible, so
3509 add "0 <= nb_iters + dx", */
3510 ineq = omega_add_zero_geq (pb, omega_black);
3511 pb->geqs[ineq].coef[i + 1] = 1;
3512 pb->geqs[ineq].coef[0] = nbi;
3513 /* and "dx <= nb_iters". */
3514 ineq = omega_add_zero_geq (pb, omega_black);
3515 pb->geqs[ineq].coef[i + 1] = -1;
3516 pb->geqs[ineq].coef[0] = nbi;
3520 omega_extract_distance_vectors (pb, ddr);
3522 return true;
3525 /* Sets up the Omega dependence problem for the data dependence
3526 relation DDR. Returns false when the constraint system cannot be
3527 built, ie. when the test answers "don't know". Returns true
3528 otherwise, and when independence has been proved (using one of the
3529 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3530 set MAYBE_DEPENDENT to true.
3532 Example: for setting up the dependence system corresponding to the
3533 conflicting accesses
3535 | loop_i
3536 | loop_j
3537 | A[i, i+1] = ...
3538 | ... A[2*j, 2*(i + j)]
3539 | endloop_j
3540 | endloop_i
3542 the following constraints come from the iteration domain:
3544 0 <= i <= Ni
3545 0 <= i + di <= Ni
3546 0 <= j <= Nj
3547 0 <= j + dj <= Nj
3549 where di, dj are the distance variables. The constraints
3550 representing the conflicting elements are:
3552 i = 2 * (j + dj)
3553 i + 1 = 2 * (i + di + j + dj)
3555 For asking that the resulting distance vector (di, dj) be
3556 lexicographically positive, we insert the constraint "di >= 0". If
3557 "di = 0" in the solution, we fix that component to zero, and we
3558 look at the inner loops: we set a new problem where all the outer
3559 loop distances are zero, and fix this inner component to be
3560 positive. When one of the components is positive, we save that
3561 distance, and set a new problem where the distance on this loop is
3562 zero, searching for other distances in the inner loops. Here is
3563 the classic example that illustrates that we have to set for each
3564 inner loop a new problem:
3566 | loop_1
3567 | loop_2
3568 | A[10]
3569 | endloop_2
3570 | endloop_1
3572 we have to save two distances (1, 0) and (0, 1).
3574 Given two array references, refA and refB, we have to set the
3575 dependence problem twice, refA vs. refB and refB vs. refA, and we
3576 cannot do a single test, as refB might occur before refA in the
3577 inner loops, and the contrary when considering outer loops: ex.
3579 | loop_0
3580 | loop_1
3581 | loop_2
3582 | T[{1,+,1}_2][{1,+,1}_1] // refA
3583 | T[{2,+,1}_2][{0,+,1}_1] // refB
3584 | endloop_2
3585 | endloop_1
3586 | endloop_0
3588 refB touches the elements in T before refA, and thus for the same
3589 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3590 but for successive loop_0 iterations, we have (1, -1, 1)
3592 The Omega solver expects the distance variables ("di" in the
3593 previous example) to come first in the constraint system (as
3594 variables to be protected, or "safe" variables), the constraint
3595 system is built using the following layout:
3597 "cst | distance vars | index vars".
3600 static bool
3601 init_omega_for_ddr (struct data_dependence_relation *ddr,
3602 bool *maybe_dependent)
3604 omega_pb pb;
3605 bool res = false;
3607 *maybe_dependent = true;
3609 if (same_access_functions (ddr))
3611 unsigned j;
3612 lambda_vector dir_v;
3614 /* Save the 0 vector. */
3615 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3616 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3617 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3618 dir_v[j] = dir_equal;
3619 save_dir_v (ddr, dir_v);
3621 /* Save the dependences carried by outer loops. */
3622 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3623 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3624 maybe_dependent);
3625 omega_free_problem (pb);
3626 return res;
3629 /* Omega expects the protected variables (those that have to be kept
3630 after elimination) to appear first in the constraint system.
3631 These variables are the distance variables. In the following
3632 initialization we declare NB_LOOPS safe variables, and the total
3633 number of variables for the constraint system is 2*NB_LOOPS. */
3634 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3635 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3636 maybe_dependent);
3637 omega_free_problem (pb);
3639 /* Stop computation if not decidable, or no dependence. */
3640 if (res == false || *maybe_dependent == false)
3641 return res;
3643 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3644 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3645 maybe_dependent);
3646 omega_free_problem (pb);
3648 return res;
3651 /* Return true when DDR contains the same information as that stored
3652 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3654 static bool
3655 ddr_consistent_p (FILE *file,
3656 struct data_dependence_relation *ddr,
3657 VEC (lambda_vector, heap) *dist_vects,
3658 VEC (lambda_vector, heap) *dir_vects)
3660 unsigned int i, j;
3662 /* If dump_file is set, output there. */
3663 if (dump_file && (dump_flags & TDF_DETAILS))
3664 file = dump_file;
3666 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3668 lambda_vector b_dist_v;
3669 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3670 VEC_length (lambda_vector, dist_vects),
3671 DDR_NUM_DIST_VECTS (ddr));
3673 fprintf (file, "Banerjee dist vectors:\n");
3674 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3675 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3677 fprintf (file, "Omega dist vectors:\n");
3678 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3679 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3681 fprintf (file, "data dependence relation:\n");
3682 dump_data_dependence_relation (file, ddr);
3684 fprintf (file, ")\n");
3685 return false;
3688 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3690 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3691 VEC_length (lambda_vector, dir_vects),
3692 DDR_NUM_DIR_VECTS (ddr));
3693 return false;
3696 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3698 lambda_vector a_dist_v;
3699 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3701 /* Distance vectors are not ordered in the same way in the DDR
3702 and in the DIST_VECTS: search for a matching vector. */
3703 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3704 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3705 break;
3707 if (j == VEC_length (lambda_vector, dist_vects))
3709 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3710 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3711 fprintf (file, "not found in Omega dist vectors:\n");
3712 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3713 fprintf (file, "data dependence relation:\n");
3714 dump_data_dependence_relation (file, ddr);
3715 fprintf (file, ")\n");
3719 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3721 lambda_vector a_dir_v;
3722 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3724 /* Direction vectors are not ordered in the same way in the DDR
3725 and in the DIR_VECTS: search for a matching vector. */
3726 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3727 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3728 break;
3730 if (j == VEC_length (lambda_vector, dist_vects))
3732 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3733 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3734 fprintf (file, "not found in Omega dir vectors:\n");
3735 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3736 fprintf (file, "data dependence relation:\n");
3737 dump_data_dependence_relation (file, ddr);
3738 fprintf (file, ")\n");
3742 return true;
3745 /* This computes the affine dependence relation between A and B with
3746 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3747 independence between two accesses, while CHREC_DONT_KNOW is used
3748 for representing the unknown relation.
3750 Note that it is possible to stop the computation of the dependence
3751 relation the first time we detect a CHREC_KNOWN element for a given
3752 subscript. */
3754 static void
3755 compute_affine_dependence (struct data_dependence_relation *ddr,
3756 struct loop *loop_nest)
3758 struct data_reference *dra = DDR_A (ddr);
3759 struct data_reference *drb = DDR_B (ddr);
3761 if (dump_file && (dump_flags & TDF_DETAILS))
3763 fprintf (dump_file, "(compute_affine_dependence\n");
3764 fprintf (dump_file, " (stmt_a = \n");
3765 print_generic_expr (dump_file, DR_STMT (dra), 0);
3766 fprintf (dump_file, ")\n (stmt_b = \n");
3767 print_generic_expr (dump_file, DR_STMT (drb), 0);
3768 fprintf (dump_file, ")\n");
3771 /* Analyze only when the dependence relation is not yet known. */
3772 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3774 dependence_stats.num_dependence_tests++;
3776 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3777 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3779 if (flag_check_data_deps)
3781 /* Compute the dependences using the first algorithm. */
3782 subscript_dependence_tester (ddr, loop_nest);
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3786 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3787 dump_data_dependence_relation (dump_file, ddr);
3790 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3792 bool maybe_dependent;
3793 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3795 /* Save the result of the first DD analyzer. */
3796 dist_vects = DDR_DIST_VECTS (ddr);
3797 dir_vects = DDR_DIR_VECTS (ddr);
3799 /* Reset the information. */
3800 DDR_DIST_VECTS (ddr) = NULL;
3801 DDR_DIR_VECTS (ddr) = NULL;
3803 /* Compute the same information using Omega. */
3804 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3805 goto csys_dont_know;
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3809 fprintf (dump_file, "Omega Analyzer\n");
3810 dump_data_dependence_relation (dump_file, ddr);
3813 /* Check that we get the same information. */
3814 if (maybe_dependent)
3815 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3816 dir_vects));
3819 else
3820 subscript_dependence_tester (ddr, loop_nest);
3823 /* As a last case, if the dependence cannot be determined, or if
3824 the dependence is considered too difficult to determine, answer
3825 "don't know". */
3826 else
3828 csys_dont_know:;
3829 dependence_stats.num_dependence_undetermined++;
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (dump_file, "Data ref a:\n");
3834 dump_data_reference (dump_file, dra);
3835 fprintf (dump_file, "Data ref b:\n");
3836 dump_data_reference (dump_file, drb);
3837 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3839 finalize_ddr_dependent (ddr, chrec_dont_know);
3843 if (dump_file && (dump_flags & TDF_DETAILS))
3844 fprintf (dump_file, ")\n");
3847 /* This computes the dependence relation for the same data
3848 reference into DDR. */
3850 static void
3851 compute_self_dependence (struct data_dependence_relation *ddr)
3853 unsigned int i;
3854 struct subscript *subscript;
3856 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3857 return;
3859 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3860 i++)
3862 /* The accessed index overlaps for each iteration. */
3863 SUB_CONFLICTS_IN_A (subscript)
3864 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3865 SUB_CONFLICTS_IN_B (subscript)
3866 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3867 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3870 /* The distance vector is the zero vector. */
3871 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3872 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3875 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3876 the data references in DATAREFS, in the LOOP_NEST. When
3877 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3878 relations. */
3880 void
3881 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3882 VEC (ddr_p, heap) **dependence_relations,
3883 VEC (loop_p, heap) *loop_nest,
3884 bool compute_self_and_rr)
3886 struct data_dependence_relation *ddr;
3887 struct data_reference *a, *b;
3888 unsigned int i, j;
3890 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3891 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3892 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3894 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3895 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3896 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3899 if (compute_self_and_rr)
3900 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3902 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3903 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3904 compute_self_dependence (ddr);
3908 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3909 true if STMT clobbers memory, false otherwise. */
3911 bool
3912 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3914 bool clobbers_memory = false;
3915 data_ref_loc *ref;
3916 tree *op0, *op1, call;
3918 *references = NULL;
3920 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3921 Calls have side-effects, except those to const or pure
3922 functions. */
3923 call = get_call_expr_in (stmt);
3924 if ((call
3925 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3926 || (TREE_CODE (stmt) == ASM_EXPR
3927 && ASM_VOLATILE_P (stmt)))
3928 clobbers_memory = true;
3930 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3931 return clobbers_memory;
3933 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3935 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3936 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3938 if (DECL_P (*op1)
3939 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
3941 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3942 ref->pos = op1;
3943 ref->is_read = true;
3946 if (DECL_P (*op0)
3947 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3949 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3950 ref->pos = op0;
3951 ref->is_read = false;
3955 if (call)
3957 unsigned i, n = call_expr_nargs (call);
3959 for (i = 0; i < n; i++)
3961 op0 = &CALL_EXPR_ARG (call, i);
3963 if (DECL_P (*op0)
3964 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3966 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3967 ref->pos = op0;
3968 ref->is_read = true;
3973 return clobbers_memory;
3976 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3977 reference, returns false, otherwise returns true. NEST is the outermost
3978 loop of the loop nest in that the references should be analyzed. */
3980 static bool
3981 find_data_references_in_stmt (struct loop *nest, tree stmt,
3982 VEC (data_reference_p, heap) **datarefs)
3984 unsigned i;
3985 VEC (data_ref_loc, heap) *references;
3986 data_ref_loc *ref;
3987 bool ret = true;
3988 data_reference_p dr;
3990 if (get_references_in_stmt (stmt, &references))
3992 VEC_free (data_ref_loc, heap, references);
3993 return false;
3996 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3998 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3999 gcc_assert (dr != NULL);
4001 /* FIXME -- data dependence analysis does not work correctly for objects with
4002 invariant addresses. Let us fail here until the problem is fixed. */
4003 if (dr_address_invariant_p (dr))
4005 free_data_ref (dr);
4006 if (dump_file && (dump_flags & TDF_DETAILS))
4007 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4008 ret = false;
4009 break;
4012 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4014 VEC_free (data_ref_loc, heap, references);
4015 return ret;
4018 /* Search the data references in LOOP, and record the information into
4019 DATAREFS. Returns chrec_dont_know when failing to analyze a
4020 difficult case, returns NULL_TREE otherwise.
4022 TODO: This function should be made smarter so that it can handle address
4023 arithmetic as if they were array accesses, etc. */
4025 static tree
4026 find_data_references_in_loop (struct loop *loop,
4027 VEC (data_reference_p, heap) **datarefs)
4029 basic_block bb, *bbs;
4030 unsigned int i;
4031 block_stmt_iterator bsi;
4033 bbs = get_loop_body_in_dom_order (loop);
4035 for (i = 0; i < loop->num_nodes; i++)
4037 bb = bbs[i];
4039 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4041 tree stmt = bsi_stmt (bsi);
4043 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4045 struct data_reference *res;
4046 res = XCNEW (struct data_reference);
4047 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4049 free (bbs);
4050 return chrec_dont_know;
4054 free (bbs);
4056 return NULL_TREE;
4059 /* Recursive helper function. */
4061 static bool
4062 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4064 /* Inner loops of the nest should not contain siblings. Example:
4065 when there are two consecutive loops,
4067 | loop_0
4068 | loop_1
4069 | A[{0, +, 1}_1]
4070 | endloop_1
4071 | loop_2
4072 | A[{0, +, 1}_2]
4073 | endloop_2
4074 | endloop_0
4076 the dependence relation cannot be captured by the distance
4077 abstraction. */
4078 if (loop->next)
4079 return false;
4081 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4082 if (loop->inner)
4083 return find_loop_nest_1 (loop->inner, loop_nest);
4084 return true;
4087 /* Return false when the LOOP is not well nested. Otherwise return
4088 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4089 contain the loops from the outermost to the innermost, as they will
4090 appear in the classic distance vector. */
4092 bool
4093 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4095 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4096 if (loop->inner)
4097 return find_loop_nest_1 (loop->inner, loop_nest);
4098 return true;
4101 /* Given a loop nest LOOP, the following vectors are returned:
4102 DATAREFS is initialized to all the array elements contained in this loop,
4103 DEPENDENCE_RELATIONS contains the relations between the data references.
4104 Compute read-read and self relations if
4105 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4107 void
4108 compute_data_dependences_for_loop (struct loop *loop,
4109 bool compute_self_and_read_read_dependences,
4110 VEC (data_reference_p, heap) **datarefs,
4111 VEC (ddr_p, heap) **dependence_relations)
4113 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4115 memset (&dependence_stats, 0, sizeof (dependence_stats));
4117 /* If the loop nest is not well formed, or one of the data references
4118 is not computable, give up without spending time to compute other
4119 dependences. */
4120 if (!loop
4121 || !find_loop_nest (loop, &vloops)
4122 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4124 struct data_dependence_relation *ddr;
4126 /* Insert a single relation into dependence_relations:
4127 chrec_dont_know. */
4128 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4129 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4131 else
4132 compute_all_dependences (*datarefs, dependence_relations, vloops,
4133 compute_self_and_read_read_dependences);
4135 if (dump_file && (dump_flags & TDF_STATS))
4137 fprintf (dump_file, "Dependence tester statistics:\n");
4139 fprintf (dump_file, "Number of dependence tests: %d\n",
4140 dependence_stats.num_dependence_tests);
4141 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4142 dependence_stats.num_dependence_dependent);
4143 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4144 dependence_stats.num_dependence_independent);
4145 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4146 dependence_stats.num_dependence_undetermined);
4148 fprintf (dump_file, "Number of subscript tests: %d\n",
4149 dependence_stats.num_subscript_tests);
4150 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4151 dependence_stats.num_subscript_undetermined);
4152 fprintf (dump_file, "Number of same subscript function: %d\n",
4153 dependence_stats.num_same_subscript_function);
4155 fprintf (dump_file, "Number of ziv tests: %d\n",
4156 dependence_stats.num_ziv);
4157 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4158 dependence_stats.num_ziv_dependent);
4159 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4160 dependence_stats.num_ziv_independent);
4161 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4162 dependence_stats.num_ziv_unimplemented);
4164 fprintf (dump_file, "Number of siv tests: %d\n",
4165 dependence_stats.num_siv);
4166 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4167 dependence_stats.num_siv_dependent);
4168 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4169 dependence_stats.num_siv_independent);
4170 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4171 dependence_stats.num_siv_unimplemented);
4173 fprintf (dump_file, "Number of miv tests: %d\n",
4174 dependence_stats.num_miv);
4175 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4176 dependence_stats.num_miv_dependent);
4177 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4178 dependence_stats.num_miv_independent);
4179 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4180 dependence_stats.num_miv_unimplemented);
4184 /* Entry point (for testing only). Analyze all the data references
4185 and the dependence relations in LOOP.
4187 The data references are computed first.
4189 A relation on these nodes is represented by a complete graph. Some
4190 of the relations could be of no interest, thus the relations can be
4191 computed on demand.
4193 In the following function we compute all the relations. This is
4194 just a first implementation that is here for:
4195 - for showing how to ask for the dependence relations,
4196 - for the debugging the whole dependence graph,
4197 - for the dejagnu testcases and maintenance.
4199 It is possible to ask only for a part of the graph, avoiding to
4200 compute the whole dependence graph. The computed dependences are
4201 stored in a knowledge base (KB) such that later queries don't
4202 recompute the same information. The implementation of this KB is
4203 transparent to the optimizer, and thus the KB can be changed with a
4204 more efficient implementation, or the KB could be disabled. */
4205 static void
4206 analyze_all_data_dependences (struct loop *loop)
4208 unsigned int i;
4209 int nb_data_refs = 10;
4210 VEC (data_reference_p, heap) *datarefs =
4211 VEC_alloc (data_reference_p, heap, nb_data_refs);
4212 VEC (ddr_p, heap) *dependence_relations =
4213 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4215 /* Compute DDs on the whole function. */
4216 compute_data_dependences_for_loop (loop, false, &datarefs,
4217 &dependence_relations);
4219 if (dump_file)
4221 dump_data_dependence_relations (dump_file, dependence_relations);
4222 fprintf (dump_file, "\n\n");
4224 if (dump_flags & TDF_DETAILS)
4225 dump_dist_dir_vectors (dump_file, dependence_relations);
4227 if (dump_flags & TDF_STATS)
4229 unsigned nb_top_relations = 0;
4230 unsigned nb_bot_relations = 0;
4231 unsigned nb_basename_differ = 0;
4232 unsigned nb_chrec_relations = 0;
4233 struct data_dependence_relation *ddr;
4235 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4237 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4238 nb_top_relations++;
4240 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4242 struct data_reference *a = DDR_A (ddr);
4243 struct data_reference *b = DDR_B (ddr);
4245 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4246 nb_basename_differ++;
4247 else
4248 nb_bot_relations++;
4251 else
4252 nb_chrec_relations++;
4255 gather_stats_on_scev_database ();
4259 free_dependence_relations (dependence_relations);
4260 free_data_refs (datarefs);
4263 /* Computes all the data dependences and check that the results of
4264 several analyzers are the same. */
4266 void
4267 tree_check_data_deps (void)
4269 loop_iterator li;
4270 struct loop *loop_nest;
4272 FOR_EACH_LOOP (li, loop_nest, 0)
4273 analyze_all_data_dependences (loop_nest);
4276 /* Free the memory used by a data dependence relation DDR. */
4278 void
4279 free_dependence_relation (struct data_dependence_relation *ddr)
4281 if (ddr == NULL)
4282 return;
4284 if (DDR_SUBSCRIPTS (ddr))
4285 free_subscripts (DDR_SUBSCRIPTS (ddr));
4286 if (DDR_DIST_VECTS (ddr))
4287 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4288 if (DDR_DIR_VECTS (ddr))
4289 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4291 free (ddr);
4294 /* Free the memory used by the data dependence relations from
4295 DEPENDENCE_RELATIONS. */
4297 void
4298 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4300 unsigned int i;
4301 struct data_dependence_relation *ddr;
4302 VEC (loop_p, heap) *loop_nest = NULL;
4304 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4306 if (ddr == NULL)
4307 continue;
4308 if (loop_nest == NULL)
4309 loop_nest = DDR_LOOP_NEST (ddr);
4310 else
4311 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4312 || DDR_LOOP_NEST (ddr) == loop_nest);
4313 free_dependence_relation (ddr);
4316 if (loop_nest)
4317 VEC_free (loop_p, heap, loop_nest);
4318 VEC_free (ddr_p, heap, dependence_relations);
4321 /* Free the memory used by the data references from DATAREFS. */
4323 void
4324 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4326 unsigned int i;
4327 struct data_reference *dr;
4329 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4330 free_data_ref (dr);
4331 VEC_free (data_reference_p, heap, datarefs);
4336 /* Returns the index of STMT in RDG. */
4338 static int
4339 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4341 int i;
4343 for (i = 0; i < rdg->n_vertices; i++)
4344 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4345 return i;
4347 gcc_unreachable ();
4348 return 0;
4351 /* Creates an edge in RDG for each distance vector from DDR. */
4353 static void
4354 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4356 int va, vb;
4357 data_reference_p dra;
4358 data_reference_p drb;
4359 struct graph_edge *e;
4361 if (DDR_REVERSED_P (ddr))
4363 dra = DDR_B (ddr);
4364 drb = DDR_A (ddr);
4366 else
4368 dra = DDR_A (ddr);
4369 drb = DDR_B (ddr);
4372 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4373 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4375 e = add_edge (rdg, va, vb);
4376 e->data = XNEW (struct rdg_edge);
4378 /* Determines the type of the data dependence. */
4379 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4380 RDGE_TYPE (e) = input_dd;
4381 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4382 RDGE_TYPE (e) = output_dd;
4383 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4384 RDGE_TYPE (e) = flow_dd;
4385 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4386 RDGE_TYPE (e) = anti_dd;
4389 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4390 the index of DEF in RDG. */
4392 static void
4393 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4395 use_operand_p imm_use_p;
4396 imm_use_iterator iterator;
4398 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4400 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4401 struct graph_edge *e = add_edge (rdg, idef, use);
4403 e->data = XNEW (struct rdg_edge);
4404 RDGE_TYPE (e) = flow_dd;
4408 /* Creates the edges of the reduced dependence graph RDG. */
4410 static void
4411 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4413 int i;
4414 struct data_dependence_relation *ddr;
4415 def_operand_p def_p;
4416 ssa_op_iter iter;
4418 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4419 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4420 create_rdg_edge_for_ddr (rdg, ddr);
4422 for (i = 0; i < rdg->n_vertices; i++)
4423 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4424 iter, SSA_OP_ALL_DEFS)
4425 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4428 /* Build the vertices of the reduced dependence graph RDG. */
4430 static void
4431 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4433 int i;
4434 tree s;
4436 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4438 struct vertex *v = &(rdg->vertices[i]);
4440 v->data = XNEW (struct rdg_vertex);
4441 RDGV_STMT (v) = s;
4445 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4447 static void
4448 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4450 unsigned int i;
4451 basic_block *bbs = get_loop_body_in_dom_order (loop);
4453 for (i = 0; i < loop->num_nodes; i++)
4455 tree phi;
4456 basic_block bb = bbs[i];
4457 block_stmt_iterator bsi;
4459 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4460 VEC_safe_push (tree, heap, *stmts, phi);
4462 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4463 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4466 free (bbs);
4469 /* Returns true when all the dependences are computable. */
4471 static bool
4472 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4474 ddr_p ddr;
4475 unsigned int i;
4477 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4478 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4479 return false;
4481 return true;
4484 /* Build a Reduced Dependence Graph with one vertex per statement of the
4485 loop nest and one edge per data dependence or scalar dependence. */
4487 struct graph *
4488 build_rdg (struct loop *loop)
4490 int nb_data_refs = 10;
4491 struct graph *rdg = NULL;
4492 VEC (ddr_p, heap) *dependence_relations;
4493 VEC (data_reference_p, heap) *datarefs;
4494 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4496 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4497 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4498 compute_data_dependences_for_loop (loop,
4499 false,
4500 &datarefs,
4501 &dependence_relations);
4503 if (!known_dependences_p (dependence_relations))
4504 goto end_rdg;
4506 stmts_from_loop (loop, &stmts);
4507 rdg = new_graph (VEC_length (tree, stmts));
4508 create_rdg_vertices (rdg, stmts);
4509 create_rdg_edges (rdg, dependence_relations);
4511 end_rdg:
4512 free_dependence_relations (dependence_relations);
4513 free_data_refs (datarefs);
4514 VEC_free (tree, heap, stmts);
4516 return rdg;