Merge -r 127928:132243 from trunk
[official-gcc.git] / gcc / tree-data-ref.c
blob2f17ed1deb4b7fed9594747dd91c0cbe6401beff
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tm.h"
80 #include "ggc.h"
81 #include "tree.h"
83 /* These RTL headers are needed for basic-block.h. */
84 #include "rtl.h"
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
162 void
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
166 unsigned int i;
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
175 void
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
179 unsigned int i;
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
198 static void
199 dump_affine_function (FILE *outf, affine_fn fn)
201 unsigned i;
202 tree coef;
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
215 static void
216 dump_conflict_function (FILE *outf, conflict_function *cf)
218 unsigned i;
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
224 else
226 for (i = 0; i < cf->n; i++)
228 fprintf (outf, "[");
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
237 void
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
270 void
271 print_direction_vector (FILE *outf,
272 lambda_vector dirv,
273 int length)
275 int eq;
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
281 switch (dir)
283 case dir_positive:
284 fprintf (outf, " +");
285 break;
286 case dir_negative:
287 fprintf (outf, " -");
288 break;
289 case dir_equal:
290 fprintf (outf, " =");
291 break;
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
294 break;
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
297 break;
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
300 break;
301 case dir_star:
302 fprintf (outf, " *");
303 break;
304 default:
305 fprintf (outf, "indep");
306 break;
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
314 void
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
316 int length)
318 unsigned j;
319 lambda_vector v;
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
327 void
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
329 int length)
331 unsigned j;
332 lambda_vector v;
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
338 /* Debug version. */
340 void
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
348 void
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
354 dra = DDR_A (ddr);
355 drb = DDR_B (ddr);
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
365 unsigned int i;
366 struct loop *loopi;
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
387 DDR_NB_LOOPS (ddr));
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
394 DDR_NB_LOOPS (ddr));
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
403 void
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
407 switch (dir)
409 case dir_positive:
410 fprintf (file, "+");
411 break;
413 case dir_negative:
414 fprintf (file, "-");
415 break;
417 case dir_equal:
418 fprintf (file, "=");
419 break;
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
423 break;
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
427 break;
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
431 break;
433 case dir_star:
434 fprintf (file, "*");
435 break;
437 default:
438 break;
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
445 considered nest. */
447 void
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
450 unsigned int i, j;
451 struct data_dependence_relation *ddr;
452 lambda_vector v;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
477 void
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
492 void
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
496 tree var0, var1;
497 tree off0, off1;
498 enum tree_code code;
500 *var = exp;
501 STRIP_NOPS (exp);
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
505 switch (code)
507 case INTEGER_CST:
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
510 return;
512 case POINTER_PLUS_EXPR:
513 code = PLUS_EXPR;
514 /* FALLTHROUGH */
515 case PLUS_EXPR:
516 case MINUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
520 var0, var1));
521 *off = size_binop (code, off0, off1);
522 return;
524 case MULT_EXPR:
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
527 break;
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
531 var0, off1));
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
533 return;
535 case ADDR_EXPR:
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
544 break;
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
550 break;
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
554 if (poffset)
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 if (POINTER_TYPE_P (TREE_TYPE (base)))
559 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
560 base, fold_convert (sizetype, poffset));
561 else
562 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
563 fold_convert (TREE_TYPE (base), poffset));
566 var0 = fold_convert (type, base);
568 /* If variable length types are involved, punt, otherwise casts
569 might be converted into ARRAY_REFs in gimplify_conversion.
570 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
571 possibly no longer appears in current GIMPLE, might resurface.
572 This perhaps could run
573 if (TREE_CODE (var0) == NOP_EXPR
574 || TREE_CODE (var0) == CONVERT_EXPR)
576 gimplify_conversion (&var0);
577 // Attempt to fill in any within var0 found ARRAY_REF's
578 // element size from corresponding op embedded ARRAY_REF,
579 // if unsuccessful, just punt.
580 } */
581 while (POINTER_TYPE_P (type))
582 type = TREE_TYPE (type);
583 if (int_size_in_bytes (type) < 0)
584 break;
586 *var = var0;
587 *off = off0;
588 return;
591 case SSA_NAME:
593 tree def_stmt = SSA_NAME_DEF_STMT (exp);
594 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
596 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
598 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
599 && EXPR_P (def_stmt_rhs)
600 && !REFERENCE_CLASS_P (def_stmt_rhs)
601 && !get_call_expr_in (def_stmt_rhs))
603 split_constant_offset (def_stmt_rhs, &var0, &off0);
604 var0 = fold_convert (type, var0);
605 *var = var0;
606 *off = off0;
607 return;
610 break;
613 default:
614 break;
617 *off = ssize_int (0);
620 /* Returns the address ADDR of an object in a canonical shape (without nop
621 casts, and with type of pointer to the object). */
623 static tree
624 canonicalize_base_object_address (tree addr)
626 tree orig = addr;
628 STRIP_NOPS (addr);
630 /* The base address may be obtained by casting from integer, in that case
631 keep the cast. */
632 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
633 return orig;
635 if (TREE_CODE (addr) != ADDR_EXPR)
636 return addr;
638 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
641 /* Analyzes the behavior of the memory reference DR in the innermost loop that
642 contains it. */
644 void
645 dr_analyze_innermost (struct data_reference *dr)
647 tree stmt = DR_STMT (dr);
648 struct loop *loop = loop_containing_stmt (stmt);
649 tree ref = DR_REF (dr);
650 HOST_WIDE_INT pbitsize, pbitpos;
651 tree base, poffset;
652 enum machine_mode pmode;
653 int punsignedp, pvolatilep;
654 affine_iv base_iv, offset_iv;
655 tree init, dinit, step;
657 if (dump_file && (dump_flags & TDF_DETAILS))
658 fprintf (dump_file, "analyze_innermost: ");
660 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
661 &pmode, &punsignedp, &pvolatilep, false);
662 gcc_assert (base != NULL_TREE);
664 if (pbitpos % BITS_PER_UNIT != 0)
666 if (dump_file && (dump_flags & TDF_DETAILS))
667 fprintf (dump_file, "failed: bit offset alignment.\n");
668 return;
671 base = build_fold_addr_expr (base);
672 if (!simple_iv (loop, stmt, base, &base_iv, false))
674 if (dump_file && (dump_flags & TDF_DETAILS))
675 fprintf (dump_file, "failed: evolution of base is not affine.\n");
676 return;
678 if (!poffset)
680 offset_iv.base = ssize_int (0);
681 offset_iv.step = ssize_int (0);
683 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
687 return;
690 init = ssize_int (pbitpos / BITS_PER_UNIT);
691 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
692 init = size_binop (PLUS_EXPR, init, dinit);
693 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
694 init = size_binop (PLUS_EXPR, init, dinit);
696 step = size_binop (PLUS_EXPR,
697 fold_convert (ssizetype, base_iv.step),
698 fold_convert (ssizetype, offset_iv.step));
700 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
702 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
703 DR_INIT (dr) = init;
704 DR_STEP (dr) = step;
706 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
708 if (dump_file && (dump_flags & TDF_DETAILS))
709 fprintf (dump_file, "success.\n");
712 /* Determines the base object and the list of indices of memory reference
713 DR, analyzed in loop nest NEST. */
715 static void
716 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
718 tree stmt = DR_STMT (dr);
719 struct loop *loop = loop_containing_stmt (stmt);
720 VEC (tree, heap) *access_fns = NULL;
721 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
722 tree base, off, access_fn;
724 while (handled_component_p (aref))
726 if (TREE_CODE (aref) == ARRAY_REF)
728 op = TREE_OPERAND (aref, 1);
729 access_fn = analyze_scalar_evolution (loop, op);
730 access_fn = resolve_mixers (nest, access_fn);
731 VEC_safe_push (tree, heap, access_fns, access_fn);
733 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
736 aref = TREE_OPERAND (aref, 0);
739 if (INDIRECT_REF_P (aref))
741 op = TREE_OPERAND (aref, 0);
742 access_fn = analyze_scalar_evolution (loop, op);
743 access_fn = resolve_mixers (nest, access_fn);
744 base = initial_condition (access_fn);
745 split_constant_offset (base, &base, &off);
746 access_fn = chrec_replace_initial_condition (access_fn,
747 fold_convert (TREE_TYPE (base), off));
749 TREE_OPERAND (aref, 0) = base;
750 VEC_safe_push (tree, heap, access_fns, access_fn);
753 DR_BASE_OBJECT (dr) = ref;
754 DR_ACCESS_FNS (dr) = access_fns;
757 /* Extracts the alias analysis information from the memory reference DR. */
759 static void
760 dr_analyze_alias (struct data_reference *dr)
762 tree stmt = DR_STMT (dr);
763 tree ref = DR_REF (dr);
764 tree base = get_base_address (ref), addr, smt = NULL_TREE;
765 ssa_op_iter it;
766 tree op;
767 bitmap vops;
769 if (DECL_P (base))
770 smt = base;
771 else if (INDIRECT_REF_P (base))
773 addr = TREE_OPERAND (base, 0);
774 if (TREE_CODE (addr) == SSA_NAME)
776 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
777 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
781 DR_SYMBOL_TAG (dr) = smt;
782 if (smt && var_can_have_subvars (smt))
783 DR_SUBVARS (dr) = get_subvars_for_var (smt);
785 vops = BITMAP_ALLOC (NULL);
786 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
788 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
791 DR_VOPS (dr) = vops;
794 /* Returns true if the address of DR is invariant. */
796 static bool
797 dr_address_invariant_p (struct data_reference *dr)
799 unsigned i;
800 tree idx;
802 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
803 if (tree_contains_chrecs (idx, NULL))
804 return false;
806 return true;
809 /* Frees data reference DR. */
811 static void
812 free_data_ref (data_reference_p dr)
814 BITMAP_FREE (DR_VOPS (dr));
815 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
816 free (dr);
819 /* Analyzes memory reference MEMREF accessed in STMT. The reference
820 is read if IS_READ is true, write otherwise. Returns the
821 data_reference description of MEMREF. NEST is the outermost loop of the
822 loop nest in that the reference should be analyzed. */
824 struct data_reference *
825 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
827 struct data_reference *dr;
829 if (dump_file && (dump_flags & TDF_DETAILS))
831 fprintf (dump_file, "Creating dr for ");
832 print_generic_expr (dump_file, memref, TDF_SLIM);
833 fprintf (dump_file, "\n");
836 dr = XCNEW (struct data_reference);
837 DR_STMT (dr) = stmt;
838 DR_REF (dr) = memref;
839 DR_IS_READ (dr) = is_read;
841 dr_analyze_innermost (dr);
842 dr_analyze_indices (dr, nest);
843 dr_analyze_alias (dr);
845 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "\tbase_address: ");
848 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
849 fprintf (dump_file, "\n\toffset from base address: ");
850 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
851 fprintf (dump_file, "\n\tconstant offset from base address: ");
852 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
853 fprintf (dump_file, "\n\tstep: ");
854 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
855 fprintf (dump_file, "\n\taligned to: ");
856 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
857 fprintf (dump_file, "\n\tbase_object: ");
858 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
859 fprintf (dump_file, "\n\tsymbol tag: ");
860 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
861 fprintf (dump_file, "\n");
864 return dr;
867 /* Returns true if FNA == FNB. */
869 static bool
870 affine_function_equal_p (affine_fn fna, affine_fn fnb)
872 unsigned i, n = VEC_length (tree, fna);
874 if (n != VEC_length (tree, fnb))
875 return false;
877 for (i = 0; i < n; i++)
878 if (!operand_equal_p (VEC_index (tree, fna, i),
879 VEC_index (tree, fnb, i), 0))
880 return false;
882 return true;
885 /* If all the functions in CF are the same, returns one of them,
886 otherwise returns NULL. */
888 static affine_fn
889 common_affine_function (conflict_function *cf)
891 unsigned i;
892 affine_fn comm;
894 if (!CF_NONTRIVIAL_P (cf))
895 return NULL;
897 comm = cf->fns[0];
899 for (i = 1; i < cf->n; i++)
900 if (!affine_function_equal_p (comm, cf->fns[i]))
901 return NULL;
903 return comm;
906 /* Returns the base of the affine function FN. */
908 static tree
909 affine_function_base (affine_fn fn)
911 return VEC_index (tree, fn, 0);
914 /* Returns true if FN is a constant. */
916 static bool
917 affine_function_constant_p (affine_fn fn)
919 unsigned i;
920 tree coef;
922 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
923 if (!integer_zerop (coef))
924 return false;
926 return true;
929 /* Returns true if FN is the zero constant function. */
931 static bool
932 affine_function_zero_p (affine_fn fn)
934 return (integer_zerop (affine_function_base (fn))
935 && affine_function_constant_p (fn));
938 /* Returns a signed integer type with the largest precision from TA
939 and TB. */
941 static tree
942 signed_type_for_types (tree ta, tree tb)
944 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
945 return signed_type_for (ta);
946 else
947 return signed_type_for (tb);
950 /* Applies operation OP on affine functions FNA and FNB, and returns the
951 result. */
953 static affine_fn
954 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
956 unsigned i, n, m;
957 affine_fn ret;
958 tree coef;
960 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
962 n = VEC_length (tree, fna);
963 m = VEC_length (tree, fnb);
965 else
967 n = VEC_length (tree, fnb);
968 m = VEC_length (tree, fna);
971 ret = VEC_alloc (tree, heap, m);
972 for (i = 0; i < n; i++)
974 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
975 TREE_TYPE (VEC_index (tree, fnb, i)));
977 VEC_quick_push (tree, ret,
978 fold_build2 (op, type,
979 VEC_index (tree, fna, i),
980 VEC_index (tree, fnb, i)));
983 for (; VEC_iterate (tree, fna, i, coef); i++)
984 VEC_quick_push (tree, ret,
985 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
986 coef, integer_zero_node));
987 for (; VEC_iterate (tree, fnb, i, coef); i++)
988 VEC_quick_push (tree, ret,
989 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
990 integer_zero_node, coef));
992 return ret;
995 /* Returns the sum of affine functions FNA and FNB. */
997 static affine_fn
998 affine_fn_plus (affine_fn fna, affine_fn fnb)
1000 return affine_fn_op (PLUS_EXPR, fna, fnb);
1003 /* Returns the difference of affine functions FNA and FNB. */
1005 static affine_fn
1006 affine_fn_minus (affine_fn fna, affine_fn fnb)
1008 return affine_fn_op (MINUS_EXPR, fna, fnb);
1011 /* Frees affine function FN. */
1013 static void
1014 affine_fn_free (affine_fn fn)
1016 VEC_free (tree, heap, fn);
1019 /* Determine for each subscript in the data dependence relation DDR
1020 the distance. */
1022 static void
1023 compute_subscript_distance (struct data_dependence_relation *ddr)
1025 conflict_function *cf_a, *cf_b;
1026 affine_fn fn_a, fn_b, diff;
1028 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1030 unsigned int i;
1032 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1034 struct subscript *subscript;
1036 subscript = DDR_SUBSCRIPT (ddr, i);
1037 cf_a = SUB_CONFLICTS_IN_A (subscript);
1038 cf_b = SUB_CONFLICTS_IN_B (subscript);
1040 fn_a = common_affine_function (cf_a);
1041 fn_b = common_affine_function (cf_b);
1042 if (!fn_a || !fn_b)
1044 SUB_DISTANCE (subscript) = chrec_dont_know;
1045 return;
1047 diff = affine_fn_minus (fn_a, fn_b);
1049 if (affine_function_constant_p (diff))
1050 SUB_DISTANCE (subscript) = affine_function_base (diff);
1051 else
1052 SUB_DISTANCE (subscript) = chrec_dont_know;
1054 affine_fn_free (diff);
1059 /* Returns the conflict function for "unknown". */
1061 static conflict_function *
1062 conflict_fn_not_known (void)
1064 conflict_function *fn = XCNEW (conflict_function);
1065 fn->n = NOT_KNOWN;
1067 return fn;
1070 /* Returns the conflict function for "independent". */
1072 static conflict_function *
1073 conflict_fn_no_dependence (void)
1075 conflict_function *fn = XCNEW (conflict_function);
1076 fn->n = NO_DEPENDENCE;
1078 return fn;
1081 /* Returns true if the address of OBJ is invariant in LOOP. */
1083 static bool
1084 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1086 while (handled_component_p (obj))
1088 if (TREE_CODE (obj) == ARRAY_REF)
1090 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1091 need to check the stride and the lower bound of the reference. */
1092 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1093 loop->num)
1094 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1095 loop->num))
1096 return false;
1098 else if (TREE_CODE (obj) == COMPONENT_REF)
1100 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1101 loop->num))
1102 return false;
1104 obj = TREE_OPERAND (obj, 0);
1107 if (!INDIRECT_REF_P (obj))
1108 return true;
1110 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1111 loop->num);
1114 /* Returns true if A and B are accesses to different objects, or to different
1115 fields of the same object. */
1117 static bool
1118 disjoint_objects_p (tree a, tree b)
1120 tree base_a, base_b;
1121 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1122 bool ret;
1124 base_a = get_base_address (a);
1125 base_b = get_base_address (b);
1127 if (DECL_P (base_a)
1128 && DECL_P (base_b)
1129 && base_a != base_b)
1130 return true;
1132 if (!operand_equal_p (base_a, base_b, 0))
1133 return false;
1135 /* Compare the component references of A and B. We must start from the inner
1136 ones, so record them to the vector first. */
1137 while (handled_component_p (a))
1139 VEC_safe_push (tree, heap, comp_a, a);
1140 a = TREE_OPERAND (a, 0);
1142 while (handled_component_p (b))
1144 VEC_safe_push (tree, heap, comp_b, b);
1145 b = TREE_OPERAND (b, 0);
1148 ret = false;
1149 while (1)
1151 if (VEC_length (tree, comp_a) == 0
1152 || VEC_length (tree, comp_b) == 0)
1153 break;
1155 a = VEC_pop (tree, comp_a);
1156 b = VEC_pop (tree, comp_b);
1158 /* Real and imaginary part of a variable do not alias. */
1159 if ((TREE_CODE (a) == REALPART_EXPR
1160 && TREE_CODE (b) == IMAGPART_EXPR)
1161 || (TREE_CODE (a) == IMAGPART_EXPR
1162 && TREE_CODE (b) == REALPART_EXPR))
1164 ret = true;
1165 break;
1168 if (TREE_CODE (a) != TREE_CODE (b))
1169 break;
1171 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1172 DR_BASE_OBJECT are always zero. */
1173 if (TREE_CODE (a) == ARRAY_REF)
1174 continue;
1175 else if (TREE_CODE (a) == COMPONENT_REF)
1177 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1178 continue;
1180 /* Different fields of unions may overlap. */
1181 base_a = TREE_OPERAND (a, 0);
1182 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1183 break;
1185 /* Different fields of structures cannot. */
1186 ret = true;
1187 break;
1189 else
1190 break;
1193 VEC_free (tree, heap, comp_a);
1194 VEC_free (tree, heap, comp_b);
1196 return ret;
1199 /* Returns false if we can prove that data references A and B do not alias,
1200 true otherwise. */
1202 static bool
1203 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1205 const_tree addr_a = DR_BASE_ADDRESS (a);
1206 const_tree addr_b = DR_BASE_ADDRESS (b);
1207 const_tree type_a, type_b;
1208 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1210 /* If the sets of virtual operands are disjoint, the memory references do not
1211 alias. */
1212 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1213 return false;
1215 /* If the accessed objects are disjoint, the memory references do not
1216 alias. */
1217 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1218 return false;
1220 if (!addr_a || !addr_b)
1221 return true;
1223 /* If the references are based on different static objects, they cannot alias
1224 (PTA should be able to disambiguate such accesses, but often it fails to,
1225 since currently we cannot distinguish between pointer and offset in pointer
1226 arithmetics). */
1227 if (TREE_CODE (addr_a) == ADDR_EXPR
1228 && TREE_CODE (addr_b) == ADDR_EXPR)
1229 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1231 /* An instruction writing through a restricted pointer is "independent" of any
1232 instruction reading or writing through a different restricted pointer,
1233 in the same block/scope. */
1235 type_a = TREE_TYPE (addr_a);
1236 type_b = TREE_TYPE (addr_b);
1237 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1239 if (TREE_CODE (addr_a) == SSA_NAME)
1240 decl_a = SSA_NAME_VAR (addr_a);
1241 if (TREE_CODE (addr_b) == SSA_NAME)
1242 decl_b = SSA_NAME_VAR (addr_b);
1244 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1245 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1246 && decl_a && DECL_P (decl_a)
1247 && decl_b && DECL_P (decl_b)
1248 && decl_a != decl_b
1249 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1250 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1251 return false;
1253 return true;
1256 /* Initialize a data dependence relation between data accesses A and
1257 B. NB_LOOPS is the number of loops surrounding the references: the
1258 size of the classic distance/direction vectors. */
1260 static struct data_dependence_relation *
1261 initialize_data_dependence_relation (struct data_reference *a,
1262 struct data_reference *b,
1263 VEC (loop_p, heap) *loop_nest)
1265 struct data_dependence_relation *res;
1266 unsigned int i;
1268 res = XNEW (struct data_dependence_relation);
1269 DDR_A (res) = a;
1270 DDR_B (res) = b;
1271 DDR_LOOP_NEST (res) = NULL;
1272 DDR_REVERSED_P (res) = false;
1273 DDR_SUBSCRIPTS (res) = NULL;
1274 DDR_DIR_VECTS (res) = NULL;
1275 DDR_DIST_VECTS (res) = NULL;
1277 if (a == NULL || b == NULL)
1279 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1280 return res;
1283 /* If the data references do not alias, then they are independent. */
1284 if (!dr_may_alias_p (a, b))
1286 DDR_ARE_DEPENDENT (res) = chrec_known;
1287 return res;
1290 /* If the references do not access the same object, we do not know
1291 whether they alias or not. */
1292 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1294 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1295 return res;
1298 /* If the base of the object is not invariant in the loop nest, we cannot
1299 analyze it. TODO -- in fact, it would suffice to record that there may
1300 be arbitrary dependences in the loops where the base object varies. */
1301 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1302 DR_BASE_OBJECT (a)))
1304 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1305 return res;
1308 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1310 DDR_AFFINE_P (res) = true;
1311 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1312 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1313 DDR_LOOP_NEST (res) = loop_nest;
1314 DDR_INNER_LOOP (res) = 0;
1316 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1318 struct subscript *subscript;
1320 subscript = XNEW (struct subscript);
1321 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1322 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1323 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1324 SUB_DISTANCE (subscript) = chrec_dont_know;
1325 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1328 return res;
1331 /* Frees memory used by the conflict function F. */
1333 static void
1334 free_conflict_function (conflict_function *f)
1336 unsigned i;
1338 if (CF_NONTRIVIAL_P (f))
1340 for (i = 0; i < f->n; i++)
1341 affine_fn_free (f->fns[i]);
1343 free (f);
1346 /* Frees memory used by SUBSCRIPTS. */
1348 static void
1349 free_subscripts (VEC (subscript_p, heap) *subscripts)
1351 unsigned i;
1352 subscript_p s;
1354 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1356 free_conflict_function (s->conflicting_iterations_in_a);
1357 free_conflict_function (s->conflicting_iterations_in_b);
1359 VEC_free (subscript_p, heap, subscripts);
1362 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1363 description. */
1365 static inline void
1366 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1367 tree chrec)
1369 if (dump_file && (dump_flags & TDF_DETAILS))
1371 fprintf (dump_file, "(dependence classified: ");
1372 print_generic_expr (dump_file, chrec, 0);
1373 fprintf (dump_file, ")\n");
1376 DDR_ARE_DEPENDENT (ddr) = chrec;
1377 free_subscripts (DDR_SUBSCRIPTS (ddr));
1378 DDR_SUBSCRIPTS (ddr) = NULL;
1381 /* The dependence relation DDR cannot be represented by a distance
1382 vector. */
1384 static inline void
1385 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1387 if (dump_file && (dump_flags & TDF_DETAILS))
1388 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1390 DDR_AFFINE_P (ddr) = false;
1395 /* This section contains the classic Banerjee tests. */
1397 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1398 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1400 static inline bool
1401 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1403 return (evolution_function_is_constant_p (chrec_a)
1404 && evolution_function_is_constant_p (chrec_b));
1407 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1408 variable, i.e., if the SIV (Single Index Variable) test is true. */
1410 static bool
1411 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1413 if ((evolution_function_is_constant_p (chrec_a)
1414 && evolution_function_is_univariate_p (chrec_b))
1415 || (evolution_function_is_constant_p (chrec_b)
1416 && evolution_function_is_univariate_p (chrec_a)))
1417 return true;
1419 if (evolution_function_is_univariate_p (chrec_a)
1420 && evolution_function_is_univariate_p (chrec_b))
1422 switch (TREE_CODE (chrec_a))
1424 case POLYNOMIAL_CHREC:
1425 switch (TREE_CODE (chrec_b))
1427 case POLYNOMIAL_CHREC:
1428 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1429 return false;
1431 default:
1432 return true;
1435 default:
1436 return true;
1440 return false;
1443 /* Creates a conflict function with N dimensions. The affine functions
1444 in each dimension follow. */
1446 static conflict_function *
1447 conflict_fn (unsigned n, ...)
1449 unsigned i;
1450 conflict_function *ret = XCNEW (conflict_function);
1451 va_list ap;
1453 gcc_assert (0 < n && n <= MAX_DIM);
1454 va_start(ap, n);
1456 ret->n = n;
1457 for (i = 0; i < n; i++)
1458 ret->fns[i] = va_arg (ap, affine_fn);
1459 va_end(ap);
1461 return ret;
1464 /* Returns constant affine function with value CST. */
1466 static affine_fn
1467 affine_fn_cst (tree cst)
1469 affine_fn fn = VEC_alloc (tree, heap, 1);
1470 VEC_quick_push (tree, fn, cst);
1471 return fn;
1474 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1476 static affine_fn
1477 affine_fn_univar (tree cst, unsigned dim, tree coef)
1479 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1480 unsigned i;
1482 gcc_assert (dim > 0);
1483 VEC_quick_push (tree, fn, cst);
1484 for (i = 1; i < dim; i++)
1485 VEC_quick_push (tree, fn, integer_zero_node);
1486 VEC_quick_push (tree, fn, coef);
1487 return fn;
1490 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1491 *OVERLAPS_B are initialized to the functions that describe the
1492 relation between the elements accessed twice by CHREC_A and
1493 CHREC_B. For k >= 0, the following property is verified:
1495 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1497 static void
1498 analyze_ziv_subscript (tree chrec_a,
1499 tree chrec_b,
1500 conflict_function **overlaps_a,
1501 conflict_function **overlaps_b,
1502 tree *last_conflicts)
1504 tree type, difference;
1505 dependence_stats.num_ziv++;
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1508 fprintf (dump_file, "(analyze_ziv_subscript \n");
1510 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1511 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1512 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1513 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1515 switch (TREE_CODE (difference))
1517 case INTEGER_CST:
1518 if (integer_zerop (difference))
1520 /* The difference is equal to zero: the accessed index
1521 overlaps for each iteration in the loop. */
1522 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1523 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1524 *last_conflicts = chrec_dont_know;
1525 dependence_stats.num_ziv_dependent++;
1527 else
1529 /* The accesses do not overlap. */
1530 *overlaps_a = conflict_fn_no_dependence ();
1531 *overlaps_b = conflict_fn_no_dependence ();
1532 *last_conflicts = integer_zero_node;
1533 dependence_stats.num_ziv_independent++;
1535 break;
1537 default:
1538 /* We're not sure whether the indexes overlap. For the moment,
1539 conservatively answer "don't know". */
1540 if (dump_file && (dump_flags & TDF_DETAILS))
1541 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1543 *overlaps_a = conflict_fn_not_known ();
1544 *overlaps_b = conflict_fn_not_known ();
1545 *last_conflicts = chrec_dont_know;
1546 dependence_stats.num_ziv_unimplemented++;
1547 break;
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, ")\n");
1554 /* Sets NIT to the estimated number of executions of the statements in
1555 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1556 large as the number of iterations. If we have no reliable estimate,
1557 the function returns false, otherwise returns true. */
1559 bool
1560 estimated_loop_iterations (struct loop *loop, bool conservative,
1561 double_int *nit)
1563 estimate_numbers_of_iterations_loop (loop);
1564 if (conservative)
1566 if (!loop->any_upper_bound)
1567 return false;
1569 *nit = loop->nb_iterations_upper_bound;
1571 else
1573 if (!loop->any_estimate)
1574 return false;
1576 *nit = loop->nb_iterations_estimate;
1579 return true;
1582 /* Similar to estimated_loop_iterations, but returns the estimate only
1583 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1584 on the number of iterations of LOOP could not be derived, returns -1. */
1586 HOST_WIDE_INT
1587 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1589 double_int nit;
1590 HOST_WIDE_INT hwi_nit;
1592 if (!estimated_loop_iterations (loop, conservative, &nit))
1593 return -1;
1595 if (!double_int_fits_in_shwi_p (nit))
1596 return -1;
1597 hwi_nit = double_int_to_shwi (nit);
1599 return hwi_nit < 0 ? -1 : hwi_nit;
1602 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1603 and only if it fits to the int type. If this is not the case, or the
1604 estimate on the number of iterations of LOOP could not be derived, returns
1605 chrec_dont_know. */
1607 static tree
1608 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1610 double_int nit;
1611 tree type;
1613 if (!estimated_loop_iterations (loop, conservative, &nit))
1614 return chrec_dont_know;
1616 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1617 if (!double_int_fits_to_tree_p (type, nit))
1618 return chrec_dont_know;
1620 return double_int_to_tree (type, nit);
1623 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1624 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1625 *OVERLAPS_B are initialized to the functions that describe the
1626 relation between the elements accessed twice by CHREC_A and
1627 CHREC_B. For k >= 0, the following property is verified:
1629 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1631 static void
1632 analyze_siv_subscript_cst_affine (tree chrec_a,
1633 tree chrec_b,
1634 conflict_function **overlaps_a,
1635 conflict_function **overlaps_b,
1636 tree *last_conflicts)
1638 bool value0, value1, value2;
1639 tree type, difference, tmp;
1641 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1642 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1643 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1644 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1646 if (!chrec_is_positive (initial_condition (difference), &value0))
1648 if (dump_file && (dump_flags & TDF_DETAILS))
1649 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1651 dependence_stats.num_siv_unimplemented++;
1652 *overlaps_a = conflict_fn_not_known ();
1653 *overlaps_b = conflict_fn_not_known ();
1654 *last_conflicts = chrec_dont_know;
1655 return;
1657 else
1659 if (value0 == false)
1661 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1666 *overlaps_a = conflict_fn_not_known ();
1667 *overlaps_b = conflict_fn_not_known ();
1668 *last_conflicts = chrec_dont_know;
1669 dependence_stats.num_siv_unimplemented++;
1670 return;
1672 else
1674 if (value1 == true)
1676 /* Example:
1677 chrec_a = 12
1678 chrec_b = {10, +, 1}
1681 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1683 HOST_WIDE_INT numiter;
1684 struct loop *loop = get_chrec_loop (chrec_b);
1686 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1687 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1688 fold_build1 (ABS_EXPR, type, difference),
1689 CHREC_RIGHT (chrec_b));
1690 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1691 *last_conflicts = integer_one_node;
1694 /* Perform weak-zero siv test to see if overlap is
1695 outside the loop bounds. */
1696 numiter = estimated_loop_iterations_int (loop, false);
1698 if (numiter >= 0
1699 && compare_tree_int (tmp, numiter) > 0)
1701 free_conflict_function (*overlaps_a);
1702 free_conflict_function (*overlaps_b);
1703 *overlaps_a = conflict_fn_no_dependence ();
1704 *overlaps_b = conflict_fn_no_dependence ();
1705 *last_conflicts = integer_zero_node;
1706 dependence_stats.num_siv_independent++;
1707 return;
1709 dependence_stats.num_siv_dependent++;
1710 return;
1713 /* When the step does not divide the difference, there are
1714 no overlaps. */
1715 else
1717 *overlaps_a = conflict_fn_no_dependence ();
1718 *overlaps_b = conflict_fn_no_dependence ();
1719 *last_conflicts = integer_zero_node;
1720 dependence_stats.num_siv_independent++;
1721 return;
1725 else
1727 /* Example:
1728 chrec_a = 12
1729 chrec_b = {10, +, -1}
1731 In this case, chrec_a will not overlap with chrec_b. */
1732 *overlaps_a = conflict_fn_no_dependence ();
1733 *overlaps_b = conflict_fn_no_dependence ();
1734 *last_conflicts = integer_zero_node;
1735 dependence_stats.num_siv_independent++;
1736 return;
1740 else
1742 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1747 *overlaps_a = conflict_fn_not_known ();
1748 *overlaps_b = conflict_fn_not_known ();
1749 *last_conflicts = chrec_dont_know;
1750 dependence_stats.num_siv_unimplemented++;
1751 return;
1753 else
1755 if (value2 == false)
1757 /* Example:
1758 chrec_a = 3
1759 chrec_b = {10, +, -1}
1761 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1763 HOST_WIDE_INT numiter;
1764 struct loop *loop = get_chrec_loop (chrec_b);
1766 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1767 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1768 CHREC_RIGHT (chrec_b));
1769 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1770 *last_conflicts = integer_one_node;
1772 /* Perform weak-zero siv test to see if overlap is
1773 outside the loop bounds. */
1774 numiter = estimated_loop_iterations_int (loop, false);
1776 if (numiter >= 0
1777 && compare_tree_int (tmp, numiter) > 0)
1779 free_conflict_function (*overlaps_a);
1780 free_conflict_function (*overlaps_b);
1781 *overlaps_a = conflict_fn_no_dependence ();
1782 *overlaps_b = conflict_fn_no_dependence ();
1783 *last_conflicts = integer_zero_node;
1784 dependence_stats.num_siv_independent++;
1785 return;
1787 dependence_stats.num_siv_dependent++;
1788 return;
1791 /* When the step does not divide the difference, there
1792 are no overlaps. */
1793 else
1795 *overlaps_a = conflict_fn_no_dependence ();
1796 *overlaps_b = conflict_fn_no_dependence ();
1797 *last_conflicts = integer_zero_node;
1798 dependence_stats.num_siv_independent++;
1799 return;
1802 else
1804 /* Example:
1805 chrec_a = 3
1806 chrec_b = {4, +, 1}
1808 In this case, chrec_a will not overlap with chrec_b. */
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1813 return;
1820 /* Helper recursive function for initializing the matrix A. Returns
1821 the initial value of CHREC. */
1823 static HOST_WIDE_INT
1824 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1826 gcc_assert (chrec);
1828 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1829 return int_cst_value (chrec);
1831 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1832 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1835 #define FLOOR_DIV(x,y) ((x) / (y))
1837 /* Solves the special case of the Diophantine equation:
1838 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1840 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1841 number of iterations that loops X and Y run. The overlaps will be
1842 constructed as evolutions in dimension DIM. */
1844 static void
1845 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1846 affine_fn *overlaps_a,
1847 affine_fn *overlaps_b,
1848 tree *last_conflicts, int dim)
1850 if (((step_a > 0 && step_b > 0)
1851 || (step_a < 0 && step_b < 0)))
1853 int step_overlaps_a, step_overlaps_b;
1854 int gcd_steps_a_b, last_conflict, tau2;
1856 gcd_steps_a_b = gcd (step_a, step_b);
1857 step_overlaps_a = step_b / gcd_steps_a_b;
1858 step_overlaps_b = step_a / gcd_steps_a_b;
1860 if (niter > 0)
1862 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1863 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1864 last_conflict = tau2;
1865 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1867 else
1868 *last_conflicts = chrec_dont_know;
1870 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1871 build_int_cst (NULL_TREE,
1872 step_overlaps_a));
1873 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1874 build_int_cst (NULL_TREE,
1875 step_overlaps_b));
1878 else
1880 *overlaps_a = affine_fn_cst (integer_zero_node);
1881 *overlaps_b = affine_fn_cst (integer_zero_node);
1882 *last_conflicts = integer_zero_node;
1886 /* Solves the special case of a Diophantine equation where CHREC_A is
1887 an affine bivariate function, and CHREC_B is an affine univariate
1888 function. For example,
1890 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1892 has the following overlapping functions:
1894 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1895 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1896 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1898 FORNOW: This is a specialized implementation for a case occurring in
1899 a common benchmark. Implement the general algorithm. */
1901 static void
1902 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1903 conflict_function **overlaps_a,
1904 conflict_function **overlaps_b,
1905 tree *last_conflicts)
1907 bool xz_p, yz_p, xyz_p;
1908 int step_x, step_y, step_z;
1909 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1910 affine_fn overlaps_a_xz, overlaps_b_xz;
1911 affine_fn overlaps_a_yz, overlaps_b_yz;
1912 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1913 affine_fn ova1, ova2, ovb;
1914 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1916 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1917 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1918 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1920 niter_x =
1921 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1922 false);
1923 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1924 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1926 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1928 if (dump_file && (dump_flags & TDF_DETAILS))
1929 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1931 *overlaps_a = conflict_fn_not_known ();
1932 *overlaps_b = conflict_fn_not_known ();
1933 *last_conflicts = chrec_dont_know;
1934 return;
1937 niter = MIN (niter_x, niter_z);
1938 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1939 &overlaps_a_xz,
1940 &overlaps_b_xz,
1941 &last_conflicts_xz, 1);
1942 niter = MIN (niter_y, niter_z);
1943 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1944 &overlaps_a_yz,
1945 &overlaps_b_yz,
1946 &last_conflicts_yz, 2);
1947 niter = MIN (niter_x, niter_z);
1948 niter = MIN (niter_y, niter);
1949 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1950 &overlaps_a_xyz,
1951 &overlaps_b_xyz,
1952 &last_conflicts_xyz, 3);
1954 xz_p = !integer_zerop (last_conflicts_xz);
1955 yz_p = !integer_zerop (last_conflicts_yz);
1956 xyz_p = !integer_zerop (last_conflicts_xyz);
1958 if (xz_p || yz_p || xyz_p)
1960 ova1 = affine_fn_cst (integer_zero_node);
1961 ova2 = affine_fn_cst (integer_zero_node);
1962 ovb = affine_fn_cst (integer_zero_node);
1963 if (xz_p)
1965 affine_fn t0 = ova1;
1966 affine_fn t2 = ovb;
1968 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1969 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1970 affine_fn_free (t0);
1971 affine_fn_free (t2);
1972 *last_conflicts = last_conflicts_xz;
1974 if (yz_p)
1976 affine_fn t0 = ova2;
1977 affine_fn t2 = ovb;
1979 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1980 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1981 affine_fn_free (t0);
1982 affine_fn_free (t2);
1983 *last_conflicts = last_conflicts_yz;
1985 if (xyz_p)
1987 affine_fn t0 = ova1;
1988 affine_fn t2 = ova2;
1989 affine_fn t4 = ovb;
1991 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1992 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1993 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1994 affine_fn_free (t0);
1995 affine_fn_free (t2);
1996 affine_fn_free (t4);
1997 *last_conflicts = last_conflicts_xyz;
1999 *overlaps_a = conflict_fn (2, ova1, ova2);
2000 *overlaps_b = conflict_fn (1, ovb);
2002 else
2004 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2005 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2006 *last_conflicts = integer_zero_node;
2009 affine_fn_free (overlaps_a_xz);
2010 affine_fn_free (overlaps_b_xz);
2011 affine_fn_free (overlaps_a_yz);
2012 affine_fn_free (overlaps_b_yz);
2013 affine_fn_free (overlaps_a_xyz);
2014 affine_fn_free (overlaps_b_xyz);
2017 /* Determines the overlapping elements due to accesses CHREC_A and
2018 CHREC_B, that are affine functions. This function cannot handle
2019 symbolic evolution functions, ie. when initial conditions are
2020 parameters, because it uses lambda matrices of integers. */
2022 static void
2023 analyze_subscript_affine_affine (tree chrec_a,
2024 tree chrec_b,
2025 conflict_function **overlaps_a,
2026 conflict_function **overlaps_b,
2027 tree *last_conflicts)
2029 unsigned nb_vars_a, nb_vars_b, dim;
2030 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2031 lambda_matrix A, U, S;
2033 if (eq_evolutions_p (chrec_a, chrec_b))
2035 /* The accessed index overlaps for each iteration in the
2036 loop. */
2037 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2038 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2039 *last_conflicts = chrec_dont_know;
2040 return;
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2043 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2045 /* For determining the initial intersection, we have to solve a
2046 Diophantine equation. This is the most time consuming part.
2048 For answering to the question: "Is there a dependence?" we have
2049 to prove that there exists a solution to the Diophantine
2050 equation, and that the solution is in the iteration domain,
2051 i.e. the solution is positive or zero, and that the solution
2052 happens before the upper bound loop.nb_iterations. Otherwise
2053 there is no dependence. This function outputs a description of
2054 the iterations that hold the intersections. */
2056 nb_vars_a = nb_vars_in_chrec (chrec_a);
2057 nb_vars_b = nb_vars_in_chrec (chrec_b);
2059 dim = nb_vars_a + nb_vars_b;
2060 U = lambda_matrix_new (dim, dim);
2061 A = lambda_matrix_new (dim, 1);
2062 S = lambda_matrix_new (dim, 1);
2064 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2065 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2066 gamma = init_b - init_a;
2068 /* Don't do all the hard work of solving the Diophantine equation
2069 when we already know the solution: for example,
2070 | {3, +, 1}_1
2071 | {3, +, 4}_2
2072 | gamma = 3 - 3 = 0.
2073 Then the first overlap occurs during the first iterations:
2074 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2076 if (gamma == 0)
2078 if (nb_vars_a == 1 && nb_vars_b == 1)
2080 HOST_WIDE_INT step_a, step_b;
2081 HOST_WIDE_INT niter, niter_a, niter_b;
2082 affine_fn ova, ovb;
2084 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2085 false);
2086 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2087 false);
2088 niter = MIN (niter_a, niter_b);
2089 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2090 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2092 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2093 &ova, &ovb,
2094 last_conflicts, 1);
2095 *overlaps_a = conflict_fn (1, ova);
2096 *overlaps_b = conflict_fn (1, ovb);
2099 else if (nb_vars_a == 2 && nb_vars_b == 1)
2100 compute_overlap_steps_for_affine_1_2
2101 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2103 else if (nb_vars_a == 1 && nb_vars_b == 2)
2104 compute_overlap_steps_for_affine_1_2
2105 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2107 else
2109 if (dump_file && (dump_flags & TDF_DETAILS))
2110 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2111 *overlaps_a = conflict_fn_not_known ();
2112 *overlaps_b = conflict_fn_not_known ();
2113 *last_conflicts = chrec_dont_know;
2115 goto end_analyze_subs_aa;
2118 /* U.A = S */
2119 lambda_matrix_right_hermite (A, dim, 1, S, U);
2121 if (S[0][0] < 0)
2123 S[0][0] *= -1;
2124 lambda_matrix_row_negate (U, dim, 0);
2126 gcd_alpha_beta = S[0][0];
2128 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2129 but that is a quite strange case. Instead of ICEing, answer
2130 don't know. */
2131 if (gcd_alpha_beta == 0)
2133 *overlaps_a = conflict_fn_not_known ();
2134 *overlaps_b = conflict_fn_not_known ();
2135 *last_conflicts = chrec_dont_know;
2136 goto end_analyze_subs_aa;
2139 /* The classic "gcd-test". */
2140 if (!int_divides_p (gcd_alpha_beta, gamma))
2142 /* The "gcd-test" has determined that there is no integer
2143 solution, i.e. there is no dependence. */
2144 *overlaps_a = conflict_fn_no_dependence ();
2145 *overlaps_b = conflict_fn_no_dependence ();
2146 *last_conflicts = integer_zero_node;
2149 /* Both access functions are univariate. This includes SIV and MIV cases. */
2150 else if (nb_vars_a == 1 && nb_vars_b == 1)
2152 /* Both functions should have the same evolution sign. */
2153 if (((A[0][0] > 0 && -A[1][0] > 0)
2154 || (A[0][0] < 0 && -A[1][0] < 0)))
2156 /* The solutions are given by:
2158 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2159 | [u21 u22] [y0]
2161 For a given integer t. Using the following variables,
2163 | i0 = u11 * gamma / gcd_alpha_beta
2164 | j0 = u12 * gamma / gcd_alpha_beta
2165 | i1 = u21
2166 | j1 = u22
2168 the solutions are:
2170 | x0 = i0 + i1 * t,
2171 | y0 = j0 + j1 * t. */
2172 HOST_WIDE_INT i0, j0, i1, j1;
2174 i0 = U[0][0] * gamma / gcd_alpha_beta;
2175 j0 = U[0][1] * gamma / gcd_alpha_beta;
2176 i1 = U[1][0];
2177 j1 = U[1][1];
2179 if ((i1 == 0 && i0 < 0)
2180 || (j1 == 0 && j0 < 0))
2182 /* There is no solution.
2183 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2184 falls in here, but for the moment we don't look at the
2185 upper bound of the iteration domain. */
2186 *overlaps_a = conflict_fn_no_dependence ();
2187 *overlaps_b = conflict_fn_no_dependence ();
2188 *last_conflicts = integer_zero_node;
2189 goto end_analyze_subs_aa;
2192 if (i1 > 0 && j1 > 0)
2194 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2195 (get_chrec_loop (chrec_a), false);
2196 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2197 (get_chrec_loop (chrec_b), false);
2198 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2200 /* (X0, Y0) is a solution of the Diophantine equation:
2201 "chrec_a (X0) = chrec_b (Y0)". */
2202 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2203 CEIL (-j0, j1));
2204 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2205 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2207 /* (X1, Y1) is the smallest positive solution of the eq
2208 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2209 first conflict occurs. */
2210 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2211 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2212 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2214 if (niter > 0)
2216 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2217 FLOOR_DIV (niter - j0, j1));
2218 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2220 /* If the overlap occurs outside of the bounds of the
2221 loop, there is no dependence. */
2222 if (x1 > niter || y1 > niter)
2224 *overlaps_a = conflict_fn_no_dependence ();
2225 *overlaps_b = conflict_fn_no_dependence ();
2226 *last_conflicts = integer_zero_node;
2227 goto end_analyze_subs_aa;
2229 else
2230 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2232 else
2233 *last_conflicts = chrec_dont_know;
2235 *overlaps_a
2236 = conflict_fn (1,
2237 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2239 build_int_cst (NULL_TREE, i1)));
2240 *overlaps_b
2241 = conflict_fn (1,
2242 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2244 build_int_cst (NULL_TREE, j1)));
2246 else
2248 /* FIXME: For the moment, the upper bound of the
2249 iteration domain for i and j is not checked. */
2250 if (dump_file && (dump_flags & TDF_DETAILS))
2251 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2252 *overlaps_a = conflict_fn_not_known ();
2253 *overlaps_b = conflict_fn_not_known ();
2254 *last_conflicts = chrec_dont_know;
2257 else
2259 if (dump_file && (dump_flags & TDF_DETAILS))
2260 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2261 *overlaps_a = conflict_fn_not_known ();
2262 *overlaps_b = conflict_fn_not_known ();
2263 *last_conflicts = chrec_dont_know;
2266 else
2268 if (dump_file && (dump_flags & TDF_DETAILS))
2269 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2270 *overlaps_a = conflict_fn_not_known ();
2271 *overlaps_b = conflict_fn_not_known ();
2272 *last_conflicts = chrec_dont_know;
2275 end_analyze_subs_aa:
2276 if (dump_file && (dump_flags & TDF_DETAILS))
2278 fprintf (dump_file, " (overlaps_a = ");
2279 dump_conflict_function (dump_file, *overlaps_a);
2280 fprintf (dump_file, ")\n (overlaps_b = ");
2281 dump_conflict_function (dump_file, *overlaps_b);
2282 fprintf (dump_file, ")\n");
2283 fprintf (dump_file, ")\n");
2287 /* Returns true when analyze_subscript_affine_affine can be used for
2288 determining the dependence relation between chrec_a and chrec_b,
2289 that contain symbols. This function modifies chrec_a and chrec_b
2290 such that the analysis result is the same, and such that they don't
2291 contain symbols, and then can safely be passed to the analyzer.
2293 Example: The analysis of the following tuples of evolutions produce
2294 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2295 vs. {0, +, 1}_1
2297 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2298 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2301 static bool
2302 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2304 tree diff, type, left_a, left_b, right_b;
2306 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2307 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2308 /* FIXME: For the moment not handled. Might be refined later. */
2309 return false;
2311 type = chrec_type (*chrec_a);
2312 left_a = CHREC_LEFT (*chrec_a);
2313 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2314 diff = chrec_fold_minus (type, left_a, left_b);
2316 if (!evolution_function_is_constant_p (diff))
2317 return false;
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2322 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2323 diff, CHREC_RIGHT (*chrec_a));
2324 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2325 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2326 build_int_cst (type, 0),
2327 right_b);
2328 return true;
2331 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2332 *OVERLAPS_B are initialized to the functions that describe the
2333 relation between the elements accessed twice by CHREC_A and
2334 CHREC_B. For k >= 0, the following property is verified:
2336 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2338 static void
2339 analyze_siv_subscript (tree chrec_a,
2340 tree chrec_b,
2341 conflict_function **overlaps_a,
2342 conflict_function **overlaps_b,
2343 tree *last_conflicts)
2345 dependence_stats.num_siv++;
2347 if (dump_file && (dump_flags & TDF_DETAILS))
2348 fprintf (dump_file, "(analyze_siv_subscript \n");
2350 if (evolution_function_is_constant_p (chrec_a)
2351 && evolution_function_is_affine_p (chrec_b))
2352 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2353 overlaps_a, overlaps_b, last_conflicts);
2355 else if (evolution_function_is_affine_p (chrec_a)
2356 && evolution_function_is_constant_p (chrec_b))
2357 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2358 overlaps_b, overlaps_a, last_conflicts);
2360 else if (evolution_function_is_affine_p (chrec_a)
2361 && evolution_function_is_affine_p (chrec_b))
2363 if (!chrec_contains_symbols (chrec_a)
2364 && !chrec_contains_symbols (chrec_b))
2366 analyze_subscript_affine_affine (chrec_a, chrec_b,
2367 overlaps_a, overlaps_b,
2368 last_conflicts);
2370 if (CF_NOT_KNOWN_P (*overlaps_a)
2371 || CF_NOT_KNOWN_P (*overlaps_b))
2372 dependence_stats.num_siv_unimplemented++;
2373 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2374 || CF_NO_DEPENDENCE_P (*overlaps_b))
2375 dependence_stats.num_siv_independent++;
2376 else
2377 dependence_stats.num_siv_dependent++;
2379 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2380 &chrec_b))
2382 analyze_subscript_affine_affine (chrec_a, chrec_b,
2383 overlaps_a, overlaps_b,
2384 last_conflicts);
2386 if (CF_NOT_KNOWN_P (*overlaps_a)
2387 || CF_NOT_KNOWN_P (*overlaps_b))
2388 dependence_stats.num_siv_unimplemented++;
2389 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2390 || CF_NO_DEPENDENCE_P (*overlaps_b))
2391 dependence_stats.num_siv_independent++;
2392 else
2393 dependence_stats.num_siv_dependent++;
2395 else
2396 goto siv_subscript_dontknow;
2399 else
2401 siv_subscript_dontknow:;
2402 if (dump_file && (dump_flags & TDF_DETAILS))
2403 fprintf (dump_file, "siv test failed: unimplemented.\n");
2404 *overlaps_a = conflict_fn_not_known ();
2405 *overlaps_b = conflict_fn_not_known ();
2406 *last_conflicts = chrec_dont_know;
2407 dependence_stats.num_siv_unimplemented++;
2410 if (dump_file && (dump_flags & TDF_DETAILS))
2411 fprintf (dump_file, ")\n");
2414 /* Returns false if we can prove that the greatest common divisor of the steps
2415 of CHREC does not divide CST, false otherwise. */
2417 static bool
2418 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2420 HOST_WIDE_INT cd = 0, val;
2421 tree step;
2423 if (!host_integerp (cst, 0))
2424 return true;
2425 val = tree_low_cst (cst, 0);
2427 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2429 step = CHREC_RIGHT (chrec);
2430 if (!host_integerp (step, 0))
2431 return true;
2432 cd = gcd (cd, tree_low_cst (step, 0));
2433 chrec = CHREC_LEFT (chrec);
2436 return val % cd == 0;
2439 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2440 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2441 functions that describe the relation between the elements accessed
2442 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2443 is verified:
2445 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2447 static void
2448 analyze_miv_subscript (tree chrec_a,
2449 tree chrec_b,
2450 conflict_function **overlaps_a,
2451 conflict_function **overlaps_b,
2452 tree *last_conflicts,
2453 struct loop *loop_nest)
2455 /* FIXME: This is a MIV subscript, not yet handled.
2456 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2457 (A[i] vs. A[j]).
2459 In the SIV test we had to solve a Diophantine equation with two
2460 variables. In the MIV case we have to solve a Diophantine
2461 equation with 2*n variables (if the subscript uses n IVs).
2463 tree type, difference;
2465 dependence_stats.num_miv++;
2466 if (dump_file && (dump_flags & TDF_DETAILS))
2467 fprintf (dump_file, "(analyze_miv_subscript \n");
2469 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2470 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
2471 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
2472 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2474 if (eq_evolutions_p (chrec_a, chrec_b))
2476 /* Access functions are the same: all the elements are accessed
2477 in the same order. */
2478 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2479 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2480 *last_conflicts = estimated_loop_iterations_tree
2481 (get_chrec_loop (chrec_a), true);
2482 dependence_stats.num_miv_dependent++;
2485 else if (evolution_function_is_constant_p (difference)
2486 /* For the moment, the following is verified:
2487 evolution_function_is_affine_multivariate_p (chrec_a,
2488 loop_nest->num) */
2489 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2491 /* testsuite/.../ssa-chrec-33.c
2492 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2494 The difference is 1, and all the evolution steps are multiples
2495 of 2, consequently there are no overlapping elements. */
2496 *overlaps_a = conflict_fn_no_dependence ();
2497 *overlaps_b = conflict_fn_no_dependence ();
2498 *last_conflicts = integer_zero_node;
2499 dependence_stats.num_miv_independent++;
2502 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2503 && !chrec_contains_symbols (chrec_a)
2504 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2505 && !chrec_contains_symbols (chrec_b))
2507 /* testsuite/.../ssa-chrec-35.c
2508 {0, +, 1}_2 vs. {0, +, 1}_3
2509 the overlapping elements are respectively located at iterations:
2510 {0, +, 1}_x and {0, +, 1}_x,
2511 in other words, we have the equality:
2512 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2514 Other examples:
2515 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2516 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2518 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2519 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2521 analyze_subscript_affine_affine (chrec_a, chrec_b,
2522 overlaps_a, overlaps_b, last_conflicts);
2524 if (CF_NOT_KNOWN_P (*overlaps_a)
2525 || CF_NOT_KNOWN_P (*overlaps_b))
2526 dependence_stats.num_miv_unimplemented++;
2527 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2528 || CF_NO_DEPENDENCE_P (*overlaps_b))
2529 dependence_stats.num_miv_independent++;
2530 else
2531 dependence_stats.num_miv_dependent++;
2534 else
2536 /* When the analysis is too difficult, answer "don't know". */
2537 if (dump_file && (dump_flags & TDF_DETAILS))
2538 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2540 *overlaps_a = conflict_fn_not_known ();
2541 *overlaps_b = conflict_fn_not_known ();
2542 *last_conflicts = chrec_dont_know;
2543 dependence_stats.num_miv_unimplemented++;
2546 if (dump_file && (dump_flags & TDF_DETAILS))
2547 fprintf (dump_file, ")\n");
2550 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2551 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2552 OVERLAP_ITERATIONS_B are initialized with two functions that
2553 describe the iterations that contain conflicting elements.
2555 Remark: For an integer k >= 0, the following equality is true:
2557 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2560 static void
2561 analyze_overlapping_iterations (tree chrec_a,
2562 tree chrec_b,
2563 conflict_function **overlap_iterations_a,
2564 conflict_function **overlap_iterations_b,
2565 tree *last_conflicts, struct loop *loop_nest)
2567 unsigned int lnn = loop_nest->num;
2569 dependence_stats.num_subscript_tests++;
2571 if (dump_file && (dump_flags & TDF_DETAILS))
2573 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2574 fprintf (dump_file, " (chrec_a = ");
2575 print_generic_expr (dump_file, chrec_a, 0);
2576 fprintf (dump_file, ")\n (chrec_b = ");
2577 print_generic_expr (dump_file, chrec_b, 0);
2578 fprintf (dump_file, ")\n");
2581 if (chrec_a == NULL_TREE
2582 || chrec_b == NULL_TREE
2583 || chrec_contains_undetermined (chrec_a)
2584 || chrec_contains_undetermined (chrec_b))
2586 dependence_stats.num_subscript_undetermined++;
2588 *overlap_iterations_a = conflict_fn_not_known ();
2589 *overlap_iterations_b = conflict_fn_not_known ();
2592 /* If they are the same chrec, and are affine, they overlap
2593 on every iteration. */
2594 else if (eq_evolutions_p (chrec_a, chrec_b)
2595 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2597 dependence_stats.num_same_subscript_function++;
2598 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2599 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2600 *last_conflicts = chrec_dont_know;
2603 /* If they aren't the same, and aren't affine, we can't do anything
2604 yet. */
2605 else if ((chrec_contains_symbols (chrec_a)
2606 || chrec_contains_symbols (chrec_b))
2607 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2608 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2610 dependence_stats.num_subscript_undetermined++;
2611 *overlap_iterations_a = conflict_fn_not_known ();
2612 *overlap_iterations_b = conflict_fn_not_known ();
2615 else if (ziv_subscript_p (chrec_a, chrec_b))
2616 analyze_ziv_subscript (chrec_a, chrec_b,
2617 overlap_iterations_a, overlap_iterations_b,
2618 last_conflicts);
2620 else if (siv_subscript_p (chrec_a, chrec_b))
2621 analyze_siv_subscript (chrec_a, chrec_b,
2622 overlap_iterations_a, overlap_iterations_b,
2623 last_conflicts);
2625 else
2626 analyze_miv_subscript (chrec_a, chrec_b,
2627 overlap_iterations_a, overlap_iterations_b,
2628 last_conflicts, loop_nest);
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, " (overlap_iterations_a = ");
2633 dump_conflict_function (dump_file, *overlap_iterations_a);
2634 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2635 dump_conflict_function (dump_file, *overlap_iterations_b);
2636 fprintf (dump_file, ")\n");
2637 fprintf (dump_file, ")\n");
2641 /* Helper function for uniquely inserting distance vectors. */
2643 static void
2644 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2646 unsigned i;
2647 lambda_vector v;
2649 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2650 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2651 return;
2653 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2656 /* Helper function for uniquely inserting direction vectors. */
2658 static void
2659 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2661 unsigned i;
2662 lambda_vector v;
2664 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2665 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2666 return;
2668 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2671 /* Add a distance of 1 on all the loops outer than INDEX. If we
2672 haven't yet determined a distance for this outer loop, push a new
2673 distance vector composed of the previous distance, and a distance
2674 of 1 for this outer loop. Example:
2676 | loop_1
2677 | loop_2
2678 | A[10]
2679 | endloop_2
2680 | endloop_1
2682 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2683 save (0, 1), then we have to save (1, 0). */
2685 static void
2686 add_outer_distances (struct data_dependence_relation *ddr,
2687 lambda_vector dist_v, int index)
2689 /* For each outer loop where init_v is not set, the accesses are
2690 in dependence of distance 1 in the loop. */
2691 while (--index >= 0)
2693 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2694 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2695 save_v[index] = 1;
2696 save_dist_v (ddr, save_v);
2700 /* Return false when fail to represent the data dependence as a
2701 distance vector. INIT_B is set to true when a component has been
2702 added to the distance vector DIST_V. INDEX_CARRY is then set to
2703 the index in DIST_V that carries the dependence. */
2705 static bool
2706 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2707 struct data_reference *ddr_a,
2708 struct data_reference *ddr_b,
2709 lambda_vector dist_v, bool *init_b,
2710 int *index_carry)
2712 unsigned i;
2713 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2715 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2717 tree access_fn_a, access_fn_b;
2718 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2720 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2722 non_affine_dependence_relation (ddr);
2723 return false;
2726 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2727 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2729 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2730 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2732 int dist, index;
2733 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2734 DDR_LOOP_NEST (ddr));
2735 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2736 DDR_LOOP_NEST (ddr));
2738 /* The dependence is carried by the outermost loop. Example:
2739 | loop_1
2740 | A[{4, +, 1}_1]
2741 | loop_2
2742 | A[{5, +, 1}_2]
2743 | endloop_2
2744 | endloop_1
2745 In this case, the dependence is carried by loop_1. */
2746 index = index_a < index_b ? index_a : index_b;
2747 *index_carry = MIN (index, *index_carry);
2749 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2751 non_affine_dependence_relation (ddr);
2752 return false;
2755 dist = int_cst_value (SUB_DISTANCE (subscript));
2757 /* This is the subscript coupling test. If we have already
2758 recorded a distance for this loop (a distance coming from
2759 another subscript), it should be the same. For example,
2760 in the following code, there is no dependence:
2762 | loop i = 0, N, 1
2763 | T[i+1][i] = ...
2764 | ... = T[i][i]
2765 | endloop
2767 if (init_v[index] != 0 && dist_v[index] != dist)
2769 finalize_ddr_dependent (ddr, chrec_known);
2770 return false;
2773 dist_v[index] = dist;
2774 init_v[index] = 1;
2775 *init_b = true;
2777 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2779 /* This can be for example an affine vs. constant dependence
2780 (T[i] vs. T[3]) that is not an affine dependence and is
2781 not representable as a distance vector. */
2782 non_affine_dependence_relation (ddr);
2783 return false;
2787 return true;
2790 /* Return true when the DDR contains two data references that have the
2791 same access functions. */
2793 static bool
2794 same_access_functions (const struct data_dependence_relation *ddr)
2796 unsigned i;
2798 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2799 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2800 DR_ACCESS_FN (DDR_B (ddr), i)))
2801 return false;
2803 return true;
2806 /* Return true when the DDR contains only constant access functions. */
2808 static bool
2809 constant_access_functions (const struct data_dependence_relation *ddr)
2811 unsigned i;
2813 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2814 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2815 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2816 return false;
2818 return true;
2821 /* Helper function for the case where DDR_A and DDR_B are the same
2822 multivariate access function with a constant step. For an example
2823 see pr34635-1.c. */
2825 static void
2826 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2828 int x_1, x_2;
2829 tree c_1 = CHREC_LEFT (c_2);
2830 tree c_0 = CHREC_LEFT (c_1);
2831 lambda_vector dist_v;
2832 int v1, v2, cd;
2834 /* Polynomials with more than 2 variables are not handled yet. When
2835 the evolution steps are parameters, it is not possible to
2836 represent the dependence using classical distance vectors. */
2837 if (TREE_CODE (c_0) != INTEGER_CST
2838 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2839 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2841 DDR_AFFINE_P (ddr) = false;
2842 return;
2845 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2846 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2848 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2849 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2850 v1 = int_cst_value (CHREC_RIGHT (c_1));
2851 v2 = int_cst_value (CHREC_RIGHT (c_2));
2852 cd = gcd (v1, v2);
2853 v1 /= cd;
2854 v2 /= cd;
2856 if (v2 < 0)
2858 v2 = -v2;
2859 v1 = -v1;
2862 dist_v[x_1] = v2;
2863 dist_v[x_2] = -v1;
2864 save_dist_v (ddr, dist_v);
2866 add_outer_distances (ddr, dist_v, x_1);
2869 /* Helper function for the case where DDR_A and DDR_B are the same
2870 access functions. */
2872 static void
2873 add_other_self_distances (struct data_dependence_relation *ddr)
2875 lambda_vector dist_v;
2876 unsigned i;
2877 int index_carry = DDR_NB_LOOPS (ddr);
2879 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2881 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2883 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2885 if (!evolution_function_is_univariate_p (access_fun))
2887 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2889 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2890 return;
2893 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2895 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2896 add_multivariate_self_dist (ddr, access_fun);
2897 else
2898 /* The evolution step is not constant: it varies in
2899 the outer loop, so this cannot be represented by a
2900 distance vector. For example in pr34635.c the
2901 evolution is {0, +, {0, +, 4}_1}_2. */
2902 DDR_AFFINE_P (ddr) = false;
2904 return;
2907 index_carry = MIN (index_carry,
2908 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2909 DDR_LOOP_NEST (ddr)));
2913 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2914 add_outer_distances (ddr, dist_v, index_carry);
2917 static void
2918 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2920 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2922 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2923 save_dist_v (ddr, dist_v);
2926 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2927 is the case for example when access functions are the same and
2928 equal to a constant, as in:
2930 | loop_1
2931 | A[3] = ...
2932 | ... = A[3]
2933 | endloop_1
2935 in which case the distance vectors are (0) and (1). */
2937 static void
2938 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2940 unsigned i, j;
2942 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2944 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2945 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2946 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2948 for (j = 0; j < ca->n; j++)
2949 if (affine_function_zero_p (ca->fns[j]))
2951 insert_innermost_unit_dist_vector (ddr);
2952 return;
2955 for (j = 0; j < cb->n; j++)
2956 if (affine_function_zero_p (cb->fns[j]))
2958 insert_innermost_unit_dist_vector (ddr);
2959 return;
2964 /* Compute the classic per loop distance vector. DDR is the data
2965 dependence relation to build a vector from. Return false when fail
2966 to represent the data dependence as a distance vector. */
2968 static bool
2969 build_classic_dist_vector (struct data_dependence_relation *ddr,
2970 struct loop *loop_nest)
2972 bool init_b = false;
2973 int index_carry = DDR_NB_LOOPS (ddr);
2974 lambda_vector dist_v;
2976 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2977 return false;
2979 if (same_access_functions (ddr))
2981 /* Save the 0 vector. */
2982 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2983 save_dist_v (ddr, dist_v);
2985 if (constant_access_functions (ddr))
2986 add_distance_for_zero_overlaps (ddr);
2988 if (DDR_NB_LOOPS (ddr) > 1)
2989 add_other_self_distances (ddr);
2991 return true;
2994 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2995 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2996 dist_v, &init_b, &index_carry))
2997 return false;
2999 /* Save the distance vector if we initialized one. */
3000 if (init_b)
3002 /* Verify a basic constraint: classic distance vectors should
3003 always be lexicographically positive.
3005 Data references are collected in the order of execution of
3006 the program, thus for the following loop
3008 | for (i = 1; i < 100; i++)
3009 | for (j = 1; j < 100; j++)
3011 | t = T[j+1][i-1]; // A
3012 | T[j][i] = t + 2; // B
3015 references are collected following the direction of the wind:
3016 A then B. The data dependence tests are performed also
3017 following this order, such that we're looking at the distance
3018 separating the elements accessed by A from the elements later
3019 accessed by B. But in this example, the distance returned by
3020 test_dep (A, B) is lexicographically negative (-1, 1), that
3021 means that the access A occurs later than B with respect to
3022 the outer loop, ie. we're actually looking upwind. In this
3023 case we solve test_dep (B, A) looking downwind to the
3024 lexicographically positive solution, that returns the
3025 distance vector (1, -1). */
3026 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3028 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3029 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3030 loop_nest))
3031 return false;
3032 compute_subscript_distance (ddr);
3033 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3034 save_v, &init_b, &index_carry))
3035 return false;
3036 save_dist_v (ddr, save_v);
3037 DDR_REVERSED_P (ddr) = true;
3039 /* In this case there is a dependence forward for all the
3040 outer loops:
3042 | for (k = 1; k < 100; k++)
3043 | for (i = 1; i < 100; i++)
3044 | for (j = 1; j < 100; j++)
3046 | t = T[j+1][i-1]; // A
3047 | T[j][i] = t + 2; // B
3050 the vectors are:
3051 (0, 1, -1)
3052 (1, 1, -1)
3053 (1, -1, 1)
3055 if (DDR_NB_LOOPS (ddr) > 1)
3057 add_outer_distances (ddr, save_v, index_carry);
3058 add_outer_distances (ddr, dist_v, index_carry);
3061 else
3063 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3064 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3066 if (DDR_NB_LOOPS (ddr) > 1)
3068 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3070 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3071 DDR_A (ddr), loop_nest))
3072 return false;
3073 compute_subscript_distance (ddr);
3074 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3075 opposite_v, &init_b,
3076 &index_carry))
3077 return false;
3079 save_dist_v (ddr, save_v);
3080 add_outer_distances (ddr, dist_v, index_carry);
3081 add_outer_distances (ddr, opposite_v, index_carry);
3083 else
3084 save_dist_v (ddr, save_v);
3087 else
3089 /* There is a distance of 1 on all the outer loops: Example:
3090 there is a dependence of distance 1 on loop_1 for the array A.
3092 | loop_1
3093 | A[5] = ...
3094 | endloop
3096 add_outer_distances (ddr, dist_v,
3097 lambda_vector_first_nz (dist_v,
3098 DDR_NB_LOOPS (ddr), 0));
3101 if (dump_file && (dump_flags & TDF_DETAILS))
3103 unsigned i;
3105 fprintf (dump_file, "(build_classic_dist_vector\n");
3106 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3108 fprintf (dump_file, " dist_vector = (");
3109 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3110 DDR_NB_LOOPS (ddr));
3111 fprintf (dump_file, " )\n");
3113 fprintf (dump_file, ")\n");
3116 return true;
3119 /* Return the direction for a given distance.
3120 FIXME: Computing dir this way is suboptimal, since dir can catch
3121 cases that dist is unable to represent. */
3123 static inline enum data_dependence_direction
3124 dir_from_dist (int dist)
3126 if (dist > 0)
3127 return dir_positive;
3128 else if (dist < 0)
3129 return dir_negative;
3130 else
3131 return dir_equal;
3134 /* Compute the classic per loop direction vector. DDR is the data
3135 dependence relation to build a vector from. */
3137 static void
3138 build_classic_dir_vector (struct data_dependence_relation *ddr)
3140 unsigned i, j;
3141 lambda_vector dist_v;
3143 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3145 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3147 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3148 dir_v[j] = dir_from_dist (dist_v[j]);
3150 save_dir_v (ddr, dir_v);
3154 /* Helper function. Returns true when there is a dependence between
3155 data references DRA and DRB. */
3157 static bool
3158 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3159 struct data_reference *dra,
3160 struct data_reference *drb,
3161 struct loop *loop_nest)
3163 unsigned int i;
3164 tree last_conflicts;
3165 struct subscript *subscript;
3167 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3168 i++)
3170 conflict_function *overlaps_a, *overlaps_b;
3172 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3173 DR_ACCESS_FN (drb, i),
3174 &overlaps_a, &overlaps_b,
3175 &last_conflicts, loop_nest);
3177 if (CF_NOT_KNOWN_P (overlaps_a)
3178 || CF_NOT_KNOWN_P (overlaps_b))
3180 finalize_ddr_dependent (ddr, chrec_dont_know);
3181 dependence_stats.num_dependence_undetermined++;
3182 free_conflict_function (overlaps_a);
3183 free_conflict_function (overlaps_b);
3184 return false;
3187 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3188 || CF_NO_DEPENDENCE_P (overlaps_b))
3190 finalize_ddr_dependent (ddr, chrec_known);
3191 dependence_stats.num_dependence_independent++;
3192 free_conflict_function (overlaps_a);
3193 free_conflict_function (overlaps_b);
3194 return false;
3197 else
3199 if (SUB_CONFLICTS_IN_A (subscript))
3200 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3201 if (SUB_CONFLICTS_IN_B (subscript))
3202 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3204 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3205 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3206 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3210 return true;
3213 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3215 static void
3216 subscript_dependence_tester (struct data_dependence_relation *ddr,
3217 struct loop *loop_nest)
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3221 fprintf (dump_file, "(subscript_dependence_tester \n");
3223 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3224 dependence_stats.num_dependence_dependent++;
3226 compute_subscript_distance (ddr);
3227 if (build_classic_dist_vector (ddr, loop_nest))
3228 build_classic_dir_vector (ddr);
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3231 fprintf (dump_file, ")\n");
3234 /* Returns true when all the access functions of A are affine or
3235 constant with respect to LOOP_NEST. */
3237 static bool
3238 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3239 const struct loop *loop_nest)
3241 unsigned int i;
3242 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3243 tree t;
3245 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3246 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3247 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3248 return false;
3250 return true;
3253 /* Initializes an equation for an OMEGA problem using the information
3254 contained in the ACCESS_FUN. Returns true when the operation
3255 succeeded.
3257 PB is the omega constraint system.
3258 EQ is the number of the equation to be initialized.
3259 OFFSET is used for shifting the variables names in the constraints:
3260 a constrain is composed of 2 * the number of variables surrounding
3261 dependence accesses. OFFSET is set either to 0 for the first n variables,
3262 then it is set to n.
3263 ACCESS_FUN is expected to be an affine chrec. */
3265 static bool
3266 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3267 unsigned int offset, tree access_fun,
3268 struct data_dependence_relation *ddr)
3270 switch (TREE_CODE (access_fun))
3272 case POLYNOMIAL_CHREC:
3274 tree left = CHREC_LEFT (access_fun);
3275 tree right = CHREC_RIGHT (access_fun);
3276 int var = CHREC_VARIABLE (access_fun);
3277 unsigned var_idx;
3279 if (TREE_CODE (right) != INTEGER_CST)
3280 return false;
3282 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3283 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3285 /* Compute the innermost loop index. */
3286 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3288 if (offset == 0)
3289 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3290 += int_cst_value (right);
3292 switch (TREE_CODE (left))
3294 case POLYNOMIAL_CHREC:
3295 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3297 case INTEGER_CST:
3298 pb->eqs[eq].coef[0] += int_cst_value (left);
3299 return true;
3301 default:
3302 return false;
3306 case INTEGER_CST:
3307 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3308 return true;
3310 default:
3311 return false;
3315 /* As explained in the comments preceding init_omega_for_ddr, we have
3316 to set up a system for each loop level, setting outer loops
3317 variation to zero, and current loop variation to positive or zero.
3318 Save each lexico positive distance vector. */
3320 static void
3321 omega_extract_distance_vectors (omega_pb pb,
3322 struct data_dependence_relation *ddr)
3324 int eq, geq;
3325 unsigned i, j;
3326 struct loop *loopi, *loopj;
3327 enum omega_result res;
3329 /* Set a new problem for each loop in the nest. The basis is the
3330 problem that we have initialized until now. On top of this we
3331 add new constraints. */
3332 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3333 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3335 int dist = 0;
3336 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3337 DDR_NB_LOOPS (ddr));
3339 omega_copy_problem (copy, pb);
3341 /* For all the outer loops "loop_j", add "dj = 0". */
3342 for (j = 0;
3343 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3345 eq = omega_add_zero_eq (copy, omega_black);
3346 copy->eqs[eq].coef[j + 1] = 1;
3349 /* For "loop_i", add "0 <= di". */
3350 geq = omega_add_zero_geq (copy, omega_black);
3351 copy->geqs[geq].coef[i + 1] = 1;
3353 /* Reduce the constraint system, and test that the current
3354 problem is feasible. */
3355 res = omega_simplify_problem (copy);
3356 if (res == omega_false
3357 || res == omega_unknown
3358 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3359 goto next_problem;
3361 for (eq = 0; eq < copy->num_subs; eq++)
3362 if (copy->subs[eq].key == (int) i + 1)
3364 dist = copy->subs[eq].coef[0];
3365 goto found_dist;
3368 if (dist == 0)
3370 /* Reinitialize problem... */
3371 omega_copy_problem (copy, pb);
3372 for (j = 0;
3373 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3375 eq = omega_add_zero_eq (copy, omega_black);
3376 copy->eqs[eq].coef[j + 1] = 1;
3379 /* ..., but this time "di = 1". */
3380 eq = omega_add_zero_eq (copy, omega_black);
3381 copy->eqs[eq].coef[i + 1] = 1;
3382 copy->eqs[eq].coef[0] = -1;
3384 res = omega_simplify_problem (copy);
3385 if (res == omega_false
3386 || res == omega_unknown
3387 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3388 goto next_problem;
3390 for (eq = 0; eq < copy->num_subs; eq++)
3391 if (copy->subs[eq].key == (int) i + 1)
3393 dist = copy->subs[eq].coef[0];
3394 goto found_dist;
3398 found_dist:;
3399 /* Save the lexicographically positive distance vector. */
3400 if (dist >= 0)
3402 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3403 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3405 dist_v[i] = dist;
3407 for (eq = 0; eq < copy->num_subs; eq++)
3408 if (copy->subs[eq].key > 0)
3410 dist = copy->subs[eq].coef[0];
3411 dist_v[copy->subs[eq].key - 1] = dist;
3414 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3415 dir_v[j] = dir_from_dist (dist_v[j]);
3417 save_dist_v (ddr, dist_v);
3418 save_dir_v (ddr, dir_v);
3421 next_problem:;
3422 omega_free_problem (copy);
3426 /* This is called for each subscript of a tuple of data references:
3427 insert an equality for representing the conflicts. */
3429 static bool
3430 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3431 struct data_dependence_relation *ddr,
3432 omega_pb pb, bool *maybe_dependent)
3434 int eq;
3435 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3436 TREE_TYPE (access_fun_b));
3437 tree fun_a = chrec_convert (type, access_fun_a, NULL_TREE);
3438 tree fun_b = chrec_convert (type, access_fun_b, NULL_TREE);
3439 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3441 /* When the fun_a - fun_b is not constant, the dependence is not
3442 captured by the classic distance vector representation. */
3443 if (TREE_CODE (difference) != INTEGER_CST)
3444 return false;
3446 /* ZIV test. */
3447 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3449 /* There is no dependence. */
3450 *maybe_dependent = false;
3451 return true;
3454 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3456 eq = omega_add_zero_eq (pb, omega_black);
3457 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3458 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3459 /* There is probably a dependence, but the system of
3460 constraints cannot be built: answer "don't know". */
3461 return false;
3463 /* GCD test. */
3464 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3465 && !int_divides_p (lambda_vector_gcd
3466 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3467 2 * DDR_NB_LOOPS (ddr)),
3468 pb->eqs[eq].coef[0]))
3470 /* There is no dependence. */
3471 *maybe_dependent = false;
3472 return true;
3475 return true;
3478 /* Helper function, same as init_omega_for_ddr but specialized for
3479 data references A and B. */
3481 static bool
3482 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3483 struct data_dependence_relation *ddr,
3484 omega_pb pb, bool *maybe_dependent)
3486 unsigned i;
3487 int ineq;
3488 struct loop *loopi;
3489 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3491 /* Insert an equality per subscript. */
3492 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3494 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3495 ddr, pb, maybe_dependent))
3496 return false;
3497 else if (*maybe_dependent == false)
3499 /* There is no dependence. */
3500 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3501 return true;
3505 /* Insert inequalities: constraints corresponding to the iteration
3506 domain, i.e. the loops surrounding the references "loop_x" and
3507 the distance variables "dx". The layout of the OMEGA
3508 representation is as follows:
3509 - coef[0] is the constant
3510 - coef[1..nb_loops] are the protected variables that will not be
3511 removed by the solver: the "dx"
3512 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3514 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3515 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3517 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3519 /* 0 <= loop_x */
3520 ineq = omega_add_zero_geq (pb, omega_black);
3521 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3523 /* 0 <= loop_x + dx */
3524 ineq = omega_add_zero_geq (pb, omega_black);
3525 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3526 pb->geqs[ineq].coef[i + 1] = 1;
3528 if (nbi != -1)
3530 /* loop_x <= nb_iters */
3531 ineq = omega_add_zero_geq (pb, omega_black);
3532 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3533 pb->geqs[ineq].coef[0] = nbi;
3535 /* loop_x + dx <= nb_iters */
3536 ineq = omega_add_zero_geq (pb, omega_black);
3537 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3538 pb->geqs[ineq].coef[i + 1] = -1;
3539 pb->geqs[ineq].coef[0] = nbi;
3541 /* A step "dx" bigger than nb_iters is not feasible, so
3542 add "0 <= nb_iters + dx", */
3543 ineq = omega_add_zero_geq (pb, omega_black);
3544 pb->geqs[ineq].coef[i + 1] = 1;
3545 pb->geqs[ineq].coef[0] = nbi;
3546 /* and "dx <= nb_iters". */
3547 ineq = omega_add_zero_geq (pb, omega_black);
3548 pb->geqs[ineq].coef[i + 1] = -1;
3549 pb->geqs[ineq].coef[0] = nbi;
3553 omega_extract_distance_vectors (pb, ddr);
3555 return true;
3558 /* Sets up the Omega dependence problem for the data dependence
3559 relation DDR. Returns false when the constraint system cannot be
3560 built, ie. when the test answers "don't know". Returns true
3561 otherwise, and when independence has been proved (using one of the
3562 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3563 set MAYBE_DEPENDENT to true.
3565 Example: for setting up the dependence system corresponding to the
3566 conflicting accesses
3568 | loop_i
3569 | loop_j
3570 | A[i, i+1] = ...
3571 | ... A[2*j, 2*(i + j)]
3572 | endloop_j
3573 | endloop_i
3575 the following constraints come from the iteration domain:
3577 0 <= i <= Ni
3578 0 <= i + di <= Ni
3579 0 <= j <= Nj
3580 0 <= j + dj <= Nj
3582 where di, dj are the distance variables. The constraints
3583 representing the conflicting elements are:
3585 i = 2 * (j + dj)
3586 i + 1 = 2 * (i + di + j + dj)
3588 For asking that the resulting distance vector (di, dj) be
3589 lexicographically positive, we insert the constraint "di >= 0". If
3590 "di = 0" in the solution, we fix that component to zero, and we
3591 look at the inner loops: we set a new problem where all the outer
3592 loop distances are zero, and fix this inner component to be
3593 positive. When one of the components is positive, we save that
3594 distance, and set a new problem where the distance on this loop is
3595 zero, searching for other distances in the inner loops. Here is
3596 the classic example that illustrates that we have to set for each
3597 inner loop a new problem:
3599 | loop_1
3600 | loop_2
3601 | A[10]
3602 | endloop_2
3603 | endloop_1
3605 we have to save two distances (1, 0) and (0, 1).
3607 Given two array references, refA and refB, we have to set the
3608 dependence problem twice, refA vs. refB and refB vs. refA, and we
3609 cannot do a single test, as refB might occur before refA in the
3610 inner loops, and the contrary when considering outer loops: ex.
3612 | loop_0
3613 | loop_1
3614 | loop_2
3615 | T[{1,+,1}_2][{1,+,1}_1] // refA
3616 | T[{2,+,1}_2][{0,+,1}_1] // refB
3617 | endloop_2
3618 | endloop_1
3619 | endloop_0
3621 refB touches the elements in T before refA, and thus for the same
3622 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3623 but for successive loop_0 iterations, we have (1, -1, 1)
3625 The Omega solver expects the distance variables ("di" in the
3626 previous example) to come first in the constraint system (as
3627 variables to be protected, or "safe" variables), the constraint
3628 system is built using the following layout:
3630 "cst | distance vars | index vars".
3633 static bool
3634 init_omega_for_ddr (struct data_dependence_relation *ddr,
3635 bool *maybe_dependent)
3637 omega_pb pb;
3638 bool res = false;
3640 *maybe_dependent = true;
3642 if (same_access_functions (ddr))
3644 unsigned j;
3645 lambda_vector dir_v;
3647 /* Save the 0 vector. */
3648 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3649 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3650 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3651 dir_v[j] = dir_equal;
3652 save_dir_v (ddr, dir_v);
3654 /* Save the dependences carried by outer loops. */
3655 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3656 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3657 maybe_dependent);
3658 omega_free_problem (pb);
3659 return res;
3662 /* Omega expects the protected variables (those that have to be kept
3663 after elimination) to appear first in the constraint system.
3664 These variables are the distance variables. In the following
3665 initialization we declare NB_LOOPS safe variables, and the total
3666 number of variables for the constraint system is 2*NB_LOOPS. */
3667 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3668 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3669 maybe_dependent);
3670 omega_free_problem (pb);
3672 /* Stop computation if not decidable, or no dependence. */
3673 if (res == false || *maybe_dependent == false)
3674 return res;
3676 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3677 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3678 maybe_dependent);
3679 omega_free_problem (pb);
3681 return res;
3684 /* Return true when DDR contains the same information as that stored
3685 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3687 static bool
3688 ddr_consistent_p (FILE *file,
3689 struct data_dependence_relation *ddr,
3690 VEC (lambda_vector, heap) *dist_vects,
3691 VEC (lambda_vector, heap) *dir_vects)
3693 unsigned int i, j;
3695 /* If dump_file is set, output there. */
3696 if (dump_file && (dump_flags & TDF_DETAILS))
3697 file = dump_file;
3699 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3701 lambda_vector b_dist_v;
3702 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3703 VEC_length (lambda_vector, dist_vects),
3704 DDR_NUM_DIST_VECTS (ddr));
3706 fprintf (file, "Banerjee dist vectors:\n");
3707 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3708 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3710 fprintf (file, "Omega dist vectors:\n");
3711 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3712 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3714 fprintf (file, "data dependence relation:\n");
3715 dump_data_dependence_relation (file, ddr);
3717 fprintf (file, ")\n");
3718 return false;
3721 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3723 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3724 VEC_length (lambda_vector, dir_vects),
3725 DDR_NUM_DIR_VECTS (ddr));
3726 return false;
3729 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3731 lambda_vector a_dist_v;
3732 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3734 /* Distance vectors are not ordered in the same way in the DDR
3735 and in the DIST_VECTS: search for a matching vector. */
3736 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3737 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3738 break;
3740 if (j == VEC_length (lambda_vector, dist_vects))
3742 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3743 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3744 fprintf (file, "not found in Omega dist vectors:\n");
3745 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3746 fprintf (file, "data dependence relation:\n");
3747 dump_data_dependence_relation (file, ddr);
3748 fprintf (file, ")\n");
3752 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3754 lambda_vector a_dir_v;
3755 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3757 /* Direction vectors are not ordered in the same way in the DDR
3758 and in the DIR_VECTS: search for a matching vector. */
3759 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3760 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3761 break;
3763 if (j == VEC_length (lambda_vector, dist_vects))
3765 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3766 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3767 fprintf (file, "not found in Omega dir vectors:\n");
3768 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3769 fprintf (file, "data dependence relation:\n");
3770 dump_data_dependence_relation (file, ddr);
3771 fprintf (file, ")\n");
3775 return true;
3778 /* This computes the affine dependence relation between A and B with
3779 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3780 independence between two accesses, while CHREC_DONT_KNOW is used
3781 for representing the unknown relation.
3783 Note that it is possible to stop the computation of the dependence
3784 relation the first time we detect a CHREC_KNOWN element for a given
3785 subscript. */
3787 static void
3788 compute_affine_dependence (struct data_dependence_relation *ddr,
3789 struct loop *loop_nest)
3791 struct data_reference *dra = DDR_A (ddr);
3792 struct data_reference *drb = DDR_B (ddr);
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3796 fprintf (dump_file, "(compute_affine_dependence\n");
3797 fprintf (dump_file, " (stmt_a = \n");
3798 print_generic_expr (dump_file, DR_STMT (dra), 0);
3799 fprintf (dump_file, ")\n (stmt_b = \n");
3800 print_generic_expr (dump_file, DR_STMT (drb), 0);
3801 fprintf (dump_file, ")\n");
3804 /* Analyze only when the dependence relation is not yet known. */
3805 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3807 dependence_stats.num_dependence_tests++;
3809 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3810 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3812 if (flag_check_data_deps)
3814 /* Compute the dependences using the first algorithm. */
3815 subscript_dependence_tester (ddr, loop_nest);
3817 if (dump_file && (dump_flags & TDF_DETAILS))
3819 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3820 dump_data_dependence_relation (dump_file, ddr);
3823 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3825 bool maybe_dependent;
3826 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3828 /* Save the result of the first DD analyzer. */
3829 dist_vects = DDR_DIST_VECTS (ddr);
3830 dir_vects = DDR_DIR_VECTS (ddr);
3832 /* Reset the information. */
3833 DDR_DIST_VECTS (ddr) = NULL;
3834 DDR_DIR_VECTS (ddr) = NULL;
3836 /* Compute the same information using Omega. */
3837 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3838 goto csys_dont_know;
3840 if (dump_file && (dump_flags & TDF_DETAILS))
3842 fprintf (dump_file, "Omega Analyzer\n");
3843 dump_data_dependence_relation (dump_file, ddr);
3846 /* Check that we get the same information. */
3847 if (maybe_dependent)
3848 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3849 dir_vects));
3852 else
3853 subscript_dependence_tester (ddr, loop_nest);
3856 /* As a last case, if the dependence cannot be determined, or if
3857 the dependence is considered too difficult to determine, answer
3858 "don't know". */
3859 else
3861 csys_dont_know:;
3862 dependence_stats.num_dependence_undetermined++;
3864 if (dump_file && (dump_flags & TDF_DETAILS))
3866 fprintf (dump_file, "Data ref a:\n");
3867 dump_data_reference (dump_file, dra);
3868 fprintf (dump_file, "Data ref b:\n");
3869 dump_data_reference (dump_file, drb);
3870 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3872 finalize_ddr_dependent (ddr, chrec_dont_know);
3876 if (dump_file && (dump_flags & TDF_DETAILS))
3877 fprintf (dump_file, ")\n");
3880 /* This computes the dependence relation for the same data
3881 reference into DDR. */
3883 static void
3884 compute_self_dependence (struct data_dependence_relation *ddr)
3886 unsigned int i;
3887 struct subscript *subscript;
3889 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3890 return;
3892 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3893 i++)
3895 if (SUB_CONFLICTS_IN_A (subscript))
3896 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3897 if (SUB_CONFLICTS_IN_B (subscript))
3898 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3900 /* The accessed index overlaps for each iteration. */
3901 SUB_CONFLICTS_IN_A (subscript)
3902 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3903 SUB_CONFLICTS_IN_B (subscript)
3904 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3905 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3908 /* The distance vector is the zero vector. */
3909 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3910 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3913 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3914 the data references in DATAREFS, in the LOOP_NEST. When
3915 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3916 relations. */
3918 void
3919 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3920 VEC (ddr_p, heap) **dependence_relations,
3921 VEC (loop_p, heap) *loop_nest,
3922 bool compute_self_and_rr)
3924 struct data_dependence_relation *ddr;
3925 struct data_reference *a, *b;
3926 unsigned int i, j;
3928 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3929 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3930 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3932 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3933 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3934 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3937 if (compute_self_and_rr)
3938 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3940 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3941 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3942 compute_self_dependence (ddr);
3946 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3947 true if STMT clobbers memory, false otherwise. */
3949 bool
3950 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3952 bool clobbers_memory = false;
3953 data_ref_loc *ref;
3954 tree *op0, *op1, call;
3956 *references = NULL;
3958 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3959 Calls have side-effects, except those to const or pure
3960 functions. */
3961 call = get_call_expr_in (stmt);
3962 if ((call
3963 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3964 || (TREE_CODE (stmt) == ASM_EXPR
3965 && ASM_VOLATILE_P (stmt)))
3966 clobbers_memory = true;
3968 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3969 return clobbers_memory;
3971 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3973 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3974 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3976 if (DECL_P (*op1)
3977 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
3979 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3980 ref->pos = op1;
3981 ref->is_read = true;
3984 if (DECL_P (*op0)
3985 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3987 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3988 ref->pos = op0;
3989 ref->is_read = false;
3993 if (call)
3995 unsigned i, n = call_expr_nargs (call);
3997 for (i = 0; i < n; i++)
3999 op0 = &CALL_EXPR_ARG (call, i);
4001 if (DECL_P (*op0)
4002 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4004 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4005 ref->pos = op0;
4006 ref->is_read = true;
4011 return clobbers_memory;
4014 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4015 reference, returns false, otherwise returns true. NEST is the outermost
4016 loop of the loop nest in that the references should be analyzed. */
4018 static bool
4019 find_data_references_in_stmt (struct loop *nest, tree stmt,
4020 VEC (data_reference_p, heap) **datarefs)
4022 unsigned i;
4023 VEC (data_ref_loc, heap) *references;
4024 data_ref_loc *ref;
4025 bool ret = true;
4026 data_reference_p dr;
4028 if (get_references_in_stmt (stmt, &references))
4030 VEC_free (data_ref_loc, heap, references);
4031 return false;
4034 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4036 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4037 gcc_assert (dr != NULL);
4039 /* FIXME -- data dependence analysis does not work correctly for objects with
4040 invariant addresses. Let us fail here until the problem is fixed. */
4041 if (dr_address_invariant_p (dr))
4043 free_data_ref (dr);
4044 if (dump_file && (dump_flags & TDF_DETAILS))
4045 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4046 ret = false;
4047 break;
4050 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4052 VEC_free (data_ref_loc, heap, references);
4053 return ret;
4056 /* Search the data references in LOOP, and record the information into
4057 DATAREFS. Returns chrec_dont_know when failing to analyze a
4058 difficult case, returns NULL_TREE otherwise.
4060 TODO: This function should be made smarter so that it can handle address
4061 arithmetic as if they were array accesses, etc. */
4063 static tree
4064 find_data_references_in_loop (struct loop *loop,
4065 VEC (data_reference_p, heap) **datarefs)
4067 basic_block bb, *bbs;
4068 unsigned int i;
4069 block_stmt_iterator bsi;
4071 bbs = get_loop_body_in_dom_order (loop);
4073 for (i = 0; i < loop->num_nodes; i++)
4075 bb = bbs[i];
4077 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4079 tree stmt = bsi_stmt (bsi);
4081 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4083 struct data_reference *res;
4084 res = XCNEW (struct data_reference);
4085 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4087 free (bbs);
4088 return chrec_dont_know;
4092 free (bbs);
4094 return NULL_TREE;
4097 /* Recursive helper function. */
4099 static bool
4100 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4102 /* Inner loops of the nest should not contain siblings. Example:
4103 when there are two consecutive loops,
4105 | loop_0
4106 | loop_1
4107 | A[{0, +, 1}_1]
4108 | endloop_1
4109 | loop_2
4110 | A[{0, +, 1}_2]
4111 | endloop_2
4112 | endloop_0
4114 the dependence relation cannot be captured by the distance
4115 abstraction. */
4116 if (loop->next)
4117 return false;
4119 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4120 if (loop->inner)
4121 return find_loop_nest_1 (loop->inner, loop_nest);
4122 return true;
4125 /* Return false when the LOOP is not well nested. Otherwise return
4126 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4127 contain the loops from the outermost to the innermost, as they will
4128 appear in the classic distance vector. */
4130 bool
4131 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4133 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4134 if (loop->inner)
4135 return find_loop_nest_1 (loop->inner, loop_nest);
4136 return true;
4139 /* Given a loop nest LOOP, the following vectors are returned:
4140 DATAREFS is initialized to all the array elements contained in this loop,
4141 DEPENDENCE_RELATIONS contains the relations between the data references.
4142 Compute read-read and self relations if
4143 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4145 void
4146 compute_data_dependences_for_loop (struct loop *loop,
4147 bool compute_self_and_read_read_dependences,
4148 VEC (data_reference_p, heap) **datarefs,
4149 VEC (ddr_p, heap) **dependence_relations)
4151 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4153 memset (&dependence_stats, 0, sizeof (dependence_stats));
4155 /* If the loop nest is not well formed, or one of the data references
4156 is not computable, give up without spending time to compute other
4157 dependences. */
4158 if (!loop
4159 || !find_loop_nest (loop, &vloops)
4160 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4162 struct data_dependence_relation *ddr;
4164 /* Insert a single relation into dependence_relations:
4165 chrec_dont_know. */
4166 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4167 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4169 else
4170 compute_all_dependences (*datarefs, dependence_relations, vloops,
4171 compute_self_and_read_read_dependences);
4173 if (dump_file && (dump_flags & TDF_STATS))
4175 fprintf (dump_file, "Dependence tester statistics:\n");
4177 fprintf (dump_file, "Number of dependence tests: %d\n",
4178 dependence_stats.num_dependence_tests);
4179 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4180 dependence_stats.num_dependence_dependent);
4181 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4182 dependence_stats.num_dependence_independent);
4183 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4184 dependence_stats.num_dependence_undetermined);
4186 fprintf (dump_file, "Number of subscript tests: %d\n",
4187 dependence_stats.num_subscript_tests);
4188 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4189 dependence_stats.num_subscript_undetermined);
4190 fprintf (dump_file, "Number of same subscript function: %d\n",
4191 dependence_stats.num_same_subscript_function);
4193 fprintf (dump_file, "Number of ziv tests: %d\n",
4194 dependence_stats.num_ziv);
4195 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4196 dependence_stats.num_ziv_dependent);
4197 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4198 dependence_stats.num_ziv_independent);
4199 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4200 dependence_stats.num_ziv_unimplemented);
4202 fprintf (dump_file, "Number of siv tests: %d\n",
4203 dependence_stats.num_siv);
4204 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4205 dependence_stats.num_siv_dependent);
4206 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4207 dependence_stats.num_siv_independent);
4208 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4209 dependence_stats.num_siv_unimplemented);
4211 fprintf (dump_file, "Number of miv tests: %d\n",
4212 dependence_stats.num_miv);
4213 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4214 dependence_stats.num_miv_dependent);
4215 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4216 dependence_stats.num_miv_independent);
4217 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4218 dependence_stats.num_miv_unimplemented);
4222 /* Entry point (for testing only). Analyze all the data references
4223 and the dependence relations in LOOP.
4225 The data references are computed first.
4227 A relation on these nodes is represented by a complete graph. Some
4228 of the relations could be of no interest, thus the relations can be
4229 computed on demand.
4231 In the following function we compute all the relations. This is
4232 just a first implementation that is here for:
4233 - for showing how to ask for the dependence relations,
4234 - for the debugging the whole dependence graph,
4235 - for the dejagnu testcases and maintenance.
4237 It is possible to ask only for a part of the graph, avoiding to
4238 compute the whole dependence graph. The computed dependences are
4239 stored in a knowledge base (KB) such that later queries don't
4240 recompute the same information. The implementation of this KB is
4241 transparent to the optimizer, and thus the KB can be changed with a
4242 more efficient implementation, or the KB could be disabled. */
4243 static void
4244 analyze_all_data_dependences (struct loop *loop)
4246 unsigned int i;
4247 int nb_data_refs = 10;
4248 VEC (data_reference_p, heap) *datarefs =
4249 VEC_alloc (data_reference_p, heap, nb_data_refs);
4250 VEC (ddr_p, heap) *dependence_relations =
4251 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4253 /* Compute DDs on the whole function. */
4254 compute_data_dependences_for_loop (loop, false, &datarefs,
4255 &dependence_relations);
4257 if (dump_file)
4259 dump_data_dependence_relations (dump_file, dependence_relations);
4260 fprintf (dump_file, "\n\n");
4262 if (dump_flags & TDF_DETAILS)
4263 dump_dist_dir_vectors (dump_file, dependence_relations);
4265 if (dump_flags & TDF_STATS)
4267 unsigned nb_top_relations = 0;
4268 unsigned nb_bot_relations = 0;
4269 unsigned nb_basename_differ = 0;
4270 unsigned nb_chrec_relations = 0;
4271 struct data_dependence_relation *ddr;
4273 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4275 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4276 nb_top_relations++;
4278 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4280 struct data_reference *a = DDR_A (ddr);
4281 struct data_reference *b = DDR_B (ddr);
4283 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4284 nb_basename_differ++;
4285 else
4286 nb_bot_relations++;
4289 else
4290 nb_chrec_relations++;
4293 gather_stats_on_scev_database ();
4297 free_dependence_relations (dependence_relations);
4298 free_data_refs (datarefs);
4301 /* Computes all the data dependences and check that the results of
4302 several analyzers are the same. */
4304 void
4305 tree_check_data_deps (void)
4307 loop_iterator li;
4308 struct loop *loop_nest;
4310 FOR_EACH_LOOP (li, loop_nest, 0)
4311 analyze_all_data_dependences (loop_nest);
4314 /* Free the memory used by a data dependence relation DDR. */
4316 void
4317 free_dependence_relation (struct data_dependence_relation *ddr)
4319 if (ddr == NULL)
4320 return;
4322 if (DDR_SUBSCRIPTS (ddr))
4323 free_subscripts (DDR_SUBSCRIPTS (ddr));
4324 if (DDR_DIST_VECTS (ddr))
4325 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4326 if (DDR_DIR_VECTS (ddr))
4327 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4329 free (ddr);
4332 /* Free the memory used by the data dependence relations from
4333 DEPENDENCE_RELATIONS. */
4335 void
4336 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4338 unsigned int i;
4339 struct data_dependence_relation *ddr;
4340 VEC (loop_p, heap) *loop_nest = NULL;
4342 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4344 if (ddr == NULL)
4345 continue;
4346 if (loop_nest == NULL)
4347 loop_nest = DDR_LOOP_NEST (ddr);
4348 else
4349 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4350 || DDR_LOOP_NEST (ddr) == loop_nest);
4351 free_dependence_relation (ddr);
4354 if (loop_nest)
4355 VEC_free (loop_p, heap, loop_nest);
4356 VEC_free (ddr_p, heap, dependence_relations);
4359 /* Free the memory used by the data references from DATAREFS. */
4361 void
4362 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4364 unsigned int i;
4365 struct data_reference *dr;
4367 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4368 free_data_ref (dr);
4369 VEC_free (data_reference_p, heap, datarefs);
4374 /* Returns the index of STMT in RDG. */
4376 static int
4377 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4379 int i;
4381 for (i = 0; i < rdg->n_vertices; i++)
4382 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4383 return i;
4385 gcc_unreachable ();
4386 return 0;
4389 /* Creates an edge in RDG for each distance vector from DDR. */
4391 static void
4392 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4394 int va, vb;
4395 data_reference_p dra;
4396 data_reference_p drb;
4397 struct graph_edge *e;
4399 if (DDR_REVERSED_P (ddr))
4401 dra = DDR_B (ddr);
4402 drb = DDR_A (ddr);
4404 else
4406 dra = DDR_A (ddr);
4407 drb = DDR_B (ddr);
4410 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4411 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4413 e = add_edge (rdg, va, vb);
4414 e->data = XNEW (struct rdg_edge);
4416 /* Determines the type of the data dependence. */
4417 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4418 RDGE_TYPE (e) = input_dd;
4419 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4420 RDGE_TYPE (e) = output_dd;
4421 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4422 RDGE_TYPE (e) = flow_dd;
4423 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4424 RDGE_TYPE (e) = anti_dd;
4427 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4428 the index of DEF in RDG. */
4430 static void
4431 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4433 use_operand_p imm_use_p;
4434 imm_use_iterator iterator;
4436 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4438 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4439 struct graph_edge *e = add_edge (rdg, idef, use);
4441 e->data = XNEW (struct rdg_edge);
4442 RDGE_TYPE (e) = flow_dd;
4446 /* Creates the edges of the reduced dependence graph RDG. */
4448 static void
4449 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4451 int i;
4452 struct data_dependence_relation *ddr;
4453 def_operand_p def_p;
4454 ssa_op_iter iter;
4456 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4457 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4458 create_rdg_edge_for_ddr (rdg, ddr);
4460 for (i = 0; i < rdg->n_vertices; i++)
4461 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4462 iter, SSA_OP_ALL_DEFS)
4463 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4466 /* Build the vertices of the reduced dependence graph RDG. */
4468 static void
4469 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4471 int i;
4472 tree s;
4474 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4476 struct vertex *v = &(rdg->vertices[i]);
4478 v->data = XNEW (struct rdg_vertex);
4479 RDGV_STMT (v) = s;
4483 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4485 static void
4486 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4488 unsigned int i;
4489 basic_block *bbs = get_loop_body_in_dom_order (loop);
4491 for (i = 0; i < loop->num_nodes; i++)
4493 tree phi;
4494 basic_block bb = bbs[i];
4495 block_stmt_iterator bsi;
4497 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4498 VEC_safe_push (tree, heap, *stmts, phi);
4500 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4501 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4504 free (bbs);
4507 /* Returns true when all the dependences are computable. */
4509 static bool
4510 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4512 ddr_p ddr;
4513 unsigned int i;
4515 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4516 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4517 return false;
4519 return true;
4522 /* Build a Reduced Dependence Graph with one vertex per statement of the
4523 loop nest and one edge per data dependence or scalar dependence. */
4525 struct graph *
4526 build_rdg (struct loop *loop)
4528 int nb_data_refs = 10;
4529 struct graph *rdg = NULL;
4530 VEC (ddr_p, heap) *dependence_relations;
4531 VEC (data_reference_p, heap) *datarefs;
4532 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4534 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4535 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4536 compute_data_dependences_for_loop (loop,
4537 false,
4538 &datarefs,
4539 &dependence_relations);
4541 if (!known_dependences_p (dependence_relations))
4542 goto end_rdg;
4544 stmts_from_loop (loop, &stmts);
4545 rdg = new_graph (VEC_length (tree, stmts));
4546 create_rdg_vertices (rdg, stmts);
4547 create_rdg_edges (rdg, dependence_relations);
4549 end_rdg:
4550 free_dependence_relations (dependence_relations);
4551 free_data_refs (datarefs);
4552 VEC_free (tree, heap, stmts);
4554 return rdg;