* tree-eh.c (optimize_double_finally): Don't assume that the
[official-gcc.git] / gcc / tree-data-ref.c
blob720c94d599812c7da1455fa01e048446e896ec62
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tm.h"
80 #include "ggc.h"
81 #include "tree.h"
83 /* These RTL headers are needed for basic-block.h. */
84 #include "rtl.h"
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
162 void
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
166 unsigned int i;
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
175 void
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
179 unsigned int i;
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
198 static void
199 dump_affine_function (FILE *outf, affine_fn fn)
201 unsigned i;
202 tree coef;
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
215 static void
216 dump_conflict_function (FILE *outf, conflict_function *cf)
218 unsigned i;
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
224 else
226 for (i = 0; i < cf->n; i++)
228 fprintf (outf, "[");
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
237 void
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
270 void
271 print_direction_vector (FILE *outf,
272 lambda_vector dirv,
273 int length)
275 int eq;
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
281 switch (dir)
283 case dir_positive:
284 fprintf (outf, " +");
285 break;
286 case dir_negative:
287 fprintf (outf, " -");
288 break;
289 case dir_equal:
290 fprintf (outf, " =");
291 break;
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
294 break;
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
297 break;
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
300 break;
301 case dir_star:
302 fprintf (outf, " *");
303 break;
304 default:
305 fprintf (outf, "indep");
306 break;
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
314 void
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
316 int length)
318 unsigned j;
319 lambda_vector v;
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
327 void
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
329 int length)
331 unsigned j;
332 lambda_vector v;
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
338 /* Debug version. */
340 void
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
348 void
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
354 dra = DDR_A (ddr);
355 drb = DDR_B (ddr);
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
365 unsigned int i;
366 struct loop *loopi;
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
387 DDR_NB_LOOPS (ddr));
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
394 DDR_NB_LOOPS (ddr));
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
403 void
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
407 switch (dir)
409 case dir_positive:
410 fprintf (file, "+");
411 break;
413 case dir_negative:
414 fprintf (file, "-");
415 break;
417 case dir_equal:
418 fprintf (file, "=");
419 break;
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
423 break;
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
427 break;
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
431 break;
433 case dir_star:
434 fprintf (file, "*");
435 break;
437 default:
438 break;
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
445 considered nest. */
447 void
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
450 unsigned int i, j;
451 struct data_dependence_relation *ddr;
452 lambda_vector v;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
477 void
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
492 void
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
496 tree var0, var1;
497 tree off0, off1;
498 enum tree_code code;
500 *var = exp;
501 STRIP_NOPS (exp);
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
505 switch (code)
507 case INTEGER_CST:
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
510 return;
512 case POINTER_PLUS_EXPR:
513 code = PLUS_EXPR;
514 /* FALLTHROUGH */
515 case PLUS_EXPR:
516 case MINUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
520 var0, var1));
521 *off = size_binop (code, off0, off1);
522 return;
524 case MULT_EXPR:
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
527 break;
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
531 var0, off1));
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
533 return;
535 case ADDR_EXPR:
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
544 break;
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
550 break;
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
554 if (poffset)
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
559 base,
560 fold_convert (TREE_TYPE (base), poffset));
563 *var = fold_convert (type, base);
564 *off = off0;
565 return;
568 case SSA_NAME:
570 tree def_stmt = SSA_NAME_DEF_STMT (exp);
571 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
573 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
575 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
576 && EXPR_P (def_stmt_rhs)
577 && !REFERENCE_CLASS_P (def_stmt_rhs)
578 && !get_call_expr_in (def_stmt_rhs))
580 split_constant_offset (def_stmt_rhs, &var0, &off0);
581 var0 = fold_convert (type, var0);
582 *var = var0;
583 *off = off0;
584 return;
587 break;
590 default:
591 break;
594 *off = ssize_int (0);
597 /* Returns the address ADDR of an object in a canonical shape (without nop
598 casts, and with type of pointer to the object). */
600 static tree
601 canonicalize_base_object_address (tree addr)
603 tree orig = addr;
605 STRIP_NOPS (addr);
607 /* The base address may be obtained by casting from integer, in that case
608 keep the cast. */
609 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
610 return orig;
612 if (TREE_CODE (addr) != ADDR_EXPR)
613 return addr;
615 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
618 /* Analyzes the behavior of the memory reference DR in the innermost loop that
619 contains it. */
621 void
622 dr_analyze_innermost (struct data_reference *dr)
624 tree stmt = DR_STMT (dr);
625 struct loop *loop = loop_containing_stmt (stmt);
626 tree ref = DR_REF (dr);
627 HOST_WIDE_INT pbitsize, pbitpos;
628 tree base, poffset;
629 enum machine_mode pmode;
630 int punsignedp, pvolatilep;
631 affine_iv base_iv, offset_iv;
632 tree init, dinit, step;
634 if (dump_file && (dump_flags & TDF_DETAILS))
635 fprintf (dump_file, "analyze_innermost: ");
637 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
638 &pmode, &punsignedp, &pvolatilep, false);
639 gcc_assert (base != NULL_TREE);
641 if (pbitpos % BITS_PER_UNIT != 0)
643 if (dump_file && (dump_flags & TDF_DETAILS))
644 fprintf (dump_file, "failed: bit offset alignment.\n");
645 return;
648 base = build_fold_addr_expr (base);
649 if (!simple_iv (loop, stmt, base, &base_iv, false))
651 if (dump_file && (dump_flags & TDF_DETAILS))
652 fprintf (dump_file, "failed: evolution of base is not affine.\n");
653 return;
655 if (!poffset)
657 offset_iv.base = ssize_int (0);
658 offset_iv.step = ssize_int (0);
660 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
662 if (dump_file && (dump_flags & TDF_DETAILS))
663 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
664 return;
667 init = ssize_int (pbitpos / BITS_PER_UNIT);
668 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
669 init = size_binop (PLUS_EXPR, init, dinit);
670 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
671 init = size_binop (PLUS_EXPR, init, dinit);
673 step = size_binop (PLUS_EXPR,
674 fold_convert (ssizetype, base_iv.step),
675 fold_convert (ssizetype, offset_iv.step));
677 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
679 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
680 DR_INIT (dr) = init;
681 DR_STEP (dr) = step;
683 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "success.\n");
689 /* Determines the base object and the list of indices of memory reference
690 DR, analyzed in loop nest NEST. */
692 static void
693 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
695 tree stmt = DR_STMT (dr);
696 struct loop *loop = loop_containing_stmt (stmt);
697 VEC (tree, heap) *access_fns = NULL;
698 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
699 tree base, off, access_fn;
701 while (handled_component_p (aref))
703 if (TREE_CODE (aref) == ARRAY_REF)
705 op = TREE_OPERAND (aref, 1);
706 access_fn = analyze_scalar_evolution (loop, op);
707 access_fn = resolve_mixers (nest, access_fn);
708 VEC_safe_push (tree, heap, access_fns, access_fn);
710 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
713 aref = TREE_OPERAND (aref, 0);
716 if (INDIRECT_REF_P (aref))
718 op = TREE_OPERAND (aref, 0);
719 access_fn = analyze_scalar_evolution (loop, op);
720 access_fn = resolve_mixers (nest, access_fn);
721 base = initial_condition (access_fn);
722 split_constant_offset (base, &base, &off);
723 access_fn = chrec_replace_initial_condition (access_fn,
724 fold_convert (TREE_TYPE (base), off));
726 TREE_OPERAND (aref, 0) = base;
727 VEC_safe_push (tree, heap, access_fns, access_fn);
730 DR_BASE_OBJECT (dr) = ref;
731 DR_ACCESS_FNS (dr) = access_fns;
734 /* Extracts the alias analysis information from the memory reference DR. */
736 static void
737 dr_analyze_alias (struct data_reference *dr)
739 tree stmt = DR_STMT (dr);
740 tree ref = DR_REF (dr);
741 tree base = get_base_address (ref), addr, smt = NULL_TREE;
742 ssa_op_iter it;
743 tree op;
744 bitmap vops;
746 if (DECL_P (base))
747 smt = base;
748 else if (INDIRECT_REF_P (base))
750 addr = TREE_OPERAND (base, 0);
751 if (TREE_CODE (addr) == SSA_NAME)
753 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
754 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
758 DR_SYMBOL_TAG (dr) = smt;
759 if (smt && var_can_have_subvars (smt))
760 DR_SUBVARS (dr) = get_subvars_for_var (smt);
762 vops = BITMAP_ALLOC (NULL);
763 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
765 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
768 DR_VOPS (dr) = vops;
771 /* Returns true if the address of DR is invariant. */
773 static bool
774 dr_address_invariant_p (struct data_reference *dr)
776 unsigned i;
777 tree idx;
779 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
780 if (tree_contains_chrecs (idx, NULL))
781 return false;
783 return true;
786 /* Frees data reference DR. */
788 static void
789 free_data_ref (data_reference_p dr)
791 BITMAP_FREE (DR_VOPS (dr));
792 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
793 free (dr);
796 /* Analyzes memory reference MEMREF accessed in STMT. The reference
797 is read if IS_READ is true, write otherwise. Returns the
798 data_reference description of MEMREF. NEST is the outermost loop of the
799 loop nest in that the reference should be analyzed. */
801 struct data_reference *
802 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
804 struct data_reference *dr;
806 if (dump_file && (dump_flags & TDF_DETAILS))
808 fprintf (dump_file, "Creating dr for ");
809 print_generic_expr (dump_file, memref, TDF_SLIM);
810 fprintf (dump_file, "\n");
813 dr = XCNEW (struct data_reference);
814 DR_STMT (dr) = stmt;
815 DR_REF (dr) = memref;
816 DR_IS_READ (dr) = is_read;
818 dr_analyze_innermost (dr);
819 dr_analyze_indices (dr, nest);
820 dr_analyze_alias (dr);
822 if (dump_file && (dump_flags & TDF_DETAILS))
824 fprintf (dump_file, "\tbase_address: ");
825 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
826 fprintf (dump_file, "\n\toffset from base address: ");
827 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
828 fprintf (dump_file, "\n\tconstant offset from base address: ");
829 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
830 fprintf (dump_file, "\n\tstep: ");
831 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
832 fprintf (dump_file, "\n\taligned to: ");
833 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
834 fprintf (dump_file, "\n\tbase_object: ");
835 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
836 fprintf (dump_file, "\n\tsymbol tag: ");
837 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
838 fprintf (dump_file, "\n");
841 return dr;
844 /* Returns true if FNA == FNB. */
846 static bool
847 affine_function_equal_p (affine_fn fna, affine_fn fnb)
849 unsigned i, n = VEC_length (tree, fna);
851 if (n != VEC_length (tree, fnb))
852 return false;
854 for (i = 0; i < n; i++)
855 if (!operand_equal_p (VEC_index (tree, fna, i),
856 VEC_index (tree, fnb, i), 0))
857 return false;
859 return true;
862 /* If all the functions in CF are the same, returns one of them,
863 otherwise returns NULL. */
865 static affine_fn
866 common_affine_function (conflict_function *cf)
868 unsigned i;
869 affine_fn comm;
871 if (!CF_NONTRIVIAL_P (cf))
872 return NULL;
874 comm = cf->fns[0];
876 for (i = 1; i < cf->n; i++)
877 if (!affine_function_equal_p (comm, cf->fns[i]))
878 return NULL;
880 return comm;
883 /* Returns the base of the affine function FN. */
885 static tree
886 affine_function_base (affine_fn fn)
888 return VEC_index (tree, fn, 0);
891 /* Returns true if FN is a constant. */
893 static bool
894 affine_function_constant_p (affine_fn fn)
896 unsigned i;
897 tree coef;
899 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
900 if (!integer_zerop (coef))
901 return false;
903 return true;
906 /* Returns true if FN is the zero constant function. */
908 static bool
909 affine_function_zero_p (affine_fn fn)
911 return (integer_zerop (affine_function_base (fn))
912 && affine_function_constant_p (fn));
915 /* Applies operation OP on affine functions FNA and FNB, and returns the
916 result. */
918 static affine_fn
919 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
921 unsigned i, n, m;
922 affine_fn ret;
923 tree coef;
925 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
927 n = VEC_length (tree, fna);
928 m = VEC_length (tree, fnb);
930 else
932 n = VEC_length (tree, fnb);
933 m = VEC_length (tree, fna);
936 ret = VEC_alloc (tree, heap, m);
937 for (i = 0; i < n; i++)
938 VEC_quick_push (tree, ret,
939 fold_build2 (op, integer_type_node,
940 VEC_index (tree, fna, i),
941 VEC_index (tree, fnb, i)));
943 for (; VEC_iterate (tree, fna, i, coef); i++)
944 VEC_quick_push (tree, ret,
945 fold_build2 (op, integer_type_node,
946 coef, integer_zero_node));
947 for (; VEC_iterate (tree, fnb, i, coef); i++)
948 VEC_quick_push (tree, ret,
949 fold_build2 (op, integer_type_node,
950 integer_zero_node, coef));
952 return ret;
955 /* Returns the sum of affine functions FNA and FNB. */
957 static affine_fn
958 affine_fn_plus (affine_fn fna, affine_fn fnb)
960 return affine_fn_op (PLUS_EXPR, fna, fnb);
963 /* Returns the difference of affine functions FNA and FNB. */
965 static affine_fn
966 affine_fn_minus (affine_fn fna, affine_fn fnb)
968 return affine_fn_op (MINUS_EXPR, fna, fnb);
971 /* Frees affine function FN. */
973 static void
974 affine_fn_free (affine_fn fn)
976 VEC_free (tree, heap, fn);
979 /* Determine for each subscript in the data dependence relation DDR
980 the distance. */
982 static void
983 compute_subscript_distance (struct data_dependence_relation *ddr)
985 conflict_function *cf_a, *cf_b;
986 affine_fn fn_a, fn_b, diff;
988 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
990 unsigned int i;
992 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
994 struct subscript *subscript;
996 subscript = DDR_SUBSCRIPT (ddr, i);
997 cf_a = SUB_CONFLICTS_IN_A (subscript);
998 cf_b = SUB_CONFLICTS_IN_B (subscript);
1000 fn_a = common_affine_function (cf_a);
1001 fn_b = common_affine_function (cf_b);
1002 if (!fn_a || !fn_b)
1004 SUB_DISTANCE (subscript) = chrec_dont_know;
1005 return;
1007 diff = affine_fn_minus (fn_a, fn_b);
1009 if (affine_function_constant_p (diff))
1010 SUB_DISTANCE (subscript) = affine_function_base (diff);
1011 else
1012 SUB_DISTANCE (subscript) = chrec_dont_know;
1014 affine_fn_free (diff);
1019 /* Returns the conflict function for "unknown". */
1021 static conflict_function *
1022 conflict_fn_not_known (void)
1024 conflict_function *fn = XCNEW (conflict_function);
1025 fn->n = NOT_KNOWN;
1027 return fn;
1030 /* Returns the conflict function for "independent". */
1032 static conflict_function *
1033 conflict_fn_no_dependence (void)
1035 conflict_function *fn = XCNEW (conflict_function);
1036 fn->n = NO_DEPENDENCE;
1038 return fn;
1041 /* Returns true if the address of OBJ is invariant in LOOP. */
1043 static bool
1044 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1046 while (handled_component_p (obj))
1048 if (TREE_CODE (obj) == ARRAY_REF)
1050 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1051 need to check the stride and the lower bound of the reference. */
1052 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1053 loop->num)
1054 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1055 loop->num))
1056 return false;
1058 else if (TREE_CODE (obj) == COMPONENT_REF)
1060 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1061 loop->num))
1062 return false;
1064 obj = TREE_OPERAND (obj, 0);
1067 if (!INDIRECT_REF_P (obj))
1068 return true;
1070 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1071 loop->num);
1074 /* Returns true if A and B are accesses to different objects, or to different
1075 fields of the same object. */
1077 static bool
1078 disjoint_objects_p (tree a, tree b)
1080 tree base_a, base_b;
1081 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1082 bool ret;
1084 base_a = get_base_address (a);
1085 base_b = get_base_address (b);
1087 if (DECL_P (base_a)
1088 && DECL_P (base_b)
1089 && base_a != base_b)
1090 return true;
1092 if (!operand_equal_p (base_a, base_b, 0))
1093 return false;
1095 /* Compare the component references of A and B. We must start from the inner
1096 ones, so record them to the vector first. */
1097 while (handled_component_p (a))
1099 VEC_safe_push (tree, heap, comp_a, a);
1100 a = TREE_OPERAND (a, 0);
1102 while (handled_component_p (b))
1104 VEC_safe_push (tree, heap, comp_b, b);
1105 b = TREE_OPERAND (b, 0);
1108 ret = false;
1109 while (1)
1111 if (VEC_length (tree, comp_a) == 0
1112 || VEC_length (tree, comp_b) == 0)
1113 break;
1115 a = VEC_pop (tree, comp_a);
1116 b = VEC_pop (tree, comp_b);
1118 /* Real and imaginary part of a variable do not alias. */
1119 if ((TREE_CODE (a) == REALPART_EXPR
1120 && TREE_CODE (b) == IMAGPART_EXPR)
1121 || (TREE_CODE (a) == IMAGPART_EXPR
1122 && TREE_CODE (b) == REALPART_EXPR))
1124 ret = true;
1125 break;
1128 if (TREE_CODE (a) != TREE_CODE (b))
1129 break;
1131 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1132 DR_BASE_OBJECT are always zero. */
1133 if (TREE_CODE (a) == ARRAY_REF)
1134 continue;
1135 else if (TREE_CODE (a) == COMPONENT_REF)
1137 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1138 continue;
1140 /* Different fields of unions may overlap. */
1141 base_a = TREE_OPERAND (a, 0);
1142 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1143 break;
1145 /* Different fields of structures cannot. */
1146 ret = true;
1147 break;
1149 else
1150 break;
1153 VEC_free (tree, heap, comp_a);
1154 VEC_free (tree, heap, comp_b);
1156 return ret;
1159 /* Returns false if we can prove that data references A and B do not alias,
1160 true otherwise. */
1162 static bool
1163 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1165 const_tree addr_a = DR_BASE_ADDRESS (a);
1166 const_tree addr_b = DR_BASE_ADDRESS (b);
1167 const_tree type_a, type_b;
1168 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1170 /* If the sets of virtual operands are disjoint, the memory references do not
1171 alias. */
1172 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1173 return false;
1175 /* If the accessed objects are disjoint, the memory references do not
1176 alias. */
1177 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1178 return false;
1180 if (!addr_a || !addr_b)
1181 return true;
1183 /* If the references are based on different static objects, they cannot alias
1184 (PTA should be able to disambiguate such accesses, but often it fails to,
1185 since currently we cannot distinguish between pointer and offset in pointer
1186 arithmetics). */
1187 if (TREE_CODE (addr_a) == ADDR_EXPR
1188 && TREE_CODE (addr_b) == ADDR_EXPR)
1189 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1191 /* An instruction writing through a restricted pointer is "independent" of any
1192 instruction reading or writing through a different restricted pointer,
1193 in the same block/scope. */
1195 type_a = TREE_TYPE (addr_a);
1196 type_b = TREE_TYPE (addr_b);
1197 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1199 if (TREE_CODE (addr_a) == SSA_NAME)
1200 decl_a = SSA_NAME_VAR (addr_a);
1201 if (TREE_CODE (addr_b) == SSA_NAME)
1202 decl_b = SSA_NAME_VAR (addr_b);
1204 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1205 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1206 && decl_a && DECL_P (decl_a)
1207 && decl_b && DECL_P (decl_b)
1208 && decl_a != decl_b
1209 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1210 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1211 return false;
1213 return true;
1216 /* Initialize a data dependence relation between data accesses A and
1217 B. NB_LOOPS is the number of loops surrounding the references: the
1218 size of the classic distance/direction vectors. */
1220 static struct data_dependence_relation *
1221 initialize_data_dependence_relation (struct data_reference *a,
1222 struct data_reference *b,
1223 VEC (loop_p, heap) *loop_nest)
1225 struct data_dependence_relation *res;
1226 unsigned int i;
1228 res = XNEW (struct data_dependence_relation);
1229 DDR_A (res) = a;
1230 DDR_B (res) = b;
1231 DDR_LOOP_NEST (res) = NULL;
1232 DDR_REVERSED_P (res) = false;
1233 DDR_SUBSCRIPTS (res) = NULL;
1234 DDR_DIR_VECTS (res) = NULL;
1235 DDR_DIST_VECTS (res) = NULL;
1237 if (a == NULL || b == NULL)
1239 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1240 return res;
1243 /* If the data references do not alias, then they are independent. */
1244 if (!dr_may_alias_p (a, b))
1246 DDR_ARE_DEPENDENT (res) = chrec_known;
1247 return res;
1250 /* If the references do not access the same object, we do not know
1251 whether they alias or not. */
1252 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1254 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1255 return res;
1258 /* If the base of the object is not invariant in the loop nest, we cannot
1259 analyze it. TODO -- in fact, it would suffice to record that there may
1260 be arbitrary dependences in the loops where the base object varies. */
1261 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1262 DR_BASE_OBJECT (a)))
1264 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1265 return res;
1268 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1270 DDR_AFFINE_P (res) = true;
1271 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1272 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1273 DDR_LOOP_NEST (res) = loop_nest;
1274 DDR_INNER_LOOP (res) = 0;
1276 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1278 struct subscript *subscript;
1280 subscript = XNEW (struct subscript);
1281 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1282 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1283 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1284 SUB_DISTANCE (subscript) = chrec_dont_know;
1285 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1288 return res;
1291 /* Frees memory used by the conflict function F. */
1293 static void
1294 free_conflict_function (conflict_function *f)
1296 unsigned i;
1298 if (CF_NONTRIVIAL_P (f))
1300 for (i = 0; i < f->n; i++)
1301 affine_fn_free (f->fns[i]);
1303 free (f);
1306 /* Frees memory used by SUBSCRIPTS. */
1308 static void
1309 free_subscripts (VEC (subscript_p, heap) *subscripts)
1311 unsigned i;
1312 subscript_p s;
1314 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1316 free_conflict_function (s->conflicting_iterations_in_a);
1317 free_conflict_function (s->conflicting_iterations_in_b);
1319 VEC_free (subscript_p, heap, subscripts);
1322 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1323 description. */
1325 static inline void
1326 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1327 tree chrec)
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file, "(dependence classified: ");
1332 print_generic_expr (dump_file, chrec, 0);
1333 fprintf (dump_file, ")\n");
1336 DDR_ARE_DEPENDENT (ddr) = chrec;
1337 free_subscripts (DDR_SUBSCRIPTS (ddr));
1338 DDR_SUBSCRIPTS (ddr) = NULL;
1341 /* The dependence relation DDR cannot be represented by a distance
1342 vector. */
1344 static inline void
1345 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1347 if (dump_file && (dump_flags & TDF_DETAILS))
1348 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1350 DDR_AFFINE_P (ddr) = false;
1355 /* This section contains the classic Banerjee tests. */
1357 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1358 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1360 static inline bool
1361 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1363 return (evolution_function_is_constant_p (chrec_a)
1364 && evolution_function_is_constant_p (chrec_b));
1367 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1368 variable, i.e., if the SIV (Single Index Variable) test is true. */
1370 static bool
1371 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1373 if ((evolution_function_is_constant_p (chrec_a)
1374 && evolution_function_is_univariate_p (chrec_b))
1375 || (evolution_function_is_constant_p (chrec_b)
1376 && evolution_function_is_univariate_p (chrec_a)))
1377 return true;
1379 if (evolution_function_is_univariate_p (chrec_a)
1380 && evolution_function_is_univariate_p (chrec_b))
1382 switch (TREE_CODE (chrec_a))
1384 case POLYNOMIAL_CHREC:
1385 switch (TREE_CODE (chrec_b))
1387 case POLYNOMIAL_CHREC:
1388 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1389 return false;
1391 default:
1392 return true;
1395 default:
1396 return true;
1400 return false;
1403 /* Creates a conflict function with N dimensions. The affine functions
1404 in each dimension follow. */
1406 static conflict_function *
1407 conflict_fn (unsigned n, ...)
1409 unsigned i;
1410 conflict_function *ret = XCNEW (conflict_function);
1411 va_list ap;
1413 gcc_assert (0 < n && n <= MAX_DIM);
1414 va_start(ap, n);
1416 ret->n = n;
1417 for (i = 0; i < n; i++)
1418 ret->fns[i] = va_arg (ap, affine_fn);
1419 va_end(ap);
1421 return ret;
1424 /* Returns constant affine function with value CST. */
1426 static affine_fn
1427 affine_fn_cst (tree cst)
1429 affine_fn fn = VEC_alloc (tree, heap, 1);
1430 VEC_quick_push (tree, fn, cst);
1431 return fn;
1434 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1436 static affine_fn
1437 affine_fn_univar (tree cst, unsigned dim, tree coef)
1439 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1440 unsigned i;
1442 gcc_assert (dim > 0);
1443 VEC_quick_push (tree, fn, cst);
1444 for (i = 1; i < dim; i++)
1445 VEC_quick_push (tree, fn, integer_zero_node);
1446 VEC_quick_push (tree, fn, coef);
1447 return fn;
1450 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1451 *OVERLAPS_B are initialized to the functions that describe the
1452 relation between the elements accessed twice by CHREC_A and
1453 CHREC_B. For k >= 0, the following property is verified:
1455 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1457 static void
1458 analyze_ziv_subscript (tree chrec_a,
1459 tree chrec_b,
1460 conflict_function **overlaps_a,
1461 conflict_function **overlaps_b,
1462 tree *last_conflicts)
1464 tree difference;
1465 dependence_stats.num_ziv++;
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1468 fprintf (dump_file, "(analyze_ziv_subscript \n");
1470 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1471 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1472 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1474 switch (TREE_CODE (difference))
1476 case INTEGER_CST:
1477 if (integer_zerop (difference))
1479 /* The difference is equal to zero: the accessed index
1480 overlaps for each iteration in the loop. */
1481 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1482 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1483 *last_conflicts = chrec_dont_know;
1484 dependence_stats.num_ziv_dependent++;
1486 else
1488 /* The accesses do not overlap. */
1489 *overlaps_a = conflict_fn_no_dependence ();
1490 *overlaps_b = conflict_fn_no_dependence ();
1491 *last_conflicts = integer_zero_node;
1492 dependence_stats.num_ziv_independent++;
1494 break;
1496 default:
1497 /* We're not sure whether the indexes overlap. For the moment,
1498 conservatively answer "don't know". */
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1500 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1502 *overlaps_a = conflict_fn_not_known ();
1503 *overlaps_b = conflict_fn_not_known ();
1504 *last_conflicts = chrec_dont_know;
1505 dependence_stats.num_ziv_unimplemented++;
1506 break;
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1510 fprintf (dump_file, ")\n");
1513 /* Sets NIT to the estimated number of executions of the statements in
1514 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1515 large as the number of iterations. If we have no reliable estimate,
1516 the function returns false, otherwise returns true. */
1518 bool
1519 estimated_loop_iterations (struct loop *loop, bool conservative,
1520 double_int *nit)
1522 estimate_numbers_of_iterations_loop (loop);
1523 if (conservative)
1525 if (!loop->any_upper_bound)
1526 return false;
1528 *nit = loop->nb_iterations_upper_bound;
1530 else
1532 if (!loop->any_estimate)
1533 return false;
1535 *nit = loop->nb_iterations_estimate;
1538 return true;
1541 /* Similar to estimated_loop_iterations, but returns the estimate only
1542 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1543 on the number of iterations of LOOP could not be derived, returns -1. */
1545 HOST_WIDE_INT
1546 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1548 double_int nit;
1549 HOST_WIDE_INT hwi_nit;
1551 if (!estimated_loop_iterations (loop, conservative, &nit))
1552 return -1;
1554 if (!double_int_fits_in_shwi_p (nit))
1555 return -1;
1556 hwi_nit = double_int_to_shwi (nit);
1558 return hwi_nit < 0 ? -1 : hwi_nit;
1561 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1562 and only if it fits to the int type. If this is not the case, or the
1563 estimate on the number of iterations of LOOP could not be derived, returns
1564 chrec_dont_know. */
1566 static tree
1567 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1569 double_int nit;
1570 tree type;
1572 if (!estimated_loop_iterations (loop, conservative, &nit))
1573 return chrec_dont_know;
1575 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1576 if (!double_int_fits_to_tree_p (type, nit))
1577 return chrec_dont_know;
1579 return double_int_to_tree (type, nit);
1582 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1583 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1584 *OVERLAPS_B are initialized to the functions that describe the
1585 relation between the elements accessed twice by CHREC_A and
1586 CHREC_B. For k >= 0, the following property is verified:
1588 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1590 static void
1591 analyze_siv_subscript_cst_affine (tree chrec_a,
1592 tree chrec_b,
1593 conflict_function **overlaps_a,
1594 conflict_function **overlaps_b,
1595 tree *last_conflicts)
1597 bool value0, value1, value2;
1598 tree difference, tmp;
1600 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1601 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1602 difference = chrec_fold_minus
1603 (integer_type_node, initial_condition (chrec_b), chrec_a);
1605 if (!chrec_is_positive (initial_condition (difference), &value0))
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1608 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1610 dependence_stats.num_siv_unimplemented++;
1611 *overlaps_a = conflict_fn_not_known ();
1612 *overlaps_b = conflict_fn_not_known ();
1613 *last_conflicts = chrec_dont_know;
1614 return;
1616 else
1618 if (value0 == false)
1620 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1622 if (dump_file && (dump_flags & TDF_DETAILS))
1623 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1625 *overlaps_a = conflict_fn_not_known ();
1626 *overlaps_b = conflict_fn_not_known ();
1627 *last_conflicts = chrec_dont_know;
1628 dependence_stats.num_siv_unimplemented++;
1629 return;
1631 else
1633 if (value1 == true)
1635 /* Example:
1636 chrec_a = 12
1637 chrec_b = {10, +, 1}
1640 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1642 HOST_WIDE_INT numiter;
1643 struct loop *loop = get_chrec_loop (chrec_b);
1645 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1646 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1647 fold_build1 (ABS_EXPR,
1648 integer_type_node,
1649 difference),
1650 CHREC_RIGHT (chrec_b));
1651 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1652 *last_conflicts = integer_one_node;
1655 /* Perform weak-zero siv test to see if overlap is
1656 outside the loop bounds. */
1657 numiter = estimated_loop_iterations_int (loop, false);
1659 if (numiter >= 0
1660 && compare_tree_int (tmp, numiter) > 0)
1662 free_conflict_function (*overlaps_a);
1663 free_conflict_function (*overlaps_b);
1664 *overlaps_a = conflict_fn_no_dependence ();
1665 *overlaps_b = conflict_fn_no_dependence ();
1666 *last_conflicts = integer_zero_node;
1667 dependence_stats.num_siv_independent++;
1668 return;
1670 dependence_stats.num_siv_dependent++;
1671 return;
1674 /* When the step does not divide the difference, there are
1675 no overlaps. */
1676 else
1678 *overlaps_a = conflict_fn_no_dependence ();
1679 *overlaps_b = conflict_fn_no_dependence ();
1680 *last_conflicts = integer_zero_node;
1681 dependence_stats.num_siv_independent++;
1682 return;
1686 else
1688 /* Example:
1689 chrec_a = 12
1690 chrec_b = {10, +, -1}
1692 In this case, chrec_a will not overlap with chrec_b. */
1693 *overlaps_a = conflict_fn_no_dependence ();
1694 *overlaps_b = conflict_fn_no_dependence ();
1695 *last_conflicts = integer_zero_node;
1696 dependence_stats.num_siv_independent++;
1697 return;
1701 else
1703 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1705 if (dump_file && (dump_flags & TDF_DETAILS))
1706 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1708 *overlaps_a = conflict_fn_not_known ();
1709 *overlaps_b = conflict_fn_not_known ();
1710 *last_conflicts = chrec_dont_know;
1711 dependence_stats.num_siv_unimplemented++;
1712 return;
1714 else
1716 if (value2 == false)
1718 /* Example:
1719 chrec_a = 3
1720 chrec_b = {10, +, -1}
1722 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1724 HOST_WIDE_INT numiter;
1725 struct loop *loop = get_chrec_loop (chrec_b);
1727 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1728 tmp = fold_build2 (EXACT_DIV_EXPR,
1729 integer_type_node, difference,
1730 CHREC_RIGHT (chrec_b));
1731 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1732 *last_conflicts = integer_one_node;
1734 /* Perform weak-zero siv test to see if overlap is
1735 outside the loop bounds. */
1736 numiter = estimated_loop_iterations_int (loop, false);
1738 if (numiter >= 0
1739 && compare_tree_int (tmp, numiter) > 0)
1741 free_conflict_function (*overlaps_a);
1742 free_conflict_function (*overlaps_b);
1743 *overlaps_a = conflict_fn_no_dependence ();
1744 *overlaps_b = conflict_fn_no_dependence ();
1745 *last_conflicts = integer_zero_node;
1746 dependence_stats.num_siv_independent++;
1747 return;
1749 dependence_stats.num_siv_dependent++;
1750 return;
1753 /* When the step does not divide the difference, there
1754 are no overlaps. */
1755 else
1757 *overlaps_a = conflict_fn_no_dependence ();
1758 *overlaps_b = conflict_fn_no_dependence ();
1759 *last_conflicts = integer_zero_node;
1760 dependence_stats.num_siv_independent++;
1761 return;
1764 else
1766 /* Example:
1767 chrec_a = 3
1768 chrec_b = {4, +, 1}
1770 In this case, chrec_a will not overlap with chrec_b. */
1771 *overlaps_a = conflict_fn_no_dependence ();
1772 *overlaps_b = conflict_fn_no_dependence ();
1773 *last_conflicts = integer_zero_node;
1774 dependence_stats.num_siv_independent++;
1775 return;
1782 /* Helper recursive function for initializing the matrix A. Returns
1783 the initial value of CHREC. */
1785 static int
1786 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1788 gcc_assert (chrec);
1790 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1791 return int_cst_value (chrec);
1793 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1794 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1797 #define FLOOR_DIV(x,y) ((x) / (y))
1799 /* Solves the special case of the Diophantine equation:
1800 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1802 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1803 number of iterations that loops X and Y run. The overlaps will be
1804 constructed as evolutions in dimension DIM. */
1806 static void
1807 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1808 affine_fn *overlaps_a,
1809 affine_fn *overlaps_b,
1810 tree *last_conflicts, int dim)
1812 if (((step_a > 0 && step_b > 0)
1813 || (step_a < 0 && step_b < 0)))
1815 int step_overlaps_a, step_overlaps_b;
1816 int gcd_steps_a_b, last_conflict, tau2;
1818 gcd_steps_a_b = gcd (step_a, step_b);
1819 step_overlaps_a = step_b / gcd_steps_a_b;
1820 step_overlaps_b = step_a / gcd_steps_a_b;
1822 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1823 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1824 last_conflict = tau2;
1826 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1827 build_int_cst (NULL_TREE,
1828 step_overlaps_a));
1829 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1830 build_int_cst (NULL_TREE,
1831 step_overlaps_b));
1832 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1835 else
1837 *overlaps_a = affine_fn_cst (integer_zero_node);
1838 *overlaps_b = affine_fn_cst (integer_zero_node);
1839 *last_conflicts = integer_zero_node;
1843 /* Solves the special case of a Diophantine equation where CHREC_A is
1844 an affine bivariate function, and CHREC_B is an affine univariate
1845 function. For example,
1847 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1849 has the following overlapping functions:
1851 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1852 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1853 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1855 FORNOW: This is a specialized implementation for a case occurring in
1856 a common benchmark. Implement the general algorithm. */
1858 static void
1859 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1860 conflict_function **overlaps_a,
1861 conflict_function **overlaps_b,
1862 tree *last_conflicts)
1864 bool xz_p, yz_p, xyz_p;
1865 int step_x, step_y, step_z;
1866 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1867 affine_fn overlaps_a_xz, overlaps_b_xz;
1868 affine_fn overlaps_a_yz, overlaps_b_yz;
1869 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1870 affine_fn ova1, ova2, ovb;
1871 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1873 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1874 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1875 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1877 niter_x =
1878 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1879 false);
1880 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1881 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1883 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1885 if (dump_file && (dump_flags & TDF_DETAILS))
1886 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1888 *overlaps_a = conflict_fn_not_known ();
1889 *overlaps_b = conflict_fn_not_known ();
1890 *last_conflicts = chrec_dont_know;
1891 return;
1894 niter = MIN (niter_x, niter_z);
1895 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1896 &overlaps_a_xz,
1897 &overlaps_b_xz,
1898 &last_conflicts_xz, 1);
1899 niter = MIN (niter_y, niter_z);
1900 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1901 &overlaps_a_yz,
1902 &overlaps_b_yz,
1903 &last_conflicts_yz, 2);
1904 niter = MIN (niter_x, niter_z);
1905 niter = MIN (niter_y, niter);
1906 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1907 &overlaps_a_xyz,
1908 &overlaps_b_xyz,
1909 &last_conflicts_xyz, 3);
1911 xz_p = !integer_zerop (last_conflicts_xz);
1912 yz_p = !integer_zerop (last_conflicts_yz);
1913 xyz_p = !integer_zerop (last_conflicts_xyz);
1915 if (xz_p || yz_p || xyz_p)
1917 ova1 = affine_fn_cst (integer_zero_node);
1918 ova2 = affine_fn_cst (integer_zero_node);
1919 ovb = affine_fn_cst (integer_zero_node);
1920 if (xz_p)
1922 affine_fn t0 = ova1;
1923 affine_fn t2 = ovb;
1925 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1926 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1927 affine_fn_free (t0);
1928 affine_fn_free (t2);
1929 *last_conflicts = last_conflicts_xz;
1931 if (yz_p)
1933 affine_fn t0 = ova2;
1934 affine_fn t2 = ovb;
1936 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1937 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1938 affine_fn_free (t0);
1939 affine_fn_free (t2);
1940 *last_conflicts = last_conflicts_yz;
1942 if (xyz_p)
1944 affine_fn t0 = ova1;
1945 affine_fn t2 = ova2;
1946 affine_fn t4 = ovb;
1948 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1949 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1950 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1951 affine_fn_free (t0);
1952 affine_fn_free (t2);
1953 affine_fn_free (t4);
1954 *last_conflicts = last_conflicts_xyz;
1956 *overlaps_a = conflict_fn (2, ova1, ova2);
1957 *overlaps_b = conflict_fn (1, ovb);
1959 else
1961 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1962 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1963 *last_conflicts = integer_zero_node;
1966 affine_fn_free (overlaps_a_xz);
1967 affine_fn_free (overlaps_b_xz);
1968 affine_fn_free (overlaps_a_yz);
1969 affine_fn_free (overlaps_b_yz);
1970 affine_fn_free (overlaps_a_xyz);
1971 affine_fn_free (overlaps_b_xyz);
1974 /* Determines the overlapping elements due to accesses CHREC_A and
1975 CHREC_B, that are affine functions. This function cannot handle
1976 symbolic evolution functions, ie. when initial conditions are
1977 parameters, because it uses lambda matrices of integers. */
1979 static void
1980 analyze_subscript_affine_affine (tree chrec_a,
1981 tree chrec_b,
1982 conflict_function **overlaps_a,
1983 conflict_function **overlaps_b,
1984 tree *last_conflicts)
1986 unsigned nb_vars_a, nb_vars_b, dim;
1987 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
1988 HOST_WIDE_INT tau1, tau2;
1989 lambda_matrix A, U, S;
1991 if (eq_evolutions_p (chrec_a, chrec_b))
1993 /* The accessed index overlaps for each iteration in the
1994 loop. */
1995 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1996 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1997 *last_conflicts = chrec_dont_know;
1998 return;
2000 if (dump_file && (dump_flags & TDF_DETAILS))
2001 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2003 /* For determining the initial intersection, we have to solve a
2004 Diophantine equation. This is the most time consuming part.
2006 For answering to the question: "Is there a dependence?" we have
2007 to prove that there exists a solution to the Diophantine
2008 equation, and that the solution is in the iteration domain,
2009 i.e. the solution is positive or zero, and that the solution
2010 happens before the upper bound loop.nb_iterations. Otherwise
2011 there is no dependence. This function outputs a description of
2012 the iterations that hold the intersections. */
2014 nb_vars_a = nb_vars_in_chrec (chrec_a);
2015 nb_vars_b = nb_vars_in_chrec (chrec_b);
2017 dim = nb_vars_a + nb_vars_b;
2018 U = lambda_matrix_new (dim, dim);
2019 A = lambda_matrix_new (dim, 1);
2020 S = lambda_matrix_new (dim, 1);
2022 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2023 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2024 gamma = init_b - init_a;
2026 /* Don't do all the hard work of solving the Diophantine equation
2027 when we already know the solution: for example,
2028 | {3, +, 1}_1
2029 | {3, +, 4}_2
2030 | gamma = 3 - 3 = 0.
2031 Then the first overlap occurs during the first iterations:
2032 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2034 if (gamma == 0)
2036 if (nb_vars_a == 1 && nb_vars_b == 1)
2038 HOST_WIDE_INT step_a, step_b;
2039 HOST_WIDE_INT niter, niter_a, niter_b;
2040 affine_fn ova, ovb;
2042 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2043 false);
2044 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2045 false);
2046 if (niter_a < 0 || niter_b < 0)
2048 if (dump_file && (dump_flags & TDF_DETAILS))
2049 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2050 *overlaps_a = conflict_fn_not_known ();
2051 *overlaps_b = conflict_fn_not_known ();
2052 *last_conflicts = chrec_dont_know;
2053 goto end_analyze_subs_aa;
2056 niter = MIN (niter_a, niter_b);
2058 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2059 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2061 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2062 &ova, &ovb,
2063 last_conflicts, 1);
2064 *overlaps_a = conflict_fn (1, ova);
2065 *overlaps_b = conflict_fn (1, ovb);
2068 else if (nb_vars_a == 2 && nb_vars_b == 1)
2069 compute_overlap_steps_for_affine_1_2
2070 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2072 else if (nb_vars_a == 1 && nb_vars_b == 2)
2073 compute_overlap_steps_for_affine_1_2
2074 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2076 else
2078 if (dump_file && (dump_flags & TDF_DETAILS))
2079 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2080 *overlaps_a = conflict_fn_not_known ();
2081 *overlaps_b = conflict_fn_not_known ();
2082 *last_conflicts = chrec_dont_know;
2084 goto end_analyze_subs_aa;
2087 /* U.A = S */
2088 lambda_matrix_right_hermite (A, dim, 1, S, U);
2090 if (S[0][0] < 0)
2092 S[0][0] *= -1;
2093 lambda_matrix_row_negate (U, dim, 0);
2095 gcd_alpha_beta = S[0][0];
2097 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2098 but that is a quite strange case. Instead of ICEing, answer
2099 don't know. */
2100 if (gcd_alpha_beta == 0)
2102 *overlaps_a = conflict_fn_not_known ();
2103 *overlaps_b = conflict_fn_not_known ();
2104 *last_conflicts = chrec_dont_know;
2105 goto end_analyze_subs_aa;
2108 /* The classic "gcd-test". */
2109 if (!int_divides_p (gcd_alpha_beta, gamma))
2111 /* The "gcd-test" has determined that there is no integer
2112 solution, i.e. there is no dependence. */
2113 *overlaps_a = conflict_fn_no_dependence ();
2114 *overlaps_b = conflict_fn_no_dependence ();
2115 *last_conflicts = integer_zero_node;
2118 /* Both access functions are univariate. This includes SIV and MIV cases. */
2119 else if (nb_vars_a == 1 && nb_vars_b == 1)
2121 /* Both functions should have the same evolution sign. */
2122 if (((A[0][0] > 0 && -A[1][0] > 0)
2123 || (A[0][0] < 0 && -A[1][0] < 0)))
2125 /* The solutions are given by:
2127 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2128 | [u21 u22] [y0]
2130 For a given integer t. Using the following variables,
2132 | i0 = u11 * gamma / gcd_alpha_beta
2133 | j0 = u12 * gamma / gcd_alpha_beta
2134 | i1 = u21
2135 | j1 = u22
2137 the solutions are:
2139 | x0 = i0 + i1 * t,
2140 | y0 = j0 + j1 * t. */
2142 HOST_WIDE_INT i0, j0, i1, j1;
2144 /* X0 and Y0 are the first iterations for which there is a
2145 dependence. X0, Y0 are two solutions of the Diophantine
2146 equation: chrec_a (X0) = chrec_b (Y0). */
2147 HOST_WIDE_INT x0, y0;
2148 HOST_WIDE_INT niter, niter_a, niter_b;
2150 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2151 false);
2152 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2153 false);
2155 if (niter_a < 0 || niter_b < 0)
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2158 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2159 *overlaps_a = conflict_fn_not_known ();
2160 *overlaps_b = conflict_fn_not_known ();
2161 *last_conflicts = chrec_dont_know;
2162 goto end_analyze_subs_aa;
2165 niter = MIN (niter_a, niter_b);
2167 i0 = U[0][0] * gamma / gcd_alpha_beta;
2168 j0 = U[0][1] * gamma / gcd_alpha_beta;
2169 i1 = U[1][0];
2170 j1 = U[1][1];
2172 if ((i1 == 0 && i0 < 0)
2173 || (j1 == 0 && j0 < 0))
2175 /* There is no solution.
2176 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2177 falls in here, but for the moment we don't look at the
2178 upper bound of the iteration domain. */
2179 *overlaps_a = conflict_fn_no_dependence ();
2180 *overlaps_b = conflict_fn_no_dependence ();
2181 *last_conflicts = integer_zero_node;
2184 else
2186 if (i1 > 0)
2188 tau1 = CEIL (-i0, i1);
2189 tau2 = FLOOR_DIV (niter - i0, i1);
2191 if (j1 > 0)
2193 int last_conflict, min_multiple;
2194 tau1 = MAX (tau1, CEIL (-j0, j1));
2195 tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
2197 x0 = i1 * tau1 + i0;
2198 y0 = j1 * tau1 + j0;
2200 /* At this point (x0, y0) is one of the
2201 solutions to the Diophantine equation. The
2202 next step has to compute the smallest
2203 positive solution: the first conflicts. */
2204 min_multiple = MIN (x0 / i1, y0 / j1);
2205 x0 -= i1 * min_multiple;
2206 y0 -= j1 * min_multiple;
2208 tau1 = (x0 - i0)/i1;
2209 last_conflict = tau2 - tau1;
2211 /* If the overlap occurs outside of the bounds of the
2212 loop, there is no dependence. */
2213 if (x0 > niter || y0 > niter)
2215 *overlaps_a = conflict_fn_no_dependence ();
2216 *overlaps_b = conflict_fn_no_dependence ();
2217 *last_conflicts = integer_zero_node;
2219 else
2221 *overlaps_a
2222 = conflict_fn (1,
2223 affine_fn_univar (build_int_cst (NULL_TREE, x0),
2225 build_int_cst (NULL_TREE, i1)));
2226 *overlaps_b
2227 = conflict_fn (1,
2228 affine_fn_univar (build_int_cst (NULL_TREE, y0),
2230 build_int_cst (NULL_TREE, j1)));
2231 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2234 else
2236 /* FIXME: For the moment, the upper bound of the
2237 iteration domain for j is not checked. */
2238 if (dump_file && (dump_flags & TDF_DETAILS))
2239 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2240 *overlaps_a = conflict_fn_not_known ();
2241 *overlaps_b = conflict_fn_not_known ();
2242 *last_conflicts = chrec_dont_know;
2246 else
2248 /* FIXME: For the moment, the upper bound of the
2249 iteration domain for i is not checked. */
2250 if (dump_file && (dump_flags & TDF_DETAILS))
2251 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2252 *overlaps_a = conflict_fn_not_known ();
2253 *overlaps_b = conflict_fn_not_known ();
2254 *last_conflicts = chrec_dont_know;
2258 else
2260 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2262 *overlaps_a = conflict_fn_not_known ();
2263 *overlaps_b = conflict_fn_not_known ();
2264 *last_conflicts = chrec_dont_know;
2268 else
2270 if (dump_file && (dump_flags & TDF_DETAILS))
2271 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2272 *overlaps_a = conflict_fn_not_known ();
2273 *overlaps_b = conflict_fn_not_known ();
2274 *last_conflicts = chrec_dont_know;
2277 end_analyze_subs_aa:
2278 if (dump_file && (dump_flags & TDF_DETAILS))
2280 fprintf (dump_file, " (overlaps_a = ");
2281 dump_conflict_function (dump_file, *overlaps_a);
2282 fprintf (dump_file, ")\n (overlaps_b = ");
2283 dump_conflict_function (dump_file, *overlaps_b);
2284 fprintf (dump_file, ")\n");
2285 fprintf (dump_file, ")\n");
2289 /* Returns true when analyze_subscript_affine_affine can be used for
2290 determining the dependence relation between chrec_a and chrec_b,
2291 that contain symbols. This function modifies chrec_a and chrec_b
2292 such that the analysis result is the same, and such that they don't
2293 contain symbols, and then can safely be passed to the analyzer.
2295 Example: The analysis of the following tuples of evolutions produce
2296 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2297 vs. {0, +, 1}_1
2299 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2300 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2303 static bool
2304 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2306 tree diff, type, left_a, left_b, right_b;
2308 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2309 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2310 /* FIXME: For the moment not handled. Might be refined later. */
2311 return false;
2313 type = chrec_type (*chrec_a);
2314 left_a = CHREC_LEFT (*chrec_a);
2315 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2316 diff = chrec_fold_minus (type, left_a, left_b);
2318 if (!evolution_function_is_constant_p (diff))
2319 return false;
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2322 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2324 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2325 diff, CHREC_RIGHT (*chrec_a));
2326 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2327 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2328 build_int_cst (type, 0),
2329 right_b);
2330 return true;
2333 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2334 *OVERLAPS_B are initialized to the functions that describe the
2335 relation between the elements accessed twice by CHREC_A and
2336 CHREC_B. For k >= 0, the following property is verified:
2338 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2340 static void
2341 analyze_siv_subscript (tree chrec_a,
2342 tree chrec_b,
2343 conflict_function **overlaps_a,
2344 conflict_function **overlaps_b,
2345 tree *last_conflicts)
2347 dependence_stats.num_siv++;
2349 if (dump_file && (dump_flags & TDF_DETAILS))
2350 fprintf (dump_file, "(analyze_siv_subscript \n");
2352 if (evolution_function_is_constant_p (chrec_a)
2353 && evolution_function_is_affine_p (chrec_b))
2354 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2355 overlaps_a, overlaps_b, last_conflicts);
2357 else if (evolution_function_is_affine_p (chrec_a)
2358 && evolution_function_is_constant_p (chrec_b))
2359 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2360 overlaps_b, overlaps_a, last_conflicts);
2362 else if (evolution_function_is_affine_p (chrec_a)
2363 && evolution_function_is_affine_p (chrec_b))
2365 if (!chrec_contains_symbols (chrec_a)
2366 && !chrec_contains_symbols (chrec_b))
2368 analyze_subscript_affine_affine (chrec_a, chrec_b,
2369 overlaps_a, overlaps_b,
2370 last_conflicts);
2372 if (CF_NOT_KNOWN_P (*overlaps_a)
2373 || CF_NOT_KNOWN_P (*overlaps_b))
2374 dependence_stats.num_siv_unimplemented++;
2375 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2376 || CF_NO_DEPENDENCE_P (*overlaps_b))
2377 dependence_stats.num_siv_independent++;
2378 else
2379 dependence_stats.num_siv_dependent++;
2381 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2382 &chrec_b))
2384 analyze_subscript_affine_affine (chrec_a, chrec_b,
2385 overlaps_a, overlaps_b,
2386 last_conflicts);
2388 if (CF_NOT_KNOWN_P (*overlaps_a)
2389 || CF_NOT_KNOWN_P (*overlaps_b))
2390 dependence_stats.num_siv_unimplemented++;
2391 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2392 || CF_NO_DEPENDENCE_P (*overlaps_b))
2393 dependence_stats.num_siv_independent++;
2394 else
2395 dependence_stats.num_siv_dependent++;
2397 else
2398 goto siv_subscript_dontknow;
2401 else
2403 siv_subscript_dontknow:;
2404 if (dump_file && (dump_flags & TDF_DETAILS))
2405 fprintf (dump_file, "siv test failed: unimplemented.\n");
2406 *overlaps_a = conflict_fn_not_known ();
2407 *overlaps_b = conflict_fn_not_known ();
2408 *last_conflicts = chrec_dont_know;
2409 dependence_stats.num_siv_unimplemented++;
2412 if (dump_file && (dump_flags & TDF_DETAILS))
2413 fprintf (dump_file, ")\n");
2416 /* Returns false if we can prove that the greatest common divisor of the steps
2417 of CHREC does not divide CST, false otherwise. */
2419 static bool
2420 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2422 HOST_WIDE_INT cd = 0, val;
2423 tree step;
2425 if (!host_integerp (cst, 0))
2426 return true;
2427 val = tree_low_cst (cst, 0);
2429 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2431 step = CHREC_RIGHT (chrec);
2432 if (!host_integerp (step, 0))
2433 return true;
2434 cd = gcd (cd, tree_low_cst (step, 0));
2435 chrec = CHREC_LEFT (chrec);
2438 return val % cd == 0;
2441 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2442 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2443 functions that describe the relation between the elements accessed
2444 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2445 is verified:
2447 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2449 static void
2450 analyze_miv_subscript (tree chrec_a,
2451 tree chrec_b,
2452 conflict_function **overlaps_a,
2453 conflict_function **overlaps_b,
2454 tree *last_conflicts,
2455 struct loop *loop_nest)
2457 /* FIXME: This is a MIV subscript, not yet handled.
2458 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2459 (A[i] vs. A[j]).
2461 In the SIV test we had to solve a Diophantine equation with two
2462 variables. In the MIV case we have to solve a Diophantine
2463 equation with 2*n variables (if the subscript uses n IVs).
2465 tree difference;
2466 dependence_stats.num_miv++;
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 fprintf (dump_file, "(analyze_miv_subscript \n");
2470 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2471 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2472 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2474 if (eq_evolutions_p (chrec_a, chrec_b))
2476 /* Access functions are the same: all the elements are accessed
2477 in the same order. */
2478 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2479 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2480 *last_conflicts = estimated_loop_iterations_tree
2481 (get_chrec_loop (chrec_a), true);
2482 dependence_stats.num_miv_dependent++;
2485 else if (evolution_function_is_constant_p (difference)
2486 /* For the moment, the following is verified:
2487 evolution_function_is_affine_multivariate_p (chrec_a,
2488 loop_nest->num) */
2489 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2491 /* testsuite/.../ssa-chrec-33.c
2492 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2494 The difference is 1, and all the evolution steps are multiples
2495 of 2, consequently there are no overlapping elements. */
2496 *overlaps_a = conflict_fn_no_dependence ();
2497 *overlaps_b = conflict_fn_no_dependence ();
2498 *last_conflicts = integer_zero_node;
2499 dependence_stats.num_miv_independent++;
2502 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2503 && !chrec_contains_symbols (chrec_a)
2504 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2505 && !chrec_contains_symbols (chrec_b))
2507 /* testsuite/.../ssa-chrec-35.c
2508 {0, +, 1}_2 vs. {0, +, 1}_3
2509 the overlapping elements are respectively located at iterations:
2510 {0, +, 1}_x and {0, +, 1}_x,
2511 in other words, we have the equality:
2512 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2514 Other examples:
2515 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2516 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2518 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2519 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2521 analyze_subscript_affine_affine (chrec_a, chrec_b,
2522 overlaps_a, overlaps_b, last_conflicts);
2524 if (CF_NOT_KNOWN_P (*overlaps_a)
2525 || CF_NOT_KNOWN_P (*overlaps_b))
2526 dependence_stats.num_miv_unimplemented++;
2527 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2528 || CF_NO_DEPENDENCE_P (*overlaps_b))
2529 dependence_stats.num_miv_independent++;
2530 else
2531 dependence_stats.num_miv_dependent++;
2534 else
2536 /* When the analysis is too difficult, answer "don't know". */
2537 if (dump_file && (dump_flags & TDF_DETAILS))
2538 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2540 *overlaps_a = conflict_fn_not_known ();
2541 *overlaps_b = conflict_fn_not_known ();
2542 *last_conflicts = chrec_dont_know;
2543 dependence_stats.num_miv_unimplemented++;
2546 if (dump_file && (dump_flags & TDF_DETAILS))
2547 fprintf (dump_file, ")\n");
2550 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2551 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2552 OVERLAP_ITERATIONS_B are initialized with two functions that
2553 describe the iterations that contain conflicting elements.
2555 Remark: For an integer k >= 0, the following equality is true:
2557 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2560 static void
2561 analyze_overlapping_iterations (tree chrec_a,
2562 tree chrec_b,
2563 conflict_function **overlap_iterations_a,
2564 conflict_function **overlap_iterations_b,
2565 tree *last_conflicts, struct loop *loop_nest)
2567 unsigned int lnn = loop_nest->num;
2569 dependence_stats.num_subscript_tests++;
2571 if (dump_file && (dump_flags & TDF_DETAILS))
2573 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2574 fprintf (dump_file, " (chrec_a = ");
2575 print_generic_expr (dump_file, chrec_a, 0);
2576 fprintf (dump_file, ")\n (chrec_b = ");
2577 print_generic_expr (dump_file, chrec_b, 0);
2578 fprintf (dump_file, ")\n");
2581 if (chrec_a == NULL_TREE
2582 || chrec_b == NULL_TREE
2583 || chrec_contains_undetermined (chrec_a)
2584 || chrec_contains_undetermined (chrec_b))
2586 dependence_stats.num_subscript_undetermined++;
2588 *overlap_iterations_a = conflict_fn_not_known ();
2589 *overlap_iterations_b = conflict_fn_not_known ();
2592 /* If they are the same chrec, and are affine, they overlap
2593 on every iteration. */
2594 else if (eq_evolutions_p (chrec_a, chrec_b)
2595 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2597 dependence_stats.num_same_subscript_function++;
2598 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2599 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2600 *last_conflicts = chrec_dont_know;
2603 /* If they aren't the same, and aren't affine, we can't do anything
2604 yet. */
2605 else if ((chrec_contains_symbols (chrec_a)
2606 || chrec_contains_symbols (chrec_b))
2607 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2608 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2610 dependence_stats.num_subscript_undetermined++;
2611 *overlap_iterations_a = conflict_fn_not_known ();
2612 *overlap_iterations_b = conflict_fn_not_known ();
2615 else if (ziv_subscript_p (chrec_a, chrec_b))
2616 analyze_ziv_subscript (chrec_a, chrec_b,
2617 overlap_iterations_a, overlap_iterations_b,
2618 last_conflicts);
2620 else if (siv_subscript_p (chrec_a, chrec_b))
2621 analyze_siv_subscript (chrec_a, chrec_b,
2622 overlap_iterations_a, overlap_iterations_b,
2623 last_conflicts);
2625 else
2626 analyze_miv_subscript (chrec_a, chrec_b,
2627 overlap_iterations_a, overlap_iterations_b,
2628 last_conflicts, loop_nest);
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, " (overlap_iterations_a = ");
2633 dump_conflict_function (dump_file, *overlap_iterations_a);
2634 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2635 dump_conflict_function (dump_file, *overlap_iterations_b);
2636 fprintf (dump_file, ")\n");
2637 fprintf (dump_file, ")\n");
2641 /* Helper function for uniquely inserting distance vectors. */
2643 static void
2644 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2646 unsigned i;
2647 lambda_vector v;
2649 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2650 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2651 return;
2653 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2656 /* Helper function for uniquely inserting direction vectors. */
2658 static void
2659 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2661 unsigned i;
2662 lambda_vector v;
2664 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2665 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2666 return;
2668 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2671 /* Add a distance of 1 on all the loops outer than INDEX. If we
2672 haven't yet determined a distance for this outer loop, push a new
2673 distance vector composed of the previous distance, and a distance
2674 of 1 for this outer loop. Example:
2676 | loop_1
2677 | loop_2
2678 | A[10]
2679 | endloop_2
2680 | endloop_1
2682 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2683 save (0, 1), then we have to save (1, 0). */
2685 static void
2686 add_outer_distances (struct data_dependence_relation *ddr,
2687 lambda_vector dist_v, int index)
2689 /* For each outer loop where init_v is not set, the accesses are
2690 in dependence of distance 1 in the loop. */
2691 while (--index >= 0)
2693 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2694 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2695 save_v[index] = 1;
2696 save_dist_v (ddr, save_v);
2700 /* Return false when fail to represent the data dependence as a
2701 distance vector. INIT_B is set to true when a component has been
2702 added to the distance vector DIST_V. INDEX_CARRY is then set to
2703 the index in DIST_V that carries the dependence. */
2705 static bool
2706 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2707 struct data_reference *ddr_a,
2708 struct data_reference *ddr_b,
2709 lambda_vector dist_v, bool *init_b,
2710 int *index_carry)
2712 unsigned i;
2713 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2715 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2717 tree access_fn_a, access_fn_b;
2718 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2720 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2722 non_affine_dependence_relation (ddr);
2723 return false;
2726 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2727 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2729 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2730 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2732 int dist, index;
2733 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2734 DDR_LOOP_NEST (ddr));
2735 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2736 DDR_LOOP_NEST (ddr));
2738 /* The dependence is carried by the outermost loop. Example:
2739 | loop_1
2740 | A[{4, +, 1}_1]
2741 | loop_2
2742 | A[{5, +, 1}_2]
2743 | endloop_2
2744 | endloop_1
2745 In this case, the dependence is carried by loop_1. */
2746 index = index_a < index_b ? index_a : index_b;
2747 *index_carry = MIN (index, *index_carry);
2749 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2751 non_affine_dependence_relation (ddr);
2752 return false;
2755 dist = int_cst_value (SUB_DISTANCE (subscript));
2757 /* This is the subscript coupling test. If we have already
2758 recorded a distance for this loop (a distance coming from
2759 another subscript), it should be the same. For example,
2760 in the following code, there is no dependence:
2762 | loop i = 0, N, 1
2763 | T[i+1][i] = ...
2764 | ... = T[i][i]
2765 | endloop
2767 if (init_v[index] != 0 && dist_v[index] != dist)
2769 finalize_ddr_dependent (ddr, chrec_known);
2770 return false;
2773 dist_v[index] = dist;
2774 init_v[index] = 1;
2775 *init_b = true;
2777 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2779 /* This can be for example an affine vs. constant dependence
2780 (T[i] vs. T[3]) that is not an affine dependence and is
2781 not representable as a distance vector. */
2782 non_affine_dependence_relation (ddr);
2783 return false;
2787 return true;
2790 /* Return true when the DDR contains two data references that have the
2791 same access functions. */
2793 static bool
2794 same_access_functions (const struct data_dependence_relation *ddr)
2796 unsigned i;
2798 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2799 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2800 DR_ACCESS_FN (DDR_B (ddr), i)))
2801 return false;
2803 return true;
2806 /* Return true when the DDR contains only constant access functions. */
2808 static bool
2809 constant_access_functions (const struct data_dependence_relation *ddr)
2811 unsigned i;
2813 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2814 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2815 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2816 return false;
2818 return true;
2822 /* Helper function for the case where DDR_A and DDR_B are the same
2823 multivariate access function. */
2825 static void
2826 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2828 int x_1, x_2;
2829 tree c_1 = CHREC_LEFT (c_2);
2830 tree c_0 = CHREC_LEFT (c_1);
2831 lambda_vector dist_v;
2832 int v1, v2, cd;
2834 /* Polynomials with more than 2 variables are not handled yet. When
2835 the evolution steps are parameters, it is not possible to
2836 represent the dependence using classical distance vectors. */
2837 if (TREE_CODE (c_0) != INTEGER_CST
2838 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2839 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2841 DDR_AFFINE_P (ddr) = false;
2842 return;
2845 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2846 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2848 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2849 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2850 v1 = int_cst_value (CHREC_RIGHT (c_1));
2851 v2 = int_cst_value (CHREC_RIGHT (c_2));
2852 cd = gcd (v1, v2);
2853 v1 /= cd;
2854 v2 /= cd;
2856 if (v2 < 0)
2858 v2 = -v2;
2859 v1 = -v1;
2862 dist_v[x_1] = v2;
2863 dist_v[x_2] = -v1;
2864 save_dist_v (ddr, dist_v);
2866 add_outer_distances (ddr, dist_v, x_1);
2869 /* Helper function for the case where DDR_A and DDR_B are the same
2870 access functions. */
2872 static void
2873 add_other_self_distances (struct data_dependence_relation *ddr)
2875 lambda_vector dist_v;
2876 unsigned i;
2877 int index_carry = DDR_NB_LOOPS (ddr);
2879 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2881 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2883 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2885 if (!evolution_function_is_univariate_p (access_fun))
2887 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2889 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2890 return;
2893 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2894 return;
2897 index_carry = MIN (index_carry,
2898 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2899 DDR_LOOP_NEST (ddr)));
2903 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2904 add_outer_distances (ddr, dist_v, index_carry);
2907 static void
2908 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2910 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2912 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2913 save_dist_v (ddr, dist_v);
2916 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2917 is the case for example when access functions are the same and
2918 equal to a constant, as in:
2920 | loop_1
2921 | A[3] = ...
2922 | ... = A[3]
2923 | endloop_1
2925 in which case the distance vectors are (0) and (1). */
2927 static void
2928 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2930 unsigned i, j;
2932 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2934 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2935 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2936 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2938 for (j = 0; j < ca->n; j++)
2939 if (affine_function_zero_p (ca->fns[j]))
2941 insert_innermost_unit_dist_vector (ddr);
2942 return;
2945 for (j = 0; j < cb->n; j++)
2946 if (affine_function_zero_p (cb->fns[j]))
2948 insert_innermost_unit_dist_vector (ddr);
2949 return;
2954 /* Compute the classic per loop distance vector. DDR is the data
2955 dependence relation to build a vector from. Return false when fail
2956 to represent the data dependence as a distance vector. */
2958 static bool
2959 build_classic_dist_vector (struct data_dependence_relation *ddr,
2960 struct loop *loop_nest)
2962 bool init_b = false;
2963 int index_carry = DDR_NB_LOOPS (ddr);
2964 lambda_vector dist_v;
2966 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2967 return false;
2969 if (same_access_functions (ddr))
2971 /* Save the 0 vector. */
2972 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2973 save_dist_v (ddr, dist_v);
2975 if (constant_access_functions (ddr))
2976 add_distance_for_zero_overlaps (ddr);
2978 if (DDR_NB_LOOPS (ddr) > 1)
2979 add_other_self_distances (ddr);
2981 return true;
2984 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2985 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2986 dist_v, &init_b, &index_carry))
2987 return false;
2989 /* Save the distance vector if we initialized one. */
2990 if (init_b)
2992 /* Verify a basic constraint: classic distance vectors should
2993 always be lexicographically positive.
2995 Data references are collected in the order of execution of
2996 the program, thus for the following loop
2998 | for (i = 1; i < 100; i++)
2999 | for (j = 1; j < 100; j++)
3001 | t = T[j+1][i-1]; // A
3002 | T[j][i] = t + 2; // B
3005 references are collected following the direction of the wind:
3006 A then B. The data dependence tests are performed also
3007 following this order, such that we're looking at the distance
3008 separating the elements accessed by A from the elements later
3009 accessed by B. But in this example, the distance returned by
3010 test_dep (A, B) is lexicographically negative (-1, 1), that
3011 means that the access A occurs later than B with respect to
3012 the outer loop, ie. we're actually looking upwind. In this
3013 case we solve test_dep (B, A) looking downwind to the
3014 lexicographically positive solution, that returns the
3015 distance vector (1, -1). */
3016 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3018 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3019 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3020 loop_nest))
3021 return false;
3022 compute_subscript_distance (ddr);
3023 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3024 save_v, &init_b, &index_carry))
3025 return false;
3026 save_dist_v (ddr, save_v);
3027 DDR_REVERSED_P (ddr) = true;
3029 /* In this case there is a dependence forward for all the
3030 outer loops:
3032 | for (k = 1; k < 100; k++)
3033 | for (i = 1; i < 100; i++)
3034 | for (j = 1; j < 100; j++)
3036 | t = T[j+1][i-1]; // A
3037 | T[j][i] = t + 2; // B
3040 the vectors are:
3041 (0, 1, -1)
3042 (1, 1, -1)
3043 (1, -1, 1)
3045 if (DDR_NB_LOOPS (ddr) > 1)
3047 add_outer_distances (ddr, save_v, index_carry);
3048 add_outer_distances (ddr, dist_v, index_carry);
3051 else
3053 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3054 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3056 if (DDR_NB_LOOPS (ddr) > 1)
3058 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3060 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3061 DDR_A (ddr), loop_nest))
3062 return false;
3063 compute_subscript_distance (ddr);
3064 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3065 opposite_v, &init_b,
3066 &index_carry))
3067 return false;
3069 save_dist_v (ddr, save_v);
3070 add_outer_distances (ddr, dist_v, index_carry);
3071 add_outer_distances (ddr, opposite_v, index_carry);
3073 else
3074 save_dist_v (ddr, save_v);
3077 else
3079 /* There is a distance of 1 on all the outer loops: Example:
3080 there is a dependence of distance 1 on loop_1 for the array A.
3082 | loop_1
3083 | A[5] = ...
3084 | endloop
3086 add_outer_distances (ddr, dist_v,
3087 lambda_vector_first_nz (dist_v,
3088 DDR_NB_LOOPS (ddr), 0));
3091 if (dump_file && (dump_flags & TDF_DETAILS))
3093 unsigned i;
3095 fprintf (dump_file, "(build_classic_dist_vector\n");
3096 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3098 fprintf (dump_file, " dist_vector = (");
3099 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3100 DDR_NB_LOOPS (ddr));
3101 fprintf (dump_file, " )\n");
3103 fprintf (dump_file, ")\n");
3106 return true;
3109 /* Return the direction for a given distance.
3110 FIXME: Computing dir this way is suboptimal, since dir can catch
3111 cases that dist is unable to represent. */
3113 static inline enum data_dependence_direction
3114 dir_from_dist (int dist)
3116 if (dist > 0)
3117 return dir_positive;
3118 else if (dist < 0)
3119 return dir_negative;
3120 else
3121 return dir_equal;
3124 /* Compute the classic per loop direction vector. DDR is the data
3125 dependence relation to build a vector from. */
3127 static void
3128 build_classic_dir_vector (struct data_dependence_relation *ddr)
3130 unsigned i, j;
3131 lambda_vector dist_v;
3133 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3135 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3137 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3138 dir_v[j] = dir_from_dist (dist_v[j]);
3140 save_dir_v (ddr, dir_v);
3144 /* Helper function. Returns true when there is a dependence between
3145 data references DRA and DRB. */
3147 static bool
3148 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3149 struct data_reference *dra,
3150 struct data_reference *drb,
3151 struct loop *loop_nest)
3153 unsigned int i;
3154 tree last_conflicts;
3155 struct subscript *subscript;
3157 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3158 i++)
3160 conflict_function *overlaps_a, *overlaps_b;
3162 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3163 DR_ACCESS_FN (drb, i),
3164 &overlaps_a, &overlaps_b,
3165 &last_conflicts, loop_nest);
3167 if (CF_NOT_KNOWN_P (overlaps_a)
3168 || CF_NOT_KNOWN_P (overlaps_b))
3170 finalize_ddr_dependent (ddr, chrec_dont_know);
3171 dependence_stats.num_dependence_undetermined++;
3172 free_conflict_function (overlaps_a);
3173 free_conflict_function (overlaps_b);
3174 return false;
3177 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3178 || CF_NO_DEPENDENCE_P (overlaps_b))
3180 finalize_ddr_dependent (ddr, chrec_known);
3181 dependence_stats.num_dependence_independent++;
3182 free_conflict_function (overlaps_a);
3183 free_conflict_function (overlaps_b);
3184 return false;
3187 else
3189 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3190 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3191 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3195 return true;
3198 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3200 static void
3201 subscript_dependence_tester (struct data_dependence_relation *ddr,
3202 struct loop *loop_nest)
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3206 fprintf (dump_file, "(subscript_dependence_tester \n");
3208 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3209 dependence_stats.num_dependence_dependent++;
3211 compute_subscript_distance (ddr);
3212 if (build_classic_dist_vector (ddr, loop_nest))
3213 build_classic_dir_vector (ddr);
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3216 fprintf (dump_file, ")\n");
3219 /* Returns true when all the access functions of A are affine or
3220 constant with respect to LOOP_NEST. */
3222 static bool
3223 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3224 const struct loop *loop_nest)
3226 unsigned int i;
3227 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3228 tree t;
3230 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3231 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3232 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3233 return false;
3235 return true;
3238 /* Initializes an equation for an OMEGA problem using the information
3239 contained in the ACCESS_FUN. Returns true when the operation
3240 succeeded.
3242 PB is the omega constraint system.
3243 EQ is the number of the equation to be initialized.
3244 OFFSET is used for shifting the variables names in the constraints:
3245 a constrain is composed of 2 * the number of variables surrounding
3246 dependence accesses. OFFSET is set either to 0 for the first n variables,
3247 then it is set to n.
3248 ACCESS_FUN is expected to be an affine chrec. */
3250 static bool
3251 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3252 unsigned int offset, tree access_fun,
3253 struct data_dependence_relation *ddr)
3255 switch (TREE_CODE (access_fun))
3257 case POLYNOMIAL_CHREC:
3259 tree left = CHREC_LEFT (access_fun);
3260 tree right = CHREC_RIGHT (access_fun);
3261 int var = CHREC_VARIABLE (access_fun);
3262 unsigned var_idx;
3264 if (TREE_CODE (right) != INTEGER_CST)
3265 return false;
3267 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3268 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3270 /* Compute the innermost loop index. */
3271 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3273 if (offset == 0)
3274 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3275 += int_cst_value (right);
3277 switch (TREE_CODE (left))
3279 case POLYNOMIAL_CHREC:
3280 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3282 case INTEGER_CST:
3283 pb->eqs[eq].coef[0] += int_cst_value (left);
3284 return true;
3286 default:
3287 return false;
3291 case INTEGER_CST:
3292 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3293 return true;
3295 default:
3296 return false;
3300 /* As explained in the comments preceding init_omega_for_ddr, we have
3301 to set up a system for each loop level, setting outer loops
3302 variation to zero, and current loop variation to positive or zero.
3303 Save each lexico positive distance vector. */
3305 static void
3306 omega_extract_distance_vectors (omega_pb pb,
3307 struct data_dependence_relation *ddr)
3309 int eq, geq;
3310 unsigned i, j;
3311 struct loop *loopi, *loopj;
3312 enum omega_result res;
3314 /* Set a new problem for each loop in the nest. The basis is the
3315 problem that we have initialized until now. On top of this we
3316 add new constraints. */
3317 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3318 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3320 int dist = 0;
3321 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3322 DDR_NB_LOOPS (ddr));
3324 omega_copy_problem (copy, pb);
3326 /* For all the outer loops "loop_j", add "dj = 0". */
3327 for (j = 0;
3328 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3330 eq = omega_add_zero_eq (copy, omega_black);
3331 copy->eqs[eq].coef[j + 1] = 1;
3334 /* For "loop_i", add "0 <= di". */
3335 geq = omega_add_zero_geq (copy, omega_black);
3336 copy->geqs[geq].coef[i + 1] = 1;
3338 /* Reduce the constraint system, and test that the current
3339 problem is feasible. */
3340 res = omega_simplify_problem (copy);
3341 if (res == omega_false
3342 || res == omega_unknown
3343 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3344 goto next_problem;
3346 for (eq = 0; eq < copy->num_subs; eq++)
3347 if (copy->subs[eq].key == (int) i + 1)
3349 dist = copy->subs[eq].coef[0];
3350 goto found_dist;
3353 if (dist == 0)
3355 /* Reinitialize problem... */
3356 omega_copy_problem (copy, pb);
3357 for (j = 0;
3358 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3360 eq = omega_add_zero_eq (copy, omega_black);
3361 copy->eqs[eq].coef[j + 1] = 1;
3364 /* ..., but this time "di = 1". */
3365 eq = omega_add_zero_eq (copy, omega_black);
3366 copy->eqs[eq].coef[i + 1] = 1;
3367 copy->eqs[eq].coef[0] = -1;
3369 res = omega_simplify_problem (copy);
3370 if (res == omega_false
3371 || res == omega_unknown
3372 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3373 goto next_problem;
3375 for (eq = 0; eq < copy->num_subs; eq++)
3376 if (copy->subs[eq].key == (int) i + 1)
3378 dist = copy->subs[eq].coef[0];
3379 goto found_dist;
3383 found_dist:;
3384 /* Save the lexicographically positive distance vector. */
3385 if (dist >= 0)
3387 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3388 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3390 dist_v[i] = dist;
3392 for (eq = 0; eq < copy->num_subs; eq++)
3393 if (copy->subs[eq].key > 0)
3395 dist = copy->subs[eq].coef[0];
3396 dist_v[copy->subs[eq].key - 1] = dist;
3399 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3400 dir_v[j] = dir_from_dist (dist_v[j]);
3402 save_dist_v (ddr, dist_v);
3403 save_dir_v (ddr, dir_v);
3406 next_problem:;
3407 omega_free_problem (copy);
3411 /* This is called for each subscript of a tuple of data references:
3412 insert an equality for representing the conflicts. */
3414 static bool
3415 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3416 struct data_dependence_relation *ddr,
3417 omega_pb pb, bool *maybe_dependent)
3419 int eq;
3420 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3421 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3422 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3424 /* When the fun_a - fun_b is not constant, the dependence is not
3425 captured by the classic distance vector representation. */
3426 if (TREE_CODE (difference) != INTEGER_CST)
3427 return false;
3429 /* ZIV test. */
3430 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3432 /* There is no dependence. */
3433 *maybe_dependent = false;
3434 return true;
3437 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3438 integer_minus_one_node);
3440 eq = omega_add_zero_eq (pb, omega_black);
3441 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3442 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3443 /* There is probably a dependence, but the system of
3444 constraints cannot be built: answer "don't know". */
3445 return false;
3447 /* GCD test. */
3448 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3449 && !int_divides_p (lambda_vector_gcd
3450 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3451 2 * DDR_NB_LOOPS (ddr)),
3452 pb->eqs[eq].coef[0]))
3454 /* There is no dependence. */
3455 *maybe_dependent = false;
3456 return true;
3459 return true;
3462 /* Helper function, same as init_omega_for_ddr but specialized for
3463 data references A and B. */
3465 static bool
3466 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3467 struct data_dependence_relation *ddr,
3468 omega_pb pb, bool *maybe_dependent)
3470 unsigned i;
3471 int ineq;
3472 struct loop *loopi;
3473 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3475 /* Insert an equality per subscript. */
3476 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3478 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3479 ddr, pb, maybe_dependent))
3480 return false;
3481 else if (*maybe_dependent == false)
3483 /* There is no dependence. */
3484 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3485 return true;
3489 /* Insert inequalities: constraints corresponding to the iteration
3490 domain, i.e. the loops surrounding the references "loop_x" and
3491 the distance variables "dx". The layout of the OMEGA
3492 representation is as follows:
3493 - coef[0] is the constant
3494 - coef[1..nb_loops] are the protected variables that will not be
3495 removed by the solver: the "dx"
3496 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3498 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3499 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3501 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3503 /* 0 <= loop_x */
3504 ineq = omega_add_zero_geq (pb, omega_black);
3505 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3507 /* 0 <= loop_x + dx */
3508 ineq = omega_add_zero_geq (pb, omega_black);
3509 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3510 pb->geqs[ineq].coef[i + 1] = 1;
3512 if (nbi != -1)
3514 /* loop_x <= nb_iters */
3515 ineq = omega_add_zero_geq (pb, omega_black);
3516 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3517 pb->geqs[ineq].coef[0] = nbi;
3519 /* loop_x + dx <= nb_iters */
3520 ineq = omega_add_zero_geq (pb, omega_black);
3521 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3522 pb->geqs[ineq].coef[i + 1] = -1;
3523 pb->geqs[ineq].coef[0] = nbi;
3525 /* A step "dx" bigger than nb_iters is not feasible, so
3526 add "0 <= nb_iters + dx", */
3527 ineq = omega_add_zero_geq (pb, omega_black);
3528 pb->geqs[ineq].coef[i + 1] = 1;
3529 pb->geqs[ineq].coef[0] = nbi;
3530 /* and "dx <= nb_iters". */
3531 ineq = omega_add_zero_geq (pb, omega_black);
3532 pb->geqs[ineq].coef[i + 1] = -1;
3533 pb->geqs[ineq].coef[0] = nbi;
3537 omega_extract_distance_vectors (pb, ddr);
3539 return true;
3542 /* Sets up the Omega dependence problem for the data dependence
3543 relation DDR. Returns false when the constraint system cannot be
3544 built, ie. when the test answers "don't know". Returns true
3545 otherwise, and when independence has been proved (using one of the
3546 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3547 set MAYBE_DEPENDENT to true.
3549 Example: for setting up the dependence system corresponding to the
3550 conflicting accesses
3552 | loop_i
3553 | loop_j
3554 | A[i, i+1] = ...
3555 | ... A[2*j, 2*(i + j)]
3556 | endloop_j
3557 | endloop_i
3559 the following constraints come from the iteration domain:
3561 0 <= i <= Ni
3562 0 <= i + di <= Ni
3563 0 <= j <= Nj
3564 0 <= j + dj <= Nj
3566 where di, dj are the distance variables. The constraints
3567 representing the conflicting elements are:
3569 i = 2 * (j + dj)
3570 i + 1 = 2 * (i + di + j + dj)
3572 For asking that the resulting distance vector (di, dj) be
3573 lexicographically positive, we insert the constraint "di >= 0". If
3574 "di = 0" in the solution, we fix that component to zero, and we
3575 look at the inner loops: we set a new problem where all the outer
3576 loop distances are zero, and fix this inner component to be
3577 positive. When one of the components is positive, we save that
3578 distance, and set a new problem where the distance on this loop is
3579 zero, searching for other distances in the inner loops. Here is
3580 the classic example that illustrates that we have to set for each
3581 inner loop a new problem:
3583 | loop_1
3584 | loop_2
3585 | A[10]
3586 | endloop_2
3587 | endloop_1
3589 we have to save two distances (1, 0) and (0, 1).
3591 Given two array references, refA and refB, we have to set the
3592 dependence problem twice, refA vs. refB and refB vs. refA, and we
3593 cannot do a single test, as refB might occur before refA in the
3594 inner loops, and the contrary when considering outer loops: ex.
3596 | loop_0
3597 | loop_1
3598 | loop_2
3599 | T[{1,+,1}_2][{1,+,1}_1] // refA
3600 | T[{2,+,1}_2][{0,+,1}_1] // refB
3601 | endloop_2
3602 | endloop_1
3603 | endloop_0
3605 refB touches the elements in T before refA, and thus for the same
3606 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3607 but for successive loop_0 iterations, we have (1, -1, 1)
3609 The Omega solver expects the distance variables ("di" in the
3610 previous example) to come first in the constraint system (as
3611 variables to be protected, or "safe" variables), the constraint
3612 system is built using the following layout:
3614 "cst | distance vars | index vars".
3617 static bool
3618 init_omega_for_ddr (struct data_dependence_relation *ddr,
3619 bool *maybe_dependent)
3621 omega_pb pb;
3622 bool res = false;
3624 *maybe_dependent = true;
3626 if (same_access_functions (ddr))
3628 unsigned j;
3629 lambda_vector dir_v;
3631 /* Save the 0 vector. */
3632 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3633 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3634 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3635 dir_v[j] = dir_equal;
3636 save_dir_v (ddr, dir_v);
3638 /* Save the dependences carried by outer loops. */
3639 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3640 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3641 maybe_dependent);
3642 omega_free_problem (pb);
3643 return res;
3646 /* Omega expects the protected variables (those that have to be kept
3647 after elimination) to appear first in the constraint system.
3648 These variables are the distance variables. In the following
3649 initialization we declare NB_LOOPS safe variables, and the total
3650 number of variables for the constraint system is 2*NB_LOOPS. */
3651 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3652 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3653 maybe_dependent);
3654 omega_free_problem (pb);
3656 /* Stop computation if not decidable, or no dependence. */
3657 if (res == false || *maybe_dependent == false)
3658 return res;
3660 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3661 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3662 maybe_dependent);
3663 omega_free_problem (pb);
3665 return res;
3668 /* Return true when DDR contains the same information as that stored
3669 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3671 static bool
3672 ddr_consistent_p (FILE *file,
3673 struct data_dependence_relation *ddr,
3674 VEC (lambda_vector, heap) *dist_vects,
3675 VEC (lambda_vector, heap) *dir_vects)
3677 unsigned int i, j;
3679 /* If dump_file is set, output there. */
3680 if (dump_file && (dump_flags & TDF_DETAILS))
3681 file = dump_file;
3683 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3685 lambda_vector b_dist_v;
3686 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3687 VEC_length (lambda_vector, dist_vects),
3688 DDR_NUM_DIST_VECTS (ddr));
3690 fprintf (file, "Banerjee dist vectors:\n");
3691 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3692 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3694 fprintf (file, "Omega dist vectors:\n");
3695 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3696 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3698 fprintf (file, "data dependence relation:\n");
3699 dump_data_dependence_relation (file, ddr);
3701 fprintf (file, ")\n");
3702 return false;
3705 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3707 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3708 VEC_length (lambda_vector, dir_vects),
3709 DDR_NUM_DIR_VECTS (ddr));
3710 return false;
3713 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3715 lambda_vector a_dist_v;
3716 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3718 /* Distance vectors are not ordered in the same way in the DDR
3719 and in the DIST_VECTS: search for a matching vector. */
3720 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3721 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3722 break;
3724 if (j == VEC_length (lambda_vector, dist_vects))
3726 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3727 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3728 fprintf (file, "not found in Omega dist vectors:\n");
3729 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3730 fprintf (file, "data dependence relation:\n");
3731 dump_data_dependence_relation (file, ddr);
3732 fprintf (file, ")\n");
3736 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3738 lambda_vector a_dir_v;
3739 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3741 /* Direction vectors are not ordered in the same way in the DDR
3742 and in the DIR_VECTS: search for a matching vector. */
3743 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3744 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3745 break;
3747 if (j == VEC_length (lambda_vector, dist_vects))
3749 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3750 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3751 fprintf (file, "not found in Omega dir vectors:\n");
3752 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3753 fprintf (file, "data dependence relation:\n");
3754 dump_data_dependence_relation (file, ddr);
3755 fprintf (file, ")\n");
3759 return true;
3762 /* This computes the affine dependence relation between A and B with
3763 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3764 independence between two accesses, while CHREC_DONT_KNOW is used
3765 for representing the unknown relation.
3767 Note that it is possible to stop the computation of the dependence
3768 relation the first time we detect a CHREC_KNOWN element for a given
3769 subscript. */
3771 static void
3772 compute_affine_dependence (struct data_dependence_relation *ddr,
3773 struct loop *loop_nest)
3775 struct data_reference *dra = DDR_A (ddr);
3776 struct data_reference *drb = DDR_B (ddr);
3778 if (dump_file && (dump_flags & TDF_DETAILS))
3780 fprintf (dump_file, "(compute_affine_dependence\n");
3781 fprintf (dump_file, " (stmt_a = \n");
3782 print_generic_expr (dump_file, DR_STMT (dra), 0);
3783 fprintf (dump_file, ")\n (stmt_b = \n");
3784 print_generic_expr (dump_file, DR_STMT (drb), 0);
3785 fprintf (dump_file, ")\n");
3788 /* Analyze only when the dependence relation is not yet known. */
3789 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3791 dependence_stats.num_dependence_tests++;
3793 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3794 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3796 if (flag_check_data_deps)
3798 /* Compute the dependences using the first algorithm. */
3799 subscript_dependence_tester (ddr, loop_nest);
3801 if (dump_file && (dump_flags & TDF_DETAILS))
3803 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3804 dump_data_dependence_relation (dump_file, ddr);
3807 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3809 bool maybe_dependent;
3810 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3812 /* Save the result of the first DD analyzer. */
3813 dist_vects = DDR_DIST_VECTS (ddr);
3814 dir_vects = DDR_DIR_VECTS (ddr);
3816 /* Reset the information. */
3817 DDR_DIST_VECTS (ddr) = NULL;
3818 DDR_DIR_VECTS (ddr) = NULL;
3820 /* Compute the same information using Omega. */
3821 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3822 goto csys_dont_know;
3824 if (dump_file && (dump_flags & TDF_DETAILS))
3826 fprintf (dump_file, "Omega Analyzer\n");
3827 dump_data_dependence_relation (dump_file, ddr);
3830 /* Check that we get the same information. */
3831 if (maybe_dependent)
3832 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3833 dir_vects));
3836 else
3837 subscript_dependence_tester (ddr, loop_nest);
3840 /* As a last case, if the dependence cannot be determined, or if
3841 the dependence is considered too difficult to determine, answer
3842 "don't know". */
3843 else
3845 csys_dont_know:;
3846 dependence_stats.num_dependence_undetermined++;
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3850 fprintf (dump_file, "Data ref a:\n");
3851 dump_data_reference (dump_file, dra);
3852 fprintf (dump_file, "Data ref b:\n");
3853 dump_data_reference (dump_file, drb);
3854 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3856 finalize_ddr_dependent (ddr, chrec_dont_know);
3860 if (dump_file && (dump_flags & TDF_DETAILS))
3861 fprintf (dump_file, ")\n");
3864 /* This computes the dependence relation for the same data
3865 reference into DDR. */
3867 static void
3868 compute_self_dependence (struct data_dependence_relation *ddr)
3870 unsigned int i;
3871 struct subscript *subscript;
3873 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3874 return;
3876 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3877 i++)
3879 /* The accessed index overlaps for each iteration. */
3880 SUB_CONFLICTS_IN_A (subscript)
3881 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3882 SUB_CONFLICTS_IN_B (subscript)
3883 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3884 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3887 /* The distance vector is the zero vector. */
3888 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3889 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3892 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3893 the data references in DATAREFS, in the LOOP_NEST. When
3894 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3895 relations. */
3897 void
3898 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3899 VEC (ddr_p, heap) **dependence_relations,
3900 VEC (loop_p, heap) *loop_nest,
3901 bool compute_self_and_rr)
3903 struct data_dependence_relation *ddr;
3904 struct data_reference *a, *b;
3905 unsigned int i, j;
3907 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3908 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3909 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3911 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3912 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3913 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3916 if (compute_self_and_rr)
3917 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3919 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3920 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3921 compute_self_dependence (ddr);
3925 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3926 true if STMT clobbers memory, false otherwise. */
3928 bool
3929 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3931 bool clobbers_memory = false;
3932 data_ref_loc *ref;
3933 tree *op0, *op1, call;
3935 *references = NULL;
3937 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3938 Calls have side-effects, except those to const or pure
3939 functions. */
3940 call = get_call_expr_in (stmt);
3941 if ((call
3942 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3943 || (TREE_CODE (stmt) == ASM_EXPR
3944 && ASM_VOLATILE_P (stmt)))
3945 clobbers_memory = true;
3947 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3948 return clobbers_memory;
3950 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3952 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3953 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3955 if (DECL_P (*op1)
3956 || REFERENCE_CLASS_P (*op1))
3958 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3959 ref->pos = op1;
3960 ref->is_read = true;
3963 if (DECL_P (*op0)
3964 || REFERENCE_CLASS_P (*op0))
3966 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3967 ref->pos = op0;
3968 ref->is_read = false;
3972 if (call)
3974 unsigned i, n = call_expr_nargs (call);
3976 for (i = 0; i < n; i++)
3978 op0 = &CALL_EXPR_ARG (call, i);
3980 if (DECL_P (*op0)
3981 || REFERENCE_CLASS_P (*op0))
3983 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3984 ref->pos = op0;
3985 ref->is_read = true;
3990 return clobbers_memory;
3993 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3994 reference, returns false, otherwise returns true. NEST is the outermost
3995 loop of the loop nest in that the references should be analyzed. */
3997 static bool
3998 find_data_references_in_stmt (struct loop *nest, tree stmt,
3999 VEC (data_reference_p, heap) **datarefs)
4001 unsigned i;
4002 VEC (data_ref_loc, heap) *references;
4003 data_ref_loc *ref;
4004 bool ret = true;
4005 data_reference_p dr;
4007 if (get_references_in_stmt (stmt, &references))
4009 VEC_free (data_ref_loc, heap, references);
4010 return false;
4013 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4015 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4016 gcc_assert (dr != NULL);
4018 /* FIXME -- data dependence analysis does not work correctly for objects with
4019 invariant addresses. Let us fail here until the problem is fixed. */
4020 if (dr_address_invariant_p (dr))
4022 free_data_ref (dr);
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4025 ret = false;
4026 break;
4029 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4031 VEC_free (data_ref_loc, heap, references);
4032 return ret;
4035 /* Search the data references in LOOP, and record the information into
4036 DATAREFS. Returns chrec_dont_know when failing to analyze a
4037 difficult case, returns NULL_TREE otherwise.
4039 TODO: This function should be made smarter so that it can handle address
4040 arithmetic as if they were array accesses, etc. */
4042 static tree
4043 find_data_references_in_loop (struct loop *loop,
4044 VEC (data_reference_p, heap) **datarefs)
4046 basic_block bb, *bbs;
4047 unsigned int i;
4048 block_stmt_iterator bsi;
4050 bbs = get_loop_body_in_dom_order (loop);
4052 for (i = 0; i < loop->num_nodes; i++)
4054 bb = bbs[i];
4056 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4058 tree stmt = bsi_stmt (bsi);
4060 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4062 struct data_reference *res;
4063 res = XCNEW (struct data_reference);
4064 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4066 free (bbs);
4067 return chrec_dont_know;
4071 free (bbs);
4073 return NULL_TREE;
4076 /* Recursive helper function. */
4078 static bool
4079 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4081 /* Inner loops of the nest should not contain siblings. Example:
4082 when there are two consecutive loops,
4084 | loop_0
4085 | loop_1
4086 | A[{0, +, 1}_1]
4087 | endloop_1
4088 | loop_2
4089 | A[{0, +, 1}_2]
4090 | endloop_2
4091 | endloop_0
4093 the dependence relation cannot be captured by the distance
4094 abstraction. */
4095 if (loop->next)
4096 return false;
4098 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4099 if (loop->inner)
4100 return find_loop_nest_1 (loop->inner, loop_nest);
4101 return true;
4104 /* Return false when the LOOP is not well nested. Otherwise return
4105 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4106 contain the loops from the outermost to the innermost, as they will
4107 appear in the classic distance vector. */
4109 bool
4110 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4112 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4113 if (loop->inner)
4114 return find_loop_nest_1 (loop->inner, loop_nest);
4115 return true;
4118 /* Given a loop nest LOOP, the following vectors are returned:
4119 DATAREFS is initialized to all the array elements contained in this loop,
4120 DEPENDENCE_RELATIONS contains the relations between the data references.
4121 Compute read-read and self relations if
4122 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4124 void
4125 compute_data_dependences_for_loop (struct loop *loop,
4126 bool compute_self_and_read_read_dependences,
4127 VEC (data_reference_p, heap) **datarefs,
4128 VEC (ddr_p, heap) **dependence_relations)
4130 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4132 memset (&dependence_stats, 0, sizeof (dependence_stats));
4134 /* If the loop nest is not well formed, or one of the data references
4135 is not computable, give up without spending time to compute other
4136 dependences. */
4137 if (!loop
4138 || !find_loop_nest (loop, &vloops)
4139 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4141 struct data_dependence_relation *ddr;
4143 /* Insert a single relation into dependence_relations:
4144 chrec_dont_know. */
4145 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4146 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4148 else
4149 compute_all_dependences (*datarefs, dependence_relations, vloops,
4150 compute_self_and_read_read_dependences);
4152 if (dump_file && (dump_flags & TDF_STATS))
4154 fprintf (dump_file, "Dependence tester statistics:\n");
4156 fprintf (dump_file, "Number of dependence tests: %d\n",
4157 dependence_stats.num_dependence_tests);
4158 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4159 dependence_stats.num_dependence_dependent);
4160 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4161 dependence_stats.num_dependence_independent);
4162 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4163 dependence_stats.num_dependence_undetermined);
4165 fprintf (dump_file, "Number of subscript tests: %d\n",
4166 dependence_stats.num_subscript_tests);
4167 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4168 dependence_stats.num_subscript_undetermined);
4169 fprintf (dump_file, "Number of same subscript function: %d\n",
4170 dependence_stats.num_same_subscript_function);
4172 fprintf (dump_file, "Number of ziv tests: %d\n",
4173 dependence_stats.num_ziv);
4174 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4175 dependence_stats.num_ziv_dependent);
4176 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4177 dependence_stats.num_ziv_independent);
4178 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4179 dependence_stats.num_ziv_unimplemented);
4181 fprintf (dump_file, "Number of siv tests: %d\n",
4182 dependence_stats.num_siv);
4183 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4184 dependence_stats.num_siv_dependent);
4185 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4186 dependence_stats.num_siv_independent);
4187 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4188 dependence_stats.num_siv_unimplemented);
4190 fprintf (dump_file, "Number of miv tests: %d\n",
4191 dependence_stats.num_miv);
4192 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4193 dependence_stats.num_miv_dependent);
4194 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4195 dependence_stats.num_miv_independent);
4196 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4197 dependence_stats.num_miv_unimplemented);
4201 /* Entry point (for testing only). Analyze all the data references
4202 and the dependence relations in LOOP.
4204 The data references are computed first.
4206 A relation on these nodes is represented by a complete graph. Some
4207 of the relations could be of no interest, thus the relations can be
4208 computed on demand.
4210 In the following function we compute all the relations. This is
4211 just a first implementation that is here for:
4212 - for showing how to ask for the dependence relations,
4213 - for the debugging the whole dependence graph,
4214 - for the dejagnu testcases and maintenance.
4216 It is possible to ask only for a part of the graph, avoiding to
4217 compute the whole dependence graph. The computed dependences are
4218 stored in a knowledge base (KB) such that later queries don't
4219 recompute the same information. The implementation of this KB is
4220 transparent to the optimizer, and thus the KB can be changed with a
4221 more efficient implementation, or the KB could be disabled. */
4222 static void
4223 analyze_all_data_dependences (struct loop *loop)
4225 unsigned int i;
4226 int nb_data_refs = 10;
4227 VEC (data_reference_p, heap) *datarefs =
4228 VEC_alloc (data_reference_p, heap, nb_data_refs);
4229 VEC (ddr_p, heap) *dependence_relations =
4230 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4232 /* Compute DDs on the whole function. */
4233 compute_data_dependences_for_loop (loop, false, &datarefs,
4234 &dependence_relations);
4236 if (dump_file)
4238 dump_data_dependence_relations (dump_file, dependence_relations);
4239 fprintf (dump_file, "\n\n");
4241 if (dump_flags & TDF_DETAILS)
4242 dump_dist_dir_vectors (dump_file, dependence_relations);
4244 if (dump_flags & TDF_STATS)
4246 unsigned nb_top_relations = 0;
4247 unsigned nb_bot_relations = 0;
4248 unsigned nb_basename_differ = 0;
4249 unsigned nb_chrec_relations = 0;
4250 struct data_dependence_relation *ddr;
4252 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4254 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4255 nb_top_relations++;
4257 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4259 struct data_reference *a = DDR_A (ddr);
4260 struct data_reference *b = DDR_B (ddr);
4262 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4263 nb_basename_differ++;
4264 else
4265 nb_bot_relations++;
4268 else
4269 nb_chrec_relations++;
4272 gather_stats_on_scev_database ();
4276 free_dependence_relations (dependence_relations);
4277 free_data_refs (datarefs);
4280 /* Computes all the data dependences and check that the results of
4281 several analyzers are the same. */
4283 void
4284 tree_check_data_deps (void)
4286 loop_iterator li;
4287 struct loop *loop_nest;
4289 FOR_EACH_LOOP (li, loop_nest, 0)
4290 analyze_all_data_dependences (loop_nest);
4293 /* Free the memory used by a data dependence relation DDR. */
4295 void
4296 free_dependence_relation (struct data_dependence_relation *ddr)
4298 if (ddr == NULL)
4299 return;
4301 if (DDR_SUBSCRIPTS (ddr))
4302 free_subscripts (DDR_SUBSCRIPTS (ddr));
4303 if (DDR_DIST_VECTS (ddr))
4304 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4305 if (DDR_DIR_VECTS (ddr))
4306 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4308 free (ddr);
4311 /* Free the memory used by the data dependence relations from
4312 DEPENDENCE_RELATIONS. */
4314 void
4315 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4317 unsigned int i;
4318 struct data_dependence_relation *ddr;
4319 VEC (loop_p, heap) *loop_nest = NULL;
4321 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4323 if (ddr == NULL)
4324 continue;
4325 if (loop_nest == NULL)
4326 loop_nest = DDR_LOOP_NEST (ddr);
4327 else
4328 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4329 || DDR_LOOP_NEST (ddr) == loop_nest);
4330 free_dependence_relation (ddr);
4333 if (loop_nest)
4334 VEC_free (loop_p, heap, loop_nest);
4335 VEC_free (ddr_p, heap, dependence_relations);
4338 /* Free the memory used by the data references from DATAREFS. */
4340 void
4341 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4343 unsigned int i;
4344 struct data_reference *dr;
4346 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4347 free_data_ref (dr);
4348 VEC_free (data_reference_p, heap, datarefs);
4353 /* Returns the index of STMT in RDG. */
4355 static int
4356 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4358 int i;
4360 for (i = 0; i < rdg->n_vertices; i++)
4361 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4362 return i;
4364 gcc_unreachable ();
4365 return 0;
4368 /* Creates an edge in RDG for each distance vector from DDR. */
4370 static void
4371 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4373 int va, vb;
4374 data_reference_p dra;
4375 data_reference_p drb;
4376 struct graph_edge *e;
4378 if (DDR_REVERSED_P (ddr))
4380 dra = DDR_B (ddr);
4381 drb = DDR_A (ddr);
4383 else
4385 dra = DDR_A (ddr);
4386 drb = DDR_B (ddr);
4389 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4390 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4392 e = add_edge (rdg, va, vb);
4393 e->data = XNEW (struct rdg_edge);
4395 /* Determines the type of the data dependence. */
4396 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4397 RDGE_TYPE (e) = input_dd;
4398 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4399 RDGE_TYPE (e) = output_dd;
4400 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4401 RDGE_TYPE (e) = flow_dd;
4402 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4403 RDGE_TYPE (e) = anti_dd;
4406 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4407 the index of DEF in RDG. */
4409 static void
4410 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4412 use_operand_p imm_use_p;
4413 imm_use_iterator iterator;
4415 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4417 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4418 struct graph_edge *e = add_edge (rdg, idef, use);
4420 e->data = XNEW (struct rdg_edge);
4421 RDGE_TYPE (e) = flow_dd;
4425 /* Creates the edges of the reduced dependence graph RDG. */
4427 static void
4428 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4430 int i;
4431 struct data_dependence_relation *ddr;
4432 def_operand_p def_p;
4433 ssa_op_iter iter;
4435 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4436 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4437 create_rdg_edge_for_ddr (rdg, ddr);
4439 for (i = 0; i < rdg->n_vertices; i++)
4440 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4441 iter, SSA_OP_ALL_DEFS)
4442 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4445 /* Build the vertices of the reduced dependence graph RDG. */
4447 static void
4448 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4450 int i;
4451 tree s;
4453 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4455 struct vertex *v = &(rdg->vertices[i]);
4457 v->data = XNEW (struct rdg_vertex);
4458 RDGV_STMT (v) = s;
4462 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4464 static void
4465 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4467 unsigned int i;
4468 basic_block *bbs = get_loop_body_in_dom_order (loop);
4470 for (i = 0; i < loop->num_nodes; i++)
4472 tree phi;
4473 basic_block bb = bbs[i];
4474 block_stmt_iterator bsi;
4476 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4477 VEC_safe_push (tree, heap, *stmts, phi);
4479 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4480 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4483 free (bbs);
4486 /* Returns true when all the dependences are computable. */
4488 static bool
4489 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4491 ddr_p ddr;
4492 unsigned int i;
4494 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4495 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4496 return false;
4498 return true;
4501 /* Build a Reduced Dependence Graph with one vertex per statement of the
4502 loop nest and one edge per data dependence or scalar dependence. */
4504 struct graph *
4505 build_rdg (struct loop *loop)
4507 int nb_data_refs = 10;
4508 struct graph *rdg = NULL;
4509 VEC (ddr_p, heap) *dependence_relations;
4510 VEC (data_reference_p, heap) *datarefs;
4511 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4513 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4514 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4515 compute_data_dependences_for_loop (loop,
4516 false,
4517 &datarefs,
4518 &dependence_relations);
4520 if (!known_dependences_p (dependence_relations))
4521 goto end_rdg;
4523 stmts_from_loop (loop, &stmts);
4524 rdg = new_graph (VEC_length (tree, stmts));
4525 create_rdg_vertices (rdg, stmts);
4526 create_rdg_edges (rdg, dependence_relations);
4528 end_rdg:
4529 free_dependence_relations (dependence_relations);
4530 free_data_refs (datarefs);
4531 VEC_free (tree, heap, stmts);
4533 return rdg;