EnumSet*.class: Regenerate
[official-gcc.git] / gcc / tree-data-ref.c
blob6ad2e96bea535b52a2d881c6fb8591f9714603b8
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tm.h"
80 #include "ggc.h"
81 #include "tree.h"
83 /* These RTL headers are needed for basic-block.h. */
84 #include "rtl.h"
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
162 void
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
166 unsigned int i;
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
175 void
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
179 unsigned int i;
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
198 static void
199 dump_affine_function (FILE *outf, affine_fn fn)
201 unsigned i;
202 tree coef;
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
215 static void
216 dump_conflict_function (FILE *outf, conflict_function *cf)
218 unsigned i;
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
224 else
226 for (i = 0; i < cf->n; i++)
228 fprintf (outf, "[");
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
237 void
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
270 void
271 print_direction_vector (FILE *outf,
272 lambda_vector dirv,
273 int length)
275 int eq;
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
281 switch (dir)
283 case dir_positive:
284 fprintf (outf, " +");
285 break;
286 case dir_negative:
287 fprintf (outf, " -");
288 break;
289 case dir_equal:
290 fprintf (outf, " =");
291 break;
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
294 break;
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
297 break;
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
300 break;
301 case dir_star:
302 fprintf (outf, " *");
303 break;
304 default:
305 fprintf (outf, "indep");
306 break;
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
314 void
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
316 int length)
318 unsigned j;
319 lambda_vector v;
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
327 void
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
329 int length)
331 unsigned j;
332 lambda_vector v;
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
338 /* Debug version. */
340 void
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
348 void
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
354 dra = DDR_A (ddr);
355 drb = DDR_B (ddr);
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
365 unsigned int i;
366 struct loop *loopi;
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
387 DDR_NB_LOOPS (ddr));
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
394 DDR_NB_LOOPS (ddr));
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
403 void
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
407 switch (dir)
409 case dir_positive:
410 fprintf (file, "+");
411 break;
413 case dir_negative:
414 fprintf (file, "-");
415 break;
417 case dir_equal:
418 fprintf (file, "=");
419 break;
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
423 break;
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
427 break;
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
431 break;
433 case dir_star:
434 fprintf (file, "*");
435 break;
437 default:
438 break;
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
445 considered nest. */
447 void
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
450 unsigned int i, j;
451 struct data_dependence_relation *ddr;
452 lambda_vector v;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
477 void
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
492 void
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
496 tree var0, var1;
497 tree off0, off1;
498 enum tree_code code;
500 *var = exp;
501 STRIP_NOPS (exp);
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
505 switch (code)
507 case INTEGER_CST:
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
510 return;
512 case POINTER_PLUS_EXPR:
513 code = PLUS_EXPR;
514 /* FALLTHROUGH */
515 case PLUS_EXPR:
516 case MINUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
520 var0, var1));
521 *off = size_binop (code, off0, off1);
522 return;
524 case MULT_EXPR:
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
527 break;
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
531 var0, off1));
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
533 return;
535 case ADDR_EXPR:
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
544 break;
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
550 break;
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
554 if (poffset)
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
559 base,
560 fold_convert (TREE_TYPE (base), poffset));
563 *var = fold_convert (type, base);
564 *off = off0;
565 return;
568 case SSA_NAME:
570 tree def_stmt = SSA_NAME_DEF_STMT (exp);
571 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
573 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
575 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
576 && EXPR_P (def_stmt_rhs)
577 && !REFERENCE_CLASS_P (def_stmt_rhs))
579 split_constant_offset (def_stmt_rhs, &var0, &off0);
580 var0 = fold_convert (type, var0);
581 *var = var0;
582 *off = off0;
583 return;
586 break;
589 default:
590 break;
593 *off = ssize_int (0);
596 /* Returns the address ADDR of an object in a canonical shape (without nop
597 casts, and with type of pointer to the object). */
599 static tree
600 canonicalize_base_object_address (tree addr)
602 tree orig = addr;
604 STRIP_NOPS (addr);
606 /* The base address may be obtained by casting from integer, in that case
607 keep the cast. */
608 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
609 return orig;
611 if (TREE_CODE (addr) != ADDR_EXPR)
612 return addr;
614 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
617 /* Analyzes the behavior of the memory reference DR in the innermost loop that
618 contains it. */
620 void
621 dr_analyze_innermost (struct data_reference *dr)
623 tree stmt = DR_STMT (dr);
624 struct loop *loop = loop_containing_stmt (stmt);
625 tree ref = DR_REF (dr);
626 HOST_WIDE_INT pbitsize, pbitpos;
627 tree base, poffset;
628 enum machine_mode pmode;
629 int punsignedp, pvolatilep;
630 affine_iv base_iv, offset_iv;
631 tree init, dinit, step;
633 if (dump_file && (dump_flags & TDF_DETAILS))
634 fprintf (dump_file, "analyze_innermost: ");
636 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
637 &pmode, &punsignedp, &pvolatilep, false);
638 gcc_assert (base != NULL_TREE);
640 if (pbitpos % BITS_PER_UNIT != 0)
642 if (dump_file && (dump_flags & TDF_DETAILS))
643 fprintf (dump_file, "failed: bit offset alignment.\n");
644 return;
647 base = build_fold_addr_expr (base);
648 if (!simple_iv (loop, stmt, base, &base_iv, false))
650 if (dump_file && (dump_flags & TDF_DETAILS))
651 fprintf (dump_file, "failed: evolution of base is not affine.\n");
652 return;
654 if (!poffset)
656 offset_iv.base = ssize_int (0);
657 offset_iv.step = ssize_int (0);
659 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
663 return;
666 init = ssize_int (pbitpos / BITS_PER_UNIT);
667 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
668 init = size_binop (PLUS_EXPR, init, dinit);
669 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
670 init = size_binop (PLUS_EXPR, init, dinit);
672 step = size_binop (PLUS_EXPR,
673 fold_convert (ssizetype, base_iv.step),
674 fold_convert (ssizetype, offset_iv.step));
676 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
678 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
679 DR_INIT (dr) = init;
680 DR_STEP (dr) = step;
682 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
684 if (dump_file && (dump_flags & TDF_DETAILS))
685 fprintf (dump_file, "success.\n");
688 /* Determines the base object and the list of indices of memory reference
689 DR, analyzed in loop nest NEST. */
691 static void
692 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
694 tree stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 VEC (tree, heap) *access_fns = NULL;
697 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
698 tree base, off, access_fn;
700 while (handled_component_p (aref))
702 if (TREE_CODE (aref) == ARRAY_REF)
704 op = TREE_OPERAND (aref, 1);
705 access_fn = analyze_scalar_evolution (loop, op);
706 access_fn = resolve_mixers (nest, access_fn);
707 VEC_safe_push (tree, heap, access_fns, access_fn);
709 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
712 aref = TREE_OPERAND (aref, 0);
715 if (INDIRECT_REF_P (aref))
717 op = TREE_OPERAND (aref, 0);
718 access_fn = analyze_scalar_evolution (loop, op);
719 access_fn = resolve_mixers (nest, access_fn);
720 base = initial_condition (access_fn);
721 split_constant_offset (base, &base, &off);
722 access_fn = chrec_replace_initial_condition (access_fn,
723 fold_convert (TREE_TYPE (base), off));
725 TREE_OPERAND (aref, 0) = base;
726 VEC_safe_push (tree, heap, access_fns, access_fn);
729 DR_BASE_OBJECT (dr) = ref;
730 DR_ACCESS_FNS (dr) = access_fns;
733 /* Extracts the alias analysis information from the memory reference DR. */
735 static void
736 dr_analyze_alias (struct data_reference *dr)
738 tree stmt = DR_STMT (dr);
739 tree ref = DR_REF (dr);
740 tree base = get_base_address (ref), addr, smt = NULL_TREE;
741 ssa_op_iter it;
742 tree op;
743 bitmap vops;
745 if (DECL_P (base))
746 smt = base;
747 else if (INDIRECT_REF_P (base))
749 addr = TREE_OPERAND (base, 0);
750 if (TREE_CODE (addr) == SSA_NAME)
752 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
753 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
757 DR_SYMBOL_TAG (dr) = smt;
758 if (smt && var_can_have_subvars (smt))
759 DR_SUBVARS (dr) = get_subvars_for_var (smt);
761 vops = BITMAP_ALLOC (NULL);
762 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
764 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
767 DR_VOPS (dr) = vops;
770 /* Returns true if the address of DR is invariant. */
772 static bool
773 dr_address_invariant_p (struct data_reference *dr)
775 unsigned i;
776 tree idx;
778 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
779 if (tree_contains_chrecs (idx, NULL))
780 return false;
782 return true;
785 /* Frees data reference DR. */
787 static void
788 free_data_ref (data_reference_p dr)
790 BITMAP_FREE (DR_VOPS (dr));
791 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
792 free (dr);
795 /* Analyzes memory reference MEMREF accessed in STMT. The reference
796 is read if IS_READ is true, write otherwise. Returns the
797 data_reference description of MEMREF. NEST is the outermost loop of the
798 loop nest in that the reference should be analyzed. */
800 struct data_reference *
801 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
803 struct data_reference *dr;
805 if (dump_file && (dump_flags & TDF_DETAILS))
807 fprintf (dump_file, "Creating dr for ");
808 print_generic_expr (dump_file, memref, TDF_SLIM);
809 fprintf (dump_file, "\n");
812 dr = XCNEW (struct data_reference);
813 DR_STMT (dr) = stmt;
814 DR_REF (dr) = memref;
815 DR_IS_READ (dr) = is_read;
817 dr_analyze_innermost (dr);
818 dr_analyze_indices (dr, nest);
819 dr_analyze_alias (dr);
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "\tbase_address: ");
824 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
825 fprintf (dump_file, "\n\toffset from base address: ");
826 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
827 fprintf (dump_file, "\n\tconstant offset from base address: ");
828 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
829 fprintf (dump_file, "\n\tstep: ");
830 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
831 fprintf (dump_file, "\n\taligned to: ");
832 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
833 fprintf (dump_file, "\n\tbase_object: ");
834 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
835 fprintf (dump_file, "\n\tsymbol tag: ");
836 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
837 fprintf (dump_file, "\n");
840 return dr;
843 /* Returns true if FNA == FNB. */
845 static bool
846 affine_function_equal_p (affine_fn fna, affine_fn fnb)
848 unsigned i, n = VEC_length (tree, fna);
850 if (n != VEC_length (tree, fnb))
851 return false;
853 for (i = 0; i < n; i++)
854 if (!operand_equal_p (VEC_index (tree, fna, i),
855 VEC_index (tree, fnb, i), 0))
856 return false;
858 return true;
861 /* If all the functions in CF are the same, returns one of them,
862 otherwise returns NULL. */
864 static affine_fn
865 common_affine_function (conflict_function *cf)
867 unsigned i;
868 affine_fn comm;
870 if (!CF_NONTRIVIAL_P (cf))
871 return NULL;
873 comm = cf->fns[0];
875 for (i = 1; i < cf->n; i++)
876 if (!affine_function_equal_p (comm, cf->fns[i]))
877 return NULL;
879 return comm;
882 /* Returns the base of the affine function FN. */
884 static tree
885 affine_function_base (affine_fn fn)
887 return VEC_index (tree, fn, 0);
890 /* Returns true if FN is a constant. */
892 static bool
893 affine_function_constant_p (affine_fn fn)
895 unsigned i;
896 tree coef;
898 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
899 if (!integer_zerop (coef))
900 return false;
902 return true;
905 /* Returns true if FN is the zero constant function. */
907 static bool
908 affine_function_zero_p (affine_fn fn)
910 return (integer_zerop (affine_function_base (fn))
911 && affine_function_constant_p (fn));
914 /* Applies operation OP on affine functions FNA and FNB, and returns the
915 result. */
917 static affine_fn
918 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
920 unsigned i, n, m;
921 affine_fn ret;
922 tree coef;
924 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
926 n = VEC_length (tree, fna);
927 m = VEC_length (tree, fnb);
929 else
931 n = VEC_length (tree, fnb);
932 m = VEC_length (tree, fna);
935 ret = VEC_alloc (tree, heap, m);
936 for (i = 0; i < n; i++)
937 VEC_quick_push (tree, ret,
938 fold_build2 (op, integer_type_node,
939 VEC_index (tree, fna, i),
940 VEC_index (tree, fnb, i)));
942 for (; VEC_iterate (tree, fna, i, coef); i++)
943 VEC_quick_push (tree, ret,
944 fold_build2 (op, integer_type_node,
945 coef, integer_zero_node));
946 for (; VEC_iterate (tree, fnb, i, coef); i++)
947 VEC_quick_push (tree, ret,
948 fold_build2 (op, integer_type_node,
949 integer_zero_node, coef));
951 return ret;
954 /* Returns the sum of affine functions FNA and FNB. */
956 static affine_fn
957 affine_fn_plus (affine_fn fna, affine_fn fnb)
959 return affine_fn_op (PLUS_EXPR, fna, fnb);
962 /* Returns the difference of affine functions FNA and FNB. */
964 static affine_fn
965 affine_fn_minus (affine_fn fna, affine_fn fnb)
967 return affine_fn_op (MINUS_EXPR, fna, fnb);
970 /* Frees affine function FN. */
972 static void
973 affine_fn_free (affine_fn fn)
975 VEC_free (tree, heap, fn);
978 /* Determine for each subscript in the data dependence relation DDR
979 the distance. */
981 static void
982 compute_subscript_distance (struct data_dependence_relation *ddr)
984 conflict_function *cf_a, *cf_b;
985 affine_fn fn_a, fn_b, diff;
987 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
989 unsigned int i;
991 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
993 struct subscript *subscript;
995 subscript = DDR_SUBSCRIPT (ddr, i);
996 cf_a = SUB_CONFLICTS_IN_A (subscript);
997 cf_b = SUB_CONFLICTS_IN_B (subscript);
999 fn_a = common_affine_function (cf_a);
1000 fn_b = common_affine_function (cf_b);
1001 if (!fn_a || !fn_b)
1003 SUB_DISTANCE (subscript) = chrec_dont_know;
1004 return;
1006 diff = affine_fn_minus (fn_a, fn_b);
1008 if (affine_function_constant_p (diff))
1009 SUB_DISTANCE (subscript) = affine_function_base (diff);
1010 else
1011 SUB_DISTANCE (subscript) = chrec_dont_know;
1013 affine_fn_free (diff);
1018 /* Returns the conflict function for "unknown". */
1020 static conflict_function *
1021 conflict_fn_not_known (void)
1023 conflict_function *fn = XCNEW (conflict_function);
1024 fn->n = NOT_KNOWN;
1026 return fn;
1029 /* Returns the conflict function for "independent". */
1031 static conflict_function *
1032 conflict_fn_no_dependence (void)
1034 conflict_function *fn = XCNEW (conflict_function);
1035 fn->n = NO_DEPENDENCE;
1037 return fn;
1040 /* Returns true if the address of OBJ is invariant in LOOP. */
1042 static bool
1043 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1045 while (handled_component_p (obj))
1047 if (TREE_CODE (obj) == ARRAY_REF)
1049 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1050 need to check the stride and the lower bound of the reference. */
1051 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1052 loop->num)
1053 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1054 loop->num))
1055 return false;
1057 else if (TREE_CODE (obj) == COMPONENT_REF)
1059 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1060 loop->num))
1061 return false;
1063 obj = TREE_OPERAND (obj, 0);
1066 if (!INDIRECT_REF_P (obj))
1067 return true;
1069 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1070 loop->num);
1073 /* Returns true if A and B are accesses to different objects, or to different
1074 fields of the same object. */
1076 static bool
1077 disjoint_objects_p (tree a, tree b)
1079 tree base_a, base_b;
1080 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1081 bool ret;
1083 base_a = get_base_address (a);
1084 base_b = get_base_address (b);
1086 if (DECL_P (base_a)
1087 && DECL_P (base_b)
1088 && base_a != base_b)
1089 return true;
1091 if (!operand_equal_p (base_a, base_b, 0))
1092 return false;
1094 /* Compare the component references of A and B. We must start from the inner
1095 ones, so record them to the vector first. */
1096 while (handled_component_p (a))
1098 VEC_safe_push (tree, heap, comp_a, a);
1099 a = TREE_OPERAND (a, 0);
1101 while (handled_component_p (b))
1103 VEC_safe_push (tree, heap, comp_b, b);
1104 b = TREE_OPERAND (b, 0);
1107 ret = false;
1108 while (1)
1110 if (VEC_length (tree, comp_a) == 0
1111 || VEC_length (tree, comp_b) == 0)
1112 break;
1114 a = VEC_pop (tree, comp_a);
1115 b = VEC_pop (tree, comp_b);
1117 /* Real and imaginary part of a variable do not alias. */
1118 if ((TREE_CODE (a) == REALPART_EXPR
1119 && TREE_CODE (b) == IMAGPART_EXPR)
1120 || (TREE_CODE (a) == IMAGPART_EXPR
1121 && TREE_CODE (b) == REALPART_EXPR))
1123 ret = true;
1124 break;
1127 if (TREE_CODE (a) != TREE_CODE (b))
1128 break;
1130 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1131 DR_BASE_OBJECT are always zero. */
1132 if (TREE_CODE (a) == ARRAY_REF)
1133 continue;
1134 else if (TREE_CODE (a) == COMPONENT_REF)
1136 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1137 continue;
1139 /* Different fields of unions may overlap. */
1140 base_a = TREE_OPERAND (a, 0);
1141 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1142 break;
1144 /* Different fields of structures cannot. */
1145 ret = true;
1146 break;
1148 else
1149 break;
1152 VEC_free (tree, heap, comp_a);
1153 VEC_free (tree, heap, comp_b);
1155 return ret;
1158 /* Returns false if we can prove that data references A and B do not alias,
1159 true otherwise. */
1161 static bool
1162 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1164 const_tree addr_a = DR_BASE_ADDRESS (a);
1165 const_tree addr_b = DR_BASE_ADDRESS (b);
1166 const_tree type_a, type_b;
1167 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1169 /* If the sets of virtual operands are disjoint, the memory references do not
1170 alias. */
1171 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1172 return false;
1174 /* If the accessed objects are disjoint, the memory references do not
1175 alias. */
1176 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1177 return false;
1179 if (!addr_a || !addr_b)
1180 return true;
1182 /* If the references are based on different static objects, they cannot alias
1183 (PTA should be able to disambiguate such accesses, but often it fails to,
1184 since currently we cannot distinguish between pointer and offset in pointer
1185 arithmetics). */
1186 if (TREE_CODE (addr_a) == ADDR_EXPR
1187 && TREE_CODE (addr_b) == ADDR_EXPR)
1188 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1190 /* An instruction writing through a restricted pointer is "independent" of any
1191 instruction reading or writing through a different restricted pointer,
1192 in the same block/scope. */
1194 type_a = TREE_TYPE (addr_a);
1195 type_b = TREE_TYPE (addr_b);
1196 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1198 if (TREE_CODE (addr_a) == SSA_NAME)
1199 decl_a = SSA_NAME_VAR (addr_a);
1200 if (TREE_CODE (addr_b) == SSA_NAME)
1201 decl_b = SSA_NAME_VAR (addr_b);
1203 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1204 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1205 && decl_a && DECL_P (decl_a)
1206 && decl_b && DECL_P (decl_b)
1207 && decl_a != decl_b
1208 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1209 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1210 return false;
1212 return true;
1215 /* Initialize a data dependence relation between data accesses A and
1216 B. NB_LOOPS is the number of loops surrounding the references: the
1217 size of the classic distance/direction vectors. */
1219 static struct data_dependence_relation *
1220 initialize_data_dependence_relation (struct data_reference *a,
1221 struct data_reference *b,
1222 VEC (loop_p, heap) *loop_nest)
1224 struct data_dependence_relation *res;
1225 unsigned int i;
1227 res = XNEW (struct data_dependence_relation);
1228 DDR_A (res) = a;
1229 DDR_B (res) = b;
1230 DDR_LOOP_NEST (res) = NULL;
1231 DDR_REVERSED_P (res) = false;
1232 DDR_SUBSCRIPTS (res) = NULL;
1233 DDR_DIR_VECTS (res) = NULL;
1234 DDR_DIST_VECTS (res) = NULL;
1236 if (a == NULL || b == NULL)
1238 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1239 return res;
1242 /* If the data references do not alias, then they are independent. */
1243 if (!dr_may_alias_p (a, b))
1245 DDR_ARE_DEPENDENT (res) = chrec_known;
1246 return res;
1249 /* If the references do not access the same object, we do not know
1250 whether they alias or not. */
1251 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1253 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1254 return res;
1257 /* If the base of the object is not invariant in the loop nest, we cannot
1258 analyze it. TODO -- in fact, it would suffice to record that there may
1259 be arbitrary dependences in the loops where the base object varies. */
1260 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1261 DR_BASE_OBJECT (a)))
1263 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1264 return res;
1267 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1269 DDR_AFFINE_P (res) = true;
1270 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1271 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1272 DDR_LOOP_NEST (res) = loop_nest;
1273 DDR_INNER_LOOP (res) = 0;
1275 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1277 struct subscript *subscript;
1279 subscript = XNEW (struct subscript);
1280 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1281 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1282 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1283 SUB_DISTANCE (subscript) = chrec_dont_know;
1284 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1287 return res;
1290 /* Frees memory used by the conflict function F. */
1292 static void
1293 free_conflict_function (conflict_function *f)
1295 unsigned i;
1297 if (CF_NONTRIVIAL_P (f))
1299 for (i = 0; i < f->n; i++)
1300 affine_fn_free (f->fns[i]);
1302 free (f);
1305 /* Frees memory used by SUBSCRIPTS. */
1307 static void
1308 free_subscripts (VEC (subscript_p, heap) *subscripts)
1310 unsigned i;
1311 subscript_p s;
1313 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1315 free_conflict_function (s->conflicting_iterations_in_a);
1316 free_conflict_function (s->conflicting_iterations_in_b);
1318 VEC_free (subscript_p, heap, subscripts);
1321 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1322 description. */
1324 static inline void
1325 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1326 tree chrec)
1328 if (dump_file && (dump_flags & TDF_DETAILS))
1330 fprintf (dump_file, "(dependence classified: ");
1331 print_generic_expr (dump_file, chrec, 0);
1332 fprintf (dump_file, ")\n");
1335 DDR_ARE_DEPENDENT (ddr) = chrec;
1336 free_subscripts (DDR_SUBSCRIPTS (ddr));
1337 DDR_SUBSCRIPTS (ddr) = NULL;
1340 /* The dependence relation DDR cannot be represented by a distance
1341 vector. */
1343 static inline void
1344 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1346 if (dump_file && (dump_flags & TDF_DETAILS))
1347 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1349 DDR_AFFINE_P (ddr) = false;
1354 /* This section contains the classic Banerjee tests. */
1356 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1357 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1359 static inline bool
1360 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1362 return (evolution_function_is_constant_p (chrec_a)
1363 && evolution_function_is_constant_p (chrec_b));
1366 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1367 variable, i.e., if the SIV (Single Index Variable) test is true. */
1369 static bool
1370 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1372 if ((evolution_function_is_constant_p (chrec_a)
1373 && evolution_function_is_univariate_p (chrec_b))
1374 || (evolution_function_is_constant_p (chrec_b)
1375 && evolution_function_is_univariate_p (chrec_a)))
1376 return true;
1378 if (evolution_function_is_univariate_p (chrec_a)
1379 && evolution_function_is_univariate_p (chrec_b))
1381 switch (TREE_CODE (chrec_a))
1383 case POLYNOMIAL_CHREC:
1384 switch (TREE_CODE (chrec_b))
1386 case POLYNOMIAL_CHREC:
1387 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1388 return false;
1390 default:
1391 return true;
1394 default:
1395 return true;
1399 return false;
1402 /* Creates a conflict function with N dimensions. The affine functions
1403 in each dimension follow. */
1405 static conflict_function *
1406 conflict_fn (unsigned n, ...)
1408 unsigned i;
1409 conflict_function *ret = XCNEW (conflict_function);
1410 va_list ap;
1412 gcc_assert (0 < n && n <= MAX_DIM);
1413 va_start(ap, n);
1415 ret->n = n;
1416 for (i = 0; i < n; i++)
1417 ret->fns[i] = va_arg (ap, affine_fn);
1418 va_end(ap);
1420 return ret;
1423 /* Returns constant affine function with value CST. */
1425 static affine_fn
1426 affine_fn_cst (tree cst)
1428 affine_fn fn = VEC_alloc (tree, heap, 1);
1429 VEC_quick_push (tree, fn, cst);
1430 return fn;
1433 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1435 static affine_fn
1436 affine_fn_univar (tree cst, unsigned dim, tree coef)
1438 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1439 unsigned i;
1441 gcc_assert (dim > 0);
1442 VEC_quick_push (tree, fn, cst);
1443 for (i = 1; i < dim; i++)
1444 VEC_quick_push (tree, fn, integer_zero_node);
1445 VEC_quick_push (tree, fn, coef);
1446 return fn;
1449 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1450 *OVERLAPS_B are initialized to the functions that describe the
1451 relation between the elements accessed twice by CHREC_A and
1452 CHREC_B. For k >= 0, the following property is verified:
1454 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1456 static void
1457 analyze_ziv_subscript (tree chrec_a,
1458 tree chrec_b,
1459 conflict_function **overlaps_a,
1460 conflict_function **overlaps_b,
1461 tree *last_conflicts)
1463 tree difference;
1464 dependence_stats.num_ziv++;
1466 if (dump_file && (dump_flags & TDF_DETAILS))
1467 fprintf (dump_file, "(analyze_ziv_subscript \n");
1469 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1470 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1471 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1473 switch (TREE_CODE (difference))
1475 case INTEGER_CST:
1476 if (integer_zerop (difference))
1478 /* The difference is equal to zero: the accessed index
1479 overlaps for each iteration in the loop. */
1480 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1481 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1482 *last_conflicts = chrec_dont_know;
1483 dependence_stats.num_ziv_dependent++;
1485 else
1487 /* The accesses do not overlap. */
1488 *overlaps_a = conflict_fn_no_dependence ();
1489 *overlaps_b = conflict_fn_no_dependence ();
1490 *last_conflicts = integer_zero_node;
1491 dependence_stats.num_ziv_independent++;
1493 break;
1495 default:
1496 /* We're not sure whether the indexes overlap. For the moment,
1497 conservatively answer "don't know". */
1498 if (dump_file && (dump_flags & TDF_DETAILS))
1499 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1501 *overlaps_a = conflict_fn_not_known ();
1502 *overlaps_b = conflict_fn_not_known ();
1503 *last_conflicts = chrec_dont_know;
1504 dependence_stats.num_ziv_unimplemented++;
1505 break;
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, ")\n");
1512 /* Sets NIT to the estimated number of executions of the statements in
1513 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1514 large as the number of iterations. If we have no reliable estimate,
1515 the function returns false, otherwise returns true. */
1517 bool
1518 estimated_loop_iterations (struct loop *loop, bool conservative,
1519 double_int *nit)
1521 estimate_numbers_of_iterations_loop (loop);
1522 if (conservative)
1524 if (!loop->any_upper_bound)
1525 return false;
1527 *nit = loop->nb_iterations_upper_bound;
1529 else
1531 if (!loop->any_estimate)
1532 return false;
1534 *nit = loop->nb_iterations_estimate;
1537 return true;
1540 /* Similar to estimated_loop_iterations, but returns the estimate only
1541 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1542 on the number of iterations of LOOP could not be derived, returns -1. */
1544 HOST_WIDE_INT
1545 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1547 double_int nit;
1548 HOST_WIDE_INT hwi_nit;
1550 if (!estimated_loop_iterations (loop, conservative, &nit))
1551 return -1;
1553 if (!double_int_fits_in_shwi_p (nit))
1554 return -1;
1555 hwi_nit = double_int_to_shwi (nit);
1557 return hwi_nit < 0 ? -1 : hwi_nit;
1560 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1561 and only if it fits to the int type. If this is not the case, or the
1562 estimate on the number of iterations of LOOP could not be derived, returns
1563 chrec_dont_know. */
1565 static tree
1566 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1568 double_int nit;
1569 tree type;
1571 if (!estimated_loop_iterations (loop, conservative, &nit))
1572 return chrec_dont_know;
1574 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1575 if (!double_int_fits_to_tree_p (type, nit))
1576 return chrec_dont_know;
1578 return double_int_to_tree (type, nit);
1581 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1582 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1583 *OVERLAPS_B are initialized to the functions that describe the
1584 relation between the elements accessed twice by CHREC_A and
1585 CHREC_B. For k >= 0, the following property is verified:
1587 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1589 static void
1590 analyze_siv_subscript_cst_affine (tree chrec_a,
1591 tree chrec_b,
1592 conflict_function **overlaps_a,
1593 conflict_function **overlaps_b,
1594 tree *last_conflicts)
1596 bool value0, value1, value2;
1597 tree difference, tmp;
1599 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1600 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1601 difference = chrec_fold_minus
1602 (integer_type_node, initial_condition (chrec_b), chrec_a);
1604 if (!chrec_is_positive (initial_condition (difference), &value0))
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1609 dependence_stats.num_siv_unimplemented++;
1610 *overlaps_a = conflict_fn_not_known ();
1611 *overlaps_b = conflict_fn_not_known ();
1612 *last_conflicts = chrec_dont_know;
1613 return;
1615 else
1617 if (value0 == false)
1619 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1622 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1624 *overlaps_a = conflict_fn_not_known ();
1625 *overlaps_b = conflict_fn_not_known ();
1626 *last_conflicts = chrec_dont_know;
1627 dependence_stats.num_siv_unimplemented++;
1628 return;
1630 else
1632 if (value1 == true)
1634 /* Example:
1635 chrec_a = 12
1636 chrec_b = {10, +, 1}
1639 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1641 HOST_WIDE_INT numiter;
1642 struct loop *loop = get_chrec_loop (chrec_b);
1644 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1645 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1646 fold_build1 (ABS_EXPR,
1647 integer_type_node,
1648 difference),
1649 CHREC_RIGHT (chrec_b));
1650 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1651 *last_conflicts = integer_one_node;
1654 /* Perform weak-zero siv test to see if overlap is
1655 outside the loop bounds. */
1656 numiter = estimated_loop_iterations_int (loop, false);
1658 if (numiter >= 0
1659 && compare_tree_int (tmp, numiter) > 0)
1661 free_conflict_function (*overlaps_a);
1662 free_conflict_function (*overlaps_b);
1663 *overlaps_a = conflict_fn_no_dependence ();
1664 *overlaps_b = conflict_fn_no_dependence ();
1665 *last_conflicts = integer_zero_node;
1666 dependence_stats.num_siv_independent++;
1667 return;
1669 dependence_stats.num_siv_dependent++;
1670 return;
1673 /* When the step does not divide the difference, there are
1674 no overlaps. */
1675 else
1677 *overlaps_a = conflict_fn_no_dependence ();
1678 *overlaps_b = conflict_fn_no_dependence ();
1679 *last_conflicts = integer_zero_node;
1680 dependence_stats.num_siv_independent++;
1681 return;
1685 else
1687 /* Example:
1688 chrec_a = 12
1689 chrec_b = {10, +, -1}
1691 In this case, chrec_a will not overlap with chrec_b. */
1692 *overlaps_a = conflict_fn_no_dependence ();
1693 *overlaps_b = conflict_fn_no_dependence ();
1694 *last_conflicts = integer_zero_node;
1695 dependence_stats.num_siv_independent++;
1696 return;
1700 else
1702 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1704 if (dump_file && (dump_flags & TDF_DETAILS))
1705 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1707 *overlaps_a = conflict_fn_not_known ();
1708 *overlaps_b = conflict_fn_not_known ();
1709 *last_conflicts = chrec_dont_know;
1710 dependence_stats.num_siv_unimplemented++;
1711 return;
1713 else
1715 if (value2 == false)
1717 /* Example:
1718 chrec_a = 3
1719 chrec_b = {10, +, -1}
1721 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1723 HOST_WIDE_INT numiter;
1724 struct loop *loop = get_chrec_loop (chrec_b);
1726 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1727 tmp = fold_build2 (EXACT_DIV_EXPR,
1728 integer_type_node, difference,
1729 CHREC_RIGHT (chrec_b));
1730 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1731 *last_conflicts = integer_one_node;
1733 /* Perform weak-zero siv test to see if overlap is
1734 outside the loop bounds. */
1735 numiter = estimated_loop_iterations_int (loop, false);
1737 if (numiter >= 0
1738 && compare_tree_int (tmp, numiter) > 0)
1740 free_conflict_function (*overlaps_a);
1741 free_conflict_function (*overlaps_b);
1742 *overlaps_a = conflict_fn_no_dependence ();
1743 *overlaps_b = conflict_fn_no_dependence ();
1744 *last_conflicts = integer_zero_node;
1745 dependence_stats.num_siv_independent++;
1746 return;
1748 dependence_stats.num_siv_dependent++;
1749 return;
1752 /* When the step does not divide the difference, there
1753 are no overlaps. */
1754 else
1756 *overlaps_a = conflict_fn_no_dependence ();
1757 *overlaps_b = conflict_fn_no_dependence ();
1758 *last_conflicts = integer_zero_node;
1759 dependence_stats.num_siv_independent++;
1760 return;
1763 else
1765 /* Example:
1766 chrec_a = 3
1767 chrec_b = {4, +, 1}
1769 In this case, chrec_a will not overlap with chrec_b. */
1770 *overlaps_a = conflict_fn_no_dependence ();
1771 *overlaps_b = conflict_fn_no_dependence ();
1772 *last_conflicts = integer_zero_node;
1773 dependence_stats.num_siv_independent++;
1774 return;
1781 /* Helper recursive function for initializing the matrix A. Returns
1782 the initial value of CHREC. */
1784 static int
1785 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1787 gcc_assert (chrec);
1789 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1790 return int_cst_value (chrec);
1792 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1793 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1796 #define FLOOR_DIV(x,y) ((x) / (y))
1798 /* Solves the special case of the Diophantine equation:
1799 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1801 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1802 number of iterations that loops X and Y run. The overlaps will be
1803 constructed as evolutions in dimension DIM. */
1805 static void
1806 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1807 affine_fn *overlaps_a,
1808 affine_fn *overlaps_b,
1809 tree *last_conflicts, int dim)
1811 if (((step_a > 0 && step_b > 0)
1812 || (step_a < 0 && step_b < 0)))
1814 int step_overlaps_a, step_overlaps_b;
1815 int gcd_steps_a_b, last_conflict, tau2;
1817 gcd_steps_a_b = gcd (step_a, step_b);
1818 step_overlaps_a = step_b / gcd_steps_a_b;
1819 step_overlaps_b = step_a / gcd_steps_a_b;
1821 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1822 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1823 last_conflict = tau2;
1825 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1826 build_int_cst (NULL_TREE,
1827 step_overlaps_a));
1828 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1829 build_int_cst (NULL_TREE,
1830 step_overlaps_b));
1831 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1834 else
1836 *overlaps_a = affine_fn_cst (integer_zero_node);
1837 *overlaps_b = affine_fn_cst (integer_zero_node);
1838 *last_conflicts = integer_zero_node;
1842 /* Solves the special case of a Diophantine equation where CHREC_A is
1843 an affine bivariate function, and CHREC_B is an affine univariate
1844 function. For example,
1846 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1848 has the following overlapping functions:
1850 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1851 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1852 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1854 FORNOW: This is a specialized implementation for a case occurring in
1855 a common benchmark. Implement the general algorithm. */
1857 static void
1858 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1859 conflict_function **overlaps_a,
1860 conflict_function **overlaps_b,
1861 tree *last_conflicts)
1863 bool xz_p, yz_p, xyz_p;
1864 int step_x, step_y, step_z;
1865 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1866 affine_fn overlaps_a_xz, overlaps_b_xz;
1867 affine_fn overlaps_a_yz, overlaps_b_yz;
1868 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1869 affine_fn ova1, ova2, ovb;
1870 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1872 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1873 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1874 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1876 niter_x =
1877 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1878 false);
1879 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1880 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1882 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1884 if (dump_file && (dump_flags & TDF_DETAILS))
1885 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1887 *overlaps_a = conflict_fn_not_known ();
1888 *overlaps_b = conflict_fn_not_known ();
1889 *last_conflicts = chrec_dont_know;
1890 return;
1893 niter = MIN (niter_x, niter_z);
1894 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1895 &overlaps_a_xz,
1896 &overlaps_b_xz,
1897 &last_conflicts_xz, 1);
1898 niter = MIN (niter_y, niter_z);
1899 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1900 &overlaps_a_yz,
1901 &overlaps_b_yz,
1902 &last_conflicts_yz, 2);
1903 niter = MIN (niter_x, niter_z);
1904 niter = MIN (niter_y, niter);
1905 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1906 &overlaps_a_xyz,
1907 &overlaps_b_xyz,
1908 &last_conflicts_xyz, 3);
1910 xz_p = !integer_zerop (last_conflicts_xz);
1911 yz_p = !integer_zerop (last_conflicts_yz);
1912 xyz_p = !integer_zerop (last_conflicts_xyz);
1914 if (xz_p || yz_p || xyz_p)
1916 ova1 = affine_fn_cst (integer_zero_node);
1917 ova2 = affine_fn_cst (integer_zero_node);
1918 ovb = affine_fn_cst (integer_zero_node);
1919 if (xz_p)
1921 affine_fn t0 = ova1;
1922 affine_fn t2 = ovb;
1924 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1925 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1926 affine_fn_free (t0);
1927 affine_fn_free (t2);
1928 *last_conflicts = last_conflicts_xz;
1930 if (yz_p)
1932 affine_fn t0 = ova2;
1933 affine_fn t2 = ovb;
1935 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1936 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1937 affine_fn_free (t0);
1938 affine_fn_free (t2);
1939 *last_conflicts = last_conflicts_yz;
1941 if (xyz_p)
1943 affine_fn t0 = ova1;
1944 affine_fn t2 = ova2;
1945 affine_fn t4 = ovb;
1947 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1948 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1949 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1950 affine_fn_free (t0);
1951 affine_fn_free (t2);
1952 affine_fn_free (t4);
1953 *last_conflicts = last_conflicts_xyz;
1955 *overlaps_a = conflict_fn (2, ova1, ova2);
1956 *overlaps_b = conflict_fn (1, ovb);
1958 else
1960 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1961 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1962 *last_conflicts = integer_zero_node;
1965 affine_fn_free (overlaps_a_xz);
1966 affine_fn_free (overlaps_b_xz);
1967 affine_fn_free (overlaps_a_yz);
1968 affine_fn_free (overlaps_b_yz);
1969 affine_fn_free (overlaps_a_xyz);
1970 affine_fn_free (overlaps_b_xyz);
1973 /* Determines the overlapping elements due to accesses CHREC_A and
1974 CHREC_B, that are affine functions. This function cannot handle
1975 symbolic evolution functions, ie. when initial conditions are
1976 parameters, because it uses lambda matrices of integers. */
1978 static void
1979 analyze_subscript_affine_affine (tree chrec_a,
1980 tree chrec_b,
1981 conflict_function **overlaps_a,
1982 conflict_function **overlaps_b,
1983 tree *last_conflicts)
1985 unsigned nb_vars_a, nb_vars_b, dim;
1986 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
1987 HOST_WIDE_INT tau1, tau2;
1988 lambda_matrix A, U, S;
1990 if (eq_evolutions_p (chrec_a, chrec_b))
1992 /* The accessed index overlaps for each iteration in the
1993 loop. */
1994 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1995 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1996 *last_conflicts = chrec_dont_know;
1997 return;
1999 if (dump_file && (dump_flags & TDF_DETAILS))
2000 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2002 /* For determining the initial intersection, we have to solve a
2003 Diophantine equation. This is the most time consuming part.
2005 For answering to the question: "Is there a dependence?" we have
2006 to prove that there exists a solution to the Diophantine
2007 equation, and that the solution is in the iteration domain,
2008 i.e. the solution is positive or zero, and that the solution
2009 happens before the upper bound loop.nb_iterations. Otherwise
2010 there is no dependence. This function outputs a description of
2011 the iterations that hold the intersections. */
2013 nb_vars_a = nb_vars_in_chrec (chrec_a);
2014 nb_vars_b = nb_vars_in_chrec (chrec_b);
2016 dim = nb_vars_a + nb_vars_b;
2017 U = lambda_matrix_new (dim, dim);
2018 A = lambda_matrix_new (dim, 1);
2019 S = lambda_matrix_new (dim, 1);
2021 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2022 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2023 gamma = init_b - init_a;
2025 /* Don't do all the hard work of solving the Diophantine equation
2026 when we already know the solution: for example,
2027 | {3, +, 1}_1
2028 | {3, +, 4}_2
2029 | gamma = 3 - 3 = 0.
2030 Then the first overlap occurs during the first iterations:
2031 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2033 if (gamma == 0)
2035 if (nb_vars_a == 1 && nb_vars_b == 1)
2037 HOST_WIDE_INT step_a, step_b;
2038 HOST_WIDE_INT niter, niter_a, niter_b;
2039 affine_fn ova, ovb;
2041 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2042 false);
2043 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2044 false);
2045 if (niter_a < 0 || niter_b < 0)
2047 if (dump_file && (dump_flags & TDF_DETAILS))
2048 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2049 *overlaps_a = conflict_fn_not_known ();
2050 *overlaps_b = conflict_fn_not_known ();
2051 *last_conflicts = chrec_dont_know;
2052 goto end_analyze_subs_aa;
2055 niter = MIN (niter_a, niter_b);
2057 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2058 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2060 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2061 &ova, &ovb,
2062 last_conflicts, 1);
2063 *overlaps_a = conflict_fn (1, ova);
2064 *overlaps_b = conflict_fn (1, ovb);
2067 else if (nb_vars_a == 2 && nb_vars_b == 1)
2068 compute_overlap_steps_for_affine_1_2
2069 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2071 else if (nb_vars_a == 1 && nb_vars_b == 2)
2072 compute_overlap_steps_for_affine_1_2
2073 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2075 else
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2078 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2079 *overlaps_a = conflict_fn_not_known ();
2080 *overlaps_b = conflict_fn_not_known ();
2081 *last_conflicts = chrec_dont_know;
2083 goto end_analyze_subs_aa;
2086 /* U.A = S */
2087 lambda_matrix_right_hermite (A, dim, 1, S, U);
2089 if (S[0][0] < 0)
2091 S[0][0] *= -1;
2092 lambda_matrix_row_negate (U, dim, 0);
2094 gcd_alpha_beta = S[0][0];
2096 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2097 but that is a quite strange case. Instead of ICEing, answer
2098 don't know. */
2099 if (gcd_alpha_beta == 0)
2101 *overlaps_a = conflict_fn_not_known ();
2102 *overlaps_b = conflict_fn_not_known ();
2103 *last_conflicts = chrec_dont_know;
2104 goto end_analyze_subs_aa;
2107 /* The classic "gcd-test". */
2108 if (!int_divides_p (gcd_alpha_beta, gamma))
2110 /* The "gcd-test" has determined that there is no integer
2111 solution, i.e. there is no dependence. */
2112 *overlaps_a = conflict_fn_no_dependence ();
2113 *overlaps_b = conflict_fn_no_dependence ();
2114 *last_conflicts = integer_zero_node;
2117 /* Both access functions are univariate. This includes SIV and MIV cases. */
2118 else if (nb_vars_a == 1 && nb_vars_b == 1)
2120 /* Both functions should have the same evolution sign. */
2121 if (((A[0][0] > 0 && -A[1][0] > 0)
2122 || (A[0][0] < 0 && -A[1][0] < 0)))
2124 /* The solutions are given by:
2126 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2127 | [u21 u22] [y0]
2129 For a given integer t. Using the following variables,
2131 | i0 = u11 * gamma / gcd_alpha_beta
2132 | j0 = u12 * gamma / gcd_alpha_beta
2133 | i1 = u21
2134 | j1 = u22
2136 the solutions are:
2138 | x0 = i0 + i1 * t,
2139 | y0 = j0 + j1 * t. */
2141 HOST_WIDE_INT i0, j0, i1, j1;
2143 /* X0 and Y0 are the first iterations for which there is a
2144 dependence. X0, Y0 are two solutions of the Diophantine
2145 equation: chrec_a (X0) = chrec_b (Y0). */
2146 HOST_WIDE_INT x0, y0;
2147 HOST_WIDE_INT niter, niter_a, niter_b;
2149 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2150 false);
2151 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2152 false);
2154 if (niter_a < 0 || niter_b < 0)
2156 if (dump_file && (dump_flags & TDF_DETAILS))
2157 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2158 *overlaps_a = conflict_fn_not_known ();
2159 *overlaps_b = conflict_fn_not_known ();
2160 *last_conflicts = chrec_dont_know;
2161 goto end_analyze_subs_aa;
2164 niter = MIN (niter_a, niter_b);
2166 i0 = U[0][0] * gamma / gcd_alpha_beta;
2167 j0 = U[0][1] * gamma / gcd_alpha_beta;
2168 i1 = U[1][0];
2169 j1 = U[1][1];
2171 if ((i1 == 0 && i0 < 0)
2172 || (j1 == 0 && j0 < 0))
2174 /* There is no solution.
2175 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2176 falls in here, but for the moment we don't look at the
2177 upper bound of the iteration domain. */
2178 *overlaps_a = conflict_fn_no_dependence ();
2179 *overlaps_b = conflict_fn_no_dependence ();
2180 *last_conflicts = integer_zero_node;
2183 else
2185 if (i1 > 0)
2187 tau1 = CEIL (-i0, i1);
2188 tau2 = FLOOR_DIV (niter - i0, i1);
2190 if (j1 > 0)
2192 int last_conflict, min_multiple;
2193 tau1 = MAX (tau1, CEIL (-j0, j1));
2194 tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
2196 x0 = i1 * tau1 + i0;
2197 y0 = j1 * tau1 + j0;
2199 /* At this point (x0, y0) is one of the
2200 solutions to the Diophantine equation. The
2201 next step has to compute the smallest
2202 positive solution: the first conflicts. */
2203 min_multiple = MIN (x0 / i1, y0 / j1);
2204 x0 -= i1 * min_multiple;
2205 y0 -= j1 * min_multiple;
2207 tau1 = (x0 - i0)/i1;
2208 last_conflict = tau2 - tau1;
2210 /* If the overlap occurs outside of the bounds of the
2211 loop, there is no dependence. */
2212 if (x0 > niter || y0 > niter)
2214 *overlaps_a = conflict_fn_no_dependence ();
2215 *overlaps_b = conflict_fn_no_dependence ();
2216 *last_conflicts = integer_zero_node;
2218 else
2220 *overlaps_a
2221 = conflict_fn (1,
2222 affine_fn_univar (build_int_cst (NULL_TREE, x0),
2224 build_int_cst (NULL_TREE, i1)));
2225 *overlaps_b
2226 = conflict_fn (1,
2227 affine_fn_univar (build_int_cst (NULL_TREE, y0),
2229 build_int_cst (NULL_TREE, j1)));
2230 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2233 else
2235 /* FIXME: For the moment, the upper bound of the
2236 iteration domain for j is not checked. */
2237 if (dump_file && (dump_flags & TDF_DETAILS))
2238 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2239 *overlaps_a = conflict_fn_not_known ();
2240 *overlaps_b = conflict_fn_not_known ();
2241 *last_conflicts = chrec_dont_know;
2245 else
2247 /* FIXME: For the moment, the upper bound of the
2248 iteration domain for i is not checked. */
2249 if (dump_file && (dump_flags & TDF_DETAILS))
2250 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2251 *overlaps_a = conflict_fn_not_known ();
2252 *overlaps_b = conflict_fn_not_known ();
2253 *last_conflicts = chrec_dont_know;
2257 else
2259 if (dump_file && (dump_flags & TDF_DETAILS))
2260 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2261 *overlaps_a = conflict_fn_not_known ();
2262 *overlaps_b = conflict_fn_not_known ();
2263 *last_conflicts = chrec_dont_know;
2267 else
2269 if (dump_file && (dump_flags & TDF_DETAILS))
2270 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2271 *overlaps_a = conflict_fn_not_known ();
2272 *overlaps_b = conflict_fn_not_known ();
2273 *last_conflicts = chrec_dont_know;
2276 end_analyze_subs_aa:
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2279 fprintf (dump_file, " (overlaps_a = ");
2280 dump_conflict_function (dump_file, *overlaps_a);
2281 fprintf (dump_file, ")\n (overlaps_b = ");
2282 dump_conflict_function (dump_file, *overlaps_b);
2283 fprintf (dump_file, ")\n");
2284 fprintf (dump_file, ")\n");
2288 /* Returns true when analyze_subscript_affine_affine can be used for
2289 determining the dependence relation between chrec_a and chrec_b,
2290 that contain symbols. This function modifies chrec_a and chrec_b
2291 such that the analysis result is the same, and such that they don't
2292 contain symbols, and then can safely be passed to the analyzer.
2294 Example: The analysis of the following tuples of evolutions produce
2295 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2296 vs. {0, +, 1}_1
2298 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2299 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2302 static bool
2303 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2305 tree diff, type, left_a, left_b, right_b;
2307 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2308 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2309 /* FIXME: For the moment not handled. Might be refined later. */
2310 return false;
2312 type = chrec_type (*chrec_a);
2313 left_a = CHREC_LEFT (*chrec_a);
2314 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2315 diff = chrec_fold_minus (type, left_a, left_b);
2317 if (!evolution_function_is_constant_p (diff))
2318 return false;
2320 if (dump_file && (dump_flags & TDF_DETAILS))
2321 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2323 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2324 diff, CHREC_RIGHT (*chrec_a));
2325 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2326 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2327 build_int_cst (type, 0),
2328 right_b);
2329 return true;
2332 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2333 *OVERLAPS_B are initialized to the functions that describe the
2334 relation between the elements accessed twice by CHREC_A and
2335 CHREC_B. For k >= 0, the following property is verified:
2337 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2339 static void
2340 analyze_siv_subscript (tree chrec_a,
2341 tree chrec_b,
2342 conflict_function **overlaps_a,
2343 conflict_function **overlaps_b,
2344 tree *last_conflicts)
2346 dependence_stats.num_siv++;
2348 if (dump_file && (dump_flags & TDF_DETAILS))
2349 fprintf (dump_file, "(analyze_siv_subscript \n");
2351 if (evolution_function_is_constant_p (chrec_a)
2352 && evolution_function_is_affine_p (chrec_b))
2353 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2354 overlaps_a, overlaps_b, last_conflicts);
2356 else if (evolution_function_is_affine_p (chrec_a)
2357 && evolution_function_is_constant_p (chrec_b))
2358 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2359 overlaps_b, overlaps_a, last_conflicts);
2361 else if (evolution_function_is_affine_p (chrec_a)
2362 && evolution_function_is_affine_p (chrec_b))
2364 if (!chrec_contains_symbols (chrec_a)
2365 && !chrec_contains_symbols (chrec_b))
2367 analyze_subscript_affine_affine (chrec_a, chrec_b,
2368 overlaps_a, overlaps_b,
2369 last_conflicts);
2371 if (CF_NOT_KNOWN_P (*overlaps_a)
2372 || CF_NOT_KNOWN_P (*overlaps_b))
2373 dependence_stats.num_siv_unimplemented++;
2374 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2375 || CF_NO_DEPENDENCE_P (*overlaps_b))
2376 dependence_stats.num_siv_independent++;
2377 else
2378 dependence_stats.num_siv_dependent++;
2380 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2381 &chrec_b))
2383 analyze_subscript_affine_affine (chrec_a, chrec_b,
2384 overlaps_a, overlaps_b,
2385 last_conflicts);
2387 if (CF_NOT_KNOWN_P (*overlaps_a)
2388 || CF_NOT_KNOWN_P (*overlaps_b))
2389 dependence_stats.num_siv_unimplemented++;
2390 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2391 || CF_NO_DEPENDENCE_P (*overlaps_b))
2392 dependence_stats.num_siv_independent++;
2393 else
2394 dependence_stats.num_siv_dependent++;
2396 else
2397 goto siv_subscript_dontknow;
2400 else
2402 siv_subscript_dontknow:;
2403 if (dump_file && (dump_flags & TDF_DETAILS))
2404 fprintf (dump_file, "siv test failed: unimplemented.\n");
2405 *overlaps_a = conflict_fn_not_known ();
2406 *overlaps_b = conflict_fn_not_known ();
2407 *last_conflicts = chrec_dont_know;
2408 dependence_stats.num_siv_unimplemented++;
2411 if (dump_file && (dump_flags & TDF_DETAILS))
2412 fprintf (dump_file, ")\n");
2415 /* Returns false if we can prove that the greatest common divisor of the steps
2416 of CHREC does not divide CST, false otherwise. */
2418 static bool
2419 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2421 HOST_WIDE_INT cd = 0, val;
2422 tree step;
2424 if (!host_integerp (cst, 0))
2425 return true;
2426 val = tree_low_cst (cst, 0);
2428 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2430 step = CHREC_RIGHT (chrec);
2431 if (!host_integerp (step, 0))
2432 return true;
2433 cd = gcd (cd, tree_low_cst (step, 0));
2434 chrec = CHREC_LEFT (chrec);
2437 return val % cd == 0;
2440 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2441 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2442 functions that describe the relation between the elements accessed
2443 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2444 is verified:
2446 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2448 static void
2449 analyze_miv_subscript (tree chrec_a,
2450 tree chrec_b,
2451 conflict_function **overlaps_a,
2452 conflict_function **overlaps_b,
2453 tree *last_conflicts,
2454 struct loop *loop_nest)
2456 /* FIXME: This is a MIV subscript, not yet handled.
2457 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2458 (A[i] vs. A[j]).
2460 In the SIV test we had to solve a Diophantine equation with two
2461 variables. In the MIV case we have to solve a Diophantine
2462 equation with 2*n variables (if the subscript uses n IVs).
2464 tree difference;
2465 dependence_stats.num_miv++;
2466 if (dump_file && (dump_flags & TDF_DETAILS))
2467 fprintf (dump_file, "(analyze_miv_subscript \n");
2469 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2470 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2471 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2473 if (eq_evolutions_p (chrec_a, chrec_b))
2475 /* Access functions are the same: all the elements are accessed
2476 in the same order. */
2477 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2478 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2479 *last_conflicts = estimated_loop_iterations_tree
2480 (get_chrec_loop (chrec_a), true);
2481 dependence_stats.num_miv_dependent++;
2484 else if (evolution_function_is_constant_p (difference)
2485 /* For the moment, the following is verified:
2486 evolution_function_is_affine_multivariate_p (chrec_a,
2487 loop_nest->num) */
2488 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2490 /* testsuite/.../ssa-chrec-33.c
2491 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2493 The difference is 1, and all the evolution steps are multiples
2494 of 2, consequently there are no overlapping elements. */
2495 *overlaps_a = conflict_fn_no_dependence ();
2496 *overlaps_b = conflict_fn_no_dependence ();
2497 *last_conflicts = integer_zero_node;
2498 dependence_stats.num_miv_independent++;
2501 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2502 && !chrec_contains_symbols (chrec_a)
2503 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2504 && !chrec_contains_symbols (chrec_b))
2506 /* testsuite/.../ssa-chrec-35.c
2507 {0, +, 1}_2 vs. {0, +, 1}_3
2508 the overlapping elements are respectively located at iterations:
2509 {0, +, 1}_x and {0, +, 1}_x,
2510 in other words, we have the equality:
2511 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2513 Other examples:
2514 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2515 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2517 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2518 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2520 analyze_subscript_affine_affine (chrec_a, chrec_b,
2521 overlaps_a, overlaps_b, last_conflicts);
2523 if (CF_NOT_KNOWN_P (*overlaps_a)
2524 || CF_NOT_KNOWN_P (*overlaps_b))
2525 dependence_stats.num_miv_unimplemented++;
2526 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2527 || CF_NO_DEPENDENCE_P (*overlaps_b))
2528 dependence_stats.num_miv_independent++;
2529 else
2530 dependence_stats.num_miv_dependent++;
2533 else
2535 /* When the analysis is too difficult, answer "don't know". */
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2537 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2539 *overlaps_a = conflict_fn_not_known ();
2540 *overlaps_b = conflict_fn_not_known ();
2541 *last_conflicts = chrec_dont_know;
2542 dependence_stats.num_miv_unimplemented++;
2545 if (dump_file && (dump_flags & TDF_DETAILS))
2546 fprintf (dump_file, ")\n");
2549 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2550 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2551 OVERLAP_ITERATIONS_B are initialized with two functions that
2552 describe the iterations that contain conflicting elements.
2554 Remark: For an integer k >= 0, the following equality is true:
2556 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2559 static void
2560 analyze_overlapping_iterations (tree chrec_a,
2561 tree chrec_b,
2562 conflict_function **overlap_iterations_a,
2563 conflict_function **overlap_iterations_b,
2564 tree *last_conflicts, struct loop *loop_nest)
2566 unsigned int lnn = loop_nest->num;
2568 dependence_stats.num_subscript_tests++;
2570 if (dump_file && (dump_flags & TDF_DETAILS))
2572 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2573 fprintf (dump_file, " (chrec_a = ");
2574 print_generic_expr (dump_file, chrec_a, 0);
2575 fprintf (dump_file, ")\n (chrec_b = ");
2576 print_generic_expr (dump_file, chrec_b, 0);
2577 fprintf (dump_file, ")\n");
2580 if (chrec_a == NULL_TREE
2581 || chrec_b == NULL_TREE
2582 || chrec_contains_undetermined (chrec_a)
2583 || chrec_contains_undetermined (chrec_b))
2585 dependence_stats.num_subscript_undetermined++;
2587 *overlap_iterations_a = conflict_fn_not_known ();
2588 *overlap_iterations_b = conflict_fn_not_known ();
2591 /* If they are the same chrec, and are affine, they overlap
2592 on every iteration. */
2593 else if (eq_evolutions_p (chrec_a, chrec_b)
2594 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2596 dependence_stats.num_same_subscript_function++;
2597 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2598 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2599 *last_conflicts = chrec_dont_know;
2602 /* If they aren't the same, and aren't affine, we can't do anything
2603 yet. */
2604 else if ((chrec_contains_symbols (chrec_a)
2605 || chrec_contains_symbols (chrec_b))
2606 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2607 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2609 dependence_stats.num_subscript_undetermined++;
2610 *overlap_iterations_a = conflict_fn_not_known ();
2611 *overlap_iterations_b = conflict_fn_not_known ();
2614 else if (ziv_subscript_p (chrec_a, chrec_b))
2615 analyze_ziv_subscript (chrec_a, chrec_b,
2616 overlap_iterations_a, overlap_iterations_b,
2617 last_conflicts);
2619 else if (siv_subscript_p (chrec_a, chrec_b))
2620 analyze_siv_subscript (chrec_a, chrec_b,
2621 overlap_iterations_a, overlap_iterations_b,
2622 last_conflicts);
2624 else
2625 analyze_miv_subscript (chrec_a, chrec_b,
2626 overlap_iterations_a, overlap_iterations_b,
2627 last_conflicts, loop_nest);
2629 if (dump_file && (dump_flags & TDF_DETAILS))
2631 fprintf (dump_file, " (overlap_iterations_a = ");
2632 dump_conflict_function (dump_file, *overlap_iterations_a);
2633 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2634 dump_conflict_function (dump_file, *overlap_iterations_b);
2635 fprintf (dump_file, ")\n");
2636 fprintf (dump_file, ")\n");
2640 /* Helper function for uniquely inserting distance vectors. */
2642 static void
2643 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2645 unsigned i;
2646 lambda_vector v;
2648 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2649 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2650 return;
2652 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2655 /* Helper function for uniquely inserting direction vectors. */
2657 static void
2658 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2660 unsigned i;
2661 lambda_vector v;
2663 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2664 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2665 return;
2667 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2670 /* Add a distance of 1 on all the loops outer than INDEX. If we
2671 haven't yet determined a distance for this outer loop, push a new
2672 distance vector composed of the previous distance, and a distance
2673 of 1 for this outer loop. Example:
2675 | loop_1
2676 | loop_2
2677 | A[10]
2678 | endloop_2
2679 | endloop_1
2681 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2682 save (0, 1), then we have to save (1, 0). */
2684 static void
2685 add_outer_distances (struct data_dependence_relation *ddr,
2686 lambda_vector dist_v, int index)
2688 /* For each outer loop where init_v is not set, the accesses are
2689 in dependence of distance 1 in the loop. */
2690 while (--index >= 0)
2692 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2693 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2694 save_v[index] = 1;
2695 save_dist_v (ddr, save_v);
2699 /* Return false when fail to represent the data dependence as a
2700 distance vector. INIT_B is set to true when a component has been
2701 added to the distance vector DIST_V. INDEX_CARRY is then set to
2702 the index in DIST_V that carries the dependence. */
2704 static bool
2705 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2706 struct data_reference *ddr_a,
2707 struct data_reference *ddr_b,
2708 lambda_vector dist_v, bool *init_b,
2709 int *index_carry)
2711 unsigned i;
2712 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2714 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2716 tree access_fn_a, access_fn_b;
2717 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2719 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2721 non_affine_dependence_relation (ddr);
2722 return false;
2725 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2726 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2728 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2729 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2731 int dist, index;
2732 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2733 DDR_LOOP_NEST (ddr));
2734 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2735 DDR_LOOP_NEST (ddr));
2737 /* The dependence is carried by the outermost loop. Example:
2738 | loop_1
2739 | A[{4, +, 1}_1]
2740 | loop_2
2741 | A[{5, +, 1}_2]
2742 | endloop_2
2743 | endloop_1
2744 In this case, the dependence is carried by loop_1. */
2745 index = index_a < index_b ? index_a : index_b;
2746 *index_carry = MIN (index, *index_carry);
2748 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2750 non_affine_dependence_relation (ddr);
2751 return false;
2754 dist = int_cst_value (SUB_DISTANCE (subscript));
2756 /* This is the subscript coupling test. If we have already
2757 recorded a distance for this loop (a distance coming from
2758 another subscript), it should be the same. For example,
2759 in the following code, there is no dependence:
2761 | loop i = 0, N, 1
2762 | T[i+1][i] = ...
2763 | ... = T[i][i]
2764 | endloop
2766 if (init_v[index] != 0 && dist_v[index] != dist)
2768 finalize_ddr_dependent (ddr, chrec_known);
2769 return false;
2772 dist_v[index] = dist;
2773 init_v[index] = 1;
2774 *init_b = true;
2776 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2778 /* This can be for example an affine vs. constant dependence
2779 (T[i] vs. T[3]) that is not an affine dependence and is
2780 not representable as a distance vector. */
2781 non_affine_dependence_relation (ddr);
2782 return false;
2786 return true;
2789 /* Return true when the DDR contains two data references that have the
2790 same access functions. */
2792 static bool
2793 same_access_functions (const struct data_dependence_relation *ddr)
2795 unsigned i;
2797 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2798 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2799 DR_ACCESS_FN (DDR_B (ddr), i)))
2800 return false;
2802 return true;
2805 /* Return true when the DDR contains only constant access functions. */
2807 static bool
2808 constant_access_functions (const struct data_dependence_relation *ddr)
2810 unsigned i;
2812 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2813 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2814 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2815 return false;
2817 return true;
2821 /* Helper function for the case where DDR_A and DDR_B are the same
2822 multivariate access function. */
2824 static void
2825 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2827 int x_1, x_2;
2828 tree c_1 = CHREC_LEFT (c_2);
2829 tree c_0 = CHREC_LEFT (c_1);
2830 lambda_vector dist_v;
2831 int v1, v2, cd;
2833 /* Polynomials with more than 2 variables are not handled yet. When
2834 the evolution steps are parameters, it is not possible to
2835 represent the dependence using classical distance vectors. */
2836 if (TREE_CODE (c_0) != INTEGER_CST
2837 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2838 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2840 DDR_AFFINE_P (ddr) = false;
2841 return;
2844 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2845 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2847 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2848 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2849 v1 = int_cst_value (CHREC_RIGHT (c_1));
2850 v2 = int_cst_value (CHREC_RIGHT (c_2));
2851 cd = gcd (v1, v2);
2852 v1 /= cd;
2853 v2 /= cd;
2855 if (v2 < 0)
2857 v2 = -v2;
2858 v1 = -v1;
2861 dist_v[x_1] = v2;
2862 dist_v[x_2] = -v1;
2863 save_dist_v (ddr, dist_v);
2865 add_outer_distances (ddr, dist_v, x_1);
2868 /* Helper function for the case where DDR_A and DDR_B are the same
2869 access functions. */
2871 static void
2872 add_other_self_distances (struct data_dependence_relation *ddr)
2874 lambda_vector dist_v;
2875 unsigned i;
2876 int index_carry = DDR_NB_LOOPS (ddr);
2878 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2880 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2882 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2884 if (!evolution_function_is_univariate_p (access_fun))
2886 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2888 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2889 return;
2892 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2893 return;
2896 index_carry = MIN (index_carry,
2897 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2898 DDR_LOOP_NEST (ddr)));
2902 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2903 add_outer_distances (ddr, dist_v, index_carry);
2906 static void
2907 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2909 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2911 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2912 save_dist_v (ddr, dist_v);
2915 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2916 is the case for example when access functions are the same and
2917 equal to a constant, as in:
2919 | loop_1
2920 | A[3] = ...
2921 | ... = A[3]
2922 | endloop_1
2924 in which case the distance vectors are (0) and (1). */
2926 static void
2927 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2929 unsigned i, j;
2931 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2933 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2934 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2935 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2937 for (j = 0; j < ca->n; j++)
2938 if (affine_function_zero_p (ca->fns[j]))
2940 insert_innermost_unit_dist_vector (ddr);
2941 return;
2944 for (j = 0; j < cb->n; j++)
2945 if (affine_function_zero_p (cb->fns[j]))
2947 insert_innermost_unit_dist_vector (ddr);
2948 return;
2953 /* Compute the classic per loop distance vector. DDR is the data
2954 dependence relation to build a vector from. Return false when fail
2955 to represent the data dependence as a distance vector. */
2957 static bool
2958 build_classic_dist_vector (struct data_dependence_relation *ddr,
2959 struct loop *loop_nest)
2961 bool init_b = false;
2962 int index_carry = DDR_NB_LOOPS (ddr);
2963 lambda_vector dist_v;
2965 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2966 return false;
2968 if (same_access_functions (ddr))
2970 /* Save the 0 vector. */
2971 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2972 save_dist_v (ddr, dist_v);
2974 if (constant_access_functions (ddr))
2975 add_distance_for_zero_overlaps (ddr);
2977 if (DDR_NB_LOOPS (ddr) > 1)
2978 add_other_self_distances (ddr);
2980 return true;
2983 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2984 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2985 dist_v, &init_b, &index_carry))
2986 return false;
2988 /* Save the distance vector if we initialized one. */
2989 if (init_b)
2991 /* Verify a basic constraint: classic distance vectors should
2992 always be lexicographically positive.
2994 Data references are collected in the order of execution of
2995 the program, thus for the following loop
2997 | for (i = 1; i < 100; i++)
2998 | for (j = 1; j < 100; j++)
3000 | t = T[j+1][i-1]; // A
3001 | T[j][i] = t + 2; // B
3004 references are collected following the direction of the wind:
3005 A then B. The data dependence tests are performed also
3006 following this order, such that we're looking at the distance
3007 separating the elements accessed by A from the elements later
3008 accessed by B. But in this example, the distance returned by
3009 test_dep (A, B) is lexicographically negative (-1, 1), that
3010 means that the access A occurs later than B with respect to
3011 the outer loop, ie. we're actually looking upwind. In this
3012 case we solve test_dep (B, A) looking downwind to the
3013 lexicographically positive solution, that returns the
3014 distance vector (1, -1). */
3015 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3017 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3018 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3019 loop_nest))
3020 return false;
3021 compute_subscript_distance (ddr);
3022 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3023 save_v, &init_b, &index_carry))
3024 return false;
3025 save_dist_v (ddr, save_v);
3026 DDR_REVERSED_P (ddr) = true;
3028 /* In this case there is a dependence forward for all the
3029 outer loops:
3031 | for (k = 1; k < 100; k++)
3032 | for (i = 1; i < 100; i++)
3033 | for (j = 1; j < 100; j++)
3035 | t = T[j+1][i-1]; // A
3036 | T[j][i] = t + 2; // B
3039 the vectors are:
3040 (0, 1, -1)
3041 (1, 1, -1)
3042 (1, -1, 1)
3044 if (DDR_NB_LOOPS (ddr) > 1)
3046 add_outer_distances (ddr, save_v, index_carry);
3047 add_outer_distances (ddr, dist_v, index_carry);
3050 else
3052 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3053 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3055 if (DDR_NB_LOOPS (ddr) > 1)
3057 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3059 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3060 DDR_A (ddr), loop_nest))
3061 return false;
3062 compute_subscript_distance (ddr);
3063 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3064 opposite_v, &init_b,
3065 &index_carry))
3066 return false;
3068 save_dist_v (ddr, save_v);
3069 add_outer_distances (ddr, dist_v, index_carry);
3070 add_outer_distances (ddr, opposite_v, index_carry);
3072 else
3073 save_dist_v (ddr, save_v);
3076 else
3078 /* There is a distance of 1 on all the outer loops: Example:
3079 there is a dependence of distance 1 on loop_1 for the array A.
3081 | loop_1
3082 | A[5] = ...
3083 | endloop
3085 add_outer_distances (ddr, dist_v,
3086 lambda_vector_first_nz (dist_v,
3087 DDR_NB_LOOPS (ddr), 0));
3090 if (dump_file && (dump_flags & TDF_DETAILS))
3092 unsigned i;
3094 fprintf (dump_file, "(build_classic_dist_vector\n");
3095 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3097 fprintf (dump_file, " dist_vector = (");
3098 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3099 DDR_NB_LOOPS (ddr));
3100 fprintf (dump_file, " )\n");
3102 fprintf (dump_file, ")\n");
3105 return true;
3108 /* Return the direction for a given distance.
3109 FIXME: Computing dir this way is suboptimal, since dir can catch
3110 cases that dist is unable to represent. */
3112 static inline enum data_dependence_direction
3113 dir_from_dist (int dist)
3115 if (dist > 0)
3116 return dir_positive;
3117 else if (dist < 0)
3118 return dir_negative;
3119 else
3120 return dir_equal;
3123 /* Compute the classic per loop direction vector. DDR is the data
3124 dependence relation to build a vector from. */
3126 static void
3127 build_classic_dir_vector (struct data_dependence_relation *ddr)
3129 unsigned i, j;
3130 lambda_vector dist_v;
3132 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3134 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3136 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3137 dir_v[j] = dir_from_dist (dist_v[j]);
3139 save_dir_v (ddr, dir_v);
3143 /* Helper function. Returns true when there is a dependence between
3144 data references DRA and DRB. */
3146 static bool
3147 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3148 struct data_reference *dra,
3149 struct data_reference *drb,
3150 struct loop *loop_nest)
3152 unsigned int i;
3153 tree last_conflicts;
3154 struct subscript *subscript;
3156 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3157 i++)
3159 conflict_function *overlaps_a, *overlaps_b;
3161 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3162 DR_ACCESS_FN (drb, i),
3163 &overlaps_a, &overlaps_b,
3164 &last_conflicts, loop_nest);
3166 if (CF_NOT_KNOWN_P (overlaps_a)
3167 || CF_NOT_KNOWN_P (overlaps_b))
3169 finalize_ddr_dependent (ddr, chrec_dont_know);
3170 dependence_stats.num_dependence_undetermined++;
3171 free_conflict_function (overlaps_a);
3172 free_conflict_function (overlaps_b);
3173 return false;
3176 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3177 || CF_NO_DEPENDENCE_P (overlaps_b))
3179 finalize_ddr_dependent (ddr, chrec_known);
3180 dependence_stats.num_dependence_independent++;
3181 free_conflict_function (overlaps_a);
3182 free_conflict_function (overlaps_b);
3183 return false;
3186 else
3188 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3189 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3190 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3194 return true;
3197 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3199 static void
3200 subscript_dependence_tester (struct data_dependence_relation *ddr,
3201 struct loop *loop_nest)
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3205 fprintf (dump_file, "(subscript_dependence_tester \n");
3207 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3208 dependence_stats.num_dependence_dependent++;
3210 compute_subscript_distance (ddr);
3211 if (build_classic_dist_vector (ddr, loop_nest))
3212 build_classic_dir_vector (ddr);
3214 if (dump_file && (dump_flags & TDF_DETAILS))
3215 fprintf (dump_file, ")\n");
3218 /* Returns true when all the access functions of A are affine or
3219 constant with respect to LOOP_NEST. */
3221 static bool
3222 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3223 const struct loop *loop_nest)
3225 unsigned int i;
3226 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3227 tree t;
3229 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3230 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3231 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3232 return false;
3234 return true;
3237 /* Initializes an equation for an OMEGA problem using the information
3238 contained in the ACCESS_FUN. Returns true when the operation
3239 succeeded.
3241 PB is the omega constraint system.
3242 EQ is the number of the equation to be initialized.
3243 OFFSET is used for shifting the variables names in the constraints:
3244 a constrain is composed of 2 * the number of variables surrounding
3245 dependence accesses. OFFSET is set either to 0 for the first n variables,
3246 then it is set to n.
3247 ACCESS_FUN is expected to be an affine chrec. */
3249 static bool
3250 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3251 unsigned int offset, tree access_fun,
3252 struct data_dependence_relation *ddr)
3254 switch (TREE_CODE (access_fun))
3256 case POLYNOMIAL_CHREC:
3258 tree left = CHREC_LEFT (access_fun);
3259 tree right = CHREC_RIGHT (access_fun);
3260 int var = CHREC_VARIABLE (access_fun);
3261 unsigned var_idx;
3263 if (TREE_CODE (right) != INTEGER_CST)
3264 return false;
3266 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3267 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3269 /* Compute the innermost loop index. */
3270 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3272 if (offset == 0)
3273 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3274 += int_cst_value (right);
3276 switch (TREE_CODE (left))
3278 case POLYNOMIAL_CHREC:
3279 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3281 case INTEGER_CST:
3282 pb->eqs[eq].coef[0] += int_cst_value (left);
3283 return true;
3285 default:
3286 return false;
3290 case INTEGER_CST:
3291 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3292 return true;
3294 default:
3295 return false;
3299 /* As explained in the comments preceding init_omega_for_ddr, we have
3300 to set up a system for each loop level, setting outer loops
3301 variation to zero, and current loop variation to positive or zero.
3302 Save each lexico positive distance vector. */
3304 static void
3305 omega_extract_distance_vectors (omega_pb pb,
3306 struct data_dependence_relation *ddr)
3308 int eq, geq;
3309 unsigned i, j;
3310 struct loop *loopi, *loopj;
3311 enum omega_result res;
3313 /* Set a new problem for each loop in the nest. The basis is the
3314 problem that we have initialized until now. On top of this we
3315 add new constraints. */
3316 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3317 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3319 int dist = 0;
3320 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3321 DDR_NB_LOOPS (ddr));
3323 omega_copy_problem (copy, pb);
3325 /* For all the outer loops "loop_j", add "dj = 0". */
3326 for (j = 0;
3327 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3329 eq = omega_add_zero_eq (copy, omega_black);
3330 copy->eqs[eq].coef[j + 1] = 1;
3333 /* For "loop_i", add "0 <= di". */
3334 geq = omega_add_zero_geq (copy, omega_black);
3335 copy->geqs[geq].coef[i + 1] = 1;
3337 /* Reduce the constraint system, and test that the current
3338 problem is feasible. */
3339 res = omega_simplify_problem (copy);
3340 if (res == omega_false
3341 || res == omega_unknown
3342 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3343 goto next_problem;
3345 for (eq = 0; eq < copy->num_subs; eq++)
3346 if (copy->subs[eq].key == (int) i + 1)
3348 dist = copy->subs[eq].coef[0];
3349 goto found_dist;
3352 if (dist == 0)
3354 /* Reinitialize problem... */
3355 omega_copy_problem (copy, pb);
3356 for (j = 0;
3357 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3359 eq = omega_add_zero_eq (copy, omega_black);
3360 copy->eqs[eq].coef[j + 1] = 1;
3363 /* ..., but this time "di = 1". */
3364 eq = omega_add_zero_eq (copy, omega_black);
3365 copy->eqs[eq].coef[i + 1] = 1;
3366 copy->eqs[eq].coef[0] = -1;
3368 res = omega_simplify_problem (copy);
3369 if (res == omega_false
3370 || res == omega_unknown
3371 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3372 goto next_problem;
3374 for (eq = 0; eq < copy->num_subs; eq++)
3375 if (copy->subs[eq].key == (int) i + 1)
3377 dist = copy->subs[eq].coef[0];
3378 goto found_dist;
3382 found_dist:;
3383 /* Save the lexicographically positive distance vector. */
3384 if (dist >= 0)
3386 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3387 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3389 dist_v[i] = dist;
3391 for (eq = 0; eq < copy->num_subs; eq++)
3392 if (copy->subs[eq].key > 0)
3394 dist = copy->subs[eq].coef[0];
3395 dist_v[copy->subs[eq].key - 1] = dist;
3398 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3399 dir_v[j] = dir_from_dist (dist_v[j]);
3401 save_dist_v (ddr, dist_v);
3402 save_dir_v (ddr, dir_v);
3405 next_problem:;
3406 omega_free_problem (copy);
3410 /* This is called for each subscript of a tuple of data references:
3411 insert an equality for representing the conflicts. */
3413 static bool
3414 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3415 struct data_dependence_relation *ddr,
3416 omega_pb pb, bool *maybe_dependent)
3418 int eq;
3419 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3420 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3421 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3423 /* When the fun_a - fun_b is not constant, the dependence is not
3424 captured by the classic distance vector representation. */
3425 if (TREE_CODE (difference) != INTEGER_CST)
3426 return false;
3428 /* ZIV test. */
3429 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3431 /* There is no dependence. */
3432 *maybe_dependent = false;
3433 return true;
3436 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3437 integer_minus_one_node);
3439 eq = omega_add_zero_eq (pb, omega_black);
3440 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3441 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3442 /* There is probably a dependence, but the system of
3443 constraints cannot be built: answer "don't know". */
3444 return false;
3446 /* GCD test. */
3447 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3448 && !int_divides_p (lambda_vector_gcd
3449 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3450 2 * DDR_NB_LOOPS (ddr)),
3451 pb->eqs[eq].coef[0]))
3453 /* There is no dependence. */
3454 *maybe_dependent = false;
3455 return true;
3458 return true;
3461 /* Helper function, same as init_omega_for_ddr but specialized for
3462 data references A and B. */
3464 static bool
3465 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3466 struct data_dependence_relation *ddr,
3467 omega_pb pb, bool *maybe_dependent)
3469 unsigned i;
3470 int ineq;
3471 struct loop *loopi;
3472 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3474 /* Insert an equality per subscript. */
3475 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3477 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3478 ddr, pb, maybe_dependent))
3479 return false;
3480 else if (*maybe_dependent == false)
3482 /* There is no dependence. */
3483 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3484 return true;
3488 /* Insert inequalities: constraints corresponding to the iteration
3489 domain, i.e. the loops surrounding the references "loop_x" and
3490 the distance variables "dx". The layout of the OMEGA
3491 representation is as follows:
3492 - coef[0] is the constant
3493 - coef[1..nb_loops] are the protected variables that will not be
3494 removed by the solver: the "dx"
3495 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3497 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3498 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3500 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3502 /* 0 <= loop_x */
3503 ineq = omega_add_zero_geq (pb, omega_black);
3504 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3506 /* 0 <= loop_x + dx */
3507 ineq = omega_add_zero_geq (pb, omega_black);
3508 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3509 pb->geqs[ineq].coef[i + 1] = 1;
3511 if (nbi != -1)
3513 /* loop_x <= nb_iters */
3514 ineq = omega_add_zero_geq (pb, omega_black);
3515 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3516 pb->geqs[ineq].coef[0] = nbi;
3518 /* loop_x + dx <= nb_iters */
3519 ineq = omega_add_zero_geq (pb, omega_black);
3520 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3521 pb->geqs[ineq].coef[i + 1] = -1;
3522 pb->geqs[ineq].coef[0] = nbi;
3524 /* A step "dx" bigger than nb_iters is not feasible, so
3525 add "0 <= nb_iters + dx", */
3526 ineq = omega_add_zero_geq (pb, omega_black);
3527 pb->geqs[ineq].coef[i + 1] = 1;
3528 pb->geqs[ineq].coef[0] = nbi;
3529 /* and "dx <= nb_iters". */
3530 ineq = omega_add_zero_geq (pb, omega_black);
3531 pb->geqs[ineq].coef[i + 1] = -1;
3532 pb->geqs[ineq].coef[0] = nbi;
3536 omega_extract_distance_vectors (pb, ddr);
3538 return true;
3541 /* Sets up the Omega dependence problem for the data dependence
3542 relation DDR. Returns false when the constraint system cannot be
3543 built, ie. when the test answers "don't know". Returns true
3544 otherwise, and when independence has been proved (using one of the
3545 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3546 set MAYBE_DEPENDENT to true.
3548 Example: for setting up the dependence system corresponding to the
3549 conflicting accesses
3551 | loop_i
3552 | loop_j
3553 | A[i, i+1] = ...
3554 | ... A[2*j, 2*(i + j)]
3555 | endloop_j
3556 | endloop_i
3558 the following constraints come from the iteration domain:
3560 0 <= i <= Ni
3561 0 <= i + di <= Ni
3562 0 <= j <= Nj
3563 0 <= j + dj <= Nj
3565 where di, dj are the distance variables. The constraints
3566 representing the conflicting elements are:
3568 i = 2 * (j + dj)
3569 i + 1 = 2 * (i + di + j + dj)
3571 For asking that the resulting distance vector (di, dj) be
3572 lexicographically positive, we insert the constraint "di >= 0". If
3573 "di = 0" in the solution, we fix that component to zero, and we
3574 look at the inner loops: we set a new problem where all the outer
3575 loop distances are zero, and fix this inner component to be
3576 positive. When one of the components is positive, we save that
3577 distance, and set a new problem where the distance on this loop is
3578 zero, searching for other distances in the inner loops. Here is
3579 the classic example that illustrates that we have to set for each
3580 inner loop a new problem:
3582 | loop_1
3583 | loop_2
3584 | A[10]
3585 | endloop_2
3586 | endloop_1
3588 we have to save two distances (1, 0) and (0, 1).
3590 Given two array references, refA and refB, we have to set the
3591 dependence problem twice, refA vs. refB and refB vs. refA, and we
3592 cannot do a single test, as refB might occur before refA in the
3593 inner loops, and the contrary when considering outer loops: ex.
3595 | loop_0
3596 | loop_1
3597 | loop_2
3598 | T[{1,+,1}_2][{1,+,1}_1] // refA
3599 | T[{2,+,1}_2][{0,+,1}_1] // refB
3600 | endloop_2
3601 | endloop_1
3602 | endloop_0
3604 refB touches the elements in T before refA, and thus for the same
3605 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3606 but for successive loop_0 iterations, we have (1, -1, 1)
3608 The Omega solver expects the distance variables ("di" in the
3609 previous example) to come first in the constraint system (as
3610 variables to be protected, or "safe" variables), the constraint
3611 system is built using the following layout:
3613 "cst | distance vars | index vars".
3616 static bool
3617 init_omega_for_ddr (struct data_dependence_relation *ddr,
3618 bool *maybe_dependent)
3620 omega_pb pb;
3621 bool res = false;
3623 *maybe_dependent = true;
3625 if (same_access_functions (ddr))
3627 unsigned j;
3628 lambda_vector dir_v;
3630 /* Save the 0 vector. */
3631 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3632 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3633 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3634 dir_v[j] = dir_equal;
3635 save_dir_v (ddr, dir_v);
3637 /* Save the dependences carried by outer loops. */
3638 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3639 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3640 maybe_dependent);
3641 omega_free_problem (pb);
3642 return res;
3645 /* Omega expects the protected variables (those that have to be kept
3646 after elimination) to appear first in the constraint system.
3647 These variables are the distance variables. In the following
3648 initialization we declare NB_LOOPS safe variables, and the total
3649 number of variables for the constraint system is 2*NB_LOOPS. */
3650 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3651 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3652 maybe_dependent);
3653 omega_free_problem (pb);
3655 /* Stop computation if not decidable, or no dependence. */
3656 if (res == false || *maybe_dependent == false)
3657 return res;
3659 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3660 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3661 maybe_dependent);
3662 omega_free_problem (pb);
3664 return res;
3667 /* Return true when DDR contains the same information as that stored
3668 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3670 static bool
3671 ddr_consistent_p (FILE *file,
3672 struct data_dependence_relation *ddr,
3673 VEC (lambda_vector, heap) *dist_vects,
3674 VEC (lambda_vector, heap) *dir_vects)
3676 unsigned int i, j;
3678 /* If dump_file is set, output there. */
3679 if (dump_file && (dump_flags & TDF_DETAILS))
3680 file = dump_file;
3682 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3684 lambda_vector b_dist_v;
3685 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3686 VEC_length (lambda_vector, dist_vects),
3687 DDR_NUM_DIST_VECTS (ddr));
3689 fprintf (file, "Banerjee dist vectors:\n");
3690 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3691 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3693 fprintf (file, "Omega dist vectors:\n");
3694 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3695 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3697 fprintf (file, "data dependence relation:\n");
3698 dump_data_dependence_relation (file, ddr);
3700 fprintf (file, ")\n");
3701 return false;
3704 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3706 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3707 VEC_length (lambda_vector, dir_vects),
3708 DDR_NUM_DIR_VECTS (ddr));
3709 return false;
3712 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3714 lambda_vector a_dist_v;
3715 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3717 /* Distance vectors are not ordered in the same way in the DDR
3718 and in the DIST_VECTS: search for a matching vector. */
3719 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3720 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3721 break;
3723 if (j == VEC_length (lambda_vector, dist_vects))
3725 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3726 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3727 fprintf (file, "not found in Omega dist vectors:\n");
3728 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3729 fprintf (file, "data dependence relation:\n");
3730 dump_data_dependence_relation (file, ddr);
3731 fprintf (file, ")\n");
3735 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3737 lambda_vector a_dir_v;
3738 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3740 /* Direction vectors are not ordered in the same way in the DDR
3741 and in the DIR_VECTS: search for a matching vector. */
3742 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3743 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3744 break;
3746 if (j == VEC_length (lambda_vector, dist_vects))
3748 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3749 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3750 fprintf (file, "not found in Omega dir vectors:\n");
3751 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3752 fprintf (file, "data dependence relation:\n");
3753 dump_data_dependence_relation (file, ddr);
3754 fprintf (file, ")\n");
3758 return true;
3761 /* This computes the affine dependence relation between A and B with
3762 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3763 independence between two accesses, while CHREC_DONT_KNOW is used
3764 for representing the unknown relation.
3766 Note that it is possible to stop the computation of the dependence
3767 relation the first time we detect a CHREC_KNOWN element for a given
3768 subscript. */
3770 static void
3771 compute_affine_dependence (struct data_dependence_relation *ddr,
3772 struct loop *loop_nest)
3774 struct data_reference *dra = DDR_A (ddr);
3775 struct data_reference *drb = DDR_B (ddr);
3777 if (dump_file && (dump_flags & TDF_DETAILS))
3779 fprintf (dump_file, "(compute_affine_dependence\n");
3780 fprintf (dump_file, " (stmt_a = \n");
3781 print_generic_expr (dump_file, DR_STMT (dra), 0);
3782 fprintf (dump_file, ")\n (stmt_b = \n");
3783 print_generic_expr (dump_file, DR_STMT (drb), 0);
3784 fprintf (dump_file, ")\n");
3787 /* Analyze only when the dependence relation is not yet known. */
3788 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3790 dependence_stats.num_dependence_tests++;
3792 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3793 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3795 if (flag_check_data_deps)
3797 /* Compute the dependences using the first algorithm. */
3798 subscript_dependence_tester (ddr, loop_nest);
3800 if (dump_file && (dump_flags & TDF_DETAILS))
3802 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3803 dump_data_dependence_relation (dump_file, ddr);
3806 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3808 bool maybe_dependent;
3809 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3811 /* Save the result of the first DD analyzer. */
3812 dist_vects = DDR_DIST_VECTS (ddr);
3813 dir_vects = DDR_DIR_VECTS (ddr);
3815 /* Reset the information. */
3816 DDR_DIST_VECTS (ddr) = NULL;
3817 DDR_DIR_VECTS (ddr) = NULL;
3819 /* Compute the same information using Omega. */
3820 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3821 goto csys_dont_know;
3823 if (dump_file && (dump_flags & TDF_DETAILS))
3825 fprintf (dump_file, "Omega Analyzer\n");
3826 dump_data_dependence_relation (dump_file, ddr);
3829 /* Check that we get the same information. */
3830 if (maybe_dependent)
3831 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3832 dir_vects));
3835 else
3836 subscript_dependence_tester (ddr, loop_nest);
3839 /* As a last case, if the dependence cannot be determined, or if
3840 the dependence is considered too difficult to determine, answer
3841 "don't know". */
3842 else
3844 csys_dont_know:;
3845 dependence_stats.num_dependence_undetermined++;
3847 if (dump_file && (dump_flags & TDF_DETAILS))
3849 fprintf (dump_file, "Data ref a:\n");
3850 dump_data_reference (dump_file, dra);
3851 fprintf (dump_file, "Data ref b:\n");
3852 dump_data_reference (dump_file, drb);
3853 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3855 finalize_ddr_dependent (ddr, chrec_dont_know);
3859 if (dump_file && (dump_flags & TDF_DETAILS))
3860 fprintf (dump_file, ")\n");
3863 /* This computes the dependence relation for the same data
3864 reference into DDR. */
3866 static void
3867 compute_self_dependence (struct data_dependence_relation *ddr)
3869 unsigned int i;
3870 struct subscript *subscript;
3872 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3873 return;
3875 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3876 i++)
3878 /* The accessed index overlaps for each iteration. */
3879 SUB_CONFLICTS_IN_A (subscript)
3880 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3881 SUB_CONFLICTS_IN_B (subscript)
3882 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3883 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3886 /* The distance vector is the zero vector. */
3887 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3888 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3891 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3892 the data references in DATAREFS, in the LOOP_NEST. When
3893 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3894 relations. */
3896 void
3897 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3898 VEC (ddr_p, heap) **dependence_relations,
3899 VEC (loop_p, heap) *loop_nest,
3900 bool compute_self_and_rr)
3902 struct data_dependence_relation *ddr;
3903 struct data_reference *a, *b;
3904 unsigned int i, j;
3906 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3907 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3908 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3910 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3911 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3912 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3915 if (compute_self_and_rr)
3916 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3918 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3919 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3920 compute_self_dependence (ddr);
3924 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3925 true if STMT clobbers memory, false otherwise. */
3927 bool
3928 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3930 bool clobbers_memory = false;
3931 data_ref_loc *ref;
3932 tree *op0, *op1, call;
3934 *references = NULL;
3936 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3937 Calls have side-effects, except those to const or pure
3938 functions. */
3939 call = get_call_expr_in (stmt);
3940 if ((call
3941 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3942 || (TREE_CODE (stmt) == ASM_EXPR
3943 && ASM_VOLATILE_P (stmt)))
3944 clobbers_memory = true;
3946 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3947 return clobbers_memory;
3949 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3951 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3952 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3954 if (DECL_P (*op1)
3955 || REFERENCE_CLASS_P (*op1))
3957 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3958 ref->pos = op1;
3959 ref->is_read = true;
3962 if (DECL_P (*op0)
3963 || REFERENCE_CLASS_P (*op0))
3965 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3966 ref->pos = op0;
3967 ref->is_read = false;
3971 if (call)
3973 unsigned i, n = call_expr_nargs (call);
3975 for (i = 0; i < n; i++)
3977 op0 = &CALL_EXPR_ARG (call, i);
3979 if (DECL_P (*op0)
3980 || REFERENCE_CLASS_P (*op0))
3982 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3983 ref->pos = op0;
3984 ref->is_read = true;
3989 return clobbers_memory;
3992 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3993 reference, returns false, otherwise returns true. NEST is the outermost
3994 loop of the loop nest in that the references should be analyzed. */
3996 static bool
3997 find_data_references_in_stmt (struct loop *nest, tree stmt,
3998 VEC (data_reference_p, heap) **datarefs)
4000 unsigned i;
4001 VEC (data_ref_loc, heap) *references;
4002 data_ref_loc *ref;
4003 bool ret = true;
4004 data_reference_p dr;
4006 if (get_references_in_stmt (stmt, &references))
4008 VEC_free (data_ref_loc, heap, references);
4009 return false;
4012 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4014 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4015 gcc_assert (dr != NULL);
4017 /* FIXME -- data dependence analysis does not work correctly for objects with
4018 invariant addresses. Let us fail here until the problem is fixed. */
4019 if (dr_address_invariant_p (dr))
4021 free_data_ref (dr);
4022 if (dump_file && (dump_flags & TDF_DETAILS))
4023 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4024 ret = false;
4025 break;
4028 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4030 VEC_free (data_ref_loc, heap, references);
4031 return ret;
4034 /* Search the data references in LOOP, and record the information into
4035 DATAREFS. Returns chrec_dont_know when failing to analyze a
4036 difficult case, returns NULL_TREE otherwise.
4038 TODO: This function should be made smarter so that it can handle address
4039 arithmetic as if they were array accesses, etc. */
4041 static tree
4042 find_data_references_in_loop (struct loop *loop,
4043 VEC (data_reference_p, heap) **datarefs)
4045 basic_block bb, *bbs;
4046 unsigned int i;
4047 block_stmt_iterator bsi;
4049 bbs = get_loop_body_in_dom_order (loop);
4051 for (i = 0; i < loop->num_nodes; i++)
4053 bb = bbs[i];
4055 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4057 tree stmt = bsi_stmt (bsi);
4059 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4061 struct data_reference *res;
4062 res = XCNEW (struct data_reference);
4063 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4065 free (bbs);
4066 return chrec_dont_know;
4070 free (bbs);
4072 return NULL_TREE;
4075 /* Recursive helper function. */
4077 static bool
4078 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4080 /* Inner loops of the nest should not contain siblings. Example:
4081 when there are two consecutive loops,
4083 | loop_0
4084 | loop_1
4085 | A[{0, +, 1}_1]
4086 | endloop_1
4087 | loop_2
4088 | A[{0, +, 1}_2]
4089 | endloop_2
4090 | endloop_0
4092 the dependence relation cannot be captured by the distance
4093 abstraction. */
4094 if (loop->next)
4095 return false;
4097 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4098 if (loop->inner)
4099 return find_loop_nest_1 (loop->inner, loop_nest);
4100 return true;
4103 /* Return false when the LOOP is not well nested. Otherwise return
4104 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4105 contain the loops from the outermost to the innermost, as they will
4106 appear in the classic distance vector. */
4108 bool
4109 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4111 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4112 if (loop->inner)
4113 return find_loop_nest_1 (loop->inner, loop_nest);
4114 return true;
4117 /* Given a loop nest LOOP, the following vectors are returned:
4118 DATAREFS is initialized to all the array elements contained in this loop,
4119 DEPENDENCE_RELATIONS contains the relations between the data references.
4120 Compute read-read and self relations if
4121 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4123 void
4124 compute_data_dependences_for_loop (struct loop *loop,
4125 bool compute_self_and_read_read_dependences,
4126 VEC (data_reference_p, heap) **datarefs,
4127 VEC (ddr_p, heap) **dependence_relations)
4129 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4131 memset (&dependence_stats, 0, sizeof (dependence_stats));
4133 /* If the loop nest is not well formed, or one of the data references
4134 is not computable, give up without spending time to compute other
4135 dependences. */
4136 if (!loop
4137 || !find_loop_nest (loop, &vloops)
4138 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4140 struct data_dependence_relation *ddr;
4142 /* Insert a single relation into dependence_relations:
4143 chrec_dont_know. */
4144 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4145 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4147 else
4148 compute_all_dependences (*datarefs, dependence_relations, vloops,
4149 compute_self_and_read_read_dependences);
4151 if (dump_file && (dump_flags & TDF_STATS))
4153 fprintf (dump_file, "Dependence tester statistics:\n");
4155 fprintf (dump_file, "Number of dependence tests: %d\n",
4156 dependence_stats.num_dependence_tests);
4157 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4158 dependence_stats.num_dependence_dependent);
4159 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4160 dependence_stats.num_dependence_independent);
4161 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4162 dependence_stats.num_dependence_undetermined);
4164 fprintf (dump_file, "Number of subscript tests: %d\n",
4165 dependence_stats.num_subscript_tests);
4166 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4167 dependence_stats.num_subscript_undetermined);
4168 fprintf (dump_file, "Number of same subscript function: %d\n",
4169 dependence_stats.num_same_subscript_function);
4171 fprintf (dump_file, "Number of ziv tests: %d\n",
4172 dependence_stats.num_ziv);
4173 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4174 dependence_stats.num_ziv_dependent);
4175 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4176 dependence_stats.num_ziv_independent);
4177 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4178 dependence_stats.num_ziv_unimplemented);
4180 fprintf (dump_file, "Number of siv tests: %d\n",
4181 dependence_stats.num_siv);
4182 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4183 dependence_stats.num_siv_dependent);
4184 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4185 dependence_stats.num_siv_independent);
4186 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4187 dependence_stats.num_siv_unimplemented);
4189 fprintf (dump_file, "Number of miv tests: %d\n",
4190 dependence_stats.num_miv);
4191 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4192 dependence_stats.num_miv_dependent);
4193 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4194 dependence_stats.num_miv_independent);
4195 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4196 dependence_stats.num_miv_unimplemented);
4200 /* Entry point (for testing only). Analyze all the data references
4201 and the dependence relations in LOOP.
4203 The data references are computed first.
4205 A relation on these nodes is represented by a complete graph. Some
4206 of the relations could be of no interest, thus the relations can be
4207 computed on demand.
4209 In the following function we compute all the relations. This is
4210 just a first implementation that is here for:
4211 - for showing how to ask for the dependence relations,
4212 - for the debugging the whole dependence graph,
4213 - for the dejagnu testcases and maintenance.
4215 It is possible to ask only for a part of the graph, avoiding to
4216 compute the whole dependence graph. The computed dependences are
4217 stored in a knowledge base (KB) such that later queries don't
4218 recompute the same information. The implementation of this KB is
4219 transparent to the optimizer, and thus the KB can be changed with a
4220 more efficient implementation, or the KB could be disabled. */
4221 static void
4222 analyze_all_data_dependences (struct loop *loop)
4224 unsigned int i;
4225 int nb_data_refs = 10;
4226 VEC (data_reference_p, heap) *datarefs =
4227 VEC_alloc (data_reference_p, heap, nb_data_refs);
4228 VEC (ddr_p, heap) *dependence_relations =
4229 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4231 /* Compute DDs on the whole function. */
4232 compute_data_dependences_for_loop (loop, false, &datarefs,
4233 &dependence_relations);
4235 if (dump_file)
4237 dump_data_dependence_relations (dump_file, dependence_relations);
4238 fprintf (dump_file, "\n\n");
4240 if (dump_flags & TDF_DETAILS)
4241 dump_dist_dir_vectors (dump_file, dependence_relations);
4243 if (dump_flags & TDF_STATS)
4245 unsigned nb_top_relations = 0;
4246 unsigned nb_bot_relations = 0;
4247 unsigned nb_basename_differ = 0;
4248 unsigned nb_chrec_relations = 0;
4249 struct data_dependence_relation *ddr;
4251 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4253 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4254 nb_top_relations++;
4256 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4258 struct data_reference *a = DDR_A (ddr);
4259 struct data_reference *b = DDR_B (ddr);
4261 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4262 nb_basename_differ++;
4263 else
4264 nb_bot_relations++;
4267 else
4268 nb_chrec_relations++;
4271 gather_stats_on_scev_database ();
4275 free_dependence_relations (dependence_relations);
4276 free_data_refs (datarefs);
4279 /* Computes all the data dependences and check that the results of
4280 several analyzers are the same. */
4282 void
4283 tree_check_data_deps (void)
4285 loop_iterator li;
4286 struct loop *loop_nest;
4288 FOR_EACH_LOOP (li, loop_nest, 0)
4289 analyze_all_data_dependences (loop_nest);
4292 /* Free the memory used by a data dependence relation DDR. */
4294 void
4295 free_dependence_relation (struct data_dependence_relation *ddr)
4297 if (ddr == NULL)
4298 return;
4300 if (DDR_SUBSCRIPTS (ddr))
4301 free_subscripts (DDR_SUBSCRIPTS (ddr));
4302 if (DDR_DIST_VECTS (ddr))
4303 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4304 if (DDR_DIR_VECTS (ddr))
4305 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4307 free (ddr);
4310 /* Free the memory used by the data dependence relations from
4311 DEPENDENCE_RELATIONS. */
4313 void
4314 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4316 unsigned int i;
4317 struct data_dependence_relation *ddr;
4318 VEC (loop_p, heap) *loop_nest = NULL;
4320 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4322 if (ddr == NULL)
4323 continue;
4324 if (loop_nest == NULL)
4325 loop_nest = DDR_LOOP_NEST (ddr);
4326 else
4327 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4328 || DDR_LOOP_NEST (ddr) == loop_nest);
4329 free_dependence_relation (ddr);
4332 if (loop_nest)
4333 VEC_free (loop_p, heap, loop_nest);
4334 VEC_free (ddr_p, heap, dependence_relations);
4337 /* Free the memory used by the data references from DATAREFS. */
4339 void
4340 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4342 unsigned int i;
4343 struct data_reference *dr;
4345 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4346 free_data_ref (dr);
4347 VEC_free (data_reference_p, heap, datarefs);
4352 /* Returns the index of STMT in RDG. */
4354 static int
4355 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4357 int i;
4359 for (i = 0; i < rdg->n_vertices; i++)
4360 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4361 return i;
4363 gcc_unreachable ();
4364 return 0;
4367 /* Creates an edge in RDG for each distance vector from DDR. */
4369 static void
4370 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4372 int va, vb;
4373 data_reference_p dra;
4374 data_reference_p drb;
4375 struct graph_edge *e;
4377 if (DDR_REVERSED_P (ddr))
4379 dra = DDR_B (ddr);
4380 drb = DDR_A (ddr);
4382 else
4384 dra = DDR_A (ddr);
4385 drb = DDR_B (ddr);
4388 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4389 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4391 e = add_edge (rdg, va, vb);
4392 e->data = XNEW (struct rdg_edge);
4394 /* Determines the type of the data dependence. */
4395 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4396 RDGE_TYPE (e) = input_dd;
4397 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4398 RDGE_TYPE (e) = output_dd;
4399 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4400 RDGE_TYPE (e) = flow_dd;
4401 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4402 RDGE_TYPE (e) = anti_dd;
4405 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4406 the index of DEF in RDG. */
4408 static void
4409 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4411 use_operand_p imm_use_p;
4412 imm_use_iterator iterator;
4414 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4416 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4417 struct graph_edge *e = add_edge (rdg, idef, use);
4419 e->data = XNEW (struct rdg_edge);
4420 RDGE_TYPE (e) = flow_dd;
4424 /* Creates the edges of the reduced dependence graph RDG. */
4426 static void
4427 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4429 int i;
4430 struct data_dependence_relation *ddr;
4431 def_operand_p def_p;
4432 ssa_op_iter iter;
4434 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4435 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4436 create_rdg_edge_for_ddr (rdg, ddr);
4438 for (i = 0; i < rdg->n_vertices; i++)
4439 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4440 iter, SSA_OP_ALL_DEFS)
4441 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4444 /* Build the vertices of the reduced dependence graph RDG. */
4446 static void
4447 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4449 int i;
4450 tree s;
4452 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4454 struct vertex *v = &(rdg->vertices[i]);
4456 v->data = XNEW (struct rdg_vertex);
4457 RDGV_STMT (v) = s;
4461 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4463 static void
4464 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4466 unsigned int i;
4467 basic_block *bbs = get_loop_body_in_dom_order (loop);
4469 for (i = 0; i < loop->num_nodes; i++)
4471 tree phi;
4472 basic_block bb = bbs[i];
4473 block_stmt_iterator bsi;
4475 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4476 VEC_safe_push (tree, heap, *stmts, phi);
4478 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4479 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4482 free (bbs);
4485 /* Returns true when all the dependences are computable. */
4487 static bool
4488 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4490 ddr_p ddr;
4491 unsigned int i;
4493 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4494 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4495 return false;
4497 return true;
4500 /* Build a Reduced Dependence Graph with one vertex per statement of the
4501 loop nest and one edge per data dependence or scalar dependence. */
4503 struct graph *
4504 build_rdg (struct loop *loop)
4506 int nb_data_refs = 10;
4507 struct graph *rdg = NULL;
4508 VEC (ddr_p, heap) *dependence_relations;
4509 VEC (data_reference_p, heap) *datarefs;
4510 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4512 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4513 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4514 compute_data_dependences_for_loop (loop,
4515 false,
4516 &datarefs,
4517 &dependence_relations);
4519 if (!known_dependences_p (dependence_relations))
4520 goto end_rdg;
4522 stmts_from_loop (loop, &stmts);
4523 rdg = new_graph (VEC_length (tree, stmts));
4524 create_rdg_vertices (rdg, stmts);
4525 create_rdg_edges (rdg, dependence_relations);
4527 end_rdg:
4528 free_dependence_relations (dependence_relations);
4529 free_data_refs (datarefs);
4530 VEC_free (tree, heap, stmts);
4532 return rdg;