pr 33870
[official-gcc.git] / gcc / tree-data-ref.c
blob8d9c4c98a55c77e62dd3217ebf9268b8708b4f63
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tm.h"
80 #include "ggc.h"
81 #include "tree.h"
83 /* These RTL headers are needed for basic-block.h. */
84 #include "rtl.h"
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
162 void
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
166 unsigned int i;
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
175 void
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
179 unsigned int i;
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
198 static void
199 dump_affine_function (FILE *outf, affine_fn fn)
201 unsigned i;
202 tree coef;
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
215 static void
216 dump_conflict_function (FILE *outf, conflict_function *cf)
218 unsigned i;
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
224 else
226 for (i = 0; i < cf->n; i++)
228 fprintf (outf, "[");
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
237 void
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
270 void
271 print_direction_vector (FILE *outf,
272 lambda_vector dirv,
273 int length)
275 int eq;
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
281 switch (dir)
283 case dir_positive:
284 fprintf (outf, " +");
285 break;
286 case dir_negative:
287 fprintf (outf, " -");
288 break;
289 case dir_equal:
290 fprintf (outf, " =");
291 break;
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
294 break;
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
297 break;
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
300 break;
301 case dir_star:
302 fprintf (outf, " *");
303 break;
304 default:
305 fprintf (outf, "indep");
306 break;
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
314 void
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
316 int length)
318 unsigned j;
319 lambda_vector v;
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
327 void
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
329 int length)
331 unsigned j;
332 lambda_vector v;
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
338 /* Debug version. */
340 void
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
348 void
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
354 dra = DDR_A (ddr);
355 drb = DDR_B (ddr);
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
365 unsigned int i;
366 struct loop *loopi;
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
387 DDR_NB_LOOPS (ddr));
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
394 DDR_NB_LOOPS (ddr));
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
403 void
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
407 switch (dir)
409 case dir_positive:
410 fprintf (file, "+");
411 break;
413 case dir_negative:
414 fprintf (file, "-");
415 break;
417 case dir_equal:
418 fprintf (file, "=");
419 break;
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
423 break;
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
427 break;
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
431 break;
433 case dir_star:
434 fprintf (file, "*");
435 break;
437 default:
438 break;
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
445 considered nest. */
447 void
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
450 unsigned int i, j;
451 struct data_dependence_relation *ddr;
452 lambda_vector v;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
477 void
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
492 void
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
496 tree var0, var1;
497 tree off0, off1;
498 enum tree_code code;
500 *var = exp;
501 STRIP_NOPS (exp);
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
505 switch (code)
507 case INTEGER_CST:
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
510 return;
512 case POINTER_PLUS_EXPR:
513 code = PLUS_EXPR;
514 /* FALLTHROUGH */
515 case PLUS_EXPR:
516 case MINUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
520 var0, var1));
521 *off = size_binop (code, off0, off1);
522 return;
524 case MULT_EXPR:
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
527 break;
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
531 var0, off1));
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
533 return;
535 case ADDR_EXPR:
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
544 break;
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
550 break;
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
554 if (poffset)
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
559 base,
560 fold_convert (TREE_TYPE (base), poffset));
563 var0 = fold_convert (type, base);
565 /* If variable length types are involved, punt, otherwise casts
566 might be converted into ARRAY_REFs in gimplify_conversion.
567 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
568 possibly no longer appears in current GIMPLE, might resurface.
569 This perhaps could run
570 if (TREE_CODE (var0) == NOP_EXPR
571 || TREE_CODE (var0) == CONVERT_EXPR)
573 gimplify_conversion (&var0);
574 // Attempt to fill in any within var0 found ARRAY_REF's
575 // element size from corresponding op embedded ARRAY_REF,
576 // if unsuccessful, just punt.
577 } */
578 while (POINTER_TYPE_P (type))
579 type = TREE_TYPE (type);
580 if (int_size_in_bytes (type) < 0)
581 break;
583 *var = var0;
584 *off = off0;
585 return;
588 case SSA_NAME:
590 tree def_stmt = SSA_NAME_DEF_STMT (exp);
591 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
593 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
595 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
596 && EXPR_P (def_stmt_rhs)
597 && !REFERENCE_CLASS_P (def_stmt_rhs)
598 && !get_call_expr_in (def_stmt_rhs))
600 split_constant_offset (def_stmt_rhs, &var0, &off0);
601 var0 = fold_convert (type, var0);
602 *var = var0;
603 *off = off0;
604 return;
607 break;
610 default:
611 break;
614 *off = ssize_int (0);
617 /* Returns the address ADDR of an object in a canonical shape (without nop
618 casts, and with type of pointer to the object). */
620 static tree
621 canonicalize_base_object_address (tree addr)
623 tree orig = addr;
625 STRIP_NOPS (addr);
627 /* The base address may be obtained by casting from integer, in that case
628 keep the cast. */
629 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
630 return orig;
632 if (TREE_CODE (addr) != ADDR_EXPR)
633 return addr;
635 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
638 /* Analyzes the behavior of the memory reference DR in the innermost loop that
639 contains it. */
641 void
642 dr_analyze_innermost (struct data_reference *dr)
644 tree stmt = DR_STMT (dr);
645 struct loop *loop = loop_containing_stmt (stmt);
646 tree ref = DR_REF (dr);
647 HOST_WIDE_INT pbitsize, pbitpos;
648 tree base, poffset;
649 enum machine_mode pmode;
650 int punsignedp, pvolatilep;
651 affine_iv base_iv, offset_iv;
652 tree init, dinit, step;
654 if (dump_file && (dump_flags & TDF_DETAILS))
655 fprintf (dump_file, "analyze_innermost: ");
657 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
658 &pmode, &punsignedp, &pvolatilep, false);
659 gcc_assert (base != NULL_TREE);
661 if (pbitpos % BITS_PER_UNIT != 0)
663 if (dump_file && (dump_flags & TDF_DETAILS))
664 fprintf (dump_file, "failed: bit offset alignment.\n");
665 return;
668 base = build_fold_addr_expr (base);
669 if (!simple_iv (loop, stmt, base, &base_iv, false))
671 if (dump_file && (dump_flags & TDF_DETAILS))
672 fprintf (dump_file, "failed: evolution of base is not affine.\n");
673 return;
675 if (!poffset)
677 offset_iv.base = ssize_int (0);
678 offset_iv.step = ssize_int (0);
680 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
682 if (dump_file && (dump_flags & TDF_DETAILS))
683 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
684 return;
687 init = ssize_int (pbitpos / BITS_PER_UNIT);
688 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
689 init = size_binop (PLUS_EXPR, init, dinit);
690 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
691 init = size_binop (PLUS_EXPR, init, dinit);
693 step = size_binop (PLUS_EXPR,
694 fold_convert (ssizetype, base_iv.step),
695 fold_convert (ssizetype, offset_iv.step));
697 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
699 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
700 DR_INIT (dr) = init;
701 DR_STEP (dr) = step;
703 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "success.\n");
709 /* Determines the base object and the list of indices of memory reference
710 DR, analyzed in loop nest NEST. */
712 static void
713 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
715 tree stmt = DR_STMT (dr);
716 struct loop *loop = loop_containing_stmt (stmt);
717 VEC (tree, heap) *access_fns = NULL;
718 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
719 tree base, off, access_fn;
721 while (handled_component_p (aref))
723 if (TREE_CODE (aref) == ARRAY_REF)
725 op = TREE_OPERAND (aref, 1);
726 access_fn = analyze_scalar_evolution (loop, op);
727 access_fn = resolve_mixers (nest, access_fn);
728 VEC_safe_push (tree, heap, access_fns, access_fn);
730 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
733 aref = TREE_OPERAND (aref, 0);
736 if (INDIRECT_REF_P (aref))
738 op = TREE_OPERAND (aref, 0);
739 access_fn = analyze_scalar_evolution (loop, op);
740 access_fn = resolve_mixers (nest, access_fn);
741 base = initial_condition (access_fn);
742 split_constant_offset (base, &base, &off);
743 access_fn = chrec_replace_initial_condition (access_fn,
744 fold_convert (TREE_TYPE (base), off));
746 TREE_OPERAND (aref, 0) = base;
747 VEC_safe_push (tree, heap, access_fns, access_fn);
750 DR_BASE_OBJECT (dr) = ref;
751 DR_ACCESS_FNS (dr) = access_fns;
754 /* Extracts the alias analysis information from the memory reference DR. */
756 static void
757 dr_analyze_alias (struct data_reference *dr)
759 tree stmt = DR_STMT (dr);
760 tree ref = DR_REF (dr);
761 tree base = get_base_address (ref), addr, smt = NULL_TREE;
762 ssa_op_iter it;
763 tree op;
764 bitmap vops;
766 if (DECL_P (base))
767 smt = base;
768 else if (INDIRECT_REF_P (base))
770 addr = TREE_OPERAND (base, 0);
771 if (TREE_CODE (addr) == SSA_NAME)
773 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
774 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
778 DR_SYMBOL_TAG (dr) = smt;
779 if (smt && var_can_have_subvars (smt))
780 DR_SUBVARS (dr) = get_subvars_for_var (smt);
782 vops = BITMAP_ALLOC (NULL);
783 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
785 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
788 DR_VOPS (dr) = vops;
791 /* Returns true if the address of DR is invariant. */
793 static bool
794 dr_address_invariant_p (struct data_reference *dr)
796 unsigned i;
797 tree idx;
799 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
800 if (tree_contains_chrecs (idx, NULL))
801 return false;
803 return true;
806 /* Frees data reference DR. */
808 static void
809 free_data_ref (data_reference_p dr)
811 BITMAP_FREE (DR_VOPS (dr));
812 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
813 free (dr);
816 /* Analyzes memory reference MEMREF accessed in STMT. The reference
817 is read if IS_READ is true, write otherwise. Returns the
818 data_reference description of MEMREF. NEST is the outermost loop of the
819 loop nest in that the reference should be analyzed. */
821 struct data_reference *
822 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
824 struct data_reference *dr;
826 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "Creating dr for ");
829 print_generic_expr (dump_file, memref, TDF_SLIM);
830 fprintf (dump_file, "\n");
833 dr = XCNEW (struct data_reference);
834 DR_STMT (dr) = stmt;
835 DR_REF (dr) = memref;
836 DR_IS_READ (dr) = is_read;
838 dr_analyze_innermost (dr);
839 dr_analyze_indices (dr, nest);
840 dr_analyze_alias (dr);
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "\tbase_address: ");
845 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
846 fprintf (dump_file, "\n\toffset from base address: ");
847 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
848 fprintf (dump_file, "\n\tconstant offset from base address: ");
849 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
850 fprintf (dump_file, "\n\tstep: ");
851 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
852 fprintf (dump_file, "\n\taligned to: ");
853 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
854 fprintf (dump_file, "\n\tbase_object: ");
855 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
856 fprintf (dump_file, "\n\tsymbol tag: ");
857 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
858 fprintf (dump_file, "\n");
861 return dr;
864 /* Returns true if FNA == FNB. */
866 static bool
867 affine_function_equal_p (affine_fn fna, affine_fn fnb)
869 unsigned i, n = VEC_length (tree, fna);
871 if (n != VEC_length (tree, fnb))
872 return false;
874 for (i = 0; i < n; i++)
875 if (!operand_equal_p (VEC_index (tree, fna, i),
876 VEC_index (tree, fnb, i), 0))
877 return false;
879 return true;
882 /* If all the functions in CF are the same, returns one of them,
883 otherwise returns NULL. */
885 static affine_fn
886 common_affine_function (conflict_function *cf)
888 unsigned i;
889 affine_fn comm;
891 if (!CF_NONTRIVIAL_P (cf))
892 return NULL;
894 comm = cf->fns[0];
896 for (i = 1; i < cf->n; i++)
897 if (!affine_function_equal_p (comm, cf->fns[i]))
898 return NULL;
900 return comm;
903 /* Returns the base of the affine function FN. */
905 static tree
906 affine_function_base (affine_fn fn)
908 return VEC_index (tree, fn, 0);
911 /* Returns true if FN is a constant. */
913 static bool
914 affine_function_constant_p (affine_fn fn)
916 unsigned i;
917 tree coef;
919 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
920 if (!integer_zerop (coef))
921 return false;
923 return true;
926 /* Returns true if FN is the zero constant function. */
928 static bool
929 affine_function_zero_p (affine_fn fn)
931 return (integer_zerop (affine_function_base (fn))
932 && affine_function_constant_p (fn));
935 /* Applies operation OP on affine functions FNA and FNB, and returns the
936 result. */
938 static affine_fn
939 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
941 unsigned i, n, m;
942 affine_fn ret;
943 tree coef;
945 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
947 n = VEC_length (tree, fna);
948 m = VEC_length (tree, fnb);
950 else
952 n = VEC_length (tree, fnb);
953 m = VEC_length (tree, fna);
956 ret = VEC_alloc (tree, heap, m);
957 for (i = 0; i < n; i++)
958 VEC_quick_push (tree, ret,
959 fold_build2 (op, integer_type_node,
960 VEC_index (tree, fna, i),
961 VEC_index (tree, fnb, i)));
963 for (; VEC_iterate (tree, fna, i, coef); i++)
964 VEC_quick_push (tree, ret,
965 fold_build2 (op, integer_type_node,
966 coef, integer_zero_node));
967 for (; VEC_iterate (tree, fnb, i, coef); i++)
968 VEC_quick_push (tree, ret,
969 fold_build2 (op, integer_type_node,
970 integer_zero_node, coef));
972 return ret;
975 /* Returns the sum of affine functions FNA and FNB. */
977 static affine_fn
978 affine_fn_plus (affine_fn fna, affine_fn fnb)
980 return affine_fn_op (PLUS_EXPR, fna, fnb);
983 /* Returns the difference of affine functions FNA and FNB. */
985 static affine_fn
986 affine_fn_minus (affine_fn fna, affine_fn fnb)
988 return affine_fn_op (MINUS_EXPR, fna, fnb);
991 /* Frees affine function FN. */
993 static void
994 affine_fn_free (affine_fn fn)
996 VEC_free (tree, heap, fn);
999 /* Determine for each subscript in the data dependence relation DDR
1000 the distance. */
1002 static void
1003 compute_subscript_distance (struct data_dependence_relation *ddr)
1005 conflict_function *cf_a, *cf_b;
1006 affine_fn fn_a, fn_b, diff;
1008 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1010 unsigned int i;
1012 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1014 struct subscript *subscript;
1016 subscript = DDR_SUBSCRIPT (ddr, i);
1017 cf_a = SUB_CONFLICTS_IN_A (subscript);
1018 cf_b = SUB_CONFLICTS_IN_B (subscript);
1020 fn_a = common_affine_function (cf_a);
1021 fn_b = common_affine_function (cf_b);
1022 if (!fn_a || !fn_b)
1024 SUB_DISTANCE (subscript) = chrec_dont_know;
1025 return;
1027 diff = affine_fn_minus (fn_a, fn_b);
1029 if (affine_function_constant_p (diff))
1030 SUB_DISTANCE (subscript) = affine_function_base (diff);
1031 else
1032 SUB_DISTANCE (subscript) = chrec_dont_know;
1034 affine_fn_free (diff);
1039 /* Returns the conflict function for "unknown". */
1041 static conflict_function *
1042 conflict_fn_not_known (void)
1044 conflict_function *fn = XCNEW (conflict_function);
1045 fn->n = NOT_KNOWN;
1047 return fn;
1050 /* Returns the conflict function for "independent". */
1052 static conflict_function *
1053 conflict_fn_no_dependence (void)
1055 conflict_function *fn = XCNEW (conflict_function);
1056 fn->n = NO_DEPENDENCE;
1058 return fn;
1061 /* Returns true if the address of OBJ is invariant in LOOP. */
1063 static bool
1064 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1066 while (handled_component_p (obj))
1068 if (TREE_CODE (obj) == ARRAY_REF)
1070 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1071 need to check the stride and the lower bound of the reference. */
1072 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1073 loop->num)
1074 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1075 loop->num))
1076 return false;
1078 else if (TREE_CODE (obj) == COMPONENT_REF)
1080 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1081 loop->num))
1082 return false;
1084 obj = TREE_OPERAND (obj, 0);
1087 if (!INDIRECT_REF_P (obj))
1088 return true;
1090 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1091 loop->num);
1094 /* Returns true if A and B are accesses to different objects, or to different
1095 fields of the same object. */
1097 static bool
1098 disjoint_objects_p (tree a, tree b)
1100 tree base_a, base_b;
1101 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1102 bool ret;
1104 base_a = get_base_address (a);
1105 base_b = get_base_address (b);
1107 if (DECL_P (base_a)
1108 && DECL_P (base_b)
1109 && base_a != base_b)
1110 return true;
1112 if (!operand_equal_p (base_a, base_b, 0))
1113 return false;
1115 /* Compare the component references of A and B. We must start from the inner
1116 ones, so record them to the vector first. */
1117 while (handled_component_p (a))
1119 VEC_safe_push (tree, heap, comp_a, a);
1120 a = TREE_OPERAND (a, 0);
1122 while (handled_component_p (b))
1124 VEC_safe_push (tree, heap, comp_b, b);
1125 b = TREE_OPERAND (b, 0);
1128 ret = false;
1129 while (1)
1131 if (VEC_length (tree, comp_a) == 0
1132 || VEC_length (tree, comp_b) == 0)
1133 break;
1135 a = VEC_pop (tree, comp_a);
1136 b = VEC_pop (tree, comp_b);
1138 /* Real and imaginary part of a variable do not alias. */
1139 if ((TREE_CODE (a) == REALPART_EXPR
1140 && TREE_CODE (b) == IMAGPART_EXPR)
1141 || (TREE_CODE (a) == IMAGPART_EXPR
1142 && TREE_CODE (b) == REALPART_EXPR))
1144 ret = true;
1145 break;
1148 if (TREE_CODE (a) != TREE_CODE (b))
1149 break;
1151 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1152 DR_BASE_OBJECT are always zero. */
1153 if (TREE_CODE (a) == ARRAY_REF)
1154 continue;
1155 else if (TREE_CODE (a) == COMPONENT_REF)
1157 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1158 continue;
1160 /* Different fields of unions may overlap. */
1161 base_a = TREE_OPERAND (a, 0);
1162 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1163 break;
1165 /* Different fields of structures cannot. */
1166 ret = true;
1167 break;
1169 else
1170 break;
1173 VEC_free (tree, heap, comp_a);
1174 VEC_free (tree, heap, comp_b);
1176 return ret;
1179 /* Returns false if we can prove that data references A and B do not alias,
1180 true otherwise. */
1182 static bool
1183 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1185 const_tree addr_a = DR_BASE_ADDRESS (a);
1186 const_tree addr_b = DR_BASE_ADDRESS (b);
1187 const_tree type_a, type_b;
1188 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1190 /* If the sets of virtual operands are disjoint, the memory references do not
1191 alias. */
1192 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1193 return false;
1195 /* If the accessed objects are disjoint, the memory references do not
1196 alias. */
1197 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1198 return false;
1200 if (!addr_a || !addr_b)
1201 return true;
1203 /* If the references are based on different static objects, they cannot alias
1204 (PTA should be able to disambiguate such accesses, but often it fails to,
1205 since currently we cannot distinguish between pointer and offset in pointer
1206 arithmetics). */
1207 if (TREE_CODE (addr_a) == ADDR_EXPR
1208 && TREE_CODE (addr_b) == ADDR_EXPR)
1209 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1211 /* An instruction writing through a restricted pointer is "independent" of any
1212 instruction reading or writing through a different restricted pointer,
1213 in the same block/scope. */
1215 type_a = TREE_TYPE (addr_a);
1216 type_b = TREE_TYPE (addr_b);
1217 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1219 if (TREE_CODE (addr_a) == SSA_NAME)
1220 decl_a = SSA_NAME_VAR (addr_a);
1221 if (TREE_CODE (addr_b) == SSA_NAME)
1222 decl_b = SSA_NAME_VAR (addr_b);
1224 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1225 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1226 && decl_a && DECL_P (decl_a)
1227 && decl_b && DECL_P (decl_b)
1228 && decl_a != decl_b
1229 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1230 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1231 return false;
1233 return true;
1236 /* Initialize a data dependence relation between data accesses A and
1237 B. NB_LOOPS is the number of loops surrounding the references: the
1238 size of the classic distance/direction vectors. */
1240 static struct data_dependence_relation *
1241 initialize_data_dependence_relation (struct data_reference *a,
1242 struct data_reference *b,
1243 VEC (loop_p, heap) *loop_nest)
1245 struct data_dependence_relation *res;
1246 unsigned int i;
1248 res = XNEW (struct data_dependence_relation);
1249 DDR_A (res) = a;
1250 DDR_B (res) = b;
1251 DDR_LOOP_NEST (res) = NULL;
1252 DDR_REVERSED_P (res) = false;
1253 DDR_SUBSCRIPTS (res) = NULL;
1254 DDR_DIR_VECTS (res) = NULL;
1255 DDR_DIST_VECTS (res) = NULL;
1257 if (a == NULL || b == NULL)
1259 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1260 return res;
1263 /* If the data references do not alias, then they are independent. */
1264 if (!dr_may_alias_p (a, b))
1266 DDR_ARE_DEPENDENT (res) = chrec_known;
1267 return res;
1270 /* If the references do not access the same object, we do not know
1271 whether they alias or not. */
1272 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1274 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1275 return res;
1278 /* If the base of the object is not invariant in the loop nest, we cannot
1279 analyze it. TODO -- in fact, it would suffice to record that there may
1280 be arbitrary dependences in the loops where the base object varies. */
1281 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1282 DR_BASE_OBJECT (a)))
1284 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1285 return res;
1288 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1290 DDR_AFFINE_P (res) = true;
1291 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1292 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1293 DDR_LOOP_NEST (res) = loop_nest;
1294 DDR_INNER_LOOP (res) = 0;
1296 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1298 struct subscript *subscript;
1300 subscript = XNEW (struct subscript);
1301 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1302 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1303 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1304 SUB_DISTANCE (subscript) = chrec_dont_know;
1305 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1308 return res;
1311 /* Frees memory used by the conflict function F. */
1313 static void
1314 free_conflict_function (conflict_function *f)
1316 unsigned i;
1318 if (CF_NONTRIVIAL_P (f))
1320 for (i = 0; i < f->n; i++)
1321 affine_fn_free (f->fns[i]);
1323 free (f);
1326 /* Frees memory used by SUBSCRIPTS. */
1328 static void
1329 free_subscripts (VEC (subscript_p, heap) *subscripts)
1331 unsigned i;
1332 subscript_p s;
1334 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1336 free_conflict_function (s->conflicting_iterations_in_a);
1337 free_conflict_function (s->conflicting_iterations_in_b);
1339 VEC_free (subscript_p, heap, subscripts);
1342 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1343 description. */
1345 static inline void
1346 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1347 tree chrec)
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1351 fprintf (dump_file, "(dependence classified: ");
1352 print_generic_expr (dump_file, chrec, 0);
1353 fprintf (dump_file, ")\n");
1356 DDR_ARE_DEPENDENT (ddr) = chrec;
1357 free_subscripts (DDR_SUBSCRIPTS (ddr));
1358 DDR_SUBSCRIPTS (ddr) = NULL;
1361 /* The dependence relation DDR cannot be represented by a distance
1362 vector. */
1364 static inline void
1365 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1367 if (dump_file && (dump_flags & TDF_DETAILS))
1368 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1370 DDR_AFFINE_P (ddr) = false;
1375 /* This section contains the classic Banerjee tests. */
1377 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1378 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1380 static inline bool
1381 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1383 return (evolution_function_is_constant_p (chrec_a)
1384 && evolution_function_is_constant_p (chrec_b));
1387 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1388 variable, i.e., if the SIV (Single Index Variable) test is true. */
1390 static bool
1391 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1393 if ((evolution_function_is_constant_p (chrec_a)
1394 && evolution_function_is_univariate_p (chrec_b))
1395 || (evolution_function_is_constant_p (chrec_b)
1396 && evolution_function_is_univariate_p (chrec_a)))
1397 return true;
1399 if (evolution_function_is_univariate_p (chrec_a)
1400 && evolution_function_is_univariate_p (chrec_b))
1402 switch (TREE_CODE (chrec_a))
1404 case POLYNOMIAL_CHREC:
1405 switch (TREE_CODE (chrec_b))
1407 case POLYNOMIAL_CHREC:
1408 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1409 return false;
1411 default:
1412 return true;
1415 default:
1416 return true;
1420 return false;
1423 /* Creates a conflict function with N dimensions. The affine functions
1424 in each dimension follow. */
1426 static conflict_function *
1427 conflict_fn (unsigned n, ...)
1429 unsigned i;
1430 conflict_function *ret = XCNEW (conflict_function);
1431 va_list ap;
1433 gcc_assert (0 < n && n <= MAX_DIM);
1434 va_start(ap, n);
1436 ret->n = n;
1437 for (i = 0; i < n; i++)
1438 ret->fns[i] = va_arg (ap, affine_fn);
1439 va_end(ap);
1441 return ret;
1444 /* Returns constant affine function with value CST. */
1446 static affine_fn
1447 affine_fn_cst (tree cst)
1449 affine_fn fn = VEC_alloc (tree, heap, 1);
1450 VEC_quick_push (tree, fn, cst);
1451 return fn;
1454 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1456 static affine_fn
1457 affine_fn_univar (tree cst, unsigned dim, tree coef)
1459 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1460 unsigned i;
1462 gcc_assert (dim > 0);
1463 VEC_quick_push (tree, fn, cst);
1464 for (i = 1; i < dim; i++)
1465 VEC_quick_push (tree, fn, integer_zero_node);
1466 VEC_quick_push (tree, fn, coef);
1467 return fn;
1470 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1471 *OVERLAPS_B are initialized to the functions that describe the
1472 relation between the elements accessed twice by CHREC_A and
1473 CHREC_B. For k >= 0, the following property is verified:
1475 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1477 static void
1478 analyze_ziv_subscript (tree chrec_a,
1479 tree chrec_b,
1480 conflict_function **overlaps_a,
1481 conflict_function **overlaps_b,
1482 tree *last_conflicts)
1484 tree difference;
1485 dependence_stats.num_ziv++;
1487 if (dump_file && (dump_flags & TDF_DETAILS))
1488 fprintf (dump_file, "(analyze_ziv_subscript \n");
1490 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1491 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1492 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1494 switch (TREE_CODE (difference))
1496 case INTEGER_CST:
1497 if (integer_zerop (difference))
1499 /* The difference is equal to zero: the accessed index
1500 overlaps for each iteration in the loop. */
1501 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1502 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1503 *last_conflicts = chrec_dont_know;
1504 dependence_stats.num_ziv_dependent++;
1506 else
1508 /* The accesses do not overlap. */
1509 *overlaps_a = conflict_fn_no_dependence ();
1510 *overlaps_b = conflict_fn_no_dependence ();
1511 *last_conflicts = integer_zero_node;
1512 dependence_stats.num_ziv_independent++;
1514 break;
1516 default:
1517 /* We're not sure whether the indexes overlap. For the moment,
1518 conservatively answer "don't know". */
1519 if (dump_file && (dump_flags & TDF_DETAILS))
1520 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1522 *overlaps_a = conflict_fn_not_known ();
1523 *overlaps_b = conflict_fn_not_known ();
1524 *last_conflicts = chrec_dont_know;
1525 dependence_stats.num_ziv_unimplemented++;
1526 break;
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, ")\n");
1533 /* Sets NIT to the estimated number of executions of the statements in
1534 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1535 large as the number of iterations. If we have no reliable estimate,
1536 the function returns false, otherwise returns true. */
1538 bool
1539 estimated_loop_iterations (struct loop *loop, bool conservative,
1540 double_int *nit)
1542 estimate_numbers_of_iterations_loop (loop);
1543 if (conservative)
1545 if (!loop->any_upper_bound)
1546 return false;
1548 *nit = loop->nb_iterations_upper_bound;
1550 else
1552 if (!loop->any_estimate)
1553 return false;
1555 *nit = loop->nb_iterations_estimate;
1558 return true;
1561 /* Similar to estimated_loop_iterations, but returns the estimate only
1562 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1563 on the number of iterations of LOOP could not be derived, returns -1. */
1565 HOST_WIDE_INT
1566 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1568 double_int nit;
1569 HOST_WIDE_INT hwi_nit;
1571 if (!estimated_loop_iterations (loop, conservative, &nit))
1572 return -1;
1574 if (!double_int_fits_in_shwi_p (nit))
1575 return -1;
1576 hwi_nit = double_int_to_shwi (nit);
1578 return hwi_nit < 0 ? -1 : hwi_nit;
1581 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1582 and only if it fits to the int type. If this is not the case, or the
1583 estimate on the number of iterations of LOOP could not be derived, returns
1584 chrec_dont_know. */
1586 static tree
1587 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1589 double_int nit;
1590 tree type;
1592 if (!estimated_loop_iterations (loop, conservative, &nit))
1593 return chrec_dont_know;
1595 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1596 if (!double_int_fits_to_tree_p (type, nit))
1597 return chrec_dont_know;
1599 return double_int_to_tree (type, nit);
1602 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1603 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1604 *OVERLAPS_B are initialized to the functions that describe the
1605 relation between the elements accessed twice by CHREC_A and
1606 CHREC_B. For k >= 0, the following property is verified:
1608 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1610 static void
1611 analyze_siv_subscript_cst_affine (tree chrec_a,
1612 tree chrec_b,
1613 conflict_function **overlaps_a,
1614 conflict_function **overlaps_b,
1615 tree *last_conflicts)
1617 bool value0, value1, value2;
1618 tree difference, tmp;
1620 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1621 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1622 difference = chrec_fold_minus
1623 (integer_type_node, initial_condition (chrec_b), chrec_a);
1625 if (!chrec_is_positive (initial_condition (difference), &value0))
1627 if (dump_file && (dump_flags & TDF_DETAILS))
1628 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1630 dependence_stats.num_siv_unimplemented++;
1631 *overlaps_a = conflict_fn_not_known ();
1632 *overlaps_b = conflict_fn_not_known ();
1633 *last_conflicts = chrec_dont_know;
1634 return;
1636 else
1638 if (value0 == false)
1640 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1642 if (dump_file && (dump_flags & TDF_DETAILS))
1643 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1645 *overlaps_a = conflict_fn_not_known ();
1646 *overlaps_b = conflict_fn_not_known ();
1647 *last_conflicts = chrec_dont_know;
1648 dependence_stats.num_siv_unimplemented++;
1649 return;
1651 else
1653 if (value1 == true)
1655 /* Example:
1656 chrec_a = 12
1657 chrec_b = {10, +, 1}
1660 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1662 HOST_WIDE_INT numiter;
1663 struct loop *loop = get_chrec_loop (chrec_b);
1665 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1666 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1667 fold_build1 (ABS_EXPR,
1668 integer_type_node,
1669 difference),
1670 CHREC_RIGHT (chrec_b));
1671 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1672 *last_conflicts = integer_one_node;
1675 /* Perform weak-zero siv test to see if overlap is
1676 outside the loop bounds. */
1677 numiter = estimated_loop_iterations_int (loop, false);
1679 if (numiter >= 0
1680 && compare_tree_int (tmp, numiter) > 0)
1682 free_conflict_function (*overlaps_a);
1683 free_conflict_function (*overlaps_b);
1684 *overlaps_a = conflict_fn_no_dependence ();
1685 *overlaps_b = conflict_fn_no_dependence ();
1686 *last_conflicts = integer_zero_node;
1687 dependence_stats.num_siv_independent++;
1688 return;
1690 dependence_stats.num_siv_dependent++;
1691 return;
1694 /* When the step does not divide the difference, there are
1695 no overlaps. */
1696 else
1698 *overlaps_a = conflict_fn_no_dependence ();
1699 *overlaps_b = conflict_fn_no_dependence ();
1700 *last_conflicts = integer_zero_node;
1701 dependence_stats.num_siv_independent++;
1702 return;
1706 else
1708 /* Example:
1709 chrec_a = 12
1710 chrec_b = {10, +, -1}
1712 In this case, chrec_a will not overlap with chrec_b. */
1713 *overlaps_a = conflict_fn_no_dependence ();
1714 *overlaps_b = conflict_fn_no_dependence ();
1715 *last_conflicts = integer_zero_node;
1716 dependence_stats.num_siv_independent++;
1717 return;
1721 else
1723 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1725 if (dump_file && (dump_flags & TDF_DETAILS))
1726 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1728 *overlaps_a = conflict_fn_not_known ();
1729 *overlaps_b = conflict_fn_not_known ();
1730 *last_conflicts = chrec_dont_know;
1731 dependence_stats.num_siv_unimplemented++;
1732 return;
1734 else
1736 if (value2 == false)
1738 /* Example:
1739 chrec_a = 3
1740 chrec_b = {10, +, -1}
1742 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1744 HOST_WIDE_INT numiter;
1745 struct loop *loop = get_chrec_loop (chrec_b);
1747 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1748 tmp = fold_build2 (EXACT_DIV_EXPR,
1749 integer_type_node, difference,
1750 CHREC_RIGHT (chrec_b));
1751 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1752 *last_conflicts = integer_one_node;
1754 /* Perform weak-zero siv test to see if overlap is
1755 outside the loop bounds. */
1756 numiter = estimated_loop_iterations_int (loop, false);
1758 if (numiter >= 0
1759 && compare_tree_int (tmp, numiter) > 0)
1761 free_conflict_function (*overlaps_a);
1762 free_conflict_function (*overlaps_b);
1763 *overlaps_a = conflict_fn_no_dependence ();
1764 *overlaps_b = conflict_fn_no_dependence ();
1765 *last_conflicts = integer_zero_node;
1766 dependence_stats.num_siv_independent++;
1767 return;
1769 dependence_stats.num_siv_dependent++;
1770 return;
1773 /* When the step does not divide the difference, there
1774 are no overlaps. */
1775 else
1777 *overlaps_a = conflict_fn_no_dependence ();
1778 *overlaps_b = conflict_fn_no_dependence ();
1779 *last_conflicts = integer_zero_node;
1780 dependence_stats.num_siv_independent++;
1781 return;
1784 else
1786 /* Example:
1787 chrec_a = 3
1788 chrec_b = {4, +, 1}
1790 In this case, chrec_a will not overlap with chrec_b. */
1791 *overlaps_a = conflict_fn_no_dependence ();
1792 *overlaps_b = conflict_fn_no_dependence ();
1793 *last_conflicts = integer_zero_node;
1794 dependence_stats.num_siv_independent++;
1795 return;
1802 /* Helper recursive function for initializing the matrix A. Returns
1803 the initial value of CHREC. */
1805 static int
1806 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1808 gcc_assert (chrec);
1810 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1811 return int_cst_value (chrec);
1813 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1814 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1817 #define FLOOR_DIV(x,y) ((x) / (y))
1819 /* Solves the special case of the Diophantine equation:
1820 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1822 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1823 number of iterations that loops X and Y run. The overlaps will be
1824 constructed as evolutions in dimension DIM. */
1826 static void
1827 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1828 affine_fn *overlaps_a,
1829 affine_fn *overlaps_b,
1830 tree *last_conflicts, int dim)
1832 if (((step_a > 0 && step_b > 0)
1833 || (step_a < 0 && step_b < 0)))
1835 int step_overlaps_a, step_overlaps_b;
1836 int gcd_steps_a_b, last_conflict, tau2;
1838 gcd_steps_a_b = gcd (step_a, step_b);
1839 step_overlaps_a = step_b / gcd_steps_a_b;
1840 step_overlaps_b = step_a / gcd_steps_a_b;
1842 if (niter > 0)
1844 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1845 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1846 last_conflict = tau2;
1847 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1849 else
1850 *last_conflicts = chrec_dont_know;
1852 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1853 build_int_cst (NULL_TREE,
1854 step_overlaps_a));
1855 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1856 build_int_cst (NULL_TREE,
1857 step_overlaps_b));
1860 else
1862 *overlaps_a = affine_fn_cst (integer_zero_node);
1863 *overlaps_b = affine_fn_cst (integer_zero_node);
1864 *last_conflicts = integer_zero_node;
1868 /* Solves the special case of a Diophantine equation where CHREC_A is
1869 an affine bivariate function, and CHREC_B is an affine univariate
1870 function. For example,
1872 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1874 has the following overlapping functions:
1876 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1877 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1878 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1880 FORNOW: This is a specialized implementation for a case occurring in
1881 a common benchmark. Implement the general algorithm. */
1883 static void
1884 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1885 conflict_function **overlaps_a,
1886 conflict_function **overlaps_b,
1887 tree *last_conflicts)
1889 bool xz_p, yz_p, xyz_p;
1890 int step_x, step_y, step_z;
1891 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1892 affine_fn overlaps_a_xz, overlaps_b_xz;
1893 affine_fn overlaps_a_yz, overlaps_b_yz;
1894 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1895 affine_fn ova1, ova2, ovb;
1896 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1898 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1899 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1900 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1902 niter_x =
1903 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1904 false);
1905 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1906 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1908 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1910 if (dump_file && (dump_flags & TDF_DETAILS))
1911 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1913 *overlaps_a = conflict_fn_not_known ();
1914 *overlaps_b = conflict_fn_not_known ();
1915 *last_conflicts = chrec_dont_know;
1916 return;
1919 niter = MIN (niter_x, niter_z);
1920 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1921 &overlaps_a_xz,
1922 &overlaps_b_xz,
1923 &last_conflicts_xz, 1);
1924 niter = MIN (niter_y, niter_z);
1925 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1926 &overlaps_a_yz,
1927 &overlaps_b_yz,
1928 &last_conflicts_yz, 2);
1929 niter = MIN (niter_x, niter_z);
1930 niter = MIN (niter_y, niter);
1931 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1932 &overlaps_a_xyz,
1933 &overlaps_b_xyz,
1934 &last_conflicts_xyz, 3);
1936 xz_p = !integer_zerop (last_conflicts_xz);
1937 yz_p = !integer_zerop (last_conflicts_yz);
1938 xyz_p = !integer_zerop (last_conflicts_xyz);
1940 if (xz_p || yz_p || xyz_p)
1942 ova1 = affine_fn_cst (integer_zero_node);
1943 ova2 = affine_fn_cst (integer_zero_node);
1944 ovb = affine_fn_cst (integer_zero_node);
1945 if (xz_p)
1947 affine_fn t0 = ova1;
1948 affine_fn t2 = ovb;
1950 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1951 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1952 affine_fn_free (t0);
1953 affine_fn_free (t2);
1954 *last_conflicts = last_conflicts_xz;
1956 if (yz_p)
1958 affine_fn t0 = ova2;
1959 affine_fn t2 = ovb;
1961 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1962 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1963 affine_fn_free (t0);
1964 affine_fn_free (t2);
1965 *last_conflicts = last_conflicts_yz;
1967 if (xyz_p)
1969 affine_fn t0 = ova1;
1970 affine_fn t2 = ova2;
1971 affine_fn t4 = ovb;
1973 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1974 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1975 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1976 affine_fn_free (t0);
1977 affine_fn_free (t2);
1978 affine_fn_free (t4);
1979 *last_conflicts = last_conflicts_xyz;
1981 *overlaps_a = conflict_fn (2, ova1, ova2);
1982 *overlaps_b = conflict_fn (1, ovb);
1984 else
1986 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1987 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1988 *last_conflicts = integer_zero_node;
1991 affine_fn_free (overlaps_a_xz);
1992 affine_fn_free (overlaps_b_xz);
1993 affine_fn_free (overlaps_a_yz);
1994 affine_fn_free (overlaps_b_yz);
1995 affine_fn_free (overlaps_a_xyz);
1996 affine_fn_free (overlaps_b_xyz);
1999 /* Determines the overlapping elements due to accesses CHREC_A and
2000 CHREC_B, that are affine functions. This function cannot handle
2001 symbolic evolution functions, ie. when initial conditions are
2002 parameters, because it uses lambda matrices of integers. */
2004 static void
2005 analyze_subscript_affine_affine (tree chrec_a,
2006 tree chrec_b,
2007 conflict_function **overlaps_a,
2008 conflict_function **overlaps_b,
2009 tree *last_conflicts)
2011 unsigned nb_vars_a, nb_vars_b, dim;
2012 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2013 lambda_matrix A, U, S;
2015 if (eq_evolutions_p (chrec_a, chrec_b))
2017 /* The accessed index overlaps for each iteration in the
2018 loop. */
2019 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2020 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2021 *last_conflicts = chrec_dont_know;
2022 return;
2024 if (dump_file && (dump_flags & TDF_DETAILS))
2025 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2027 /* For determining the initial intersection, we have to solve a
2028 Diophantine equation. This is the most time consuming part.
2030 For answering to the question: "Is there a dependence?" we have
2031 to prove that there exists a solution to the Diophantine
2032 equation, and that the solution is in the iteration domain,
2033 i.e. the solution is positive or zero, and that the solution
2034 happens before the upper bound loop.nb_iterations. Otherwise
2035 there is no dependence. This function outputs a description of
2036 the iterations that hold the intersections. */
2038 nb_vars_a = nb_vars_in_chrec (chrec_a);
2039 nb_vars_b = nb_vars_in_chrec (chrec_b);
2041 dim = nb_vars_a + nb_vars_b;
2042 U = lambda_matrix_new (dim, dim);
2043 A = lambda_matrix_new (dim, 1);
2044 S = lambda_matrix_new (dim, 1);
2046 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2047 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2048 gamma = init_b - init_a;
2050 /* Don't do all the hard work of solving the Diophantine equation
2051 when we already know the solution: for example,
2052 | {3, +, 1}_1
2053 | {3, +, 4}_2
2054 | gamma = 3 - 3 = 0.
2055 Then the first overlap occurs during the first iterations:
2056 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2058 if (gamma == 0)
2060 if (nb_vars_a == 1 && nb_vars_b == 1)
2062 HOST_WIDE_INT step_a, step_b;
2063 HOST_WIDE_INT niter, niter_a, niter_b;
2064 affine_fn ova, ovb;
2066 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2067 false);
2068 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2069 false);
2070 niter = MIN (niter_a, niter_b);
2071 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2072 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2074 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2075 &ova, &ovb,
2076 last_conflicts, 1);
2077 *overlaps_a = conflict_fn (1, ova);
2078 *overlaps_b = conflict_fn (1, ovb);
2081 else if (nb_vars_a == 2 && nb_vars_b == 1)
2082 compute_overlap_steps_for_affine_1_2
2083 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2085 else if (nb_vars_a == 1 && nb_vars_b == 2)
2086 compute_overlap_steps_for_affine_1_2
2087 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2089 else
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2093 *overlaps_a = conflict_fn_not_known ();
2094 *overlaps_b = conflict_fn_not_known ();
2095 *last_conflicts = chrec_dont_know;
2097 goto end_analyze_subs_aa;
2100 /* U.A = S */
2101 lambda_matrix_right_hermite (A, dim, 1, S, U);
2103 if (S[0][0] < 0)
2105 S[0][0] *= -1;
2106 lambda_matrix_row_negate (U, dim, 0);
2108 gcd_alpha_beta = S[0][0];
2110 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2111 but that is a quite strange case. Instead of ICEing, answer
2112 don't know. */
2113 if (gcd_alpha_beta == 0)
2115 *overlaps_a = conflict_fn_not_known ();
2116 *overlaps_b = conflict_fn_not_known ();
2117 *last_conflicts = chrec_dont_know;
2118 goto end_analyze_subs_aa;
2121 /* The classic "gcd-test". */
2122 if (!int_divides_p (gcd_alpha_beta, gamma))
2124 /* The "gcd-test" has determined that there is no integer
2125 solution, i.e. there is no dependence. */
2126 *overlaps_a = conflict_fn_no_dependence ();
2127 *overlaps_b = conflict_fn_no_dependence ();
2128 *last_conflicts = integer_zero_node;
2131 /* Both access functions are univariate. This includes SIV and MIV cases. */
2132 else if (nb_vars_a == 1 && nb_vars_b == 1)
2134 /* Both functions should have the same evolution sign. */
2135 if (((A[0][0] > 0 && -A[1][0] > 0)
2136 || (A[0][0] < 0 && -A[1][0] < 0)))
2138 /* The solutions are given by:
2140 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2141 | [u21 u22] [y0]
2143 For a given integer t. Using the following variables,
2145 | i0 = u11 * gamma / gcd_alpha_beta
2146 | j0 = u12 * gamma / gcd_alpha_beta
2147 | i1 = u21
2148 | j1 = u22
2150 the solutions are:
2152 | x0 = i0 + i1 * t,
2153 | y0 = j0 + j1 * t. */
2154 HOST_WIDE_INT i0, j0, i1, j1;
2156 i0 = U[0][0] * gamma / gcd_alpha_beta;
2157 j0 = U[0][1] * gamma / gcd_alpha_beta;
2158 i1 = U[1][0];
2159 j1 = U[1][1];
2161 if ((i1 == 0 && i0 < 0)
2162 || (j1 == 0 && j0 < 0))
2164 /* There is no solution.
2165 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2166 falls in here, but for the moment we don't look at the
2167 upper bound of the iteration domain. */
2168 *overlaps_a = conflict_fn_no_dependence ();
2169 *overlaps_b = conflict_fn_no_dependence ();
2170 *last_conflicts = integer_zero_node;
2171 goto end_analyze_subs_aa;
2174 if (i1 > 0 && j1 > 0)
2176 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2177 (get_chrec_loop (chrec_a), false);
2178 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2179 (get_chrec_loop (chrec_b), false);
2180 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2182 /* (X0, Y0) is a solution of the Diophantine equation:
2183 "chrec_a (X0) = chrec_b (Y0)". */
2184 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2185 CEIL (-j0, j1));
2186 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2187 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2189 /* (X1, Y1) is the smallest positive solution of the eq
2190 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2191 first conflict occurs. */
2192 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2193 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2194 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2196 if (niter > 0)
2198 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2199 FLOOR_DIV (niter - j0, j1));
2200 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2202 /* If the overlap occurs outside of the bounds of the
2203 loop, there is no dependence. */
2204 if (x1 > niter || y1 > niter)
2206 *overlaps_a = conflict_fn_no_dependence ();
2207 *overlaps_b = conflict_fn_no_dependence ();
2208 *last_conflicts = integer_zero_node;
2209 goto end_analyze_subs_aa;
2211 else
2212 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2214 else
2215 *last_conflicts = chrec_dont_know;
2217 *overlaps_a
2218 = conflict_fn (1,
2219 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2221 build_int_cst (NULL_TREE, i1)));
2222 *overlaps_b
2223 = conflict_fn (1,
2224 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2226 build_int_cst (NULL_TREE, j1)));
2228 else
2230 /* FIXME: For the moment, the upper bound of the
2231 iteration domain for i and j is not checked. */
2232 if (dump_file && (dump_flags & TDF_DETAILS))
2233 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2234 *overlaps_a = conflict_fn_not_known ();
2235 *overlaps_b = conflict_fn_not_known ();
2236 *last_conflicts = chrec_dont_know;
2239 else
2241 if (dump_file && (dump_flags & TDF_DETAILS))
2242 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2243 *overlaps_a = conflict_fn_not_known ();
2244 *overlaps_b = conflict_fn_not_known ();
2245 *last_conflicts = chrec_dont_know;
2248 else
2250 if (dump_file && (dump_flags & TDF_DETAILS))
2251 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2252 *overlaps_a = conflict_fn_not_known ();
2253 *overlaps_b = conflict_fn_not_known ();
2254 *last_conflicts = chrec_dont_know;
2257 end_analyze_subs_aa:
2258 if (dump_file && (dump_flags & TDF_DETAILS))
2260 fprintf (dump_file, " (overlaps_a = ");
2261 dump_conflict_function (dump_file, *overlaps_a);
2262 fprintf (dump_file, ")\n (overlaps_b = ");
2263 dump_conflict_function (dump_file, *overlaps_b);
2264 fprintf (dump_file, ")\n");
2265 fprintf (dump_file, ")\n");
2269 /* Returns true when analyze_subscript_affine_affine can be used for
2270 determining the dependence relation between chrec_a and chrec_b,
2271 that contain symbols. This function modifies chrec_a and chrec_b
2272 such that the analysis result is the same, and such that they don't
2273 contain symbols, and then can safely be passed to the analyzer.
2275 Example: The analysis of the following tuples of evolutions produce
2276 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2277 vs. {0, +, 1}_1
2279 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2280 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2283 static bool
2284 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2286 tree diff, type, left_a, left_b, right_b;
2288 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2289 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2290 /* FIXME: For the moment not handled. Might be refined later. */
2291 return false;
2293 type = chrec_type (*chrec_a);
2294 left_a = CHREC_LEFT (*chrec_a);
2295 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2296 diff = chrec_fold_minus (type, left_a, left_b);
2298 if (!evolution_function_is_constant_p (diff))
2299 return false;
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2304 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2305 diff, CHREC_RIGHT (*chrec_a));
2306 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2307 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2308 build_int_cst (type, 0),
2309 right_b);
2310 return true;
2313 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2314 *OVERLAPS_B are initialized to the functions that describe the
2315 relation between the elements accessed twice by CHREC_A and
2316 CHREC_B. For k >= 0, the following property is verified:
2318 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2320 static void
2321 analyze_siv_subscript (tree chrec_a,
2322 tree chrec_b,
2323 conflict_function **overlaps_a,
2324 conflict_function **overlaps_b,
2325 tree *last_conflicts)
2327 dependence_stats.num_siv++;
2329 if (dump_file && (dump_flags & TDF_DETAILS))
2330 fprintf (dump_file, "(analyze_siv_subscript \n");
2332 if (evolution_function_is_constant_p (chrec_a)
2333 && evolution_function_is_affine_p (chrec_b))
2334 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2335 overlaps_a, overlaps_b, last_conflicts);
2337 else if (evolution_function_is_affine_p (chrec_a)
2338 && evolution_function_is_constant_p (chrec_b))
2339 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2340 overlaps_b, overlaps_a, last_conflicts);
2342 else if (evolution_function_is_affine_p (chrec_a)
2343 && evolution_function_is_affine_p (chrec_b))
2345 if (!chrec_contains_symbols (chrec_a)
2346 && !chrec_contains_symbols (chrec_b))
2348 analyze_subscript_affine_affine (chrec_a, chrec_b,
2349 overlaps_a, overlaps_b,
2350 last_conflicts);
2352 if (CF_NOT_KNOWN_P (*overlaps_a)
2353 || CF_NOT_KNOWN_P (*overlaps_b))
2354 dependence_stats.num_siv_unimplemented++;
2355 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2356 || CF_NO_DEPENDENCE_P (*overlaps_b))
2357 dependence_stats.num_siv_independent++;
2358 else
2359 dependence_stats.num_siv_dependent++;
2361 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2362 &chrec_b))
2364 analyze_subscript_affine_affine (chrec_a, chrec_b,
2365 overlaps_a, overlaps_b,
2366 last_conflicts);
2368 if (CF_NOT_KNOWN_P (*overlaps_a)
2369 || CF_NOT_KNOWN_P (*overlaps_b))
2370 dependence_stats.num_siv_unimplemented++;
2371 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2372 || CF_NO_DEPENDENCE_P (*overlaps_b))
2373 dependence_stats.num_siv_independent++;
2374 else
2375 dependence_stats.num_siv_dependent++;
2377 else
2378 goto siv_subscript_dontknow;
2381 else
2383 siv_subscript_dontknow:;
2384 if (dump_file && (dump_flags & TDF_DETAILS))
2385 fprintf (dump_file, "siv test failed: unimplemented.\n");
2386 *overlaps_a = conflict_fn_not_known ();
2387 *overlaps_b = conflict_fn_not_known ();
2388 *last_conflicts = chrec_dont_know;
2389 dependence_stats.num_siv_unimplemented++;
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 fprintf (dump_file, ")\n");
2396 /* Returns false if we can prove that the greatest common divisor of the steps
2397 of CHREC does not divide CST, false otherwise. */
2399 static bool
2400 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2402 HOST_WIDE_INT cd = 0, val;
2403 tree step;
2405 if (!host_integerp (cst, 0))
2406 return true;
2407 val = tree_low_cst (cst, 0);
2409 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2411 step = CHREC_RIGHT (chrec);
2412 if (!host_integerp (step, 0))
2413 return true;
2414 cd = gcd (cd, tree_low_cst (step, 0));
2415 chrec = CHREC_LEFT (chrec);
2418 return val % cd == 0;
2421 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2422 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2423 functions that describe the relation between the elements accessed
2424 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2425 is verified:
2427 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2429 static void
2430 analyze_miv_subscript (tree chrec_a,
2431 tree chrec_b,
2432 conflict_function **overlaps_a,
2433 conflict_function **overlaps_b,
2434 tree *last_conflicts,
2435 struct loop *loop_nest)
2437 /* FIXME: This is a MIV subscript, not yet handled.
2438 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2439 (A[i] vs. A[j]).
2441 In the SIV test we had to solve a Diophantine equation with two
2442 variables. In the MIV case we have to solve a Diophantine
2443 equation with 2*n variables (if the subscript uses n IVs).
2445 tree difference;
2446 dependence_stats.num_miv++;
2447 if (dump_file && (dump_flags & TDF_DETAILS))
2448 fprintf (dump_file, "(analyze_miv_subscript \n");
2450 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2451 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2452 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2454 if (eq_evolutions_p (chrec_a, chrec_b))
2456 /* Access functions are the same: all the elements are accessed
2457 in the same order. */
2458 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2459 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2460 *last_conflicts = estimated_loop_iterations_tree
2461 (get_chrec_loop (chrec_a), true);
2462 dependence_stats.num_miv_dependent++;
2465 else if (evolution_function_is_constant_p (difference)
2466 /* For the moment, the following is verified:
2467 evolution_function_is_affine_multivariate_p (chrec_a,
2468 loop_nest->num) */
2469 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2471 /* testsuite/.../ssa-chrec-33.c
2472 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2474 The difference is 1, and all the evolution steps are multiples
2475 of 2, consequently there are no overlapping elements. */
2476 *overlaps_a = conflict_fn_no_dependence ();
2477 *overlaps_b = conflict_fn_no_dependence ();
2478 *last_conflicts = integer_zero_node;
2479 dependence_stats.num_miv_independent++;
2482 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2483 && !chrec_contains_symbols (chrec_a)
2484 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2485 && !chrec_contains_symbols (chrec_b))
2487 /* testsuite/.../ssa-chrec-35.c
2488 {0, +, 1}_2 vs. {0, +, 1}_3
2489 the overlapping elements are respectively located at iterations:
2490 {0, +, 1}_x and {0, +, 1}_x,
2491 in other words, we have the equality:
2492 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2494 Other examples:
2495 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2496 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2498 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2499 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2501 analyze_subscript_affine_affine (chrec_a, chrec_b,
2502 overlaps_a, overlaps_b, last_conflicts);
2504 if (CF_NOT_KNOWN_P (*overlaps_a)
2505 || CF_NOT_KNOWN_P (*overlaps_b))
2506 dependence_stats.num_miv_unimplemented++;
2507 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2508 || CF_NO_DEPENDENCE_P (*overlaps_b))
2509 dependence_stats.num_miv_independent++;
2510 else
2511 dependence_stats.num_miv_dependent++;
2514 else
2516 /* When the analysis is too difficult, answer "don't know". */
2517 if (dump_file && (dump_flags & TDF_DETAILS))
2518 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2520 *overlaps_a = conflict_fn_not_known ();
2521 *overlaps_b = conflict_fn_not_known ();
2522 *last_conflicts = chrec_dont_know;
2523 dependence_stats.num_miv_unimplemented++;
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file, ")\n");
2530 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2531 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2532 OVERLAP_ITERATIONS_B are initialized with two functions that
2533 describe the iterations that contain conflicting elements.
2535 Remark: For an integer k >= 0, the following equality is true:
2537 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2540 static void
2541 analyze_overlapping_iterations (tree chrec_a,
2542 tree chrec_b,
2543 conflict_function **overlap_iterations_a,
2544 conflict_function **overlap_iterations_b,
2545 tree *last_conflicts, struct loop *loop_nest)
2547 unsigned int lnn = loop_nest->num;
2549 dependence_stats.num_subscript_tests++;
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2553 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2554 fprintf (dump_file, " (chrec_a = ");
2555 print_generic_expr (dump_file, chrec_a, 0);
2556 fprintf (dump_file, ")\n (chrec_b = ");
2557 print_generic_expr (dump_file, chrec_b, 0);
2558 fprintf (dump_file, ")\n");
2561 if (chrec_a == NULL_TREE
2562 || chrec_b == NULL_TREE
2563 || chrec_contains_undetermined (chrec_a)
2564 || chrec_contains_undetermined (chrec_b))
2566 dependence_stats.num_subscript_undetermined++;
2568 *overlap_iterations_a = conflict_fn_not_known ();
2569 *overlap_iterations_b = conflict_fn_not_known ();
2572 /* If they are the same chrec, and are affine, they overlap
2573 on every iteration. */
2574 else if (eq_evolutions_p (chrec_a, chrec_b)
2575 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2577 dependence_stats.num_same_subscript_function++;
2578 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2579 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2580 *last_conflicts = chrec_dont_know;
2583 /* If they aren't the same, and aren't affine, we can't do anything
2584 yet. */
2585 else if ((chrec_contains_symbols (chrec_a)
2586 || chrec_contains_symbols (chrec_b))
2587 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2588 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2590 dependence_stats.num_subscript_undetermined++;
2591 *overlap_iterations_a = conflict_fn_not_known ();
2592 *overlap_iterations_b = conflict_fn_not_known ();
2595 else if (ziv_subscript_p (chrec_a, chrec_b))
2596 analyze_ziv_subscript (chrec_a, chrec_b,
2597 overlap_iterations_a, overlap_iterations_b,
2598 last_conflicts);
2600 else if (siv_subscript_p (chrec_a, chrec_b))
2601 analyze_siv_subscript (chrec_a, chrec_b,
2602 overlap_iterations_a, overlap_iterations_b,
2603 last_conflicts);
2605 else
2606 analyze_miv_subscript (chrec_a, chrec_b,
2607 overlap_iterations_a, overlap_iterations_b,
2608 last_conflicts, loop_nest);
2610 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, " (overlap_iterations_a = ");
2613 dump_conflict_function (dump_file, *overlap_iterations_a);
2614 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2615 dump_conflict_function (dump_file, *overlap_iterations_b);
2616 fprintf (dump_file, ")\n");
2617 fprintf (dump_file, ")\n");
2621 /* Helper function for uniquely inserting distance vectors. */
2623 static void
2624 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2626 unsigned i;
2627 lambda_vector v;
2629 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2630 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2631 return;
2633 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2636 /* Helper function for uniquely inserting direction vectors. */
2638 static void
2639 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2641 unsigned i;
2642 lambda_vector v;
2644 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2645 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2646 return;
2648 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2651 /* Add a distance of 1 on all the loops outer than INDEX. If we
2652 haven't yet determined a distance for this outer loop, push a new
2653 distance vector composed of the previous distance, and a distance
2654 of 1 for this outer loop. Example:
2656 | loop_1
2657 | loop_2
2658 | A[10]
2659 | endloop_2
2660 | endloop_1
2662 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2663 save (0, 1), then we have to save (1, 0). */
2665 static void
2666 add_outer_distances (struct data_dependence_relation *ddr,
2667 lambda_vector dist_v, int index)
2669 /* For each outer loop where init_v is not set, the accesses are
2670 in dependence of distance 1 in the loop. */
2671 while (--index >= 0)
2673 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2674 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2675 save_v[index] = 1;
2676 save_dist_v (ddr, save_v);
2680 /* Return false when fail to represent the data dependence as a
2681 distance vector. INIT_B is set to true when a component has been
2682 added to the distance vector DIST_V. INDEX_CARRY is then set to
2683 the index in DIST_V that carries the dependence. */
2685 static bool
2686 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2687 struct data_reference *ddr_a,
2688 struct data_reference *ddr_b,
2689 lambda_vector dist_v, bool *init_b,
2690 int *index_carry)
2692 unsigned i;
2693 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2695 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2697 tree access_fn_a, access_fn_b;
2698 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2700 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2702 non_affine_dependence_relation (ddr);
2703 return false;
2706 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2707 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2709 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2710 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2712 int dist, index;
2713 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2714 DDR_LOOP_NEST (ddr));
2715 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2716 DDR_LOOP_NEST (ddr));
2718 /* The dependence is carried by the outermost loop. Example:
2719 | loop_1
2720 | A[{4, +, 1}_1]
2721 | loop_2
2722 | A[{5, +, 1}_2]
2723 | endloop_2
2724 | endloop_1
2725 In this case, the dependence is carried by loop_1. */
2726 index = index_a < index_b ? index_a : index_b;
2727 *index_carry = MIN (index, *index_carry);
2729 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2731 non_affine_dependence_relation (ddr);
2732 return false;
2735 dist = int_cst_value (SUB_DISTANCE (subscript));
2737 /* This is the subscript coupling test. If we have already
2738 recorded a distance for this loop (a distance coming from
2739 another subscript), it should be the same. For example,
2740 in the following code, there is no dependence:
2742 | loop i = 0, N, 1
2743 | T[i+1][i] = ...
2744 | ... = T[i][i]
2745 | endloop
2747 if (init_v[index] != 0 && dist_v[index] != dist)
2749 finalize_ddr_dependent (ddr, chrec_known);
2750 return false;
2753 dist_v[index] = dist;
2754 init_v[index] = 1;
2755 *init_b = true;
2757 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2759 /* This can be for example an affine vs. constant dependence
2760 (T[i] vs. T[3]) that is not an affine dependence and is
2761 not representable as a distance vector. */
2762 non_affine_dependence_relation (ddr);
2763 return false;
2767 return true;
2770 /* Return true when the DDR contains two data references that have the
2771 same access functions. */
2773 static bool
2774 same_access_functions (const struct data_dependence_relation *ddr)
2776 unsigned i;
2778 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2779 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2780 DR_ACCESS_FN (DDR_B (ddr), i)))
2781 return false;
2783 return true;
2786 /* Return true when the DDR contains only constant access functions. */
2788 static bool
2789 constant_access_functions (const struct data_dependence_relation *ddr)
2791 unsigned i;
2793 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2794 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2795 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2796 return false;
2798 return true;
2802 /* Helper function for the case where DDR_A and DDR_B are the same
2803 multivariate access function. */
2805 static void
2806 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2808 int x_1, x_2;
2809 tree c_1 = CHREC_LEFT (c_2);
2810 tree c_0 = CHREC_LEFT (c_1);
2811 lambda_vector dist_v;
2812 int v1, v2, cd;
2814 /* Polynomials with more than 2 variables are not handled yet. When
2815 the evolution steps are parameters, it is not possible to
2816 represent the dependence using classical distance vectors. */
2817 if (TREE_CODE (c_0) != INTEGER_CST
2818 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2819 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2821 DDR_AFFINE_P (ddr) = false;
2822 return;
2825 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2826 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2828 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2829 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2830 v1 = int_cst_value (CHREC_RIGHT (c_1));
2831 v2 = int_cst_value (CHREC_RIGHT (c_2));
2832 cd = gcd (v1, v2);
2833 v1 /= cd;
2834 v2 /= cd;
2836 if (v2 < 0)
2838 v2 = -v2;
2839 v1 = -v1;
2842 dist_v[x_1] = v2;
2843 dist_v[x_2] = -v1;
2844 save_dist_v (ddr, dist_v);
2846 add_outer_distances (ddr, dist_v, x_1);
2849 /* Helper function for the case where DDR_A and DDR_B are the same
2850 access functions. */
2852 static void
2853 add_other_self_distances (struct data_dependence_relation *ddr)
2855 lambda_vector dist_v;
2856 unsigned i;
2857 int index_carry = DDR_NB_LOOPS (ddr);
2859 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2861 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2863 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2865 if (!evolution_function_is_univariate_p (access_fun))
2867 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2869 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2870 return;
2873 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2874 return;
2877 index_carry = MIN (index_carry,
2878 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2879 DDR_LOOP_NEST (ddr)));
2883 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2884 add_outer_distances (ddr, dist_v, index_carry);
2887 static void
2888 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2890 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2892 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2893 save_dist_v (ddr, dist_v);
2896 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2897 is the case for example when access functions are the same and
2898 equal to a constant, as in:
2900 | loop_1
2901 | A[3] = ...
2902 | ... = A[3]
2903 | endloop_1
2905 in which case the distance vectors are (0) and (1). */
2907 static void
2908 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2910 unsigned i, j;
2912 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2914 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2915 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2916 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2918 for (j = 0; j < ca->n; j++)
2919 if (affine_function_zero_p (ca->fns[j]))
2921 insert_innermost_unit_dist_vector (ddr);
2922 return;
2925 for (j = 0; j < cb->n; j++)
2926 if (affine_function_zero_p (cb->fns[j]))
2928 insert_innermost_unit_dist_vector (ddr);
2929 return;
2934 /* Compute the classic per loop distance vector. DDR is the data
2935 dependence relation to build a vector from. Return false when fail
2936 to represent the data dependence as a distance vector. */
2938 static bool
2939 build_classic_dist_vector (struct data_dependence_relation *ddr,
2940 struct loop *loop_nest)
2942 bool init_b = false;
2943 int index_carry = DDR_NB_LOOPS (ddr);
2944 lambda_vector dist_v;
2946 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2947 return false;
2949 if (same_access_functions (ddr))
2951 /* Save the 0 vector. */
2952 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2953 save_dist_v (ddr, dist_v);
2955 if (constant_access_functions (ddr))
2956 add_distance_for_zero_overlaps (ddr);
2958 if (DDR_NB_LOOPS (ddr) > 1)
2959 add_other_self_distances (ddr);
2961 return true;
2964 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2965 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2966 dist_v, &init_b, &index_carry))
2967 return false;
2969 /* Save the distance vector if we initialized one. */
2970 if (init_b)
2972 /* Verify a basic constraint: classic distance vectors should
2973 always be lexicographically positive.
2975 Data references are collected in the order of execution of
2976 the program, thus for the following loop
2978 | for (i = 1; i < 100; i++)
2979 | for (j = 1; j < 100; j++)
2981 | t = T[j+1][i-1]; // A
2982 | T[j][i] = t + 2; // B
2985 references are collected following the direction of the wind:
2986 A then B. The data dependence tests are performed also
2987 following this order, such that we're looking at the distance
2988 separating the elements accessed by A from the elements later
2989 accessed by B. But in this example, the distance returned by
2990 test_dep (A, B) is lexicographically negative (-1, 1), that
2991 means that the access A occurs later than B with respect to
2992 the outer loop, ie. we're actually looking upwind. In this
2993 case we solve test_dep (B, A) looking downwind to the
2994 lexicographically positive solution, that returns the
2995 distance vector (1, -1). */
2996 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
2998 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2999 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3000 loop_nest))
3001 return false;
3002 compute_subscript_distance (ddr);
3003 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3004 save_v, &init_b, &index_carry))
3005 return false;
3006 save_dist_v (ddr, save_v);
3007 DDR_REVERSED_P (ddr) = true;
3009 /* In this case there is a dependence forward for all the
3010 outer loops:
3012 | for (k = 1; k < 100; k++)
3013 | for (i = 1; i < 100; i++)
3014 | for (j = 1; j < 100; j++)
3016 | t = T[j+1][i-1]; // A
3017 | T[j][i] = t + 2; // B
3020 the vectors are:
3021 (0, 1, -1)
3022 (1, 1, -1)
3023 (1, -1, 1)
3025 if (DDR_NB_LOOPS (ddr) > 1)
3027 add_outer_distances (ddr, save_v, index_carry);
3028 add_outer_distances (ddr, dist_v, index_carry);
3031 else
3033 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3034 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3036 if (DDR_NB_LOOPS (ddr) > 1)
3038 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3040 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3041 DDR_A (ddr), loop_nest))
3042 return false;
3043 compute_subscript_distance (ddr);
3044 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3045 opposite_v, &init_b,
3046 &index_carry))
3047 return false;
3049 save_dist_v (ddr, save_v);
3050 add_outer_distances (ddr, dist_v, index_carry);
3051 add_outer_distances (ddr, opposite_v, index_carry);
3053 else
3054 save_dist_v (ddr, save_v);
3057 else
3059 /* There is a distance of 1 on all the outer loops: Example:
3060 there is a dependence of distance 1 on loop_1 for the array A.
3062 | loop_1
3063 | A[5] = ...
3064 | endloop
3066 add_outer_distances (ddr, dist_v,
3067 lambda_vector_first_nz (dist_v,
3068 DDR_NB_LOOPS (ddr), 0));
3071 if (dump_file && (dump_flags & TDF_DETAILS))
3073 unsigned i;
3075 fprintf (dump_file, "(build_classic_dist_vector\n");
3076 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3078 fprintf (dump_file, " dist_vector = (");
3079 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3080 DDR_NB_LOOPS (ddr));
3081 fprintf (dump_file, " )\n");
3083 fprintf (dump_file, ")\n");
3086 return true;
3089 /* Return the direction for a given distance.
3090 FIXME: Computing dir this way is suboptimal, since dir can catch
3091 cases that dist is unable to represent. */
3093 static inline enum data_dependence_direction
3094 dir_from_dist (int dist)
3096 if (dist > 0)
3097 return dir_positive;
3098 else if (dist < 0)
3099 return dir_negative;
3100 else
3101 return dir_equal;
3104 /* Compute the classic per loop direction vector. DDR is the data
3105 dependence relation to build a vector from. */
3107 static void
3108 build_classic_dir_vector (struct data_dependence_relation *ddr)
3110 unsigned i, j;
3111 lambda_vector dist_v;
3113 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3115 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3117 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3118 dir_v[j] = dir_from_dist (dist_v[j]);
3120 save_dir_v (ddr, dir_v);
3124 /* Helper function. Returns true when there is a dependence between
3125 data references DRA and DRB. */
3127 static bool
3128 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3129 struct data_reference *dra,
3130 struct data_reference *drb,
3131 struct loop *loop_nest)
3133 unsigned int i;
3134 tree last_conflicts;
3135 struct subscript *subscript;
3137 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3138 i++)
3140 conflict_function *overlaps_a, *overlaps_b;
3142 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3143 DR_ACCESS_FN (drb, i),
3144 &overlaps_a, &overlaps_b,
3145 &last_conflicts, loop_nest);
3147 if (CF_NOT_KNOWN_P (overlaps_a)
3148 || CF_NOT_KNOWN_P (overlaps_b))
3150 finalize_ddr_dependent (ddr, chrec_dont_know);
3151 dependence_stats.num_dependence_undetermined++;
3152 free_conflict_function (overlaps_a);
3153 free_conflict_function (overlaps_b);
3154 return false;
3157 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3158 || CF_NO_DEPENDENCE_P (overlaps_b))
3160 finalize_ddr_dependent (ddr, chrec_known);
3161 dependence_stats.num_dependence_independent++;
3162 free_conflict_function (overlaps_a);
3163 free_conflict_function (overlaps_b);
3164 return false;
3167 else
3169 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3170 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3171 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3175 return true;
3178 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3180 static void
3181 subscript_dependence_tester (struct data_dependence_relation *ddr,
3182 struct loop *loop_nest)
3185 if (dump_file && (dump_flags & TDF_DETAILS))
3186 fprintf (dump_file, "(subscript_dependence_tester \n");
3188 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3189 dependence_stats.num_dependence_dependent++;
3191 compute_subscript_distance (ddr);
3192 if (build_classic_dist_vector (ddr, loop_nest))
3193 build_classic_dir_vector (ddr);
3195 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, ")\n");
3199 /* Returns true when all the access functions of A are affine or
3200 constant with respect to LOOP_NEST. */
3202 static bool
3203 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3204 const struct loop *loop_nest)
3206 unsigned int i;
3207 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3208 tree t;
3210 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3211 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3212 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3213 return false;
3215 return true;
3218 /* Initializes an equation for an OMEGA problem using the information
3219 contained in the ACCESS_FUN. Returns true when the operation
3220 succeeded.
3222 PB is the omega constraint system.
3223 EQ is the number of the equation to be initialized.
3224 OFFSET is used for shifting the variables names in the constraints:
3225 a constrain is composed of 2 * the number of variables surrounding
3226 dependence accesses. OFFSET is set either to 0 for the first n variables,
3227 then it is set to n.
3228 ACCESS_FUN is expected to be an affine chrec. */
3230 static bool
3231 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3232 unsigned int offset, tree access_fun,
3233 struct data_dependence_relation *ddr)
3235 switch (TREE_CODE (access_fun))
3237 case POLYNOMIAL_CHREC:
3239 tree left = CHREC_LEFT (access_fun);
3240 tree right = CHREC_RIGHT (access_fun);
3241 int var = CHREC_VARIABLE (access_fun);
3242 unsigned var_idx;
3244 if (TREE_CODE (right) != INTEGER_CST)
3245 return false;
3247 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3248 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3250 /* Compute the innermost loop index. */
3251 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3253 if (offset == 0)
3254 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3255 += int_cst_value (right);
3257 switch (TREE_CODE (left))
3259 case POLYNOMIAL_CHREC:
3260 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3262 case INTEGER_CST:
3263 pb->eqs[eq].coef[0] += int_cst_value (left);
3264 return true;
3266 default:
3267 return false;
3271 case INTEGER_CST:
3272 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3273 return true;
3275 default:
3276 return false;
3280 /* As explained in the comments preceding init_omega_for_ddr, we have
3281 to set up a system for each loop level, setting outer loops
3282 variation to zero, and current loop variation to positive or zero.
3283 Save each lexico positive distance vector. */
3285 static void
3286 omega_extract_distance_vectors (omega_pb pb,
3287 struct data_dependence_relation *ddr)
3289 int eq, geq;
3290 unsigned i, j;
3291 struct loop *loopi, *loopj;
3292 enum omega_result res;
3294 /* Set a new problem for each loop in the nest. The basis is the
3295 problem that we have initialized until now. On top of this we
3296 add new constraints. */
3297 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3298 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3300 int dist = 0;
3301 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3302 DDR_NB_LOOPS (ddr));
3304 omega_copy_problem (copy, pb);
3306 /* For all the outer loops "loop_j", add "dj = 0". */
3307 for (j = 0;
3308 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3310 eq = omega_add_zero_eq (copy, omega_black);
3311 copy->eqs[eq].coef[j + 1] = 1;
3314 /* For "loop_i", add "0 <= di". */
3315 geq = omega_add_zero_geq (copy, omega_black);
3316 copy->geqs[geq].coef[i + 1] = 1;
3318 /* Reduce the constraint system, and test that the current
3319 problem is feasible. */
3320 res = omega_simplify_problem (copy);
3321 if (res == omega_false
3322 || res == omega_unknown
3323 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3324 goto next_problem;
3326 for (eq = 0; eq < copy->num_subs; eq++)
3327 if (copy->subs[eq].key == (int) i + 1)
3329 dist = copy->subs[eq].coef[0];
3330 goto found_dist;
3333 if (dist == 0)
3335 /* Reinitialize problem... */
3336 omega_copy_problem (copy, pb);
3337 for (j = 0;
3338 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3340 eq = omega_add_zero_eq (copy, omega_black);
3341 copy->eqs[eq].coef[j + 1] = 1;
3344 /* ..., but this time "di = 1". */
3345 eq = omega_add_zero_eq (copy, omega_black);
3346 copy->eqs[eq].coef[i + 1] = 1;
3347 copy->eqs[eq].coef[0] = -1;
3349 res = omega_simplify_problem (copy);
3350 if (res == omega_false
3351 || res == omega_unknown
3352 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3353 goto next_problem;
3355 for (eq = 0; eq < copy->num_subs; eq++)
3356 if (copy->subs[eq].key == (int) i + 1)
3358 dist = copy->subs[eq].coef[0];
3359 goto found_dist;
3363 found_dist:;
3364 /* Save the lexicographically positive distance vector. */
3365 if (dist >= 0)
3367 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3368 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3370 dist_v[i] = dist;
3372 for (eq = 0; eq < copy->num_subs; eq++)
3373 if (copy->subs[eq].key > 0)
3375 dist = copy->subs[eq].coef[0];
3376 dist_v[copy->subs[eq].key - 1] = dist;
3379 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3380 dir_v[j] = dir_from_dist (dist_v[j]);
3382 save_dist_v (ddr, dist_v);
3383 save_dir_v (ddr, dir_v);
3386 next_problem:;
3387 omega_free_problem (copy);
3391 /* This is called for each subscript of a tuple of data references:
3392 insert an equality for representing the conflicts. */
3394 static bool
3395 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3396 struct data_dependence_relation *ddr,
3397 omega_pb pb, bool *maybe_dependent)
3399 int eq;
3400 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3401 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3402 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3404 /* When the fun_a - fun_b is not constant, the dependence is not
3405 captured by the classic distance vector representation. */
3406 if (TREE_CODE (difference) != INTEGER_CST)
3407 return false;
3409 /* ZIV test. */
3410 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3412 /* There is no dependence. */
3413 *maybe_dependent = false;
3414 return true;
3417 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3418 integer_minus_one_node);
3420 eq = omega_add_zero_eq (pb, omega_black);
3421 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3422 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3423 /* There is probably a dependence, but the system of
3424 constraints cannot be built: answer "don't know". */
3425 return false;
3427 /* GCD test. */
3428 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3429 && !int_divides_p (lambda_vector_gcd
3430 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3431 2 * DDR_NB_LOOPS (ddr)),
3432 pb->eqs[eq].coef[0]))
3434 /* There is no dependence. */
3435 *maybe_dependent = false;
3436 return true;
3439 return true;
3442 /* Helper function, same as init_omega_for_ddr but specialized for
3443 data references A and B. */
3445 static bool
3446 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3447 struct data_dependence_relation *ddr,
3448 omega_pb pb, bool *maybe_dependent)
3450 unsigned i;
3451 int ineq;
3452 struct loop *loopi;
3453 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3455 /* Insert an equality per subscript. */
3456 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3458 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3459 ddr, pb, maybe_dependent))
3460 return false;
3461 else if (*maybe_dependent == false)
3463 /* There is no dependence. */
3464 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3465 return true;
3469 /* Insert inequalities: constraints corresponding to the iteration
3470 domain, i.e. the loops surrounding the references "loop_x" and
3471 the distance variables "dx". The layout of the OMEGA
3472 representation is as follows:
3473 - coef[0] is the constant
3474 - coef[1..nb_loops] are the protected variables that will not be
3475 removed by the solver: the "dx"
3476 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3478 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3479 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3481 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3483 /* 0 <= loop_x */
3484 ineq = omega_add_zero_geq (pb, omega_black);
3485 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3487 /* 0 <= loop_x + dx */
3488 ineq = omega_add_zero_geq (pb, omega_black);
3489 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3490 pb->geqs[ineq].coef[i + 1] = 1;
3492 if (nbi != -1)
3494 /* loop_x <= nb_iters */
3495 ineq = omega_add_zero_geq (pb, omega_black);
3496 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3497 pb->geqs[ineq].coef[0] = nbi;
3499 /* loop_x + dx <= nb_iters */
3500 ineq = omega_add_zero_geq (pb, omega_black);
3501 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3502 pb->geqs[ineq].coef[i + 1] = -1;
3503 pb->geqs[ineq].coef[0] = nbi;
3505 /* A step "dx" bigger than nb_iters is not feasible, so
3506 add "0 <= nb_iters + dx", */
3507 ineq = omega_add_zero_geq (pb, omega_black);
3508 pb->geqs[ineq].coef[i + 1] = 1;
3509 pb->geqs[ineq].coef[0] = nbi;
3510 /* and "dx <= nb_iters". */
3511 ineq = omega_add_zero_geq (pb, omega_black);
3512 pb->geqs[ineq].coef[i + 1] = -1;
3513 pb->geqs[ineq].coef[0] = nbi;
3517 omega_extract_distance_vectors (pb, ddr);
3519 return true;
3522 /* Sets up the Omega dependence problem for the data dependence
3523 relation DDR. Returns false when the constraint system cannot be
3524 built, ie. when the test answers "don't know". Returns true
3525 otherwise, and when independence has been proved (using one of the
3526 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3527 set MAYBE_DEPENDENT to true.
3529 Example: for setting up the dependence system corresponding to the
3530 conflicting accesses
3532 | loop_i
3533 | loop_j
3534 | A[i, i+1] = ...
3535 | ... A[2*j, 2*(i + j)]
3536 | endloop_j
3537 | endloop_i
3539 the following constraints come from the iteration domain:
3541 0 <= i <= Ni
3542 0 <= i + di <= Ni
3543 0 <= j <= Nj
3544 0 <= j + dj <= Nj
3546 where di, dj are the distance variables. The constraints
3547 representing the conflicting elements are:
3549 i = 2 * (j + dj)
3550 i + 1 = 2 * (i + di + j + dj)
3552 For asking that the resulting distance vector (di, dj) be
3553 lexicographically positive, we insert the constraint "di >= 0". If
3554 "di = 0" in the solution, we fix that component to zero, and we
3555 look at the inner loops: we set a new problem where all the outer
3556 loop distances are zero, and fix this inner component to be
3557 positive. When one of the components is positive, we save that
3558 distance, and set a new problem where the distance on this loop is
3559 zero, searching for other distances in the inner loops. Here is
3560 the classic example that illustrates that we have to set for each
3561 inner loop a new problem:
3563 | loop_1
3564 | loop_2
3565 | A[10]
3566 | endloop_2
3567 | endloop_1
3569 we have to save two distances (1, 0) and (0, 1).
3571 Given two array references, refA and refB, we have to set the
3572 dependence problem twice, refA vs. refB and refB vs. refA, and we
3573 cannot do a single test, as refB might occur before refA in the
3574 inner loops, and the contrary when considering outer loops: ex.
3576 | loop_0
3577 | loop_1
3578 | loop_2
3579 | T[{1,+,1}_2][{1,+,1}_1] // refA
3580 | T[{2,+,1}_2][{0,+,1}_1] // refB
3581 | endloop_2
3582 | endloop_1
3583 | endloop_0
3585 refB touches the elements in T before refA, and thus for the same
3586 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3587 but for successive loop_0 iterations, we have (1, -1, 1)
3589 The Omega solver expects the distance variables ("di" in the
3590 previous example) to come first in the constraint system (as
3591 variables to be protected, or "safe" variables), the constraint
3592 system is built using the following layout:
3594 "cst | distance vars | index vars".
3597 static bool
3598 init_omega_for_ddr (struct data_dependence_relation *ddr,
3599 bool *maybe_dependent)
3601 omega_pb pb;
3602 bool res = false;
3604 *maybe_dependent = true;
3606 if (same_access_functions (ddr))
3608 unsigned j;
3609 lambda_vector dir_v;
3611 /* Save the 0 vector. */
3612 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3613 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3614 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3615 dir_v[j] = dir_equal;
3616 save_dir_v (ddr, dir_v);
3618 /* Save the dependences carried by outer loops. */
3619 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3620 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3621 maybe_dependent);
3622 omega_free_problem (pb);
3623 return res;
3626 /* Omega expects the protected variables (those that have to be kept
3627 after elimination) to appear first in the constraint system.
3628 These variables are the distance variables. In the following
3629 initialization we declare NB_LOOPS safe variables, and the total
3630 number of variables for the constraint system is 2*NB_LOOPS. */
3631 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3632 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3633 maybe_dependent);
3634 omega_free_problem (pb);
3636 /* Stop computation if not decidable, or no dependence. */
3637 if (res == false || *maybe_dependent == false)
3638 return res;
3640 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3641 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3642 maybe_dependent);
3643 omega_free_problem (pb);
3645 return res;
3648 /* Return true when DDR contains the same information as that stored
3649 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3651 static bool
3652 ddr_consistent_p (FILE *file,
3653 struct data_dependence_relation *ddr,
3654 VEC (lambda_vector, heap) *dist_vects,
3655 VEC (lambda_vector, heap) *dir_vects)
3657 unsigned int i, j;
3659 /* If dump_file is set, output there. */
3660 if (dump_file && (dump_flags & TDF_DETAILS))
3661 file = dump_file;
3663 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3665 lambda_vector b_dist_v;
3666 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3667 VEC_length (lambda_vector, dist_vects),
3668 DDR_NUM_DIST_VECTS (ddr));
3670 fprintf (file, "Banerjee dist vectors:\n");
3671 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3672 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3674 fprintf (file, "Omega dist vectors:\n");
3675 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3676 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3678 fprintf (file, "data dependence relation:\n");
3679 dump_data_dependence_relation (file, ddr);
3681 fprintf (file, ")\n");
3682 return false;
3685 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3687 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3688 VEC_length (lambda_vector, dir_vects),
3689 DDR_NUM_DIR_VECTS (ddr));
3690 return false;
3693 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3695 lambda_vector a_dist_v;
3696 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3698 /* Distance vectors are not ordered in the same way in the DDR
3699 and in the DIST_VECTS: search for a matching vector. */
3700 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3701 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3702 break;
3704 if (j == VEC_length (lambda_vector, dist_vects))
3706 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3707 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3708 fprintf (file, "not found in Omega dist vectors:\n");
3709 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3710 fprintf (file, "data dependence relation:\n");
3711 dump_data_dependence_relation (file, ddr);
3712 fprintf (file, ")\n");
3716 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3718 lambda_vector a_dir_v;
3719 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3721 /* Direction vectors are not ordered in the same way in the DDR
3722 and in the DIR_VECTS: search for a matching vector. */
3723 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3724 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3725 break;
3727 if (j == VEC_length (lambda_vector, dist_vects))
3729 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3730 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3731 fprintf (file, "not found in Omega dir vectors:\n");
3732 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3733 fprintf (file, "data dependence relation:\n");
3734 dump_data_dependence_relation (file, ddr);
3735 fprintf (file, ")\n");
3739 return true;
3742 /* This computes the affine dependence relation between A and B with
3743 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3744 independence between two accesses, while CHREC_DONT_KNOW is used
3745 for representing the unknown relation.
3747 Note that it is possible to stop the computation of the dependence
3748 relation the first time we detect a CHREC_KNOWN element for a given
3749 subscript. */
3751 static void
3752 compute_affine_dependence (struct data_dependence_relation *ddr,
3753 struct loop *loop_nest)
3755 struct data_reference *dra = DDR_A (ddr);
3756 struct data_reference *drb = DDR_B (ddr);
3758 if (dump_file && (dump_flags & TDF_DETAILS))
3760 fprintf (dump_file, "(compute_affine_dependence\n");
3761 fprintf (dump_file, " (stmt_a = \n");
3762 print_generic_expr (dump_file, DR_STMT (dra), 0);
3763 fprintf (dump_file, ")\n (stmt_b = \n");
3764 print_generic_expr (dump_file, DR_STMT (drb), 0);
3765 fprintf (dump_file, ")\n");
3768 /* Analyze only when the dependence relation is not yet known. */
3769 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3771 dependence_stats.num_dependence_tests++;
3773 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3774 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3776 if (flag_check_data_deps)
3778 /* Compute the dependences using the first algorithm. */
3779 subscript_dependence_tester (ddr, loop_nest);
3781 if (dump_file && (dump_flags & TDF_DETAILS))
3783 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3784 dump_data_dependence_relation (dump_file, ddr);
3787 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3789 bool maybe_dependent;
3790 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3792 /* Save the result of the first DD analyzer. */
3793 dist_vects = DDR_DIST_VECTS (ddr);
3794 dir_vects = DDR_DIR_VECTS (ddr);
3796 /* Reset the information. */
3797 DDR_DIST_VECTS (ddr) = NULL;
3798 DDR_DIR_VECTS (ddr) = NULL;
3800 /* Compute the same information using Omega. */
3801 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3802 goto csys_dont_know;
3804 if (dump_file && (dump_flags & TDF_DETAILS))
3806 fprintf (dump_file, "Omega Analyzer\n");
3807 dump_data_dependence_relation (dump_file, ddr);
3810 /* Check that we get the same information. */
3811 if (maybe_dependent)
3812 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3813 dir_vects));
3816 else
3817 subscript_dependence_tester (ddr, loop_nest);
3820 /* As a last case, if the dependence cannot be determined, or if
3821 the dependence is considered too difficult to determine, answer
3822 "don't know". */
3823 else
3825 csys_dont_know:;
3826 dependence_stats.num_dependence_undetermined++;
3828 if (dump_file && (dump_flags & TDF_DETAILS))
3830 fprintf (dump_file, "Data ref a:\n");
3831 dump_data_reference (dump_file, dra);
3832 fprintf (dump_file, "Data ref b:\n");
3833 dump_data_reference (dump_file, drb);
3834 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3836 finalize_ddr_dependent (ddr, chrec_dont_know);
3840 if (dump_file && (dump_flags & TDF_DETAILS))
3841 fprintf (dump_file, ")\n");
3844 /* This computes the dependence relation for the same data
3845 reference into DDR. */
3847 static void
3848 compute_self_dependence (struct data_dependence_relation *ddr)
3850 unsigned int i;
3851 struct subscript *subscript;
3853 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3854 return;
3856 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3857 i++)
3859 /* The accessed index overlaps for each iteration. */
3860 SUB_CONFLICTS_IN_A (subscript)
3861 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3862 SUB_CONFLICTS_IN_B (subscript)
3863 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3864 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3867 /* The distance vector is the zero vector. */
3868 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3869 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3872 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3873 the data references in DATAREFS, in the LOOP_NEST. When
3874 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3875 relations. */
3877 void
3878 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3879 VEC (ddr_p, heap) **dependence_relations,
3880 VEC (loop_p, heap) *loop_nest,
3881 bool compute_self_and_rr)
3883 struct data_dependence_relation *ddr;
3884 struct data_reference *a, *b;
3885 unsigned int i, j;
3887 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3888 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3889 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3891 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3892 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3893 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3896 if (compute_self_and_rr)
3897 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3899 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3900 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3901 compute_self_dependence (ddr);
3905 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3906 true if STMT clobbers memory, false otherwise. */
3908 bool
3909 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3911 bool clobbers_memory = false;
3912 data_ref_loc *ref;
3913 tree *op0, *op1, call;
3915 *references = NULL;
3917 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3918 Calls have side-effects, except those to const or pure
3919 functions. */
3920 call = get_call_expr_in (stmt);
3921 if ((call
3922 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3923 || (TREE_CODE (stmt) == ASM_EXPR
3924 && ASM_VOLATILE_P (stmt)))
3925 clobbers_memory = true;
3927 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3928 return clobbers_memory;
3930 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3932 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3933 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3935 if (DECL_P (*op1)
3936 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
3938 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3939 ref->pos = op1;
3940 ref->is_read = true;
3943 if (DECL_P (*op0)
3944 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3946 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3947 ref->pos = op0;
3948 ref->is_read = false;
3952 if (call)
3954 unsigned i, n = call_expr_nargs (call);
3956 for (i = 0; i < n; i++)
3958 op0 = &CALL_EXPR_ARG (call, i);
3960 if (DECL_P (*op0)
3961 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3963 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3964 ref->pos = op0;
3965 ref->is_read = true;
3970 return clobbers_memory;
3973 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3974 reference, returns false, otherwise returns true. NEST is the outermost
3975 loop of the loop nest in that the references should be analyzed. */
3977 static bool
3978 find_data_references_in_stmt (struct loop *nest, tree stmt,
3979 VEC (data_reference_p, heap) **datarefs)
3981 unsigned i;
3982 VEC (data_ref_loc, heap) *references;
3983 data_ref_loc *ref;
3984 bool ret = true;
3985 data_reference_p dr;
3987 if (get_references_in_stmt (stmt, &references))
3989 VEC_free (data_ref_loc, heap, references);
3990 return false;
3993 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3995 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3996 gcc_assert (dr != NULL);
3998 /* FIXME -- data dependence analysis does not work correctly for objects with
3999 invariant addresses. Let us fail here until the problem is fixed. */
4000 if (dr_address_invariant_p (dr))
4002 free_data_ref (dr);
4003 if (dump_file && (dump_flags & TDF_DETAILS))
4004 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4005 ret = false;
4006 break;
4009 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4011 VEC_free (data_ref_loc, heap, references);
4012 return ret;
4015 /* Search the data references in LOOP, and record the information into
4016 DATAREFS. Returns chrec_dont_know when failing to analyze a
4017 difficult case, returns NULL_TREE otherwise.
4019 TODO: This function should be made smarter so that it can handle address
4020 arithmetic as if they were array accesses, etc. */
4022 static tree
4023 find_data_references_in_loop (struct loop *loop,
4024 VEC (data_reference_p, heap) **datarefs)
4026 basic_block bb, *bbs;
4027 unsigned int i;
4028 block_stmt_iterator bsi;
4030 bbs = get_loop_body_in_dom_order (loop);
4032 for (i = 0; i < loop->num_nodes; i++)
4034 bb = bbs[i];
4036 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4038 tree stmt = bsi_stmt (bsi);
4040 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4042 struct data_reference *res;
4043 res = XCNEW (struct data_reference);
4044 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4046 free (bbs);
4047 return chrec_dont_know;
4051 free (bbs);
4053 return NULL_TREE;
4056 /* Recursive helper function. */
4058 static bool
4059 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4061 /* Inner loops of the nest should not contain siblings. Example:
4062 when there are two consecutive loops,
4064 | loop_0
4065 | loop_1
4066 | A[{0, +, 1}_1]
4067 | endloop_1
4068 | loop_2
4069 | A[{0, +, 1}_2]
4070 | endloop_2
4071 | endloop_0
4073 the dependence relation cannot be captured by the distance
4074 abstraction. */
4075 if (loop->next)
4076 return false;
4078 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4079 if (loop->inner)
4080 return find_loop_nest_1 (loop->inner, loop_nest);
4081 return true;
4084 /* Return false when the LOOP is not well nested. Otherwise return
4085 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4086 contain the loops from the outermost to the innermost, as they will
4087 appear in the classic distance vector. */
4089 bool
4090 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4092 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4093 if (loop->inner)
4094 return find_loop_nest_1 (loop->inner, loop_nest);
4095 return true;
4098 /* Given a loop nest LOOP, the following vectors are returned:
4099 DATAREFS is initialized to all the array elements contained in this loop,
4100 DEPENDENCE_RELATIONS contains the relations between the data references.
4101 Compute read-read and self relations if
4102 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4104 void
4105 compute_data_dependences_for_loop (struct loop *loop,
4106 bool compute_self_and_read_read_dependences,
4107 VEC (data_reference_p, heap) **datarefs,
4108 VEC (ddr_p, heap) **dependence_relations)
4110 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4112 memset (&dependence_stats, 0, sizeof (dependence_stats));
4114 /* If the loop nest is not well formed, or one of the data references
4115 is not computable, give up without spending time to compute other
4116 dependences. */
4117 if (!loop
4118 || !find_loop_nest (loop, &vloops)
4119 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4121 struct data_dependence_relation *ddr;
4123 /* Insert a single relation into dependence_relations:
4124 chrec_dont_know. */
4125 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4126 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4128 else
4129 compute_all_dependences (*datarefs, dependence_relations, vloops,
4130 compute_self_and_read_read_dependences);
4132 if (dump_file && (dump_flags & TDF_STATS))
4134 fprintf (dump_file, "Dependence tester statistics:\n");
4136 fprintf (dump_file, "Number of dependence tests: %d\n",
4137 dependence_stats.num_dependence_tests);
4138 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4139 dependence_stats.num_dependence_dependent);
4140 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4141 dependence_stats.num_dependence_independent);
4142 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4143 dependence_stats.num_dependence_undetermined);
4145 fprintf (dump_file, "Number of subscript tests: %d\n",
4146 dependence_stats.num_subscript_tests);
4147 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4148 dependence_stats.num_subscript_undetermined);
4149 fprintf (dump_file, "Number of same subscript function: %d\n",
4150 dependence_stats.num_same_subscript_function);
4152 fprintf (dump_file, "Number of ziv tests: %d\n",
4153 dependence_stats.num_ziv);
4154 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4155 dependence_stats.num_ziv_dependent);
4156 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4157 dependence_stats.num_ziv_independent);
4158 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4159 dependence_stats.num_ziv_unimplemented);
4161 fprintf (dump_file, "Number of siv tests: %d\n",
4162 dependence_stats.num_siv);
4163 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4164 dependence_stats.num_siv_dependent);
4165 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4166 dependence_stats.num_siv_independent);
4167 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4168 dependence_stats.num_siv_unimplemented);
4170 fprintf (dump_file, "Number of miv tests: %d\n",
4171 dependence_stats.num_miv);
4172 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4173 dependence_stats.num_miv_dependent);
4174 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4175 dependence_stats.num_miv_independent);
4176 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4177 dependence_stats.num_miv_unimplemented);
4181 /* Entry point (for testing only). Analyze all the data references
4182 and the dependence relations in LOOP.
4184 The data references are computed first.
4186 A relation on these nodes is represented by a complete graph. Some
4187 of the relations could be of no interest, thus the relations can be
4188 computed on demand.
4190 In the following function we compute all the relations. This is
4191 just a first implementation that is here for:
4192 - for showing how to ask for the dependence relations,
4193 - for the debugging the whole dependence graph,
4194 - for the dejagnu testcases and maintenance.
4196 It is possible to ask only for a part of the graph, avoiding to
4197 compute the whole dependence graph. The computed dependences are
4198 stored in a knowledge base (KB) such that later queries don't
4199 recompute the same information. The implementation of this KB is
4200 transparent to the optimizer, and thus the KB can be changed with a
4201 more efficient implementation, or the KB could be disabled. */
4202 static void
4203 analyze_all_data_dependences (struct loop *loop)
4205 unsigned int i;
4206 int nb_data_refs = 10;
4207 VEC (data_reference_p, heap) *datarefs =
4208 VEC_alloc (data_reference_p, heap, nb_data_refs);
4209 VEC (ddr_p, heap) *dependence_relations =
4210 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4212 /* Compute DDs on the whole function. */
4213 compute_data_dependences_for_loop (loop, false, &datarefs,
4214 &dependence_relations);
4216 if (dump_file)
4218 dump_data_dependence_relations (dump_file, dependence_relations);
4219 fprintf (dump_file, "\n\n");
4221 if (dump_flags & TDF_DETAILS)
4222 dump_dist_dir_vectors (dump_file, dependence_relations);
4224 if (dump_flags & TDF_STATS)
4226 unsigned nb_top_relations = 0;
4227 unsigned nb_bot_relations = 0;
4228 unsigned nb_basename_differ = 0;
4229 unsigned nb_chrec_relations = 0;
4230 struct data_dependence_relation *ddr;
4232 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4234 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4235 nb_top_relations++;
4237 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4239 struct data_reference *a = DDR_A (ddr);
4240 struct data_reference *b = DDR_B (ddr);
4242 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4243 nb_basename_differ++;
4244 else
4245 nb_bot_relations++;
4248 else
4249 nb_chrec_relations++;
4252 gather_stats_on_scev_database ();
4256 free_dependence_relations (dependence_relations);
4257 free_data_refs (datarefs);
4260 /* Computes all the data dependences and check that the results of
4261 several analyzers are the same. */
4263 void
4264 tree_check_data_deps (void)
4266 loop_iterator li;
4267 struct loop *loop_nest;
4269 FOR_EACH_LOOP (li, loop_nest, 0)
4270 analyze_all_data_dependences (loop_nest);
4273 /* Free the memory used by a data dependence relation DDR. */
4275 void
4276 free_dependence_relation (struct data_dependence_relation *ddr)
4278 if (ddr == NULL)
4279 return;
4281 if (DDR_SUBSCRIPTS (ddr))
4282 free_subscripts (DDR_SUBSCRIPTS (ddr));
4283 if (DDR_DIST_VECTS (ddr))
4284 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4285 if (DDR_DIR_VECTS (ddr))
4286 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4288 free (ddr);
4291 /* Free the memory used by the data dependence relations from
4292 DEPENDENCE_RELATIONS. */
4294 void
4295 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4297 unsigned int i;
4298 struct data_dependence_relation *ddr;
4299 VEC (loop_p, heap) *loop_nest = NULL;
4301 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4303 if (ddr == NULL)
4304 continue;
4305 if (loop_nest == NULL)
4306 loop_nest = DDR_LOOP_NEST (ddr);
4307 else
4308 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4309 || DDR_LOOP_NEST (ddr) == loop_nest);
4310 free_dependence_relation (ddr);
4313 if (loop_nest)
4314 VEC_free (loop_p, heap, loop_nest);
4315 VEC_free (ddr_p, heap, dependence_relations);
4318 /* Free the memory used by the data references from DATAREFS. */
4320 void
4321 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4323 unsigned int i;
4324 struct data_reference *dr;
4326 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4327 free_data_ref (dr);
4328 VEC_free (data_reference_p, heap, datarefs);
4333 /* Returns the index of STMT in RDG. */
4335 static int
4336 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4338 int i;
4340 for (i = 0; i < rdg->n_vertices; i++)
4341 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4342 return i;
4344 gcc_unreachable ();
4345 return 0;
4348 /* Creates an edge in RDG for each distance vector from DDR. */
4350 static void
4351 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4353 int va, vb;
4354 data_reference_p dra;
4355 data_reference_p drb;
4356 struct graph_edge *e;
4358 if (DDR_REVERSED_P (ddr))
4360 dra = DDR_B (ddr);
4361 drb = DDR_A (ddr);
4363 else
4365 dra = DDR_A (ddr);
4366 drb = DDR_B (ddr);
4369 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4370 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4372 e = add_edge (rdg, va, vb);
4373 e->data = XNEW (struct rdg_edge);
4375 /* Determines the type of the data dependence. */
4376 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4377 RDGE_TYPE (e) = input_dd;
4378 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4379 RDGE_TYPE (e) = output_dd;
4380 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4381 RDGE_TYPE (e) = flow_dd;
4382 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4383 RDGE_TYPE (e) = anti_dd;
4386 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4387 the index of DEF in RDG. */
4389 static void
4390 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4392 use_operand_p imm_use_p;
4393 imm_use_iterator iterator;
4395 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4397 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4398 struct graph_edge *e = add_edge (rdg, idef, use);
4400 e->data = XNEW (struct rdg_edge);
4401 RDGE_TYPE (e) = flow_dd;
4405 /* Creates the edges of the reduced dependence graph RDG. */
4407 static void
4408 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4410 int i;
4411 struct data_dependence_relation *ddr;
4412 def_operand_p def_p;
4413 ssa_op_iter iter;
4415 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4416 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4417 create_rdg_edge_for_ddr (rdg, ddr);
4419 for (i = 0; i < rdg->n_vertices; i++)
4420 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4421 iter, SSA_OP_ALL_DEFS)
4422 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4425 /* Build the vertices of the reduced dependence graph RDG. */
4427 static void
4428 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4430 int i;
4431 tree s;
4433 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4435 struct vertex *v = &(rdg->vertices[i]);
4437 v->data = XNEW (struct rdg_vertex);
4438 RDGV_STMT (v) = s;
4442 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4444 static void
4445 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4447 unsigned int i;
4448 basic_block *bbs = get_loop_body_in_dom_order (loop);
4450 for (i = 0; i < loop->num_nodes; i++)
4452 tree phi;
4453 basic_block bb = bbs[i];
4454 block_stmt_iterator bsi;
4456 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4457 VEC_safe_push (tree, heap, *stmts, phi);
4459 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4460 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4463 free (bbs);
4466 /* Returns true when all the dependences are computable. */
4468 static bool
4469 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4471 ddr_p ddr;
4472 unsigned int i;
4474 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4475 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4476 return false;
4478 return true;
4481 /* Build a Reduced Dependence Graph with one vertex per statement of the
4482 loop nest and one edge per data dependence or scalar dependence. */
4484 struct graph *
4485 build_rdg (struct loop *loop)
4487 int nb_data_refs = 10;
4488 struct graph *rdg = NULL;
4489 VEC (ddr_p, heap) *dependence_relations;
4490 VEC (data_reference_p, heap) *datarefs;
4491 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4493 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4494 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4495 compute_data_dependences_for_loop (loop,
4496 false,
4497 &datarefs,
4498 &dependence_relations);
4500 if (!known_dependences_p (dependence_relations))
4501 goto end_rdg;
4503 stmts_from_loop (loop, &stmts);
4504 rdg = new_graph (VEC_length (tree, stmts));
4505 create_rdg_vertices (rdg, stmts);
4506 create_rdg_edges (rdg, dependence_relations);
4508 end_rdg:
4509 free_dependence_relations (dependence_relations);
4510 free_data_refs (datarefs);
4511 VEC_free (tree, heap, stmts);
4513 return rdg;