Enable dumping of alias graphs.
[official-gcc/Ramakrishna.git] / gcc / tree-data-ref.c
blobacc82b4b2d523b2ee6b24f3aea93c6eeac21e746
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "tree.h"
84 /* These RTL headers are needed for basic-block.h. */
85 #include "rtl.h"
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
90 #include "timevar.h"
91 #include "cfgloop.h"
92 #include "tree-scalar-evolution.h"
93 #include "tree-data-ref.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into STDERR all the data references from DATAREFS. */
162 void
163 debug_data_references (VEC (data_reference_p, heap) *datarefs)
165 dump_data_references (stderr, datarefs);
168 /* Dump to STDERR all the dependence relations from DDRS. */
170 void
171 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
173 dump_data_dependence_relations (stderr, ddrs);
176 /* Dump into FILE all the dependence relations from DDRS. */
178 void
179 dump_data_dependence_relations (FILE *file,
180 VEC (ddr_p, heap) *ddrs)
182 unsigned int i;
183 struct data_dependence_relation *ddr;
185 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
186 dump_data_dependence_relation (file, ddr);
189 /* Print to STDERR the data_reference DR. */
191 void
192 debug_data_reference (struct data_reference *dr)
194 dump_data_reference (stderr, dr);
197 /* Dump function for a DATA_REFERENCE structure. */
199 void
200 dump_data_reference (FILE *outf,
201 struct data_reference *dr)
203 unsigned int i;
205 fprintf (outf, "(Data Ref: \n stmt: ");
206 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
207 fprintf (outf, " ref: ");
208 print_generic_stmt (outf, DR_REF (dr), 0);
209 fprintf (outf, " base_object: ");
210 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
212 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
214 fprintf (outf, " Access function %d: ", i);
215 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
217 fprintf (outf, ")\n");
220 /* Dumps the affine function described by FN to the file OUTF. */
222 static void
223 dump_affine_function (FILE *outf, affine_fn fn)
225 unsigned i;
226 tree coef;
228 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
229 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
231 fprintf (outf, " + ");
232 print_generic_expr (outf, coef, TDF_SLIM);
233 fprintf (outf, " * x_%u", i);
237 /* Dumps the conflict function CF to the file OUTF. */
239 static void
240 dump_conflict_function (FILE *outf, conflict_function *cf)
242 unsigned i;
244 if (cf->n == NO_DEPENDENCE)
245 fprintf (outf, "no dependence\n");
246 else if (cf->n == NOT_KNOWN)
247 fprintf (outf, "not known\n");
248 else
250 for (i = 0; i < cf->n; i++)
252 fprintf (outf, "[");
253 dump_affine_function (outf, cf->fns[i]);
254 fprintf (outf, "]\n");
259 /* Dump function for a SUBSCRIPT structure. */
261 void
262 dump_subscript (FILE *outf, struct subscript *subscript)
264 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
266 fprintf (outf, "\n (subscript \n");
267 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
268 dump_conflict_function (outf, cf);
269 if (CF_NONTRIVIAL_P (cf))
271 tree last_iteration = SUB_LAST_CONFLICT (subscript);
272 fprintf (outf, " last_conflict: ");
273 print_generic_stmt (outf, last_iteration, 0);
276 cf = SUB_CONFLICTS_IN_B (subscript);
277 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
278 dump_conflict_function (outf, cf);
279 if (CF_NONTRIVIAL_P (cf))
281 tree last_iteration = SUB_LAST_CONFLICT (subscript);
282 fprintf (outf, " last_conflict: ");
283 print_generic_stmt (outf, last_iteration, 0);
286 fprintf (outf, " (Subscript distance: ");
287 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
288 fprintf (outf, " )\n");
289 fprintf (outf, " )\n");
292 /* Print the classic direction vector DIRV to OUTF. */
294 void
295 print_direction_vector (FILE *outf,
296 lambda_vector dirv,
297 int length)
299 int eq;
301 for (eq = 0; eq < length; eq++)
303 enum data_dependence_direction dir = ((enum data_dependence_direction)
304 dirv[eq]);
306 switch (dir)
308 case dir_positive:
309 fprintf (outf, " +");
310 break;
311 case dir_negative:
312 fprintf (outf, " -");
313 break;
314 case dir_equal:
315 fprintf (outf, " =");
316 break;
317 case dir_positive_or_equal:
318 fprintf (outf, " +=");
319 break;
320 case dir_positive_or_negative:
321 fprintf (outf, " +-");
322 break;
323 case dir_negative_or_equal:
324 fprintf (outf, " -=");
325 break;
326 case dir_star:
327 fprintf (outf, " *");
328 break;
329 default:
330 fprintf (outf, "indep");
331 break;
334 fprintf (outf, "\n");
337 /* Print a vector of direction vectors. */
339 void
340 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
341 int length)
343 unsigned j;
344 lambda_vector v;
346 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
347 print_direction_vector (outf, v, length);
350 /* Print a vector of distance vectors. */
352 void
353 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
354 int length)
356 unsigned j;
357 lambda_vector v;
359 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
360 print_lambda_vector (outf, v, length);
363 /* Debug version. */
365 void
366 debug_data_dependence_relation (struct data_dependence_relation *ddr)
368 dump_data_dependence_relation (stderr, ddr);
371 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
373 void
374 dump_data_dependence_relation (FILE *outf,
375 struct data_dependence_relation *ddr)
377 struct data_reference *dra, *drb;
379 fprintf (outf, "(Data Dep: \n");
381 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
383 fprintf (outf, " (don't know)\n)\n");
384 return;
387 dra = DDR_A (ddr);
388 drb = DDR_B (ddr);
389 dump_data_reference (outf, dra);
390 dump_data_reference (outf, drb);
392 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
393 fprintf (outf, " (no dependence)\n");
395 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
397 unsigned int i;
398 struct loop *loopi;
400 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
402 fprintf (outf, " access_fn_A: ");
403 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
404 fprintf (outf, " access_fn_B: ");
405 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
406 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
409 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
410 fprintf (outf, " loop nest: (");
411 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
412 fprintf (outf, "%d ", loopi->num);
413 fprintf (outf, ")\n");
415 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
417 fprintf (outf, " distance_vector: ");
418 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
419 DDR_NB_LOOPS (ddr));
422 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
424 fprintf (outf, " direction_vector: ");
425 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
426 DDR_NB_LOOPS (ddr));
430 fprintf (outf, ")\n");
433 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
435 void
436 dump_data_dependence_direction (FILE *file,
437 enum data_dependence_direction dir)
439 switch (dir)
441 case dir_positive:
442 fprintf (file, "+");
443 break;
445 case dir_negative:
446 fprintf (file, "-");
447 break;
449 case dir_equal:
450 fprintf (file, "=");
451 break;
453 case dir_positive_or_negative:
454 fprintf (file, "+-");
455 break;
457 case dir_positive_or_equal:
458 fprintf (file, "+=");
459 break;
461 case dir_negative_or_equal:
462 fprintf (file, "-=");
463 break;
465 case dir_star:
466 fprintf (file, "*");
467 break;
469 default:
470 break;
474 /* Dumps the distance and direction vectors in FILE. DDRS contains
475 the dependence relations, and VECT_SIZE is the size of the
476 dependence vectors, or in other words the number of loops in the
477 considered nest. */
479 void
480 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
482 unsigned int i, j;
483 struct data_dependence_relation *ddr;
484 lambda_vector v;
486 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
487 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
489 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
491 fprintf (file, "DISTANCE_V (");
492 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
493 fprintf (file, ")\n");
496 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
498 fprintf (file, "DIRECTION_V (");
499 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
500 fprintf (file, ")\n");
504 fprintf (file, "\n\n");
507 /* Dumps the data dependence relations DDRS in FILE. */
509 void
510 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
512 unsigned int i;
513 struct data_dependence_relation *ddr;
515 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
516 dump_data_dependence_relation (file, ddr);
518 fprintf (file, "\n\n");
521 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
522 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
523 constant of type ssizetype, and returns true. If we cannot do this
524 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
525 is returned. */
527 static bool
528 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
529 tree *var, tree *off)
531 tree var0, var1;
532 tree off0, off1;
533 enum tree_code ocode = code;
535 *var = NULL_TREE;
536 *off = NULL_TREE;
538 switch (code)
540 case INTEGER_CST:
541 *var = build_int_cst (type, 0);
542 *off = fold_convert (ssizetype, op0);
543 return true;
545 case POINTER_PLUS_EXPR:
546 ocode = PLUS_EXPR;
547 /* FALLTHROUGH */
548 case PLUS_EXPR:
549 case MINUS_EXPR:
550 split_constant_offset (op0, &var0, &off0);
551 split_constant_offset (op1, &var1, &off1);
552 *var = fold_build2 (code, type, var0, var1);
553 *off = size_binop (ocode, off0, off1);
554 return true;
556 case MULT_EXPR:
557 if (TREE_CODE (op1) != INTEGER_CST)
558 return false;
560 split_constant_offset (op0, &var0, &off0);
561 *var = fold_build2 (MULT_EXPR, type, var0, op1);
562 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
563 return true;
565 case ADDR_EXPR:
567 tree base, poffset;
568 HOST_WIDE_INT pbitsize, pbitpos;
569 enum machine_mode pmode;
570 int punsignedp, pvolatilep;
572 op0 = TREE_OPERAND (op0, 0);
573 if (!handled_component_p (op0))
574 return false;
576 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
577 &pmode, &punsignedp, &pvolatilep, false);
579 if (pbitpos % BITS_PER_UNIT != 0)
580 return false;
581 base = build_fold_addr_expr (base);
582 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
584 if (poffset)
586 split_constant_offset (poffset, &poffset, &off1);
587 off0 = size_binop (PLUS_EXPR, off0, off1);
588 if (POINTER_TYPE_P (TREE_TYPE (base)))
589 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
590 base, fold_convert (sizetype, poffset));
591 else
592 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
593 fold_convert (TREE_TYPE (base), poffset));
596 var0 = fold_convert (type, base);
598 /* If variable length types are involved, punt, otherwise casts
599 might be converted into ARRAY_REFs in gimplify_conversion.
600 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
601 possibly no longer appears in current GIMPLE, might resurface.
602 This perhaps could run
603 if (CONVERT_EXPR_P (var0))
605 gimplify_conversion (&var0);
606 // Attempt to fill in any within var0 found ARRAY_REF's
607 // element size from corresponding op embedded ARRAY_REF,
608 // if unsuccessful, just punt.
609 } */
610 while (POINTER_TYPE_P (type))
611 type = TREE_TYPE (type);
612 if (int_size_in_bytes (type) < 0)
613 return false;
615 *var = var0;
616 *off = off0;
617 return true;
620 case SSA_NAME:
622 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
623 enum tree_code subcode;
625 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
626 return false;
628 var0 = gimple_assign_rhs1 (def_stmt);
629 subcode = gimple_assign_rhs_code (def_stmt);
630 var1 = gimple_assign_rhs2 (def_stmt);
632 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
635 default:
636 return false;
640 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
641 will be ssizetype. */
643 void
644 split_constant_offset (tree exp, tree *var, tree *off)
646 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
647 enum tree_code code;
649 *var = exp;
650 *off = ssize_int (0);
651 STRIP_NOPS (exp);
653 if (automatically_generated_chrec_p (exp))
654 return;
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
661 *var = fold_convert (type, e);
662 *off = o;
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
669 static tree
670 canonicalize_base_object_address (tree addr)
672 tree orig = addr;
674 STRIP_NOPS (addr);
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
689 otherwise. */
691 bool
692 dr_analyze_innermost (struct data_reference *dr)
694 gimple stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
697 HOST_WIDE_INT pbitsize, pbitpos;
698 tree base, poffset;
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
703 bool in_loop = (loop && loop->num);
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
712 if (pbitpos % BITS_PER_UNIT != 0)
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
716 return false;
719 base = build_fold_addr_expr (base);
720 if (in_loop)
722 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
723 false))
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 fprintf (dump_file, "failed: evolution of base is not affine.\n");
727 return false;
730 else
732 base_iv.base = base;
733 base_iv.step = ssize_int (0);
734 base_iv.no_overflow = true;
737 if (!poffset)
739 offset_iv.base = ssize_int (0);
740 offset_iv.step = ssize_int (0);
742 else
744 if (!in_loop)
746 offset_iv.base = poffset;
747 offset_iv.step = ssize_int (0);
749 else if (!simple_iv (loop, loop_containing_stmt (stmt),
750 poffset, &offset_iv, false))
752 if (dump_file && (dump_flags & TDF_DETAILS))
753 fprintf (dump_file, "failed: evolution of offset is not"
754 " affine.\n");
755 return false;
759 init = ssize_int (pbitpos / BITS_PER_UNIT);
760 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
761 init = size_binop (PLUS_EXPR, init, dinit);
762 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
763 init = size_binop (PLUS_EXPR, init, dinit);
765 step = size_binop (PLUS_EXPR,
766 fold_convert (ssizetype, base_iv.step),
767 fold_convert (ssizetype, offset_iv.step));
769 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
771 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
772 DR_INIT (dr) = init;
773 DR_STEP (dr) = step;
775 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, "success.\n");
780 return true;
783 /* Determines the base object and the list of indices of memory reference
784 DR, analyzed in loop nest NEST. */
786 static void
787 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
789 gimple stmt = DR_STMT (dr);
790 struct loop *loop = loop_containing_stmt (stmt);
791 VEC (tree, heap) *access_fns = NULL;
792 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
793 tree base, off, access_fn = NULL_TREE;
794 basic_block before_loop = NULL;
796 if (nest)
797 before_loop = block_before_loop (nest);
799 while (handled_component_p (aref))
801 if (TREE_CODE (aref) == ARRAY_REF)
803 op = TREE_OPERAND (aref, 1);
804 if (nest)
806 access_fn = analyze_scalar_evolution (loop, op);
807 access_fn = instantiate_scev (before_loop, loop, access_fn);
808 VEC_safe_push (tree, heap, access_fns, access_fn);
811 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
814 aref = TREE_OPERAND (aref, 0);
817 if (nest && INDIRECT_REF_P (aref))
819 op = TREE_OPERAND (aref, 0);
820 access_fn = analyze_scalar_evolution (loop, op);
821 access_fn = instantiate_scev (before_loop, loop, access_fn);
822 base = initial_condition (access_fn);
823 split_constant_offset (base, &base, &off);
824 access_fn = chrec_replace_initial_condition (access_fn,
825 fold_convert (TREE_TYPE (base), off));
827 TREE_OPERAND (aref, 0) = base;
828 VEC_safe_push (tree, heap, access_fns, access_fn);
831 DR_BASE_OBJECT (dr) = ref;
832 DR_ACCESS_FNS (dr) = access_fns;
835 /* Extracts the alias analysis information from the memory reference DR. */
837 static void
838 dr_analyze_alias (struct data_reference *dr)
840 tree ref = DR_REF (dr);
841 tree base = get_base_address (ref), addr;
843 if (INDIRECT_REF_P (base))
845 addr = TREE_OPERAND (base, 0);
846 if (TREE_CODE (addr) == SSA_NAME)
847 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
851 /* Returns true if the address of DR is invariant. */
853 static bool
854 dr_address_invariant_p (struct data_reference *dr)
856 unsigned i;
857 tree idx;
859 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
860 if (tree_contains_chrecs (idx, NULL))
861 return false;
863 return true;
866 /* Frees data reference DR. */
868 void
869 free_data_ref (data_reference_p dr)
871 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
872 free (dr);
875 /* Analyzes memory reference MEMREF accessed in STMT. The reference
876 is read if IS_READ is true, write otherwise. Returns the
877 data_reference description of MEMREF. NEST is the outermost loop of the
878 loop nest in that the reference should be analyzed. */
880 struct data_reference *
881 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
883 struct data_reference *dr;
885 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Creating dr for ");
888 print_generic_expr (dump_file, memref, TDF_SLIM);
889 fprintf (dump_file, "\n");
892 dr = XCNEW (struct data_reference);
893 DR_STMT (dr) = stmt;
894 DR_REF (dr) = memref;
895 DR_IS_READ (dr) = is_read;
897 dr_analyze_innermost (dr);
898 dr_analyze_indices (dr, nest);
899 dr_analyze_alias (dr);
901 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file, "\tbase_address: ");
904 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
905 fprintf (dump_file, "\n\toffset from base address: ");
906 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
907 fprintf (dump_file, "\n\tconstant offset from base address: ");
908 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
909 fprintf (dump_file, "\n\tstep: ");
910 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
911 fprintf (dump_file, "\n\taligned to: ");
912 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
913 fprintf (dump_file, "\n\tbase_object: ");
914 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
915 fprintf (dump_file, "\n");
918 return dr;
921 /* Returns true if FNA == FNB. */
923 static bool
924 affine_function_equal_p (affine_fn fna, affine_fn fnb)
926 unsigned i, n = VEC_length (tree, fna);
928 if (n != VEC_length (tree, fnb))
929 return false;
931 for (i = 0; i < n; i++)
932 if (!operand_equal_p (VEC_index (tree, fna, i),
933 VEC_index (tree, fnb, i), 0))
934 return false;
936 return true;
939 /* If all the functions in CF are the same, returns one of them,
940 otherwise returns NULL. */
942 static affine_fn
943 common_affine_function (conflict_function *cf)
945 unsigned i;
946 affine_fn comm;
948 if (!CF_NONTRIVIAL_P (cf))
949 return NULL;
951 comm = cf->fns[0];
953 for (i = 1; i < cf->n; i++)
954 if (!affine_function_equal_p (comm, cf->fns[i]))
955 return NULL;
957 return comm;
960 /* Returns the base of the affine function FN. */
962 static tree
963 affine_function_base (affine_fn fn)
965 return VEC_index (tree, fn, 0);
968 /* Returns true if FN is a constant. */
970 static bool
971 affine_function_constant_p (affine_fn fn)
973 unsigned i;
974 tree coef;
976 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
977 if (!integer_zerop (coef))
978 return false;
980 return true;
983 /* Returns true if FN is the zero constant function. */
985 static bool
986 affine_function_zero_p (affine_fn fn)
988 return (integer_zerop (affine_function_base (fn))
989 && affine_function_constant_p (fn));
992 /* Returns a signed integer type with the largest precision from TA
993 and TB. */
995 static tree
996 signed_type_for_types (tree ta, tree tb)
998 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
999 return signed_type_for (ta);
1000 else
1001 return signed_type_for (tb);
1004 /* Applies operation OP on affine functions FNA and FNB, and returns the
1005 result. */
1007 static affine_fn
1008 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1010 unsigned i, n, m;
1011 affine_fn ret;
1012 tree coef;
1014 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1016 n = VEC_length (tree, fna);
1017 m = VEC_length (tree, fnb);
1019 else
1021 n = VEC_length (tree, fnb);
1022 m = VEC_length (tree, fna);
1025 ret = VEC_alloc (tree, heap, m);
1026 for (i = 0; i < n; i++)
1028 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1029 TREE_TYPE (VEC_index (tree, fnb, i)));
1031 VEC_quick_push (tree, ret,
1032 fold_build2 (op, type,
1033 VEC_index (tree, fna, i),
1034 VEC_index (tree, fnb, i)));
1037 for (; VEC_iterate (tree, fna, i, coef); i++)
1038 VEC_quick_push (tree, ret,
1039 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1040 coef, integer_zero_node));
1041 for (; VEC_iterate (tree, fnb, i, coef); i++)
1042 VEC_quick_push (tree, ret,
1043 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1044 integer_zero_node, coef));
1046 return ret;
1049 /* Returns the sum of affine functions FNA and FNB. */
1051 static affine_fn
1052 affine_fn_plus (affine_fn fna, affine_fn fnb)
1054 return affine_fn_op (PLUS_EXPR, fna, fnb);
1057 /* Returns the difference of affine functions FNA and FNB. */
1059 static affine_fn
1060 affine_fn_minus (affine_fn fna, affine_fn fnb)
1062 return affine_fn_op (MINUS_EXPR, fna, fnb);
1065 /* Frees affine function FN. */
1067 static void
1068 affine_fn_free (affine_fn fn)
1070 VEC_free (tree, heap, fn);
1073 /* Determine for each subscript in the data dependence relation DDR
1074 the distance. */
1076 static void
1077 compute_subscript_distance (struct data_dependence_relation *ddr)
1079 conflict_function *cf_a, *cf_b;
1080 affine_fn fn_a, fn_b, diff;
1082 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1084 unsigned int i;
1086 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1088 struct subscript *subscript;
1090 subscript = DDR_SUBSCRIPT (ddr, i);
1091 cf_a = SUB_CONFLICTS_IN_A (subscript);
1092 cf_b = SUB_CONFLICTS_IN_B (subscript);
1094 fn_a = common_affine_function (cf_a);
1095 fn_b = common_affine_function (cf_b);
1096 if (!fn_a || !fn_b)
1098 SUB_DISTANCE (subscript) = chrec_dont_know;
1099 return;
1101 diff = affine_fn_minus (fn_a, fn_b);
1103 if (affine_function_constant_p (diff))
1104 SUB_DISTANCE (subscript) = affine_function_base (diff);
1105 else
1106 SUB_DISTANCE (subscript) = chrec_dont_know;
1108 affine_fn_free (diff);
1113 /* Returns the conflict function for "unknown". */
1115 static conflict_function *
1116 conflict_fn_not_known (void)
1118 conflict_function *fn = XCNEW (conflict_function);
1119 fn->n = NOT_KNOWN;
1121 return fn;
1124 /* Returns the conflict function for "independent". */
1126 static conflict_function *
1127 conflict_fn_no_dependence (void)
1129 conflict_function *fn = XCNEW (conflict_function);
1130 fn->n = NO_DEPENDENCE;
1132 return fn;
1135 /* Returns true if the address of OBJ is invariant in LOOP. */
1137 static bool
1138 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1140 while (handled_component_p (obj))
1142 if (TREE_CODE (obj) == ARRAY_REF)
1144 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1145 need to check the stride and the lower bound of the reference. */
1146 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1147 loop->num)
1148 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1149 loop->num))
1150 return false;
1152 else if (TREE_CODE (obj) == COMPONENT_REF)
1154 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1155 loop->num))
1156 return false;
1158 obj = TREE_OPERAND (obj, 0);
1161 if (!INDIRECT_REF_P (obj))
1162 return true;
1164 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1165 loop->num);
1168 /* Returns true if A and B are accesses to different objects, or to different
1169 fields of the same object. */
1171 static bool
1172 disjoint_objects_p (tree a, tree b)
1174 tree base_a, base_b;
1175 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1176 bool ret;
1178 base_a = get_base_address (a);
1179 base_b = get_base_address (b);
1181 if (DECL_P (base_a)
1182 && DECL_P (base_b)
1183 && base_a != base_b)
1184 return true;
1186 if (!operand_equal_p (base_a, base_b, 0))
1187 return false;
1189 /* Compare the component references of A and B. We must start from the inner
1190 ones, so record them to the vector first. */
1191 while (handled_component_p (a))
1193 VEC_safe_push (tree, heap, comp_a, a);
1194 a = TREE_OPERAND (a, 0);
1196 while (handled_component_p (b))
1198 VEC_safe_push (tree, heap, comp_b, b);
1199 b = TREE_OPERAND (b, 0);
1202 ret = false;
1203 while (1)
1205 if (VEC_length (tree, comp_a) == 0
1206 || VEC_length (tree, comp_b) == 0)
1207 break;
1209 a = VEC_pop (tree, comp_a);
1210 b = VEC_pop (tree, comp_b);
1212 /* Real and imaginary part of a variable do not alias. */
1213 if ((TREE_CODE (a) == REALPART_EXPR
1214 && TREE_CODE (b) == IMAGPART_EXPR)
1215 || (TREE_CODE (a) == IMAGPART_EXPR
1216 && TREE_CODE (b) == REALPART_EXPR))
1218 ret = true;
1219 break;
1222 if (TREE_CODE (a) != TREE_CODE (b))
1223 break;
1225 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1226 DR_BASE_OBJECT are always zero. */
1227 if (TREE_CODE (a) == ARRAY_REF)
1228 continue;
1229 else if (TREE_CODE (a) == COMPONENT_REF)
1231 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1232 continue;
1234 /* Different fields of unions may overlap. */
1235 base_a = TREE_OPERAND (a, 0);
1236 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1237 break;
1239 /* Different fields of structures cannot. */
1240 ret = true;
1241 break;
1243 else
1244 break;
1247 VEC_free (tree, heap, comp_a);
1248 VEC_free (tree, heap, comp_b);
1250 return ret;
1253 /* Returns false if we can prove that data references A and B do not alias,
1254 true otherwise. */
1256 bool
1257 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1259 const_tree addr_a = DR_BASE_ADDRESS (a);
1260 const_tree addr_b = DR_BASE_ADDRESS (b);
1261 const_tree type_a, type_b;
1262 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1264 /* If the accessed objects are disjoint, the memory references do not
1265 alias. */
1266 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1267 return false;
1269 /* Query the alias oracle. */
1270 if (!DR_IS_READ (a) && !DR_IS_READ (b))
1272 if (!refs_output_dependent_p (DR_REF (a), DR_REF (b)))
1273 return false;
1275 else if (DR_IS_READ (a) && !DR_IS_READ (b))
1277 if (!refs_anti_dependent_p (DR_REF (a), DR_REF (b)))
1278 return false;
1280 else if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1281 return false;
1283 if (!addr_a || !addr_b)
1284 return true;
1286 /* If the references are based on different static objects, they cannot
1287 alias (PTA should be able to disambiguate such accesses, but often
1288 it fails to). */
1289 if (TREE_CODE (addr_a) == ADDR_EXPR
1290 && TREE_CODE (addr_b) == ADDR_EXPR)
1291 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1293 /* An instruction writing through a restricted pointer is "independent" of any
1294 instruction reading or writing through a different restricted pointer,
1295 in the same block/scope. */
1297 type_a = TREE_TYPE (addr_a);
1298 type_b = TREE_TYPE (addr_b);
1299 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1301 if (TREE_CODE (addr_a) == SSA_NAME)
1302 decl_a = SSA_NAME_VAR (addr_a);
1303 if (TREE_CODE (addr_b) == SSA_NAME)
1304 decl_b = SSA_NAME_VAR (addr_b);
1306 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1307 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1308 && decl_a && DECL_P (decl_a)
1309 && decl_b && DECL_P (decl_b)
1310 && decl_a != decl_b
1311 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1312 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1313 return false;
1315 return true;
1318 static void compute_self_dependence (struct data_dependence_relation *);
1320 /* Initialize a data dependence relation between data accesses A and
1321 B. NB_LOOPS is the number of loops surrounding the references: the
1322 size of the classic distance/direction vectors. */
1324 static struct data_dependence_relation *
1325 initialize_data_dependence_relation (struct data_reference *a,
1326 struct data_reference *b,
1327 VEC (loop_p, heap) *loop_nest)
1329 struct data_dependence_relation *res;
1330 unsigned int i;
1332 res = XNEW (struct data_dependence_relation);
1333 DDR_A (res) = a;
1334 DDR_B (res) = b;
1335 DDR_LOOP_NEST (res) = NULL;
1336 DDR_REVERSED_P (res) = false;
1337 DDR_SUBSCRIPTS (res) = NULL;
1338 DDR_DIR_VECTS (res) = NULL;
1339 DDR_DIST_VECTS (res) = NULL;
1341 if (a == NULL || b == NULL)
1343 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1344 return res;
1347 /* If the data references do not alias, then they are independent. */
1348 if (!dr_may_alias_p (a, b))
1350 DDR_ARE_DEPENDENT (res) = chrec_known;
1351 return res;
1354 /* When the references are exactly the same, don't spend time doing
1355 the data dependence tests, just initialize the ddr and return. */
1356 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1358 DDR_AFFINE_P (res) = true;
1359 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1360 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1361 DDR_LOOP_NEST (res) = loop_nest;
1362 DDR_INNER_LOOP (res) = 0;
1363 DDR_SELF_REFERENCE (res) = true;
1364 compute_self_dependence (res);
1365 return res;
1368 /* If the references do not access the same object, we do not know
1369 whether they alias or not. */
1370 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1372 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1373 return res;
1376 /* If the base of the object is not invariant in the loop nest, we cannot
1377 analyze it. TODO -- in fact, it would suffice to record that there may
1378 be arbitrary dependences in the loops where the base object varies. */
1379 if (loop_nest
1380 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1381 DR_BASE_OBJECT (a)))
1383 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1384 return res;
1387 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1389 DDR_AFFINE_P (res) = true;
1390 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1391 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1392 DDR_LOOP_NEST (res) = loop_nest;
1393 DDR_INNER_LOOP (res) = 0;
1394 DDR_SELF_REFERENCE (res) = false;
1396 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1398 struct subscript *subscript;
1400 subscript = XNEW (struct subscript);
1401 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1402 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1403 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1404 SUB_DISTANCE (subscript) = chrec_dont_know;
1405 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1408 return res;
1411 /* Frees memory used by the conflict function F. */
1413 static void
1414 free_conflict_function (conflict_function *f)
1416 unsigned i;
1418 if (CF_NONTRIVIAL_P (f))
1420 for (i = 0; i < f->n; i++)
1421 affine_fn_free (f->fns[i]);
1423 free (f);
1426 /* Frees memory used by SUBSCRIPTS. */
1428 static void
1429 free_subscripts (VEC (subscript_p, heap) *subscripts)
1431 unsigned i;
1432 subscript_p s;
1434 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1436 free_conflict_function (s->conflicting_iterations_in_a);
1437 free_conflict_function (s->conflicting_iterations_in_b);
1438 free (s);
1440 VEC_free (subscript_p, heap, subscripts);
1443 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1444 description. */
1446 static inline void
1447 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1448 tree chrec)
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1452 fprintf (dump_file, "(dependence classified: ");
1453 print_generic_expr (dump_file, chrec, 0);
1454 fprintf (dump_file, ")\n");
1457 DDR_ARE_DEPENDENT (ddr) = chrec;
1458 free_subscripts (DDR_SUBSCRIPTS (ddr));
1459 DDR_SUBSCRIPTS (ddr) = NULL;
1462 /* The dependence relation DDR cannot be represented by a distance
1463 vector. */
1465 static inline void
1466 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1469 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1471 DDR_AFFINE_P (ddr) = false;
1476 /* This section contains the classic Banerjee tests. */
1478 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1479 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1481 static inline bool
1482 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1484 return (evolution_function_is_constant_p (chrec_a)
1485 && evolution_function_is_constant_p (chrec_b));
1488 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1489 variable, i.e., if the SIV (Single Index Variable) test is true. */
1491 static bool
1492 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1494 if ((evolution_function_is_constant_p (chrec_a)
1495 && evolution_function_is_univariate_p (chrec_b))
1496 || (evolution_function_is_constant_p (chrec_b)
1497 && evolution_function_is_univariate_p (chrec_a)))
1498 return true;
1500 if (evolution_function_is_univariate_p (chrec_a)
1501 && evolution_function_is_univariate_p (chrec_b))
1503 switch (TREE_CODE (chrec_a))
1505 case POLYNOMIAL_CHREC:
1506 switch (TREE_CODE (chrec_b))
1508 case POLYNOMIAL_CHREC:
1509 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1510 return false;
1512 default:
1513 return true;
1516 default:
1517 return true;
1521 return false;
1524 /* Creates a conflict function with N dimensions. The affine functions
1525 in each dimension follow. */
1527 static conflict_function *
1528 conflict_fn (unsigned n, ...)
1530 unsigned i;
1531 conflict_function *ret = XCNEW (conflict_function);
1532 va_list ap;
1534 gcc_assert (0 < n && n <= MAX_DIM);
1535 va_start(ap, n);
1537 ret->n = n;
1538 for (i = 0; i < n; i++)
1539 ret->fns[i] = va_arg (ap, affine_fn);
1540 va_end(ap);
1542 return ret;
1545 /* Returns constant affine function with value CST. */
1547 static affine_fn
1548 affine_fn_cst (tree cst)
1550 affine_fn fn = VEC_alloc (tree, heap, 1);
1551 VEC_quick_push (tree, fn, cst);
1552 return fn;
1555 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1557 static affine_fn
1558 affine_fn_univar (tree cst, unsigned dim, tree coef)
1560 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1561 unsigned i;
1563 gcc_assert (dim > 0);
1564 VEC_quick_push (tree, fn, cst);
1565 for (i = 1; i < dim; i++)
1566 VEC_quick_push (tree, fn, integer_zero_node);
1567 VEC_quick_push (tree, fn, coef);
1568 return fn;
1571 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1572 *OVERLAPS_B are initialized to the functions that describe the
1573 relation between the elements accessed twice by CHREC_A and
1574 CHREC_B. For k >= 0, the following property is verified:
1576 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1578 static void
1579 analyze_ziv_subscript (tree chrec_a,
1580 tree chrec_b,
1581 conflict_function **overlaps_a,
1582 conflict_function **overlaps_b,
1583 tree *last_conflicts)
1585 tree type, difference;
1586 dependence_stats.num_ziv++;
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, "(analyze_ziv_subscript \n");
1591 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1592 chrec_a = chrec_convert (type, chrec_a, NULL);
1593 chrec_b = chrec_convert (type, chrec_b, NULL);
1594 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1596 switch (TREE_CODE (difference))
1598 case INTEGER_CST:
1599 if (integer_zerop (difference))
1601 /* The difference is equal to zero: the accessed index
1602 overlaps for each iteration in the loop. */
1603 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1604 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1605 *last_conflicts = chrec_dont_know;
1606 dependence_stats.num_ziv_dependent++;
1608 else
1610 /* The accesses do not overlap. */
1611 *overlaps_a = conflict_fn_no_dependence ();
1612 *overlaps_b = conflict_fn_no_dependence ();
1613 *last_conflicts = integer_zero_node;
1614 dependence_stats.num_ziv_independent++;
1616 break;
1618 default:
1619 /* We're not sure whether the indexes overlap. For the moment,
1620 conservatively answer "don't know". */
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1622 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1624 *overlaps_a = conflict_fn_not_known ();
1625 *overlaps_b = conflict_fn_not_known ();
1626 *last_conflicts = chrec_dont_know;
1627 dependence_stats.num_ziv_unimplemented++;
1628 break;
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1632 fprintf (dump_file, ")\n");
1635 /* Sets NIT to the estimated number of executions of the statements in
1636 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1637 large as the number of iterations. If we have no reliable estimate,
1638 the function returns false, otherwise returns true. */
1640 bool
1641 estimated_loop_iterations (struct loop *loop, bool conservative,
1642 double_int *nit)
1644 estimate_numbers_of_iterations_loop (loop);
1645 if (conservative)
1647 if (!loop->any_upper_bound)
1648 return false;
1650 *nit = loop->nb_iterations_upper_bound;
1652 else
1654 if (!loop->any_estimate)
1655 return false;
1657 *nit = loop->nb_iterations_estimate;
1660 return true;
1663 /* Similar to estimated_loop_iterations, but returns the estimate only
1664 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1665 on the number of iterations of LOOP could not be derived, returns -1. */
1667 HOST_WIDE_INT
1668 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1670 double_int nit;
1671 HOST_WIDE_INT hwi_nit;
1673 if (!estimated_loop_iterations (loop, conservative, &nit))
1674 return -1;
1676 if (!double_int_fits_in_shwi_p (nit))
1677 return -1;
1678 hwi_nit = double_int_to_shwi (nit);
1680 return hwi_nit < 0 ? -1 : hwi_nit;
1683 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1684 and only if it fits to the int type. If this is not the case, or the
1685 estimate on the number of iterations of LOOP could not be derived, returns
1686 chrec_dont_know. */
1688 static tree
1689 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1691 double_int nit;
1692 tree type;
1694 if (!estimated_loop_iterations (loop, conservative, &nit))
1695 return chrec_dont_know;
1697 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1698 if (!double_int_fits_to_tree_p (type, nit))
1699 return chrec_dont_know;
1701 return double_int_to_tree (type, nit);
1704 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1705 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1706 *OVERLAPS_B are initialized to the functions that describe the
1707 relation between the elements accessed twice by CHREC_A and
1708 CHREC_B. For k >= 0, the following property is verified:
1710 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1712 static void
1713 analyze_siv_subscript_cst_affine (tree chrec_a,
1714 tree chrec_b,
1715 conflict_function **overlaps_a,
1716 conflict_function **overlaps_b,
1717 tree *last_conflicts)
1719 bool value0, value1, value2;
1720 tree type, difference, tmp;
1722 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1723 chrec_a = chrec_convert (type, chrec_a, NULL);
1724 chrec_b = chrec_convert (type, chrec_b, NULL);
1725 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1727 if (!chrec_is_positive (initial_condition (difference), &value0))
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1732 dependence_stats.num_siv_unimplemented++;
1733 *overlaps_a = conflict_fn_not_known ();
1734 *overlaps_b = conflict_fn_not_known ();
1735 *last_conflicts = chrec_dont_know;
1736 return;
1738 else
1740 if (value0 == false)
1742 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1747 *overlaps_a = conflict_fn_not_known ();
1748 *overlaps_b = conflict_fn_not_known ();
1749 *last_conflicts = chrec_dont_know;
1750 dependence_stats.num_siv_unimplemented++;
1751 return;
1753 else
1755 if (value1 == true)
1757 /* Example:
1758 chrec_a = 12
1759 chrec_b = {10, +, 1}
1762 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1764 HOST_WIDE_INT numiter;
1765 struct loop *loop = get_chrec_loop (chrec_b);
1767 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1768 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1769 fold_build1 (ABS_EXPR, type, difference),
1770 CHREC_RIGHT (chrec_b));
1771 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1772 *last_conflicts = integer_one_node;
1775 /* Perform weak-zero siv test to see if overlap is
1776 outside the loop bounds. */
1777 numiter = estimated_loop_iterations_int (loop, false);
1779 if (numiter >= 0
1780 && compare_tree_int (tmp, numiter) > 0)
1782 free_conflict_function (*overlaps_a);
1783 free_conflict_function (*overlaps_b);
1784 *overlaps_a = conflict_fn_no_dependence ();
1785 *overlaps_b = conflict_fn_no_dependence ();
1786 *last_conflicts = integer_zero_node;
1787 dependence_stats.num_siv_independent++;
1788 return;
1790 dependence_stats.num_siv_dependent++;
1791 return;
1794 /* When the step does not divide the difference, there are
1795 no overlaps. */
1796 else
1798 *overlaps_a = conflict_fn_no_dependence ();
1799 *overlaps_b = conflict_fn_no_dependence ();
1800 *last_conflicts = integer_zero_node;
1801 dependence_stats.num_siv_independent++;
1802 return;
1806 else
1808 /* Example:
1809 chrec_a = 12
1810 chrec_b = {10, +, -1}
1812 In this case, chrec_a will not overlap with chrec_b. */
1813 *overlaps_a = conflict_fn_no_dependence ();
1814 *overlaps_b = conflict_fn_no_dependence ();
1815 *last_conflicts = integer_zero_node;
1816 dependence_stats.num_siv_independent++;
1817 return;
1821 else
1823 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1825 if (dump_file && (dump_flags & TDF_DETAILS))
1826 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1828 *overlaps_a = conflict_fn_not_known ();
1829 *overlaps_b = conflict_fn_not_known ();
1830 *last_conflicts = chrec_dont_know;
1831 dependence_stats.num_siv_unimplemented++;
1832 return;
1834 else
1836 if (value2 == false)
1838 /* Example:
1839 chrec_a = 3
1840 chrec_b = {10, +, -1}
1842 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1844 HOST_WIDE_INT numiter;
1845 struct loop *loop = get_chrec_loop (chrec_b);
1847 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1848 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1849 CHREC_RIGHT (chrec_b));
1850 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1851 *last_conflicts = integer_one_node;
1853 /* Perform weak-zero siv test to see if overlap is
1854 outside the loop bounds. */
1855 numiter = estimated_loop_iterations_int (loop, false);
1857 if (numiter >= 0
1858 && compare_tree_int (tmp, numiter) > 0)
1860 free_conflict_function (*overlaps_a);
1861 free_conflict_function (*overlaps_b);
1862 *overlaps_a = conflict_fn_no_dependence ();
1863 *overlaps_b = conflict_fn_no_dependence ();
1864 *last_conflicts = integer_zero_node;
1865 dependence_stats.num_siv_independent++;
1866 return;
1868 dependence_stats.num_siv_dependent++;
1869 return;
1872 /* When the step does not divide the difference, there
1873 are no overlaps. */
1874 else
1876 *overlaps_a = conflict_fn_no_dependence ();
1877 *overlaps_b = conflict_fn_no_dependence ();
1878 *last_conflicts = integer_zero_node;
1879 dependence_stats.num_siv_independent++;
1880 return;
1883 else
1885 /* Example:
1886 chrec_a = 3
1887 chrec_b = {4, +, 1}
1889 In this case, chrec_a will not overlap with chrec_b. */
1890 *overlaps_a = conflict_fn_no_dependence ();
1891 *overlaps_b = conflict_fn_no_dependence ();
1892 *last_conflicts = integer_zero_node;
1893 dependence_stats.num_siv_independent++;
1894 return;
1901 /* Helper recursive function for initializing the matrix A. Returns
1902 the initial value of CHREC. */
1904 static tree
1905 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1907 gcc_assert (chrec);
1909 switch (TREE_CODE (chrec))
1911 case POLYNOMIAL_CHREC:
1912 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1914 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1915 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1917 case PLUS_EXPR:
1918 case MULT_EXPR:
1919 case MINUS_EXPR:
1921 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1922 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1924 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1927 case NOP_EXPR:
1929 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1930 return chrec_convert (chrec_type (chrec), op, NULL);
1933 case BIT_NOT_EXPR:
1935 /* Handle ~X as -1 - X. */
1936 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1937 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1938 build_int_cst (TREE_TYPE (chrec), -1), op);
1941 case INTEGER_CST:
1942 return chrec;
1944 default:
1945 gcc_unreachable ();
1946 return NULL_TREE;
1950 #define FLOOR_DIV(x,y) ((x) / (y))
1952 /* Solves the special case of the Diophantine equation:
1953 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1955 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1956 number of iterations that loops X and Y run. The overlaps will be
1957 constructed as evolutions in dimension DIM. */
1959 static void
1960 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1961 affine_fn *overlaps_a,
1962 affine_fn *overlaps_b,
1963 tree *last_conflicts, int dim)
1965 if (((step_a > 0 && step_b > 0)
1966 || (step_a < 0 && step_b < 0)))
1968 int step_overlaps_a, step_overlaps_b;
1969 int gcd_steps_a_b, last_conflict, tau2;
1971 gcd_steps_a_b = gcd (step_a, step_b);
1972 step_overlaps_a = step_b / gcd_steps_a_b;
1973 step_overlaps_b = step_a / gcd_steps_a_b;
1975 if (niter > 0)
1977 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1978 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1979 last_conflict = tau2;
1980 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1982 else
1983 *last_conflicts = chrec_dont_know;
1985 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1986 build_int_cst (NULL_TREE,
1987 step_overlaps_a));
1988 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1989 build_int_cst (NULL_TREE,
1990 step_overlaps_b));
1993 else
1995 *overlaps_a = affine_fn_cst (integer_zero_node);
1996 *overlaps_b = affine_fn_cst (integer_zero_node);
1997 *last_conflicts = integer_zero_node;
2001 /* Solves the special case of a Diophantine equation where CHREC_A is
2002 an affine bivariate function, and CHREC_B is an affine univariate
2003 function. For example,
2005 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2007 has the following overlapping functions:
2009 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2010 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2011 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2013 FORNOW: This is a specialized implementation for a case occurring in
2014 a common benchmark. Implement the general algorithm. */
2016 static void
2017 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2018 conflict_function **overlaps_a,
2019 conflict_function **overlaps_b,
2020 tree *last_conflicts)
2022 bool xz_p, yz_p, xyz_p;
2023 int step_x, step_y, step_z;
2024 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2025 affine_fn overlaps_a_xz, overlaps_b_xz;
2026 affine_fn overlaps_a_yz, overlaps_b_yz;
2027 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2028 affine_fn ova1, ova2, ovb;
2029 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2031 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2032 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2033 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2035 niter_x =
2036 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2037 false);
2038 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2039 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2041 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2043 if (dump_file && (dump_flags & TDF_DETAILS))
2044 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2046 *overlaps_a = conflict_fn_not_known ();
2047 *overlaps_b = conflict_fn_not_known ();
2048 *last_conflicts = chrec_dont_know;
2049 return;
2052 niter = MIN (niter_x, niter_z);
2053 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2054 &overlaps_a_xz,
2055 &overlaps_b_xz,
2056 &last_conflicts_xz, 1);
2057 niter = MIN (niter_y, niter_z);
2058 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2059 &overlaps_a_yz,
2060 &overlaps_b_yz,
2061 &last_conflicts_yz, 2);
2062 niter = MIN (niter_x, niter_z);
2063 niter = MIN (niter_y, niter);
2064 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2065 &overlaps_a_xyz,
2066 &overlaps_b_xyz,
2067 &last_conflicts_xyz, 3);
2069 xz_p = !integer_zerop (last_conflicts_xz);
2070 yz_p = !integer_zerop (last_conflicts_yz);
2071 xyz_p = !integer_zerop (last_conflicts_xyz);
2073 if (xz_p || yz_p || xyz_p)
2075 ova1 = affine_fn_cst (integer_zero_node);
2076 ova2 = affine_fn_cst (integer_zero_node);
2077 ovb = affine_fn_cst (integer_zero_node);
2078 if (xz_p)
2080 affine_fn t0 = ova1;
2081 affine_fn t2 = ovb;
2083 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2084 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2085 affine_fn_free (t0);
2086 affine_fn_free (t2);
2087 *last_conflicts = last_conflicts_xz;
2089 if (yz_p)
2091 affine_fn t0 = ova2;
2092 affine_fn t2 = ovb;
2094 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2095 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2096 affine_fn_free (t0);
2097 affine_fn_free (t2);
2098 *last_conflicts = last_conflicts_yz;
2100 if (xyz_p)
2102 affine_fn t0 = ova1;
2103 affine_fn t2 = ova2;
2104 affine_fn t4 = ovb;
2106 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2107 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2108 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2109 affine_fn_free (t0);
2110 affine_fn_free (t2);
2111 affine_fn_free (t4);
2112 *last_conflicts = last_conflicts_xyz;
2114 *overlaps_a = conflict_fn (2, ova1, ova2);
2115 *overlaps_b = conflict_fn (1, ovb);
2117 else
2119 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2120 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2121 *last_conflicts = integer_zero_node;
2124 affine_fn_free (overlaps_a_xz);
2125 affine_fn_free (overlaps_b_xz);
2126 affine_fn_free (overlaps_a_yz);
2127 affine_fn_free (overlaps_b_yz);
2128 affine_fn_free (overlaps_a_xyz);
2129 affine_fn_free (overlaps_b_xyz);
2132 /* Determines the overlapping elements due to accesses CHREC_A and
2133 CHREC_B, that are affine functions. This function cannot handle
2134 symbolic evolution functions, ie. when initial conditions are
2135 parameters, because it uses lambda matrices of integers. */
2137 static void
2138 analyze_subscript_affine_affine (tree chrec_a,
2139 tree chrec_b,
2140 conflict_function **overlaps_a,
2141 conflict_function **overlaps_b,
2142 tree *last_conflicts)
2144 unsigned nb_vars_a, nb_vars_b, dim;
2145 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2146 lambda_matrix A, U, S;
2148 if (eq_evolutions_p (chrec_a, chrec_b))
2150 /* The accessed index overlaps for each iteration in the
2151 loop. */
2152 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2153 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2154 *last_conflicts = chrec_dont_know;
2155 return;
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2158 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2160 /* For determining the initial intersection, we have to solve a
2161 Diophantine equation. This is the most time consuming part.
2163 For answering to the question: "Is there a dependence?" we have
2164 to prove that there exists a solution to the Diophantine
2165 equation, and that the solution is in the iteration domain,
2166 i.e. the solution is positive or zero, and that the solution
2167 happens before the upper bound loop.nb_iterations. Otherwise
2168 there is no dependence. This function outputs a description of
2169 the iterations that hold the intersections. */
2171 nb_vars_a = nb_vars_in_chrec (chrec_a);
2172 nb_vars_b = nb_vars_in_chrec (chrec_b);
2174 dim = nb_vars_a + nb_vars_b;
2175 U = lambda_matrix_new (dim, dim);
2176 A = lambda_matrix_new (dim, 1);
2177 S = lambda_matrix_new (dim, 1);
2179 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2180 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2181 gamma = init_b - init_a;
2183 /* Don't do all the hard work of solving the Diophantine equation
2184 when we already know the solution: for example,
2185 | {3, +, 1}_1
2186 | {3, +, 4}_2
2187 | gamma = 3 - 3 = 0.
2188 Then the first overlap occurs during the first iterations:
2189 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2191 if (gamma == 0)
2193 if (nb_vars_a == 1 && nb_vars_b == 1)
2195 HOST_WIDE_INT step_a, step_b;
2196 HOST_WIDE_INT niter, niter_a, niter_b;
2197 affine_fn ova, ovb;
2199 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2200 false);
2201 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2202 false);
2203 niter = MIN (niter_a, niter_b);
2204 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2205 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2207 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2208 &ova, &ovb,
2209 last_conflicts, 1);
2210 *overlaps_a = conflict_fn (1, ova);
2211 *overlaps_b = conflict_fn (1, ovb);
2214 else if (nb_vars_a == 2 && nb_vars_b == 1)
2215 compute_overlap_steps_for_affine_1_2
2216 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2218 else if (nb_vars_a == 1 && nb_vars_b == 2)
2219 compute_overlap_steps_for_affine_1_2
2220 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2222 else
2224 if (dump_file && (dump_flags & TDF_DETAILS))
2225 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2226 *overlaps_a = conflict_fn_not_known ();
2227 *overlaps_b = conflict_fn_not_known ();
2228 *last_conflicts = chrec_dont_know;
2230 goto end_analyze_subs_aa;
2233 /* U.A = S */
2234 lambda_matrix_right_hermite (A, dim, 1, S, U);
2236 if (S[0][0] < 0)
2238 S[0][0] *= -1;
2239 lambda_matrix_row_negate (U, dim, 0);
2241 gcd_alpha_beta = S[0][0];
2243 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2244 but that is a quite strange case. Instead of ICEing, answer
2245 don't know. */
2246 if (gcd_alpha_beta == 0)
2248 *overlaps_a = conflict_fn_not_known ();
2249 *overlaps_b = conflict_fn_not_known ();
2250 *last_conflicts = chrec_dont_know;
2251 goto end_analyze_subs_aa;
2254 /* The classic "gcd-test". */
2255 if (!int_divides_p (gcd_alpha_beta, gamma))
2257 /* The "gcd-test" has determined that there is no integer
2258 solution, i.e. there is no dependence. */
2259 *overlaps_a = conflict_fn_no_dependence ();
2260 *overlaps_b = conflict_fn_no_dependence ();
2261 *last_conflicts = integer_zero_node;
2264 /* Both access functions are univariate. This includes SIV and MIV cases. */
2265 else if (nb_vars_a == 1 && nb_vars_b == 1)
2267 /* Both functions should have the same evolution sign. */
2268 if (((A[0][0] > 0 && -A[1][0] > 0)
2269 || (A[0][0] < 0 && -A[1][0] < 0)))
2271 /* The solutions are given by:
2273 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2274 | [u21 u22] [y0]
2276 For a given integer t. Using the following variables,
2278 | i0 = u11 * gamma / gcd_alpha_beta
2279 | j0 = u12 * gamma / gcd_alpha_beta
2280 | i1 = u21
2281 | j1 = u22
2283 the solutions are:
2285 | x0 = i0 + i1 * t,
2286 | y0 = j0 + j1 * t. */
2287 HOST_WIDE_INT i0, j0, i1, j1;
2289 i0 = U[0][0] * gamma / gcd_alpha_beta;
2290 j0 = U[0][1] * gamma / gcd_alpha_beta;
2291 i1 = U[1][0];
2292 j1 = U[1][1];
2294 if ((i1 == 0 && i0 < 0)
2295 || (j1 == 0 && j0 < 0))
2297 /* There is no solution.
2298 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2299 falls in here, but for the moment we don't look at the
2300 upper bound of the iteration domain. */
2301 *overlaps_a = conflict_fn_no_dependence ();
2302 *overlaps_b = conflict_fn_no_dependence ();
2303 *last_conflicts = integer_zero_node;
2304 goto end_analyze_subs_aa;
2307 if (i1 > 0 && j1 > 0)
2309 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2310 (get_chrec_loop (chrec_a), false);
2311 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2312 (get_chrec_loop (chrec_b), false);
2313 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2315 /* (X0, Y0) is a solution of the Diophantine equation:
2316 "chrec_a (X0) = chrec_b (Y0)". */
2317 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2318 CEIL (-j0, j1));
2319 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2320 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2322 /* (X1, Y1) is the smallest positive solution of the eq
2323 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2324 first conflict occurs. */
2325 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2326 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2327 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2329 if (niter > 0)
2331 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2332 FLOOR_DIV (niter - j0, j1));
2333 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2335 /* If the overlap occurs outside of the bounds of the
2336 loop, there is no dependence. */
2337 if (x1 >= niter || y1 >= niter)
2339 *overlaps_a = conflict_fn_no_dependence ();
2340 *overlaps_b = conflict_fn_no_dependence ();
2341 *last_conflicts = integer_zero_node;
2342 goto end_analyze_subs_aa;
2344 else
2345 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2347 else
2348 *last_conflicts = chrec_dont_know;
2350 *overlaps_a
2351 = conflict_fn (1,
2352 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2354 build_int_cst (NULL_TREE, i1)));
2355 *overlaps_b
2356 = conflict_fn (1,
2357 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2359 build_int_cst (NULL_TREE, j1)));
2361 else
2363 /* FIXME: For the moment, the upper bound of the
2364 iteration domain for i and j is not checked. */
2365 if (dump_file && (dump_flags & TDF_DETAILS))
2366 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2367 *overlaps_a = conflict_fn_not_known ();
2368 *overlaps_b = conflict_fn_not_known ();
2369 *last_conflicts = chrec_dont_know;
2372 else
2374 if (dump_file && (dump_flags & TDF_DETAILS))
2375 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2376 *overlaps_a = conflict_fn_not_known ();
2377 *overlaps_b = conflict_fn_not_known ();
2378 *last_conflicts = chrec_dont_know;
2381 else
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2385 *overlaps_a = conflict_fn_not_known ();
2386 *overlaps_b = conflict_fn_not_known ();
2387 *last_conflicts = chrec_dont_know;
2390 end_analyze_subs_aa:
2391 if (dump_file && (dump_flags & TDF_DETAILS))
2393 fprintf (dump_file, " (overlaps_a = ");
2394 dump_conflict_function (dump_file, *overlaps_a);
2395 fprintf (dump_file, ")\n (overlaps_b = ");
2396 dump_conflict_function (dump_file, *overlaps_b);
2397 fprintf (dump_file, ")\n");
2398 fprintf (dump_file, ")\n");
2402 /* Returns true when analyze_subscript_affine_affine can be used for
2403 determining the dependence relation between chrec_a and chrec_b,
2404 that contain symbols. This function modifies chrec_a and chrec_b
2405 such that the analysis result is the same, and such that they don't
2406 contain symbols, and then can safely be passed to the analyzer.
2408 Example: The analysis of the following tuples of evolutions produce
2409 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2410 vs. {0, +, 1}_1
2412 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2413 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2416 static bool
2417 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2419 tree diff, type, left_a, left_b, right_b;
2421 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2422 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2423 /* FIXME: For the moment not handled. Might be refined later. */
2424 return false;
2426 type = chrec_type (*chrec_a);
2427 left_a = CHREC_LEFT (*chrec_a);
2428 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2429 diff = chrec_fold_minus (type, left_a, left_b);
2431 if (!evolution_function_is_constant_p (diff))
2432 return false;
2434 if (dump_file && (dump_flags & TDF_DETAILS))
2435 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2437 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2438 diff, CHREC_RIGHT (*chrec_a));
2439 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2440 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2441 build_int_cst (type, 0),
2442 right_b);
2443 return true;
2446 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2447 *OVERLAPS_B are initialized to the functions that describe the
2448 relation between the elements accessed twice by CHREC_A and
2449 CHREC_B. For k >= 0, the following property is verified:
2451 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2453 static void
2454 analyze_siv_subscript (tree chrec_a,
2455 tree chrec_b,
2456 conflict_function **overlaps_a,
2457 conflict_function **overlaps_b,
2458 tree *last_conflicts,
2459 int loop_nest_num)
2461 dependence_stats.num_siv++;
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, "(analyze_siv_subscript \n");
2466 if (evolution_function_is_constant_p (chrec_a)
2467 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2468 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2469 overlaps_a, overlaps_b, last_conflicts);
2471 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2472 && evolution_function_is_constant_p (chrec_b))
2473 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2474 overlaps_b, overlaps_a, last_conflicts);
2476 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2477 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2479 if (!chrec_contains_symbols (chrec_a)
2480 && !chrec_contains_symbols (chrec_b))
2482 analyze_subscript_affine_affine (chrec_a, chrec_b,
2483 overlaps_a, overlaps_b,
2484 last_conflicts);
2486 if (CF_NOT_KNOWN_P (*overlaps_a)
2487 || CF_NOT_KNOWN_P (*overlaps_b))
2488 dependence_stats.num_siv_unimplemented++;
2489 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2490 || CF_NO_DEPENDENCE_P (*overlaps_b))
2491 dependence_stats.num_siv_independent++;
2492 else
2493 dependence_stats.num_siv_dependent++;
2495 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2496 &chrec_b))
2498 analyze_subscript_affine_affine (chrec_a, chrec_b,
2499 overlaps_a, overlaps_b,
2500 last_conflicts);
2502 if (CF_NOT_KNOWN_P (*overlaps_a)
2503 || CF_NOT_KNOWN_P (*overlaps_b))
2504 dependence_stats.num_siv_unimplemented++;
2505 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2506 || CF_NO_DEPENDENCE_P (*overlaps_b))
2507 dependence_stats.num_siv_independent++;
2508 else
2509 dependence_stats.num_siv_dependent++;
2511 else
2512 goto siv_subscript_dontknow;
2515 else
2517 siv_subscript_dontknow:;
2518 if (dump_file && (dump_flags & TDF_DETAILS))
2519 fprintf (dump_file, "siv test failed: unimplemented.\n");
2520 *overlaps_a = conflict_fn_not_known ();
2521 *overlaps_b = conflict_fn_not_known ();
2522 *last_conflicts = chrec_dont_know;
2523 dependence_stats.num_siv_unimplemented++;
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file, ")\n");
2530 /* Returns false if we can prove that the greatest common divisor of the steps
2531 of CHREC does not divide CST, false otherwise. */
2533 static bool
2534 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2536 HOST_WIDE_INT cd = 0, val;
2537 tree step;
2539 if (!host_integerp (cst, 0))
2540 return true;
2541 val = tree_low_cst (cst, 0);
2543 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2545 step = CHREC_RIGHT (chrec);
2546 if (!host_integerp (step, 0))
2547 return true;
2548 cd = gcd (cd, tree_low_cst (step, 0));
2549 chrec = CHREC_LEFT (chrec);
2552 return val % cd == 0;
2555 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2556 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2557 functions that describe the relation between the elements accessed
2558 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2559 is verified:
2561 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2563 static void
2564 analyze_miv_subscript (tree chrec_a,
2565 tree chrec_b,
2566 conflict_function **overlaps_a,
2567 conflict_function **overlaps_b,
2568 tree *last_conflicts,
2569 struct loop *loop_nest)
2571 /* FIXME: This is a MIV subscript, not yet handled.
2572 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2573 (A[i] vs. A[j]).
2575 In the SIV test we had to solve a Diophantine equation with two
2576 variables. In the MIV case we have to solve a Diophantine
2577 equation with 2*n variables (if the subscript uses n IVs).
2579 tree type, difference;
2581 dependence_stats.num_miv++;
2582 if (dump_file && (dump_flags & TDF_DETAILS))
2583 fprintf (dump_file, "(analyze_miv_subscript \n");
2585 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2586 chrec_a = chrec_convert (type, chrec_a, NULL);
2587 chrec_b = chrec_convert (type, chrec_b, NULL);
2588 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2590 if (eq_evolutions_p (chrec_a, chrec_b))
2592 /* Access functions are the same: all the elements are accessed
2593 in the same order. */
2594 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2595 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2596 *last_conflicts = estimated_loop_iterations_tree
2597 (get_chrec_loop (chrec_a), true);
2598 dependence_stats.num_miv_dependent++;
2601 else if (evolution_function_is_constant_p (difference)
2602 /* For the moment, the following is verified:
2603 evolution_function_is_affine_multivariate_p (chrec_a,
2604 loop_nest->num) */
2605 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2607 /* testsuite/.../ssa-chrec-33.c
2608 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2610 The difference is 1, and all the evolution steps are multiples
2611 of 2, consequently there are no overlapping elements. */
2612 *overlaps_a = conflict_fn_no_dependence ();
2613 *overlaps_b = conflict_fn_no_dependence ();
2614 *last_conflicts = integer_zero_node;
2615 dependence_stats.num_miv_independent++;
2618 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2619 && !chrec_contains_symbols (chrec_a)
2620 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2621 && !chrec_contains_symbols (chrec_b))
2623 /* testsuite/.../ssa-chrec-35.c
2624 {0, +, 1}_2 vs. {0, +, 1}_3
2625 the overlapping elements are respectively located at iterations:
2626 {0, +, 1}_x and {0, +, 1}_x,
2627 in other words, we have the equality:
2628 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2630 Other examples:
2631 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2632 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2634 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2635 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2637 analyze_subscript_affine_affine (chrec_a, chrec_b,
2638 overlaps_a, overlaps_b, last_conflicts);
2640 if (CF_NOT_KNOWN_P (*overlaps_a)
2641 || CF_NOT_KNOWN_P (*overlaps_b))
2642 dependence_stats.num_miv_unimplemented++;
2643 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2644 || CF_NO_DEPENDENCE_P (*overlaps_b))
2645 dependence_stats.num_miv_independent++;
2646 else
2647 dependence_stats.num_miv_dependent++;
2650 else
2652 /* When the analysis is too difficult, answer "don't know". */
2653 if (dump_file && (dump_flags & TDF_DETAILS))
2654 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2656 *overlaps_a = conflict_fn_not_known ();
2657 *overlaps_b = conflict_fn_not_known ();
2658 *last_conflicts = chrec_dont_know;
2659 dependence_stats.num_miv_unimplemented++;
2662 if (dump_file && (dump_flags & TDF_DETAILS))
2663 fprintf (dump_file, ")\n");
2666 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2667 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2668 OVERLAP_ITERATIONS_B are initialized with two functions that
2669 describe the iterations that contain conflicting elements.
2671 Remark: For an integer k >= 0, the following equality is true:
2673 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2676 static void
2677 analyze_overlapping_iterations (tree chrec_a,
2678 tree chrec_b,
2679 conflict_function **overlap_iterations_a,
2680 conflict_function **overlap_iterations_b,
2681 tree *last_conflicts, struct loop *loop_nest)
2683 unsigned int lnn = loop_nest->num;
2685 dependence_stats.num_subscript_tests++;
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2689 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2690 fprintf (dump_file, " (chrec_a = ");
2691 print_generic_expr (dump_file, chrec_a, 0);
2692 fprintf (dump_file, ")\n (chrec_b = ");
2693 print_generic_expr (dump_file, chrec_b, 0);
2694 fprintf (dump_file, ")\n");
2697 if (chrec_a == NULL_TREE
2698 || chrec_b == NULL_TREE
2699 || chrec_contains_undetermined (chrec_a)
2700 || chrec_contains_undetermined (chrec_b))
2702 dependence_stats.num_subscript_undetermined++;
2704 *overlap_iterations_a = conflict_fn_not_known ();
2705 *overlap_iterations_b = conflict_fn_not_known ();
2708 /* If they are the same chrec, and are affine, they overlap
2709 on every iteration. */
2710 else if (eq_evolutions_p (chrec_a, chrec_b)
2711 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2713 dependence_stats.num_same_subscript_function++;
2714 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2715 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2716 *last_conflicts = chrec_dont_know;
2719 /* If they aren't the same, and aren't affine, we can't do anything
2720 yet. */
2721 else if ((chrec_contains_symbols (chrec_a)
2722 || chrec_contains_symbols (chrec_b))
2723 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2724 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2726 dependence_stats.num_subscript_undetermined++;
2727 *overlap_iterations_a = conflict_fn_not_known ();
2728 *overlap_iterations_b = conflict_fn_not_known ();
2731 else if (ziv_subscript_p (chrec_a, chrec_b))
2732 analyze_ziv_subscript (chrec_a, chrec_b,
2733 overlap_iterations_a, overlap_iterations_b,
2734 last_conflicts);
2736 else if (siv_subscript_p (chrec_a, chrec_b))
2737 analyze_siv_subscript (chrec_a, chrec_b,
2738 overlap_iterations_a, overlap_iterations_b,
2739 last_conflicts, lnn);
2741 else
2742 analyze_miv_subscript (chrec_a, chrec_b,
2743 overlap_iterations_a, overlap_iterations_b,
2744 last_conflicts, loop_nest);
2746 if (dump_file && (dump_flags & TDF_DETAILS))
2748 fprintf (dump_file, " (overlap_iterations_a = ");
2749 dump_conflict_function (dump_file, *overlap_iterations_a);
2750 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2751 dump_conflict_function (dump_file, *overlap_iterations_b);
2752 fprintf (dump_file, ")\n");
2753 fprintf (dump_file, ")\n");
2757 /* Helper function for uniquely inserting distance vectors. */
2759 static void
2760 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2762 unsigned i;
2763 lambda_vector v;
2765 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2766 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2767 return;
2769 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2772 /* Helper function for uniquely inserting direction vectors. */
2774 static void
2775 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2777 unsigned i;
2778 lambda_vector v;
2780 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2781 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2782 return;
2784 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2787 /* Add a distance of 1 on all the loops outer than INDEX. If we
2788 haven't yet determined a distance for this outer loop, push a new
2789 distance vector composed of the previous distance, and a distance
2790 of 1 for this outer loop. Example:
2792 | loop_1
2793 | loop_2
2794 | A[10]
2795 | endloop_2
2796 | endloop_1
2798 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2799 save (0, 1), then we have to save (1, 0). */
2801 static void
2802 add_outer_distances (struct data_dependence_relation *ddr,
2803 lambda_vector dist_v, int index)
2805 /* For each outer loop where init_v is not set, the accesses are
2806 in dependence of distance 1 in the loop. */
2807 while (--index >= 0)
2809 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2810 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2811 save_v[index] = 1;
2812 save_dist_v (ddr, save_v);
2816 /* Return false when fail to represent the data dependence as a
2817 distance vector. INIT_B is set to true when a component has been
2818 added to the distance vector DIST_V. INDEX_CARRY is then set to
2819 the index in DIST_V that carries the dependence. */
2821 static bool
2822 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2823 struct data_reference *ddr_a,
2824 struct data_reference *ddr_b,
2825 lambda_vector dist_v, bool *init_b,
2826 int *index_carry)
2828 unsigned i;
2829 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2831 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2833 tree access_fn_a, access_fn_b;
2834 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2836 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2838 non_affine_dependence_relation (ddr);
2839 return false;
2842 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2843 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2845 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2846 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2848 int dist, index;
2849 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2850 DDR_LOOP_NEST (ddr));
2851 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2852 DDR_LOOP_NEST (ddr));
2854 /* The dependence is carried by the outermost loop. Example:
2855 | loop_1
2856 | A[{4, +, 1}_1]
2857 | loop_2
2858 | A[{5, +, 1}_2]
2859 | endloop_2
2860 | endloop_1
2861 In this case, the dependence is carried by loop_1. */
2862 index = index_a < index_b ? index_a : index_b;
2863 *index_carry = MIN (index, *index_carry);
2865 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2867 non_affine_dependence_relation (ddr);
2868 return false;
2871 dist = int_cst_value (SUB_DISTANCE (subscript));
2873 /* This is the subscript coupling test. If we have already
2874 recorded a distance for this loop (a distance coming from
2875 another subscript), it should be the same. For example,
2876 in the following code, there is no dependence:
2878 | loop i = 0, N, 1
2879 | T[i+1][i] = ...
2880 | ... = T[i][i]
2881 | endloop
2883 if (init_v[index] != 0 && dist_v[index] != dist)
2885 finalize_ddr_dependent (ddr, chrec_known);
2886 return false;
2889 dist_v[index] = dist;
2890 init_v[index] = 1;
2891 *init_b = true;
2893 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2895 /* This can be for example an affine vs. constant dependence
2896 (T[i] vs. T[3]) that is not an affine dependence and is
2897 not representable as a distance vector. */
2898 non_affine_dependence_relation (ddr);
2899 return false;
2903 return true;
2906 /* Return true when the DDR contains only constant access functions. */
2908 static bool
2909 constant_access_functions (const struct data_dependence_relation *ddr)
2911 unsigned i;
2913 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2914 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2915 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2916 return false;
2918 return true;
2921 /* Helper function for the case where DDR_A and DDR_B are the same
2922 multivariate access function with a constant step. For an example
2923 see pr34635-1.c. */
2925 static void
2926 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2928 int x_1, x_2;
2929 tree c_1 = CHREC_LEFT (c_2);
2930 tree c_0 = CHREC_LEFT (c_1);
2931 lambda_vector dist_v;
2932 int v1, v2, cd;
2934 /* Polynomials with more than 2 variables are not handled yet. When
2935 the evolution steps are parameters, it is not possible to
2936 represent the dependence using classical distance vectors. */
2937 if (TREE_CODE (c_0) != INTEGER_CST
2938 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2939 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2941 DDR_AFFINE_P (ddr) = false;
2942 return;
2945 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2946 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2948 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2949 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2950 v1 = int_cst_value (CHREC_RIGHT (c_1));
2951 v2 = int_cst_value (CHREC_RIGHT (c_2));
2952 cd = gcd (v1, v2);
2953 v1 /= cd;
2954 v2 /= cd;
2956 if (v2 < 0)
2958 v2 = -v2;
2959 v1 = -v1;
2962 dist_v[x_1] = v2;
2963 dist_v[x_2] = -v1;
2964 save_dist_v (ddr, dist_v);
2966 add_outer_distances (ddr, dist_v, x_1);
2969 /* Helper function for the case where DDR_A and DDR_B are the same
2970 access functions. */
2972 static void
2973 add_other_self_distances (struct data_dependence_relation *ddr)
2975 lambda_vector dist_v;
2976 unsigned i;
2977 int index_carry = DDR_NB_LOOPS (ddr);
2979 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2981 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2983 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2985 if (!evolution_function_is_univariate_p (access_fun))
2987 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2989 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2990 return;
2993 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2995 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2996 add_multivariate_self_dist (ddr, access_fun);
2997 else
2998 /* The evolution step is not constant: it varies in
2999 the outer loop, so this cannot be represented by a
3000 distance vector. For example in pr34635.c the
3001 evolution is {0, +, {0, +, 4}_1}_2. */
3002 DDR_AFFINE_P (ddr) = false;
3004 return;
3007 index_carry = MIN (index_carry,
3008 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3009 DDR_LOOP_NEST (ddr)));
3013 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3014 add_outer_distances (ddr, dist_v, index_carry);
3017 static void
3018 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3020 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3022 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3023 save_dist_v (ddr, dist_v);
3026 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3027 is the case for example when access functions are the same and
3028 equal to a constant, as in:
3030 | loop_1
3031 | A[3] = ...
3032 | ... = A[3]
3033 | endloop_1
3035 in which case the distance vectors are (0) and (1). */
3037 static void
3038 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3040 unsigned i, j;
3042 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3044 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3045 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3046 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3048 for (j = 0; j < ca->n; j++)
3049 if (affine_function_zero_p (ca->fns[j]))
3051 insert_innermost_unit_dist_vector (ddr);
3052 return;
3055 for (j = 0; j < cb->n; j++)
3056 if (affine_function_zero_p (cb->fns[j]))
3058 insert_innermost_unit_dist_vector (ddr);
3059 return;
3064 /* Compute the classic per loop distance vector. DDR is the data
3065 dependence relation to build a vector from. Return false when fail
3066 to represent the data dependence as a distance vector. */
3068 static bool
3069 build_classic_dist_vector (struct data_dependence_relation *ddr,
3070 struct loop *loop_nest)
3072 bool init_b = false;
3073 int index_carry = DDR_NB_LOOPS (ddr);
3074 lambda_vector dist_v;
3076 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3077 return false;
3079 if (same_access_functions (ddr))
3081 /* Save the 0 vector. */
3082 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3083 save_dist_v (ddr, dist_v);
3085 if (constant_access_functions (ddr))
3086 add_distance_for_zero_overlaps (ddr);
3088 if (DDR_NB_LOOPS (ddr) > 1)
3089 add_other_self_distances (ddr);
3091 return true;
3094 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3095 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3096 dist_v, &init_b, &index_carry))
3097 return false;
3099 /* Save the distance vector if we initialized one. */
3100 if (init_b)
3102 /* Verify a basic constraint: classic distance vectors should
3103 always be lexicographically positive.
3105 Data references are collected in the order of execution of
3106 the program, thus for the following loop
3108 | for (i = 1; i < 100; i++)
3109 | for (j = 1; j < 100; j++)
3111 | t = T[j+1][i-1]; // A
3112 | T[j][i] = t + 2; // B
3115 references are collected following the direction of the wind:
3116 A then B. The data dependence tests are performed also
3117 following this order, such that we're looking at the distance
3118 separating the elements accessed by A from the elements later
3119 accessed by B. But in this example, the distance returned by
3120 test_dep (A, B) is lexicographically negative (-1, 1), that
3121 means that the access A occurs later than B with respect to
3122 the outer loop, ie. we're actually looking upwind. In this
3123 case we solve test_dep (B, A) looking downwind to the
3124 lexicographically positive solution, that returns the
3125 distance vector (1, -1). */
3126 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3128 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3129 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3130 loop_nest))
3131 return false;
3132 compute_subscript_distance (ddr);
3133 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3134 save_v, &init_b, &index_carry))
3135 return false;
3136 save_dist_v (ddr, save_v);
3137 DDR_REVERSED_P (ddr) = true;
3139 /* In this case there is a dependence forward for all the
3140 outer loops:
3142 | for (k = 1; k < 100; k++)
3143 | for (i = 1; i < 100; i++)
3144 | for (j = 1; j < 100; j++)
3146 | t = T[j+1][i-1]; // A
3147 | T[j][i] = t + 2; // B
3150 the vectors are:
3151 (0, 1, -1)
3152 (1, 1, -1)
3153 (1, -1, 1)
3155 if (DDR_NB_LOOPS (ddr) > 1)
3157 add_outer_distances (ddr, save_v, index_carry);
3158 add_outer_distances (ddr, dist_v, index_carry);
3161 else
3163 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3164 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3166 if (DDR_NB_LOOPS (ddr) > 1)
3168 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3170 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3171 DDR_A (ddr), loop_nest))
3172 return false;
3173 compute_subscript_distance (ddr);
3174 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3175 opposite_v, &init_b,
3176 &index_carry))
3177 return false;
3179 save_dist_v (ddr, save_v);
3180 add_outer_distances (ddr, dist_v, index_carry);
3181 add_outer_distances (ddr, opposite_v, index_carry);
3183 else
3184 save_dist_v (ddr, save_v);
3187 else
3189 /* There is a distance of 1 on all the outer loops: Example:
3190 there is a dependence of distance 1 on loop_1 for the array A.
3192 | loop_1
3193 | A[5] = ...
3194 | endloop
3196 add_outer_distances (ddr, dist_v,
3197 lambda_vector_first_nz (dist_v,
3198 DDR_NB_LOOPS (ddr), 0));
3201 if (dump_file && (dump_flags & TDF_DETAILS))
3203 unsigned i;
3205 fprintf (dump_file, "(build_classic_dist_vector\n");
3206 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3208 fprintf (dump_file, " dist_vector = (");
3209 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3210 DDR_NB_LOOPS (ddr));
3211 fprintf (dump_file, " )\n");
3213 fprintf (dump_file, ")\n");
3216 return true;
3219 /* Return the direction for a given distance.
3220 FIXME: Computing dir this way is suboptimal, since dir can catch
3221 cases that dist is unable to represent. */
3223 static inline enum data_dependence_direction
3224 dir_from_dist (int dist)
3226 if (dist > 0)
3227 return dir_positive;
3228 else if (dist < 0)
3229 return dir_negative;
3230 else
3231 return dir_equal;
3234 /* Compute the classic per loop direction vector. DDR is the data
3235 dependence relation to build a vector from. */
3237 static void
3238 build_classic_dir_vector (struct data_dependence_relation *ddr)
3240 unsigned i, j;
3241 lambda_vector dist_v;
3243 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3245 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3247 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3248 dir_v[j] = dir_from_dist (dist_v[j]);
3250 save_dir_v (ddr, dir_v);
3254 /* Helper function. Returns true when there is a dependence between
3255 data references DRA and DRB. */
3257 static bool
3258 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3259 struct data_reference *dra,
3260 struct data_reference *drb,
3261 struct loop *loop_nest)
3263 unsigned int i;
3264 tree last_conflicts;
3265 struct subscript *subscript;
3267 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3268 i++)
3270 conflict_function *overlaps_a, *overlaps_b;
3272 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3273 DR_ACCESS_FN (drb, i),
3274 &overlaps_a, &overlaps_b,
3275 &last_conflicts, loop_nest);
3277 if (CF_NOT_KNOWN_P (overlaps_a)
3278 || CF_NOT_KNOWN_P (overlaps_b))
3280 finalize_ddr_dependent (ddr, chrec_dont_know);
3281 dependence_stats.num_dependence_undetermined++;
3282 free_conflict_function (overlaps_a);
3283 free_conflict_function (overlaps_b);
3284 return false;
3287 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3288 || CF_NO_DEPENDENCE_P (overlaps_b))
3290 finalize_ddr_dependent (ddr, chrec_known);
3291 dependence_stats.num_dependence_independent++;
3292 free_conflict_function (overlaps_a);
3293 free_conflict_function (overlaps_b);
3294 return false;
3297 else
3299 if (SUB_CONFLICTS_IN_A (subscript))
3300 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3301 if (SUB_CONFLICTS_IN_B (subscript))
3302 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3304 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3305 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3306 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3310 return true;
3313 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3315 static void
3316 subscript_dependence_tester (struct data_dependence_relation *ddr,
3317 struct loop *loop_nest)
3320 if (dump_file && (dump_flags & TDF_DETAILS))
3321 fprintf (dump_file, "(subscript_dependence_tester \n");
3323 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3324 dependence_stats.num_dependence_dependent++;
3326 compute_subscript_distance (ddr);
3327 if (build_classic_dist_vector (ddr, loop_nest))
3328 build_classic_dir_vector (ddr);
3330 if (dump_file && (dump_flags & TDF_DETAILS))
3331 fprintf (dump_file, ")\n");
3334 /* Returns true when all the access functions of A are affine or
3335 constant with respect to LOOP_NEST. */
3337 static bool
3338 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3339 const struct loop *loop_nest)
3341 unsigned int i;
3342 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3343 tree t;
3345 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3346 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3347 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3348 return false;
3350 return true;
3353 /* Initializes an equation for an OMEGA problem using the information
3354 contained in the ACCESS_FUN. Returns true when the operation
3355 succeeded.
3357 PB is the omega constraint system.
3358 EQ is the number of the equation to be initialized.
3359 OFFSET is used for shifting the variables names in the constraints:
3360 a constrain is composed of 2 * the number of variables surrounding
3361 dependence accesses. OFFSET is set either to 0 for the first n variables,
3362 then it is set to n.
3363 ACCESS_FUN is expected to be an affine chrec. */
3365 static bool
3366 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3367 unsigned int offset, tree access_fun,
3368 struct data_dependence_relation *ddr)
3370 switch (TREE_CODE (access_fun))
3372 case POLYNOMIAL_CHREC:
3374 tree left = CHREC_LEFT (access_fun);
3375 tree right = CHREC_RIGHT (access_fun);
3376 int var = CHREC_VARIABLE (access_fun);
3377 unsigned var_idx;
3379 if (TREE_CODE (right) != INTEGER_CST)
3380 return false;
3382 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3383 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3385 /* Compute the innermost loop index. */
3386 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3388 if (offset == 0)
3389 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3390 += int_cst_value (right);
3392 switch (TREE_CODE (left))
3394 case POLYNOMIAL_CHREC:
3395 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3397 case INTEGER_CST:
3398 pb->eqs[eq].coef[0] += int_cst_value (left);
3399 return true;
3401 default:
3402 return false;
3406 case INTEGER_CST:
3407 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3408 return true;
3410 default:
3411 return false;
3415 /* As explained in the comments preceding init_omega_for_ddr, we have
3416 to set up a system for each loop level, setting outer loops
3417 variation to zero, and current loop variation to positive or zero.
3418 Save each lexico positive distance vector. */
3420 static void
3421 omega_extract_distance_vectors (omega_pb pb,
3422 struct data_dependence_relation *ddr)
3424 int eq, geq;
3425 unsigned i, j;
3426 struct loop *loopi, *loopj;
3427 enum omega_result res;
3429 /* Set a new problem for each loop in the nest. The basis is the
3430 problem that we have initialized until now. On top of this we
3431 add new constraints. */
3432 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3433 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3435 int dist = 0;
3436 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3437 DDR_NB_LOOPS (ddr));
3439 omega_copy_problem (copy, pb);
3441 /* For all the outer loops "loop_j", add "dj = 0". */
3442 for (j = 0;
3443 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3445 eq = omega_add_zero_eq (copy, omega_black);
3446 copy->eqs[eq].coef[j + 1] = 1;
3449 /* For "loop_i", add "0 <= di". */
3450 geq = omega_add_zero_geq (copy, omega_black);
3451 copy->geqs[geq].coef[i + 1] = 1;
3453 /* Reduce the constraint system, and test that the current
3454 problem is feasible. */
3455 res = omega_simplify_problem (copy);
3456 if (res == omega_false
3457 || res == omega_unknown
3458 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3459 goto next_problem;
3461 for (eq = 0; eq < copy->num_subs; eq++)
3462 if (copy->subs[eq].key == (int) i + 1)
3464 dist = copy->subs[eq].coef[0];
3465 goto found_dist;
3468 if (dist == 0)
3470 /* Reinitialize problem... */
3471 omega_copy_problem (copy, pb);
3472 for (j = 0;
3473 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3475 eq = omega_add_zero_eq (copy, omega_black);
3476 copy->eqs[eq].coef[j + 1] = 1;
3479 /* ..., but this time "di = 1". */
3480 eq = omega_add_zero_eq (copy, omega_black);
3481 copy->eqs[eq].coef[i + 1] = 1;
3482 copy->eqs[eq].coef[0] = -1;
3484 res = omega_simplify_problem (copy);
3485 if (res == omega_false
3486 || res == omega_unknown
3487 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3488 goto next_problem;
3490 for (eq = 0; eq < copy->num_subs; eq++)
3491 if (copy->subs[eq].key == (int) i + 1)
3493 dist = copy->subs[eq].coef[0];
3494 goto found_dist;
3498 found_dist:;
3499 /* Save the lexicographically positive distance vector. */
3500 if (dist >= 0)
3502 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3503 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3505 dist_v[i] = dist;
3507 for (eq = 0; eq < copy->num_subs; eq++)
3508 if (copy->subs[eq].key > 0)
3510 dist = copy->subs[eq].coef[0];
3511 dist_v[copy->subs[eq].key - 1] = dist;
3514 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3515 dir_v[j] = dir_from_dist (dist_v[j]);
3517 save_dist_v (ddr, dist_v);
3518 save_dir_v (ddr, dir_v);
3521 next_problem:;
3522 omega_free_problem (copy);
3526 /* This is called for each subscript of a tuple of data references:
3527 insert an equality for representing the conflicts. */
3529 static bool
3530 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3531 struct data_dependence_relation *ddr,
3532 omega_pb pb, bool *maybe_dependent)
3534 int eq;
3535 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3536 TREE_TYPE (access_fun_b));
3537 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3538 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3539 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3541 /* When the fun_a - fun_b is not constant, the dependence is not
3542 captured by the classic distance vector representation. */
3543 if (TREE_CODE (difference) != INTEGER_CST)
3544 return false;
3546 /* ZIV test. */
3547 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3549 /* There is no dependence. */
3550 *maybe_dependent = false;
3551 return true;
3554 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3556 eq = omega_add_zero_eq (pb, omega_black);
3557 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3558 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3559 /* There is probably a dependence, but the system of
3560 constraints cannot be built: answer "don't know". */
3561 return false;
3563 /* GCD test. */
3564 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3565 && !int_divides_p (lambda_vector_gcd
3566 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3567 2 * DDR_NB_LOOPS (ddr)),
3568 pb->eqs[eq].coef[0]))
3570 /* There is no dependence. */
3571 *maybe_dependent = false;
3572 return true;
3575 return true;
3578 /* Helper function, same as init_omega_for_ddr but specialized for
3579 data references A and B. */
3581 static bool
3582 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3583 struct data_dependence_relation *ddr,
3584 omega_pb pb, bool *maybe_dependent)
3586 unsigned i;
3587 int ineq;
3588 struct loop *loopi;
3589 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3591 /* Insert an equality per subscript. */
3592 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3594 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3595 ddr, pb, maybe_dependent))
3596 return false;
3597 else if (*maybe_dependent == false)
3599 /* There is no dependence. */
3600 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3601 return true;
3605 /* Insert inequalities: constraints corresponding to the iteration
3606 domain, i.e. the loops surrounding the references "loop_x" and
3607 the distance variables "dx". The layout of the OMEGA
3608 representation is as follows:
3609 - coef[0] is the constant
3610 - coef[1..nb_loops] are the protected variables that will not be
3611 removed by the solver: the "dx"
3612 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3614 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3615 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3617 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3619 /* 0 <= loop_x */
3620 ineq = omega_add_zero_geq (pb, omega_black);
3621 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3623 /* 0 <= loop_x + dx */
3624 ineq = omega_add_zero_geq (pb, omega_black);
3625 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3626 pb->geqs[ineq].coef[i + 1] = 1;
3628 if (nbi != -1)
3630 /* loop_x <= nb_iters */
3631 ineq = omega_add_zero_geq (pb, omega_black);
3632 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3633 pb->geqs[ineq].coef[0] = nbi;
3635 /* loop_x + dx <= nb_iters */
3636 ineq = omega_add_zero_geq (pb, omega_black);
3637 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3638 pb->geqs[ineq].coef[i + 1] = -1;
3639 pb->geqs[ineq].coef[0] = nbi;
3641 /* A step "dx" bigger than nb_iters is not feasible, so
3642 add "0 <= nb_iters + dx", */
3643 ineq = omega_add_zero_geq (pb, omega_black);
3644 pb->geqs[ineq].coef[i + 1] = 1;
3645 pb->geqs[ineq].coef[0] = nbi;
3646 /* and "dx <= nb_iters". */
3647 ineq = omega_add_zero_geq (pb, omega_black);
3648 pb->geqs[ineq].coef[i + 1] = -1;
3649 pb->geqs[ineq].coef[0] = nbi;
3653 omega_extract_distance_vectors (pb, ddr);
3655 return true;
3658 /* Sets up the Omega dependence problem for the data dependence
3659 relation DDR. Returns false when the constraint system cannot be
3660 built, ie. when the test answers "don't know". Returns true
3661 otherwise, and when independence has been proved (using one of the
3662 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3663 set MAYBE_DEPENDENT to true.
3665 Example: for setting up the dependence system corresponding to the
3666 conflicting accesses
3668 | loop_i
3669 | loop_j
3670 | A[i, i+1] = ...
3671 | ... A[2*j, 2*(i + j)]
3672 | endloop_j
3673 | endloop_i
3675 the following constraints come from the iteration domain:
3677 0 <= i <= Ni
3678 0 <= i + di <= Ni
3679 0 <= j <= Nj
3680 0 <= j + dj <= Nj
3682 where di, dj are the distance variables. The constraints
3683 representing the conflicting elements are:
3685 i = 2 * (j + dj)
3686 i + 1 = 2 * (i + di + j + dj)
3688 For asking that the resulting distance vector (di, dj) be
3689 lexicographically positive, we insert the constraint "di >= 0". If
3690 "di = 0" in the solution, we fix that component to zero, and we
3691 look at the inner loops: we set a new problem where all the outer
3692 loop distances are zero, and fix this inner component to be
3693 positive. When one of the components is positive, we save that
3694 distance, and set a new problem where the distance on this loop is
3695 zero, searching for other distances in the inner loops. Here is
3696 the classic example that illustrates that we have to set for each
3697 inner loop a new problem:
3699 | loop_1
3700 | loop_2
3701 | A[10]
3702 | endloop_2
3703 | endloop_1
3705 we have to save two distances (1, 0) and (0, 1).
3707 Given two array references, refA and refB, we have to set the
3708 dependence problem twice, refA vs. refB and refB vs. refA, and we
3709 cannot do a single test, as refB might occur before refA in the
3710 inner loops, and the contrary when considering outer loops: ex.
3712 | loop_0
3713 | loop_1
3714 | loop_2
3715 | T[{1,+,1}_2][{1,+,1}_1] // refA
3716 | T[{2,+,1}_2][{0,+,1}_1] // refB
3717 | endloop_2
3718 | endloop_1
3719 | endloop_0
3721 refB touches the elements in T before refA, and thus for the same
3722 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3723 but for successive loop_0 iterations, we have (1, -1, 1)
3725 The Omega solver expects the distance variables ("di" in the
3726 previous example) to come first in the constraint system (as
3727 variables to be protected, or "safe" variables), the constraint
3728 system is built using the following layout:
3730 "cst | distance vars | index vars".
3733 static bool
3734 init_omega_for_ddr (struct data_dependence_relation *ddr,
3735 bool *maybe_dependent)
3737 omega_pb pb;
3738 bool res = false;
3740 *maybe_dependent = true;
3742 if (same_access_functions (ddr))
3744 unsigned j;
3745 lambda_vector dir_v;
3747 /* Save the 0 vector. */
3748 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3749 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3750 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3751 dir_v[j] = dir_equal;
3752 save_dir_v (ddr, dir_v);
3754 /* Save the dependences carried by outer loops. */
3755 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3756 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3757 maybe_dependent);
3758 omega_free_problem (pb);
3759 return res;
3762 /* Omega expects the protected variables (those that have to be kept
3763 after elimination) to appear first in the constraint system.
3764 These variables are the distance variables. In the following
3765 initialization we declare NB_LOOPS safe variables, and the total
3766 number of variables for the constraint system is 2*NB_LOOPS. */
3767 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3768 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3769 maybe_dependent);
3770 omega_free_problem (pb);
3772 /* Stop computation if not decidable, or no dependence. */
3773 if (res == false || *maybe_dependent == false)
3774 return res;
3776 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3777 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3778 maybe_dependent);
3779 omega_free_problem (pb);
3781 return res;
3784 /* Return true when DDR contains the same information as that stored
3785 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3787 static bool
3788 ddr_consistent_p (FILE *file,
3789 struct data_dependence_relation *ddr,
3790 VEC (lambda_vector, heap) *dist_vects,
3791 VEC (lambda_vector, heap) *dir_vects)
3793 unsigned int i, j;
3795 /* If dump_file is set, output there. */
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 file = dump_file;
3799 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3801 lambda_vector b_dist_v;
3802 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3803 VEC_length (lambda_vector, dist_vects),
3804 DDR_NUM_DIST_VECTS (ddr));
3806 fprintf (file, "Banerjee dist vectors:\n");
3807 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3808 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3810 fprintf (file, "Omega dist vectors:\n");
3811 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3812 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3814 fprintf (file, "data dependence relation:\n");
3815 dump_data_dependence_relation (file, ddr);
3817 fprintf (file, ")\n");
3818 return false;
3821 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3823 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3824 VEC_length (lambda_vector, dir_vects),
3825 DDR_NUM_DIR_VECTS (ddr));
3826 return false;
3829 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3831 lambda_vector a_dist_v;
3832 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3834 /* Distance vectors are not ordered in the same way in the DDR
3835 and in the DIST_VECTS: search for a matching vector. */
3836 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3837 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3838 break;
3840 if (j == VEC_length (lambda_vector, dist_vects))
3842 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3843 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3844 fprintf (file, "not found in Omega dist vectors:\n");
3845 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3846 fprintf (file, "data dependence relation:\n");
3847 dump_data_dependence_relation (file, ddr);
3848 fprintf (file, ")\n");
3852 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3854 lambda_vector a_dir_v;
3855 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3857 /* Direction vectors are not ordered in the same way in the DDR
3858 and in the DIR_VECTS: search for a matching vector. */
3859 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3860 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3861 break;
3863 if (j == VEC_length (lambda_vector, dist_vects))
3865 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3866 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3867 fprintf (file, "not found in Omega dir vectors:\n");
3868 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3869 fprintf (file, "data dependence relation:\n");
3870 dump_data_dependence_relation (file, ddr);
3871 fprintf (file, ")\n");
3875 return true;
3878 /* This computes the affine dependence relation between A and B with
3879 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3880 independence between two accesses, while CHREC_DONT_KNOW is used
3881 for representing the unknown relation.
3883 Note that it is possible to stop the computation of the dependence
3884 relation the first time we detect a CHREC_KNOWN element for a given
3885 subscript. */
3887 static void
3888 compute_affine_dependence (struct data_dependence_relation *ddr,
3889 struct loop *loop_nest)
3891 struct data_reference *dra = DDR_A (ddr);
3892 struct data_reference *drb = DDR_B (ddr);
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3896 fprintf (dump_file, "(compute_affine_dependence\n");
3897 fprintf (dump_file, " (stmt_a = \n");
3898 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3899 fprintf (dump_file, ")\n (stmt_b = \n");
3900 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3901 fprintf (dump_file, ")\n");
3904 /* Analyze only when the dependence relation is not yet known. */
3905 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3906 && !DDR_SELF_REFERENCE (ddr))
3908 dependence_stats.num_dependence_tests++;
3910 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3911 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3913 if (flag_check_data_deps)
3915 /* Compute the dependences using the first algorithm. */
3916 subscript_dependence_tester (ddr, loop_nest);
3918 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3921 dump_data_dependence_relation (dump_file, ddr);
3924 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3926 bool maybe_dependent;
3927 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3929 /* Save the result of the first DD analyzer. */
3930 dist_vects = DDR_DIST_VECTS (ddr);
3931 dir_vects = DDR_DIR_VECTS (ddr);
3933 /* Reset the information. */
3934 DDR_DIST_VECTS (ddr) = NULL;
3935 DDR_DIR_VECTS (ddr) = NULL;
3937 /* Compute the same information using Omega. */
3938 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3939 goto csys_dont_know;
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3943 fprintf (dump_file, "Omega Analyzer\n");
3944 dump_data_dependence_relation (dump_file, ddr);
3947 /* Check that we get the same information. */
3948 if (maybe_dependent)
3949 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3950 dir_vects));
3953 else
3954 subscript_dependence_tester (ddr, loop_nest);
3957 /* As a last case, if the dependence cannot be determined, or if
3958 the dependence is considered too difficult to determine, answer
3959 "don't know". */
3960 else
3962 csys_dont_know:;
3963 dependence_stats.num_dependence_undetermined++;
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3967 fprintf (dump_file, "Data ref a:\n");
3968 dump_data_reference (dump_file, dra);
3969 fprintf (dump_file, "Data ref b:\n");
3970 dump_data_reference (dump_file, drb);
3971 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3973 finalize_ddr_dependent (ddr, chrec_dont_know);
3977 if (dump_file && (dump_flags & TDF_DETAILS))
3978 fprintf (dump_file, ")\n");
3981 /* This computes the dependence relation for the same data
3982 reference into DDR. */
3984 static void
3985 compute_self_dependence (struct data_dependence_relation *ddr)
3987 unsigned int i;
3988 struct subscript *subscript;
3990 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3991 return;
3993 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3994 i++)
3996 if (SUB_CONFLICTS_IN_A (subscript))
3997 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3998 if (SUB_CONFLICTS_IN_B (subscript))
3999 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4001 /* The accessed index overlaps for each iteration. */
4002 SUB_CONFLICTS_IN_A (subscript)
4003 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4004 SUB_CONFLICTS_IN_B (subscript)
4005 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4006 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4009 /* The distance vector is the zero vector. */
4010 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4011 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4014 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4015 the data references in DATAREFS, in the LOOP_NEST. When
4016 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4017 relations. */
4019 void
4020 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4021 VEC (ddr_p, heap) **dependence_relations,
4022 VEC (loop_p, heap) *loop_nest,
4023 bool compute_self_and_rr)
4025 struct data_dependence_relation *ddr;
4026 struct data_reference *a, *b;
4027 unsigned int i, j;
4029 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4030 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4031 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
4033 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4034 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4035 if (loop_nest)
4036 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4039 if (compute_self_and_rr)
4040 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4042 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4043 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4044 compute_self_dependence (ddr);
4048 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4049 true if STMT clobbers memory, false otherwise. */
4051 bool
4052 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4054 bool clobbers_memory = false;
4055 data_ref_loc *ref;
4056 tree *op0, *op1;
4057 enum gimple_code stmt_code = gimple_code (stmt);
4059 *references = NULL;
4061 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4062 Calls have side-effects, except those to const or pure
4063 functions. */
4064 if ((stmt_code == GIMPLE_CALL
4065 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4066 || (stmt_code == GIMPLE_ASM
4067 && gimple_asm_volatile_p (stmt)))
4068 clobbers_memory = true;
4070 if (!gimple_vuse (stmt))
4071 return clobbers_memory;
4073 if (stmt_code == GIMPLE_ASSIGN)
4075 tree base;
4076 op0 = gimple_assign_lhs_ptr (stmt);
4077 op1 = gimple_assign_rhs1_ptr (stmt);
4079 if (DECL_P (*op1)
4080 || (REFERENCE_CLASS_P (*op1)
4081 && (base = get_base_address (*op1))
4082 && TREE_CODE (base) != SSA_NAME))
4084 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4085 ref->pos = op1;
4086 ref->is_read = true;
4089 if (DECL_P (*op0)
4090 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4092 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4093 ref->pos = op0;
4094 ref->is_read = false;
4097 else if (stmt_code == GIMPLE_CALL)
4099 unsigned i, n = gimple_call_num_args (stmt);
4101 for (i = 0; i < n; i++)
4103 op0 = gimple_call_arg_ptr (stmt, i);
4105 if (DECL_P (*op0)
4106 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4108 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4109 ref->pos = op0;
4110 ref->is_read = true;
4115 return clobbers_memory;
4118 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4119 reference, returns false, otherwise returns true. NEST is the outermost
4120 loop of the loop nest in which the references should be analyzed. */
4122 bool
4123 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4124 VEC (data_reference_p, heap) **datarefs)
4126 unsigned i;
4127 VEC (data_ref_loc, heap) *references;
4128 data_ref_loc *ref;
4129 bool ret = true;
4130 data_reference_p dr;
4132 if (get_references_in_stmt (stmt, &references))
4134 VEC_free (data_ref_loc, heap, references);
4135 return false;
4138 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4140 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4141 gcc_assert (dr != NULL);
4143 /* FIXME -- data dependence analysis does not work correctly for objects
4144 with invariant addresses in loop nests. Let us fail here until the
4145 problem is fixed. */
4146 if (dr_address_invariant_p (dr) && nest)
4148 free_data_ref (dr);
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4151 ret = false;
4152 break;
4155 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4157 VEC_free (data_ref_loc, heap, references);
4158 return ret;
4161 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4162 reference, returns false, otherwise returns true. NEST is the outermost
4163 loop of the loop nest in which the references should be analyzed. */
4165 bool
4166 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4167 VEC (data_reference_p, heap) **datarefs)
4169 unsigned i;
4170 VEC (data_ref_loc, heap) *references;
4171 data_ref_loc *ref;
4172 bool ret = true;
4173 data_reference_p dr;
4175 if (get_references_in_stmt (stmt, &references))
4177 VEC_free (data_ref_loc, heap, references);
4178 return false;
4181 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4183 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4184 gcc_assert (dr != NULL);
4185 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4188 VEC_free (data_ref_loc, heap, references);
4189 return ret;
4192 /* Search the data references in LOOP, and record the information into
4193 DATAREFS. Returns chrec_dont_know when failing to analyze a
4194 difficult case, returns NULL_TREE otherwise. */
4196 static tree
4197 find_data_references_in_bb (struct loop *loop, basic_block bb,
4198 VEC (data_reference_p, heap) **datarefs)
4200 gimple_stmt_iterator bsi;
4202 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4204 gimple stmt = gsi_stmt (bsi);
4206 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4208 struct data_reference *res;
4209 res = XCNEW (struct data_reference);
4210 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4212 return chrec_dont_know;
4216 return NULL_TREE;
4219 /* Search the data references in LOOP, and record the information into
4220 DATAREFS. Returns chrec_dont_know when failing to analyze a
4221 difficult case, returns NULL_TREE otherwise.
4223 TODO: This function should be made smarter so that it can handle address
4224 arithmetic as if they were array accesses, etc. */
4226 tree
4227 find_data_references_in_loop (struct loop *loop,
4228 VEC (data_reference_p, heap) **datarefs)
4230 basic_block bb, *bbs;
4231 unsigned int i;
4233 bbs = get_loop_body_in_dom_order (loop);
4235 for (i = 0; i < loop->num_nodes; i++)
4237 bb = bbs[i];
4239 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4241 free (bbs);
4242 return chrec_dont_know;
4245 free (bbs);
4247 return NULL_TREE;
4250 /* Recursive helper function. */
4252 static bool
4253 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4255 /* Inner loops of the nest should not contain siblings. Example:
4256 when there are two consecutive loops,
4258 | loop_0
4259 | loop_1
4260 | A[{0, +, 1}_1]
4261 | endloop_1
4262 | loop_2
4263 | A[{0, +, 1}_2]
4264 | endloop_2
4265 | endloop_0
4267 the dependence relation cannot be captured by the distance
4268 abstraction. */
4269 if (loop->next)
4270 return false;
4272 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4273 if (loop->inner)
4274 return find_loop_nest_1 (loop->inner, loop_nest);
4275 return true;
4278 /* Return false when the LOOP is not well nested. Otherwise return
4279 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4280 contain the loops from the outermost to the innermost, as they will
4281 appear in the classic distance vector. */
4283 bool
4284 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4286 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4287 if (loop->inner)
4288 return find_loop_nest_1 (loop->inner, loop_nest);
4289 return true;
4292 /* Returns true when the data dependences have been computed, false otherwise.
4293 Given a loop nest LOOP, the following vectors are returned:
4294 DATAREFS is initialized to all the array elements contained in this loop,
4295 DEPENDENCE_RELATIONS contains the relations between the data references.
4296 Compute read-read and self relations if
4297 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4299 bool
4300 compute_data_dependences_for_loop (struct loop *loop,
4301 bool compute_self_and_read_read_dependences,
4302 VEC (data_reference_p, heap) **datarefs,
4303 VEC (ddr_p, heap) **dependence_relations)
4305 bool res = true;
4306 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4308 memset (&dependence_stats, 0, sizeof (dependence_stats));
4310 /* If the loop nest is not well formed, or one of the data references
4311 is not computable, give up without spending time to compute other
4312 dependences. */
4313 if (!loop
4314 || !find_loop_nest (loop, &vloops)
4315 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4317 struct data_dependence_relation *ddr;
4319 /* Insert a single relation into dependence_relations:
4320 chrec_dont_know. */
4321 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4322 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4323 res = false;
4325 else
4326 compute_all_dependences (*datarefs, dependence_relations, vloops,
4327 compute_self_and_read_read_dependences);
4329 if (dump_file && (dump_flags & TDF_STATS))
4331 fprintf (dump_file, "Dependence tester statistics:\n");
4333 fprintf (dump_file, "Number of dependence tests: %d\n",
4334 dependence_stats.num_dependence_tests);
4335 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4336 dependence_stats.num_dependence_dependent);
4337 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4338 dependence_stats.num_dependence_independent);
4339 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4340 dependence_stats.num_dependence_undetermined);
4342 fprintf (dump_file, "Number of subscript tests: %d\n",
4343 dependence_stats.num_subscript_tests);
4344 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4345 dependence_stats.num_subscript_undetermined);
4346 fprintf (dump_file, "Number of same subscript function: %d\n",
4347 dependence_stats.num_same_subscript_function);
4349 fprintf (dump_file, "Number of ziv tests: %d\n",
4350 dependence_stats.num_ziv);
4351 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4352 dependence_stats.num_ziv_dependent);
4353 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4354 dependence_stats.num_ziv_independent);
4355 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4356 dependence_stats.num_ziv_unimplemented);
4358 fprintf (dump_file, "Number of siv tests: %d\n",
4359 dependence_stats.num_siv);
4360 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4361 dependence_stats.num_siv_dependent);
4362 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4363 dependence_stats.num_siv_independent);
4364 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4365 dependence_stats.num_siv_unimplemented);
4367 fprintf (dump_file, "Number of miv tests: %d\n",
4368 dependence_stats.num_miv);
4369 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4370 dependence_stats.num_miv_dependent);
4371 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4372 dependence_stats.num_miv_independent);
4373 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4374 dependence_stats.num_miv_unimplemented);
4377 return res;
4380 /* Returns true when the data dependences for the basic block BB have been
4381 computed, false otherwise.
4382 DATAREFS is initialized to all the array elements contained in this basic
4383 block, DEPENDENCE_RELATIONS contains the relations between the data
4384 references. Compute read-read and self relations if
4385 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4386 bool
4387 compute_data_dependences_for_bb (basic_block bb,
4388 bool compute_self_and_read_read_dependences,
4389 VEC (data_reference_p, heap) **datarefs,
4390 VEC (ddr_p, heap) **dependence_relations)
4392 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4393 return false;
4395 compute_all_dependences (*datarefs, dependence_relations, NULL,
4396 compute_self_and_read_read_dependences);
4397 return true;
4400 /* Entry point (for testing only). Analyze all the data references
4401 and the dependence relations in LOOP.
4403 The data references are computed first.
4405 A relation on these nodes is represented by a complete graph. Some
4406 of the relations could be of no interest, thus the relations can be
4407 computed on demand.
4409 In the following function we compute all the relations. This is
4410 just a first implementation that is here for:
4411 - for showing how to ask for the dependence relations,
4412 - for the debugging the whole dependence graph,
4413 - for the dejagnu testcases and maintenance.
4415 It is possible to ask only for a part of the graph, avoiding to
4416 compute the whole dependence graph. The computed dependences are
4417 stored in a knowledge base (KB) such that later queries don't
4418 recompute the same information. The implementation of this KB is
4419 transparent to the optimizer, and thus the KB can be changed with a
4420 more efficient implementation, or the KB could be disabled. */
4421 static void
4422 analyze_all_data_dependences (struct loop *loop)
4424 unsigned int i;
4425 int nb_data_refs = 10;
4426 VEC (data_reference_p, heap) *datarefs =
4427 VEC_alloc (data_reference_p, heap, nb_data_refs);
4428 VEC (ddr_p, heap) *dependence_relations =
4429 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4431 /* Compute DDs on the whole function. */
4432 compute_data_dependences_for_loop (loop, false, &datarefs,
4433 &dependence_relations);
4435 if (dump_file)
4437 dump_data_dependence_relations (dump_file, dependence_relations);
4438 fprintf (dump_file, "\n\n");
4440 if (dump_flags & TDF_DETAILS)
4441 dump_dist_dir_vectors (dump_file, dependence_relations);
4443 if (dump_flags & TDF_STATS)
4445 unsigned nb_top_relations = 0;
4446 unsigned nb_bot_relations = 0;
4447 unsigned nb_chrec_relations = 0;
4448 struct data_dependence_relation *ddr;
4450 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4452 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4453 nb_top_relations++;
4455 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4456 nb_bot_relations++;
4458 else
4459 nb_chrec_relations++;
4462 gather_stats_on_scev_database ();
4466 free_dependence_relations (dependence_relations);
4467 free_data_refs (datarefs);
4470 /* Computes all the data dependences and check that the results of
4471 several analyzers are the same. */
4473 void
4474 tree_check_data_deps (void)
4476 loop_iterator li;
4477 struct loop *loop_nest;
4479 FOR_EACH_LOOP (li, loop_nest, 0)
4480 analyze_all_data_dependences (loop_nest);
4483 /* Free the memory used by a data dependence relation DDR. */
4485 void
4486 free_dependence_relation (struct data_dependence_relation *ddr)
4488 if (ddr == NULL)
4489 return;
4491 if (DDR_SUBSCRIPTS (ddr))
4492 free_subscripts (DDR_SUBSCRIPTS (ddr));
4493 if (DDR_DIST_VECTS (ddr))
4494 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4495 if (DDR_DIR_VECTS (ddr))
4496 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4498 free (ddr);
4501 /* Free the memory used by the data dependence relations from
4502 DEPENDENCE_RELATIONS. */
4504 void
4505 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4507 unsigned int i;
4508 struct data_dependence_relation *ddr;
4509 VEC (loop_p, heap) *loop_nest = NULL;
4511 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4513 if (ddr == NULL)
4514 continue;
4515 if (loop_nest == NULL)
4516 loop_nest = DDR_LOOP_NEST (ddr);
4517 else
4518 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4519 || DDR_LOOP_NEST (ddr) == loop_nest);
4520 free_dependence_relation (ddr);
4523 if (loop_nest)
4524 VEC_free (loop_p, heap, loop_nest);
4525 VEC_free (ddr_p, heap, dependence_relations);
4528 /* Free the memory used by the data references from DATAREFS. */
4530 void
4531 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4533 unsigned int i;
4534 struct data_reference *dr;
4536 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4537 free_data_ref (dr);
4538 VEC_free (data_reference_p, heap, datarefs);
4543 /* Dump vertex I in RDG to FILE. */
4545 void
4546 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4548 struct vertex *v = &(rdg->vertices[i]);
4549 struct graph_edge *e;
4551 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4552 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4553 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4555 if (v->pred)
4556 for (e = v->pred; e; e = e->pred_next)
4557 fprintf (file, " %d", e->src);
4559 fprintf (file, ") (out:");
4561 if (v->succ)
4562 for (e = v->succ; e; e = e->succ_next)
4563 fprintf (file, " %d", e->dest);
4565 fprintf (file, ") \n");
4566 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4567 fprintf (file, ")\n");
4570 /* Call dump_rdg_vertex on stderr. */
4572 void
4573 debug_rdg_vertex (struct graph *rdg, int i)
4575 dump_rdg_vertex (stderr, rdg, i);
4578 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4579 dumped vertices to that bitmap. */
4581 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4583 int i;
4585 fprintf (file, "(%d\n", c);
4587 for (i = 0; i < rdg->n_vertices; i++)
4588 if (rdg->vertices[i].component == c)
4590 if (dumped)
4591 bitmap_set_bit (dumped, i);
4593 dump_rdg_vertex (file, rdg, i);
4596 fprintf (file, ")\n");
4599 /* Call dump_rdg_vertex on stderr. */
4601 void
4602 debug_rdg_component (struct graph *rdg, int c)
4604 dump_rdg_component (stderr, rdg, c, NULL);
4607 /* Dump the reduced dependence graph RDG to FILE. */
4609 void
4610 dump_rdg (FILE *file, struct graph *rdg)
4612 int i;
4613 bitmap dumped = BITMAP_ALLOC (NULL);
4615 fprintf (file, "(rdg\n");
4617 for (i = 0; i < rdg->n_vertices; i++)
4618 if (!bitmap_bit_p (dumped, i))
4619 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4621 fprintf (file, ")\n");
4622 BITMAP_FREE (dumped);
4625 /* Call dump_rdg on stderr. */
4627 void
4628 debug_rdg (struct graph *rdg)
4630 dump_rdg (stderr, rdg);
4633 static void
4634 dot_rdg_1 (FILE *file, struct graph *rdg)
4636 int i;
4638 fprintf (file, "digraph RDG {\n");
4640 for (i = 0; i < rdg->n_vertices; i++)
4642 struct vertex *v = &(rdg->vertices[i]);
4643 struct graph_edge *e;
4645 /* Highlight reads from memory. */
4646 if (RDG_MEM_READS_STMT (rdg, i))
4647 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4649 /* Highlight stores to memory. */
4650 if (RDG_MEM_WRITE_STMT (rdg, i))
4651 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4653 if (v->succ)
4654 for (e = v->succ; e; e = e->succ_next)
4655 switch (RDGE_TYPE (e))
4657 case input_dd:
4658 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4659 break;
4661 case output_dd:
4662 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4663 break;
4665 case flow_dd:
4666 /* These are the most common dependences: don't print these. */
4667 fprintf (file, "%d -> %d \n", i, e->dest);
4668 break;
4670 case anti_dd:
4671 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4672 break;
4674 default:
4675 gcc_unreachable ();
4679 fprintf (file, "}\n\n");
4682 /* Display SCOP using dotty. */
4684 void
4685 dot_rdg (struct graph *rdg)
4687 FILE *file = fopen ("/tmp/rdg.dot", "w");
4688 gcc_assert (file != NULL);
4690 dot_rdg_1 (file, rdg);
4691 fclose (file);
4693 system ("dotty /tmp/rdg.dot");
4697 /* This structure is used for recording the mapping statement index in
4698 the RDG. */
4700 struct GTY(()) rdg_vertex_info
4702 gimple stmt;
4703 int index;
4706 /* Returns the index of STMT in RDG. */
4709 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4711 struct rdg_vertex_info rvi, *slot;
4713 rvi.stmt = stmt;
4714 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4716 if (!slot)
4717 return -1;
4719 return slot->index;
4722 /* Creates an edge in RDG for each distance vector from DDR. The
4723 order that we keep track of in the RDG is the order in which
4724 statements have to be executed. */
4726 static void
4727 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4729 struct graph_edge *e;
4730 int va, vb;
4731 data_reference_p dra = DDR_A (ddr);
4732 data_reference_p drb = DDR_B (ddr);
4733 unsigned level = ddr_dependence_level (ddr);
4735 /* For non scalar dependences, when the dependence is REVERSED,
4736 statement B has to be executed before statement A. */
4737 if (level > 0
4738 && !DDR_REVERSED_P (ddr))
4740 data_reference_p tmp = dra;
4741 dra = drb;
4742 drb = tmp;
4745 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4746 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4748 if (va < 0 || vb < 0)
4749 return;
4751 e = add_edge (rdg, va, vb);
4752 e->data = XNEW (struct rdg_edge);
4754 RDGE_LEVEL (e) = level;
4755 RDGE_RELATION (e) = ddr;
4757 /* Determines the type of the data dependence. */
4758 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4759 RDGE_TYPE (e) = input_dd;
4760 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4761 RDGE_TYPE (e) = output_dd;
4762 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4763 RDGE_TYPE (e) = flow_dd;
4764 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4765 RDGE_TYPE (e) = anti_dd;
4768 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4769 the index of DEF in RDG. */
4771 static void
4772 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4774 use_operand_p imm_use_p;
4775 imm_use_iterator iterator;
4777 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4779 struct graph_edge *e;
4780 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4782 if (use < 0)
4783 continue;
4785 e = add_edge (rdg, idef, use);
4786 e->data = XNEW (struct rdg_edge);
4787 RDGE_TYPE (e) = flow_dd;
4788 RDGE_RELATION (e) = NULL;
4792 /* Creates the edges of the reduced dependence graph RDG. */
4794 static void
4795 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4797 int i;
4798 struct data_dependence_relation *ddr;
4799 def_operand_p def_p;
4800 ssa_op_iter iter;
4802 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4803 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4804 create_rdg_edge_for_ddr (rdg, ddr);
4806 for (i = 0; i < rdg->n_vertices; i++)
4807 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4808 iter, SSA_OP_DEF)
4809 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4812 /* Build the vertices of the reduced dependence graph RDG. */
4814 void
4815 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4817 int i, j;
4818 gimple stmt;
4820 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4822 VEC (data_ref_loc, heap) *references;
4823 data_ref_loc *ref;
4824 struct vertex *v = &(rdg->vertices[i]);
4825 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4826 struct rdg_vertex_info **slot;
4828 rvi->stmt = stmt;
4829 rvi->index = i;
4830 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4832 if (!*slot)
4833 *slot = rvi;
4834 else
4835 free (rvi);
4837 v->data = XNEW (struct rdg_vertex);
4838 RDG_STMT (rdg, i) = stmt;
4840 RDG_MEM_WRITE_STMT (rdg, i) = false;
4841 RDG_MEM_READS_STMT (rdg, i) = false;
4842 if (gimple_code (stmt) == GIMPLE_PHI)
4843 continue;
4845 get_references_in_stmt (stmt, &references);
4846 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4847 if (!ref->is_read)
4848 RDG_MEM_WRITE_STMT (rdg, i) = true;
4849 else
4850 RDG_MEM_READS_STMT (rdg, i) = true;
4852 VEC_free (data_ref_loc, heap, references);
4856 /* Initialize STMTS with all the statements of LOOP. When
4857 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4858 which we discover statements is important as
4859 generate_loops_for_partition is using the same traversal for
4860 identifying statements. */
4862 static void
4863 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4865 unsigned int i;
4866 basic_block *bbs = get_loop_body_in_dom_order (loop);
4868 for (i = 0; i < loop->num_nodes; i++)
4870 basic_block bb = bbs[i];
4871 gimple_stmt_iterator bsi;
4872 gimple stmt;
4874 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4875 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4877 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4879 stmt = gsi_stmt (bsi);
4880 if (gimple_code (stmt) != GIMPLE_LABEL)
4881 VEC_safe_push (gimple, heap, *stmts, stmt);
4885 free (bbs);
4888 /* Returns true when all the dependences are computable. */
4890 static bool
4891 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4893 ddr_p ddr;
4894 unsigned int i;
4896 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4897 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4898 return false;
4900 return true;
4903 /* Computes a hash function for element ELT. */
4905 static hashval_t
4906 hash_stmt_vertex_info (const void *elt)
4908 const struct rdg_vertex_info *const rvi =
4909 (const struct rdg_vertex_info *) elt;
4910 gimple stmt = rvi->stmt;
4912 return htab_hash_pointer (stmt);
4915 /* Compares database elements E1 and E2. */
4917 static int
4918 eq_stmt_vertex_info (const void *e1, const void *e2)
4920 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4921 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4923 return elt1->stmt == elt2->stmt;
4926 /* Free the element E. */
4928 static void
4929 hash_stmt_vertex_del (void *e)
4931 free (e);
4934 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4935 statement of the loop nest, and one edge per data dependence or
4936 scalar dependence. */
4938 struct graph *
4939 build_empty_rdg (int n_stmts)
4941 int nb_data_refs = 10;
4942 struct graph *rdg = new_graph (n_stmts);
4944 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4945 eq_stmt_vertex_info, hash_stmt_vertex_del);
4946 return rdg;
4949 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4950 statement of the loop nest, and one edge per data dependence or
4951 scalar dependence. */
4953 struct graph *
4954 build_rdg (struct loop *loop)
4956 int nb_data_refs = 10;
4957 struct graph *rdg = NULL;
4958 VEC (ddr_p, heap) *dependence_relations;
4959 VEC (data_reference_p, heap) *datarefs;
4960 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4962 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4963 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4964 compute_data_dependences_for_loop (loop,
4965 false,
4966 &datarefs,
4967 &dependence_relations);
4969 if (!known_dependences_p (dependence_relations))
4971 free_dependence_relations (dependence_relations);
4972 free_data_refs (datarefs);
4973 VEC_free (gimple, heap, stmts);
4975 return rdg;
4978 stmts_from_loop (loop, &stmts);
4979 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4981 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4982 eq_stmt_vertex_info, hash_stmt_vertex_del);
4983 create_rdg_vertices (rdg, stmts);
4984 create_rdg_edges (rdg, dependence_relations);
4986 VEC_free (gimple, heap, stmts);
4987 return rdg;
4990 /* Free the reduced dependence graph RDG. */
4992 void
4993 free_rdg (struct graph *rdg)
4995 int i;
4997 for (i = 0; i < rdg->n_vertices; i++)
4998 free (rdg->vertices[i].data);
5000 htab_delete (rdg->indices);
5001 free_graph (rdg);
5004 /* Initialize STMTS with all the statements of LOOP that contain a
5005 store to memory. */
5007 void
5008 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5010 unsigned int i;
5011 basic_block *bbs = get_loop_body_in_dom_order (loop);
5013 for (i = 0; i < loop->num_nodes; i++)
5015 basic_block bb = bbs[i];
5016 gimple_stmt_iterator bsi;
5018 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5019 if (gimple_vdef (gsi_stmt (bsi)))
5020 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5023 free (bbs);
5026 /* For a data reference REF, return the declaration of its base
5027 address or NULL_TREE if the base is not determined. */
5029 static inline tree
5030 ref_base_address (gimple stmt, data_ref_loc *ref)
5032 tree base = NULL_TREE;
5033 tree base_address;
5034 struct data_reference *dr = XCNEW (struct data_reference);
5036 DR_STMT (dr) = stmt;
5037 DR_REF (dr) = *ref->pos;
5038 dr_analyze_innermost (dr);
5039 base_address = DR_BASE_ADDRESS (dr);
5041 if (!base_address)
5042 goto end;
5044 switch (TREE_CODE (base_address))
5046 case ADDR_EXPR:
5047 base = TREE_OPERAND (base_address, 0);
5048 break;
5050 default:
5051 base = base_address;
5052 break;
5055 end:
5056 free_data_ref (dr);
5057 return base;
5060 /* Determines whether the statement from vertex V of the RDG has a
5061 definition used outside the loop that contains this statement. */
5063 bool
5064 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5066 gimple stmt = RDG_STMT (rdg, v);
5067 struct loop *loop = loop_containing_stmt (stmt);
5068 use_operand_p imm_use_p;
5069 imm_use_iterator iterator;
5070 ssa_op_iter it;
5071 def_operand_p def_p;
5073 if (!loop)
5074 return true;
5076 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5078 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5080 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5081 return true;
5085 return false;
5088 /* Determines whether statements S1 and S2 access to similar memory
5089 locations. Two memory accesses are considered similar when they
5090 have the same base address declaration, i.e. when their
5091 ref_base_address is the same. */
5093 bool
5094 have_similar_memory_accesses (gimple s1, gimple s2)
5096 bool res = false;
5097 unsigned i, j;
5098 VEC (data_ref_loc, heap) *refs1, *refs2;
5099 data_ref_loc *ref1, *ref2;
5101 get_references_in_stmt (s1, &refs1);
5102 get_references_in_stmt (s2, &refs2);
5104 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
5106 tree base1 = ref_base_address (s1, ref1);
5108 if (base1)
5109 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5110 if (base1 == ref_base_address (s2, ref2))
5112 res = true;
5113 goto end;
5117 end:
5118 VEC_free (data_ref_loc, heap, refs1);
5119 VEC_free (data_ref_loc, heap, refs2);
5120 return res;
5123 /* Helper function for the hashtab. */
5125 static int
5126 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5128 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5129 CONST_CAST_GIMPLE ((const_gimple) s2));
5132 /* Helper function for the hashtab. */
5134 static hashval_t
5135 ref_base_address_1 (const void *s)
5137 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5138 unsigned i;
5139 VEC (data_ref_loc, heap) *refs;
5140 data_ref_loc *ref;
5141 hashval_t res = 0;
5143 get_references_in_stmt (stmt, &refs);
5145 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5146 if (!ref->is_read)
5148 res = htab_hash_pointer (ref_base_address (stmt, ref));
5149 break;
5152 VEC_free (data_ref_loc, heap, refs);
5153 return res;
5156 /* Try to remove duplicated write data references from STMTS. */
5158 void
5159 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5161 unsigned i;
5162 gimple stmt;
5163 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5164 have_similar_memory_accesses_1, NULL);
5166 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5168 void **slot;
5170 slot = htab_find_slot (seen, stmt, INSERT);
5172 if (*slot)
5173 VEC_ordered_remove (gimple, *stmts, i);
5174 else
5176 *slot = (void *) stmt;
5177 i++;
5181 htab_delete (seen);
5184 /* Returns the index of PARAMETER in the parameters vector of the
5185 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5187 int
5188 access_matrix_get_index_for_parameter (tree parameter,
5189 struct access_matrix *access_matrix)
5191 int i;
5192 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5193 tree lambda_parameter;
5195 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5196 if (lambda_parameter == parameter)
5197 return i + AM_NB_INDUCTION_VARS (access_matrix);
5199 return -1;