PR middle-end/40080
[official-gcc/constexpr.git] / gcc / tree-data-ref.c
blobde2ad5f921bad56c8d4ee49333ee8dcf75bac1f3
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "tree.h"
84 /* These RTL headers are needed for basic-block.h. */
85 #include "rtl.h"
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
90 #include "timevar.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump to STDERR all the dependence relations from DDRS. */
162 void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
174 unsigned int i;
175 struct data_dependence_relation *ddr;
177 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
178 dump_data_dependence_relation (file, ddr);
181 /* Dump function for a DATA_REFERENCE structure. */
183 void
184 dump_data_reference (FILE *outf,
185 struct data_reference *dr)
187 unsigned int i;
189 fprintf (outf, "(Data Ref: \n stmt: ");
190 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
191 fprintf (outf, " ref: ");
192 print_generic_stmt (outf, DR_REF (dr), 0);
193 fprintf (outf, " base_object: ");
194 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
196 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
198 fprintf (outf, " Access function %d: ", i);
199 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
201 fprintf (outf, ")\n");
204 /* Dumps the affine function described by FN to the file OUTF. */
206 static void
207 dump_affine_function (FILE *outf, affine_fn fn)
209 unsigned i;
210 tree coef;
212 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
213 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
215 fprintf (outf, " + ");
216 print_generic_expr (outf, coef, TDF_SLIM);
217 fprintf (outf, " * x_%u", i);
221 /* Dumps the conflict function CF to the file OUTF. */
223 static void
224 dump_conflict_function (FILE *outf, conflict_function *cf)
226 unsigned i;
228 if (cf->n == NO_DEPENDENCE)
229 fprintf (outf, "no dependence\n");
230 else if (cf->n == NOT_KNOWN)
231 fprintf (outf, "not known\n");
232 else
234 for (i = 0; i < cf->n; i++)
236 fprintf (outf, "[");
237 dump_affine_function (outf, cf->fns[i]);
238 fprintf (outf, "]\n");
243 /* Dump function for a SUBSCRIPT structure. */
245 void
246 dump_subscript (FILE *outf, struct subscript *subscript)
248 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
250 fprintf (outf, "\n (subscript \n");
251 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
260 cf = SUB_CONFLICTS_IN_B (subscript);
261 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 fprintf (outf, " (Subscript distance: ");
271 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
272 fprintf (outf, " )\n");
273 fprintf (outf, " )\n");
276 /* Print the classic direction vector DIRV to OUTF. */
278 void
279 print_direction_vector (FILE *outf,
280 lambda_vector dirv,
281 int length)
283 int eq;
285 for (eq = 0; eq < length; eq++)
287 enum data_dependence_direction dir = ((enum data_dependence_direction)
288 dirv[eq]);
290 switch (dir)
292 case dir_positive:
293 fprintf (outf, " +");
294 break;
295 case dir_negative:
296 fprintf (outf, " -");
297 break;
298 case dir_equal:
299 fprintf (outf, " =");
300 break;
301 case dir_positive_or_equal:
302 fprintf (outf, " +=");
303 break;
304 case dir_positive_or_negative:
305 fprintf (outf, " +-");
306 break;
307 case dir_negative_or_equal:
308 fprintf (outf, " -=");
309 break;
310 case dir_star:
311 fprintf (outf, " *");
312 break;
313 default:
314 fprintf (outf, "indep");
315 break;
318 fprintf (outf, "\n");
321 /* Print a vector of direction vectors. */
323 void
324 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
325 int length)
327 unsigned j;
328 lambda_vector v;
330 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
331 print_direction_vector (outf, v, length);
334 /* Print a vector of distance vectors. */
336 void
337 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
338 int length)
340 unsigned j;
341 lambda_vector v;
343 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
344 print_lambda_vector (outf, v, length);
347 /* Debug version. */
349 void
350 debug_data_dependence_relation (struct data_dependence_relation *ddr)
352 dump_data_dependence_relation (stderr, ddr);
355 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
357 void
358 dump_data_dependence_relation (FILE *outf,
359 struct data_dependence_relation *ddr)
361 struct data_reference *dra, *drb;
363 fprintf (outf, "(Data Dep: \n");
365 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
367 fprintf (outf, " (don't know)\n)\n");
368 return;
371 dra = DDR_A (ddr);
372 drb = DDR_B (ddr);
373 dump_data_reference (outf, dra);
374 dump_data_reference (outf, drb);
376 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
377 fprintf (outf, " (no dependence)\n");
379 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
381 unsigned int i;
382 struct loop *loopi;
384 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
386 fprintf (outf, " access_fn_A: ");
387 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
388 fprintf (outf, " access_fn_B: ");
389 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
390 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
393 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
394 fprintf (outf, " loop nest: (");
395 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
396 fprintf (outf, "%d ", loopi->num);
397 fprintf (outf, ")\n");
399 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
401 fprintf (outf, " distance_vector: ");
402 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
403 DDR_NB_LOOPS (ddr));
406 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
408 fprintf (outf, " direction_vector: ");
409 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
414 fprintf (outf, ")\n");
417 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
419 void
420 dump_data_dependence_direction (FILE *file,
421 enum data_dependence_direction dir)
423 switch (dir)
425 case dir_positive:
426 fprintf (file, "+");
427 break;
429 case dir_negative:
430 fprintf (file, "-");
431 break;
433 case dir_equal:
434 fprintf (file, "=");
435 break;
437 case dir_positive_or_negative:
438 fprintf (file, "+-");
439 break;
441 case dir_positive_or_equal:
442 fprintf (file, "+=");
443 break;
445 case dir_negative_or_equal:
446 fprintf (file, "-=");
447 break;
449 case dir_star:
450 fprintf (file, "*");
451 break;
453 default:
454 break;
458 /* Dumps the distance and direction vectors in FILE. DDRS contains
459 the dependence relations, and VECT_SIZE is the size of the
460 dependence vectors, or in other words the number of loops in the
461 considered nest. */
463 void
464 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
466 unsigned int i, j;
467 struct data_dependence_relation *ddr;
468 lambda_vector v;
470 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
471 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
473 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
475 fprintf (file, "DISTANCE_V (");
476 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
477 fprintf (file, ")\n");
480 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
482 fprintf (file, "DIRECTION_V (");
483 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
484 fprintf (file, ")\n");
488 fprintf (file, "\n\n");
491 /* Dumps the data dependence relations DDRS in FILE. */
493 void
494 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
496 unsigned int i;
497 struct data_dependence_relation *ddr;
499 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
500 dump_data_dependence_relation (file, ddr);
502 fprintf (file, "\n\n");
505 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
506 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
507 constant of type ssizetype, and returns true. If we cannot do this
508 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
509 is returned. */
511 static bool
512 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
513 tree *var, tree *off)
515 tree var0, var1;
516 tree off0, off1;
517 enum tree_code ocode = code;
519 *var = NULL_TREE;
520 *off = NULL_TREE;
522 switch (code)
524 case INTEGER_CST:
525 *var = build_int_cst (type, 0);
526 *off = fold_convert (ssizetype, op0);
527 return true;
529 case POINTER_PLUS_EXPR:
530 ocode = PLUS_EXPR;
531 /* FALLTHROUGH */
532 case PLUS_EXPR:
533 case MINUS_EXPR:
534 split_constant_offset (op0, &var0, &off0);
535 split_constant_offset (op1, &var1, &off1);
536 *var = fold_build2 (code, type, var0, var1);
537 *off = size_binop (ocode, off0, off1);
538 return true;
540 case MULT_EXPR:
541 if (TREE_CODE (op1) != INTEGER_CST)
542 return false;
544 split_constant_offset (op0, &var0, &off0);
545 *var = fold_build2 (MULT_EXPR, type, var0, op1);
546 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
547 return true;
549 case ADDR_EXPR:
551 tree base, poffset;
552 HOST_WIDE_INT pbitsize, pbitpos;
553 enum machine_mode pmode;
554 int punsignedp, pvolatilep;
556 op0 = TREE_OPERAND (op0, 0);
557 if (!handled_component_p (op0))
558 return false;
560 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
561 &pmode, &punsignedp, &pvolatilep, false);
563 if (pbitpos % BITS_PER_UNIT != 0)
564 return false;
565 base = build_fold_addr_expr (base);
566 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
568 if (poffset)
570 split_constant_offset (poffset, &poffset, &off1);
571 off0 = size_binop (PLUS_EXPR, off0, off1);
572 if (POINTER_TYPE_P (TREE_TYPE (base)))
573 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
574 base, fold_convert (sizetype, poffset));
575 else
576 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
577 fold_convert (TREE_TYPE (base), poffset));
580 var0 = fold_convert (type, base);
582 /* If variable length types are involved, punt, otherwise casts
583 might be converted into ARRAY_REFs in gimplify_conversion.
584 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
585 possibly no longer appears in current GIMPLE, might resurface.
586 This perhaps could run
587 if (CONVERT_EXPR_P (var0))
589 gimplify_conversion (&var0);
590 // Attempt to fill in any within var0 found ARRAY_REF's
591 // element size from corresponding op embedded ARRAY_REF,
592 // if unsuccessful, just punt.
593 } */
594 while (POINTER_TYPE_P (type))
595 type = TREE_TYPE (type);
596 if (int_size_in_bytes (type) < 0)
597 return false;
599 *var = var0;
600 *off = off0;
601 return true;
604 case SSA_NAME:
606 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
607 enum tree_code subcode;
609 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
610 return false;
612 var0 = gimple_assign_rhs1 (def_stmt);
613 subcode = gimple_assign_rhs_code (def_stmt);
614 var1 = gimple_assign_rhs2 (def_stmt);
616 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
619 default:
620 return false;
624 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
625 will be ssizetype. */
627 void
628 split_constant_offset (tree exp, tree *var, tree *off)
630 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
631 enum tree_code code;
633 *var = exp;
634 *off = ssize_int (0);
635 STRIP_NOPS (exp);
637 if (automatically_generated_chrec_p (exp))
638 return;
640 otype = TREE_TYPE (exp);
641 code = TREE_CODE (exp);
642 extract_ops_from_tree (exp, &code, &op0, &op1);
643 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
645 *var = fold_convert (type, e);
646 *off = o;
650 /* Returns the address ADDR of an object in a canonical shape (without nop
651 casts, and with type of pointer to the object). */
653 static tree
654 canonicalize_base_object_address (tree addr)
656 tree orig = addr;
658 STRIP_NOPS (addr);
660 /* The base address may be obtained by casting from integer, in that case
661 keep the cast. */
662 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
663 return orig;
665 if (TREE_CODE (addr) != ADDR_EXPR)
666 return addr;
668 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
671 /* Analyzes the behavior of the memory reference DR in the innermost loop that
672 contains it. Returns true if analysis succeed or false otherwise. */
674 bool
675 dr_analyze_innermost (struct data_reference *dr)
677 gimple stmt = DR_STMT (dr);
678 struct loop *loop = loop_containing_stmt (stmt);
679 tree ref = DR_REF (dr);
680 HOST_WIDE_INT pbitsize, pbitpos;
681 tree base, poffset;
682 enum machine_mode pmode;
683 int punsignedp, pvolatilep;
684 affine_iv base_iv, offset_iv;
685 tree init, dinit, step;
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (dump_file, "analyze_innermost: ");
690 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
691 &pmode, &punsignedp, &pvolatilep, false);
692 gcc_assert (base != NULL_TREE);
694 if (pbitpos % BITS_PER_UNIT != 0)
696 if (dump_file && (dump_flags & TDF_DETAILS))
697 fprintf (dump_file, "failed: bit offset alignment.\n");
698 return false;
701 base = build_fold_addr_expr (base);
702 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, false))
704 if (dump_file && (dump_flags & TDF_DETAILS))
705 fprintf (dump_file, "failed: evolution of base is not affine.\n");
706 return false;
708 if (!poffset)
710 offset_iv.base = ssize_int (0);
711 offset_iv.step = ssize_int (0);
713 else if (!simple_iv (loop, loop_containing_stmt (stmt),
714 poffset, &offset_iv, false))
716 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
718 return false;
721 init = ssize_int (pbitpos / BITS_PER_UNIT);
722 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
723 init = size_binop (PLUS_EXPR, init, dinit);
724 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
725 init = size_binop (PLUS_EXPR, init, dinit);
727 step = size_binop (PLUS_EXPR,
728 fold_convert (ssizetype, base_iv.step),
729 fold_convert (ssizetype, offset_iv.step));
731 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
733 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
734 DR_INIT (dr) = init;
735 DR_STEP (dr) = step;
737 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "success.\n");
742 return true;
745 /* Determines the base object and the list of indices of memory reference
746 DR, analyzed in loop nest NEST. */
748 static void
749 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
751 gimple stmt = DR_STMT (dr);
752 struct loop *loop = loop_containing_stmt (stmt);
753 VEC (tree, heap) *access_fns = NULL;
754 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
755 tree base, off, access_fn;
756 basic_block before_loop = block_before_loop (nest);
758 while (handled_component_p (aref))
760 if (TREE_CODE (aref) == ARRAY_REF)
762 op = TREE_OPERAND (aref, 1);
763 access_fn = analyze_scalar_evolution (loop, op);
764 access_fn = instantiate_scev (before_loop, loop, access_fn);
765 VEC_safe_push (tree, heap, access_fns, access_fn);
767 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
770 aref = TREE_OPERAND (aref, 0);
773 if (INDIRECT_REF_P (aref))
775 op = TREE_OPERAND (aref, 0);
776 access_fn = analyze_scalar_evolution (loop, op);
777 access_fn = instantiate_scev (before_loop, loop, access_fn);
778 base = initial_condition (access_fn);
779 split_constant_offset (base, &base, &off);
780 access_fn = chrec_replace_initial_condition (access_fn,
781 fold_convert (TREE_TYPE (base), off));
783 TREE_OPERAND (aref, 0) = base;
784 VEC_safe_push (tree, heap, access_fns, access_fn);
787 DR_BASE_OBJECT (dr) = ref;
788 DR_ACCESS_FNS (dr) = access_fns;
791 /* Extracts the alias analysis information from the memory reference DR. */
793 static void
794 dr_analyze_alias (struct data_reference *dr)
796 tree ref = DR_REF (dr);
797 tree base = get_base_address (ref), addr;
799 if (INDIRECT_REF_P (base))
801 addr = TREE_OPERAND (base, 0);
802 if (TREE_CODE (addr) == SSA_NAME)
803 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
807 /* Returns true if the address of DR is invariant. */
809 static bool
810 dr_address_invariant_p (struct data_reference *dr)
812 unsigned i;
813 tree idx;
815 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
816 if (tree_contains_chrecs (idx, NULL))
817 return false;
819 return true;
822 /* Frees data reference DR. */
824 void
825 free_data_ref (data_reference_p dr)
827 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
828 free (dr);
831 /* Analyzes memory reference MEMREF accessed in STMT. The reference
832 is read if IS_READ is true, write otherwise. Returns the
833 data_reference description of MEMREF. NEST is the outermost loop of the
834 loop nest in that the reference should be analyzed. */
836 struct data_reference *
837 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
839 struct data_reference *dr;
841 if (dump_file && (dump_flags & TDF_DETAILS))
843 fprintf (dump_file, "Creating dr for ");
844 print_generic_expr (dump_file, memref, TDF_SLIM);
845 fprintf (dump_file, "\n");
848 dr = XCNEW (struct data_reference);
849 DR_STMT (dr) = stmt;
850 DR_REF (dr) = memref;
851 DR_IS_READ (dr) = is_read;
853 dr_analyze_innermost (dr);
854 dr_analyze_indices (dr, nest);
855 dr_analyze_alias (dr);
857 if (dump_file && (dump_flags & TDF_DETAILS))
859 fprintf (dump_file, "\tbase_address: ");
860 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
861 fprintf (dump_file, "\n\toffset from base address: ");
862 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
863 fprintf (dump_file, "\n\tconstant offset from base address: ");
864 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
865 fprintf (dump_file, "\n\tstep: ");
866 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
867 fprintf (dump_file, "\n\taligned to: ");
868 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
869 fprintf (dump_file, "\n\tbase_object: ");
870 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
871 fprintf (dump_file, "\n");
874 return dr;
877 /* Returns true if FNA == FNB. */
879 static bool
880 affine_function_equal_p (affine_fn fna, affine_fn fnb)
882 unsigned i, n = VEC_length (tree, fna);
884 if (n != VEC_length (tree, fnb))
885 return false;
887 for (i = 0; i < n; i++)
888 if (!operand_equal_p (VEC_index (tree, fna, i),
889 VEC_index (tree, fnb, i), 0))
890 return false;
892 return true;
895 /* If all the functions in CF are the same, returns one of them,
896 otherwise returns NULL. */
898 static affine_fn
899 common_affine_function (conflict_function *cf)
901 unsigned i;
902 affine_fn comm;
904 if (!CF_NONTRIVIAL_P (cf))
905 return NULL;
907 comm = cf->fns[0];
909 for (i = 1; i < cf->n; i++)
910 if (!affine_function_equal_p (comm, cf->fns[i]))
911 return NULL;
913 return comm;
916 /* Returns the base of the affine function FN. */
918 static tree
919 affine_function_base (affine_fn fn)
921 return VEC_index (tree, fn, 0);
924 /* Returns true if FN is a constant. */
926 static bool
927 affine_function_constant_p (affine_fn fn)
929 unsigned i;
930 tree coef;
932 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
933 if (!integer_zerop (coef))
934 return false;
936 return true;
939 /* Returns true if FN is the zero constant function. */
941 static bool
942 affine_function_zero_p (affine_fn fn)
944 return (integer_zerop (affine_function_base (fn))
945 && affine_function_constant_p (fn));
948 /* Returns a signed integer type with the largest precision from TA
949 and TB. */
951 static tree
952 signed_type_for_types (tree ta, tree tb)
954 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
955 return signed_type_for (ta);
956 else
957 return signed_type_for (tb);
960 /* Applies operation OP on affine functions FNA and FNB, and returns the
961 result. */
963 static affine_fn
964 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
966 unsigned i, n, m;
967 affine_fn ret;
968 tree coef;
970 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
972 n = VEC_length (tree, fna);
973 m = VEC_length (tree, fnb);
975 else
977 n = VEC_length (tree, fnb);
978 m = VEC_length (tree, fna);
981 ret = VEC_alloc (tree, heap, m);
982 for (i = 0; i < n; i++)
984 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
985 TREE_TYPE (VEC_index (tree, fnb, i)));
987 VEC_quick_push (tree, ret,
988 fold_build2 (op, type,
989 VEC_index (tree, fna, i),
990 VEC_index (tree, fnb, i)));
993 for (; VEC_iterate (tree, fna, i, coef); i++)
994 VEC_quick_push (tree, ret,
995 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
996 coef, integer_zero_node));
997 for (; VEC_iterate (tree, fnb, i, coef); i++)
998 VEC_quick_push (tree, ret,
999 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1000 integer_zero_node, coef));
1002 return ret;
1005 /* Returns the sum of affine functions FNA and FNB. */
1007 static affine_fn
1008 affine_fn_plus (affine_fn fna, affine_fn fnb)
1010 return affine_fn_op (PLUS_EXPR, fna, fnb);
1013 /* Returns the difference of affine functions FNA and FNB. */
1015 static affine_fn
1016 affine_fn_minus (affine_fn fna, affine_fn fnb)
1018 return affine_fn_op (MINUS_EXPR, fna, fnb);
1021 /* Frees affine function FN. */
1023 static void
1024 affine_fn_free (affine_fn fn)
1026 VEC_free (tree, heap, fn);
1029 /* Determine for each subscript in the data dependence relation DDR
1030 the distance. */
1032 static void
1033 compute_subscript_distance (struct data_dependence_relation *ddr)
1035 conflict_function *cf_a, *cf_b;
1036 affine_fn fn_a, fn_b, diff;
1038 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1040 unsigned int i;
1042 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1044 struct subscript *subscript;
1046 subscript = DDR_SUBSCRIPT (ddr, i);
1047 cf_a = SUB_CONFLICTS_IN_A (subscript);
1048 cf_b = SUB_CONFLICTS_IN_B (subscript);
1050 fn_a = common_affine_function (cf_a);
1051 fn_b = common_affine_function (cf_b);
1052 if (!fn_a || !fn_b)
1054 SUB_DISTANCE (subscript) = chrec_dont_know;
1055 return;
1057 diff = affine_fn_minus (fn_a, fn_b);
1059 if (affine_function_constant_p (diff))
1060 SUB_DISTANCE (subscript) = affine_function_base (diff);
1061 else
1062 SUB_DISTANCE (subscript) = chrec_dont_know;
1064 affine_fn_free (diff);
1069 /* Returns the conflict function for "unknown". */
1071 static conflict_function *
1072 conflict_fn_not_known (void)
1074 conflict_function *fn = XCNEW (conflict_function);
1075 fn->n = NOT_KNOWN;
1077 return fn;
1080 /* Returns the conflict function for "independent". */
1082 static conflict_function *
1083 conflict_fn_no_dependence (void)
1085 conflict_function *fn = XCNEW (conflict_function);
1086 fn->n = NO_DEPENDENCE;
1088 return fn;
1091 /* Returns true if the address of OBJ is invariant in LOOP. */
1093 static bool
1094 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1096 while (handled_component_p (obj))
1098 if (TREE_CODE (obj) == ARRAY_REF)
1100 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1101 need to check the stride and the lower bound of the reference. */
1102 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1103 loop->num)
1104 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1105 loop->num))
1106 return false;
1108 else if (TREE_CODE (obj) == COMPONENT_REF)
1110 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1111 loop->num))
1112 return false;
1114 obj = TREE_OPERAND (obj, 0);
1117 if (!INDIRECT_REF_P (obj))
1118 return true;
1120 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1121 loop->num);
1124 /* Returns true if A and B are accesses to different objects, or to different
1125 fields of the same object. */
1127 static bool
1128 disjoint_objects_p (tree a, tree b)
1130 tree base_a, base_b;
1131 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1132 bool ret;
1134 base_a = get_base_address (a);
1135 base_b = get_base_address (b);
1137 if (DECL_P (base_a)
1138 && DECL_P (base_b)
1139 && base_a != base_b)
1140 return true;
1142 if (!operand_equal_p (base_a, base_b, 0))
1143 return false;
1145 /* Compare the component references of A and B. We must start from the inner
1146 ones, so record them to the vector first. */
1147 while (handled_component_p (a))
1149 VEC_safe_push (tree, heap, comp_a, a);
1150 a = TREE_OPERAND (a, 0);
1152 while (handled_component_p (b))
1154 VEC_safe_push (tree, heap, comp_b, b);
1155 b = TREE_OPERAND (b, 0);
1158 ret = false;
1159 while (1)
1161 if (VEC_length (tree, comp_a) == 0
1162 || VEC_length (tree, comp_b) == 0)
1163 break;
1165 a = VEC_pop (tree, comp_a);
1166 b = VEC_pop (tree, comp_b);
1168 /* Real and imaginary part of a variable do not alias. */
1169 if ((TREE_CODE (a) == REALPART_EXPR
1170 && TREE_CODE (b) == IMAGPART_EXPR)
1171 || (TREE_CODE (a) == IMAGPART_EXPR
1172 && TREE_CODE (b) == REALPART_EXPR))
1174 ret = true;
1175 break;
1178 if (TREE_CODE (a) != TREE_CODE (b))
1179 break;
1181 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1182 DR_BASE_OBJECT are always zero. */
1183 if (TREE_CODE (a) == ARRAY_REF)
1184 continue;
1185 else if (TREE_CODE (a) == COMPONENT_REF)
1187 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1188 continue;
1190 /* Different fields of unions may overlap. */
1191 base_a = TREE_OPERAND (a, 0);
1192 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1193 break;
1195 /* Different fields of structures cannot. */
1196 ret = true;
1197 break;
1199 else
1200 break;
1203 VEC_free (tree, heap, comp_a);
1204 VEC_free (tree, heap, comp_b);
1206 return ret;
1209 /* Returns false if we can prove that data references A and B do not alias,
1210 true otherwise. */
1212 bool
1213 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1215 const_tree addr_a = DR_BASE_ADDRESS (a);
1216 const_tree addr_b = DR_BASE_ADDRESS (b);
1217 const_tree type_a, type_b;
1218 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1220 /* If the accessed objects are disjoint, the memory references do not
1221 alias. */
1222 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1223 return false;
1225 /* Query the alias oracle. */
1226 if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1227 return false;
1229 if (!addr_a || !addr_b)
1230 return true;
1232 /* If the references are based on different static objects, they cannot
1233 alias (PTA should be able to disambiguate such accesses, but often
1234 it fails to). */
1235 if (TREE_CODE (addr_a) == ADDR_EXPR
1236 && TREE_CODE (addr_b) == ADDR_EXPR)
1237 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1239 /* An instruction writing through a restricted pointer is "independent" of any
1240 instruction reading or writing through a different restricted pointer,
1241 in the same block/scope. */
1243 type_a = TREE_TYPE (addr_a);
1244 type_b = TREE_TYPE (addr_b);
1245 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1247 if (TREE_CODE (addr_a) == SSA_NAME)
1248 decl_a = SSA_NAME_VAR (addr_a);
1249 if (TREE_CODE (addr_b) == SSA_NAME)
1250 decl_b = SSA_NAME_VAR (addr_b);
1252 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1253 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1254 && decl_a && DECL_P (decl_a)
1255 && decl_b && DECL_P (decl_b)
1256 && decl_a != decl_b
1257 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1258 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1259 return false;
1261 return true;
1264 static void compute_self_dependence (struct data_dependence_relation *);
1266 /* Initialize a data dependence relation between data accesses A and
1267 B. NB_LOOPS is the number of loops surrounding the references: the
1268 size of the classic distance/direction vectors. */
1270 static struct data_dependence_relation *
1271 initialize_data_dependence_relation (struct data_reference *a,
1272 struct data_reference *b,
1273 VEC (loop_p, heap) *loop_nest)
1275 struct data_dependence_relation *res;
1276 unsigned int i;
1278 res = XNEW (struct data_dependence_relation);
1279 DDR_A (res) = a;
1280 DDR_B (res) = b;
1281 DDR_LOOP_NEST (res) = NULL;
1282 DDR_REVERSED_P (res) = false;
1283 DDR_SUBSCRIPTS (res) = NULL;
1284 DDR_DIR_VECTS (res) = NULL;
1285 DDR_DIST_VECTS (res) = NULL;
1287 if (a == NULL || b == NULL)
1289 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1290 return res;
1293 /* If the data references do not alias, then they are independent. */
1294 if (!dr_may_alias_p (a, b))
1296 DDR_ARE_DEPENDENT (res) = chrec_known;
1297 return res;
1300 /* When the references are exactly the same, don't spend time doing
1301 the data dependence tests, just initialize the ddr and return. */
1302 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1304 DDR_AFFINE_P (res) = true;
1305 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1306 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1307 DDR_LOOP_NEST (res) = loop_nest;
1308 DDR_INNER_LOOP (res) = 0;
1309 DDR_SELF_REFERENCE (res) = true;
1310 compute_self_dependence (res);
1311 return res;
1314 /* If the references do not access the same object, we do not know
1315 whether they alias or not. */
1316 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1318 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1319 return res;
1322 /* If the base of the object is not invariant in the loop nest, we cannot
1323 analyze it. TODO -- in fact, it would suffice to record that there may
1324 be arbitrary dependences in the loops where the base object varies. */
1325 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1326 DR_BASE_OBJECT (a)))
1328 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1329 return res;
1332 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1334 DDR_AFFINE_P (res) = true;
1335 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1336 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1337 DDR_LOOP_NEST (res) = loop_nest;
1338 DDR_INNER_LOOP (res) = 0;
1339 DDR_SELF_REFERENCE (res) = false;
1341 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1343 struct subscript *subscript;
1345 subscript = XNEW (struct subscript);
1346 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1347 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1348 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1349 SUB_DISTANCE (subscript) = chrec_dont_know;
1350 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1353 return res;
1356 /* Frees memory used by the conflict function F. */
1358 static void
1359 free_conflict_function (conflict_function *f)
1361 unsigned i;
1363 if (CF_NONTRIVIAL_P (f))
1365 for (i = 0; i < f->n; i++)
1366 affine_fn_free (f->fns[i]);
1368 free (f);
1371 /* Frees memory used by SUBSCRIPTS. */
1373 static void
1374 free_subscripts (VEC (subscript_p, heap) *subscripts)
1376 unsigned i;
1377 subscript_p s;
1379 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1381 free_conflict_function (s->conflicting_iterations_in_a);
1382 free_conflict_function (s->conflicting_iterations_in_b);
1383 free (s);
1385 VEC_free (subscript_p, heap, subscripts);
1388 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1389 description. */
1391 static inline void
1392 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1393 tree chrec)
1395 if (dump_file && (dump_flags & TDF_DETAILS))
1397 fprintf (dump_file, "(dependence classified: ");
1398 print_generic_expr (dump_file, chrec, 0);
1399 fprintf (dump_file, ")\n");
1402 DDR_ARE_DEPENDENT (ddr) = chrec;
1403 free_subscripts (DDR_SUBSCRIPTS (ddr));
1404 DDR_SUBSCRIPTS (ddr) = NULL;
1407 /* The dependence relation DDR cannot be represented by a distance
1408 vector. */
1410 static inline void
1411 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1416 DDR_AFFINE_P (ddr) = false;
1421 /* This section contains the classic Banerjee tests. */
1423 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1424 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1426 static inline bool
1427 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1429 return (evolution_function_is_constant_p (chrec_a)
1430 && evolution_function_is_constant_p (chrec_b));
1433 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1434 variable, i.e., if the SIV (Single Index Variable) test is true. */
1436 static bool
1437 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1439 if ((evolution_function_is_constant_p (chrec_a)
1440 && evolution_function_is_univariate_p (chrec_b))
1441 || (evolution_function_is_constant_p (chrec_b)
1442 && evolution_function_is_univariate_p (chrec_a)))
1443 return true;
1445 if (evolution_function_is_univariate_p (chrec_a)
1446 && evolution_function_is_univariate_p (chrec_b))
1448 switch (TREE_CODE (chrec_a))
1450 case POLYNOMIAL_CHREC:
1451 switch (TREE_CODE (chrec_b))
1453 case POLYNOMIAL_CHREC:
1454 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1455 return false;
1457 default:
1458 return true;
1461 default:
1462 return true;
1466 return false;
1469 /* Creates a conflict function with N dimensions. The affine functions
1470 in each dimension follow. */
1472 static conflict_function *
1473 conflict_fn (unsigned n, ...)
1475 unsigned i;
1476 conflict_function *ret = XCNEW (conflict_function);
1477 va_list ap;
1479 gcc_assert (0 < n && n <= MAX_DIM);
1480 va_start(ap, n);
1482 ret->n = n;
1483 for (i = 0; i < n; i++)
1484 ret->fns[i] = va_arg (ap, affine_fn);
1485 va_end(ap);
1487 return ret;
1490 /* Returns constant affine function with value CST. */
1492 static affine_fn
1493 affine_fn_cst (tree cst)
1495 affine_fn fn = VEC_alloc (tree, heap, 1);
1496 VEC_quick_push (tree, fn, cst);
1497 return fn;
1500 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1502 static affine_fn
1503 affine_fn_univar (tree cst, unsigned dim, tree coef)
1505 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1506 unsigned i;
1508 gcc_assert (dim > 0);
1509 VEC_quick_push (tree, fn, cst);
1510 for (i = 1; i < dim; i++)
1511 VEC_quick_push (tree, fn, integer_zero_node);
1512 VEC_quick_push (tree, fn, coef);
1513 return fn;
1516 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1517 *OVERLAPS_B are initialized to the functions that describe the
1518 relation between the elements accessed twice by CHREC_A and
1519 CHREC_B. For k >= 0, the following property is verified:
1521 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1523 static void
1524 analyze_ziv_subscript (tree chrec_a,
1525 tree chrec_b,
1526 conflict_function **overlaps_a,
1527 conflict_function **overlaps_b,
1528 tree *last_conflicts)
1530 tree type, difference;
1531 dependence_stats.num_ziv++;
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "(analyze_ziv_subscript \n");
1536 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1537 chrec_a = chrec_convert (type, chrec_a, NULL);
1538 chrec_b = chrec_convert (type, chrec_b, NULL);
1539 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1541 switch (TREE_CODE (difference))
1543 case INTEGER_CST:
1544 if (integer_zerop (difference))
1546 /* The difference is equal to zero: the accessed index
1547 overlaps for each iteration in the loop. */
1548 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1549 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1550 *last_conflicts = chrec_dont_know;
1551 dependence_stats.num_ziv_dependent++;
1553 else
1555 /* The accesses do not overlap. */
1556 *overlaps_a = conflict_fn_no_dependence ();
1557 *overlaps_b = conflict_fn_no_dependence ();
1558 *last_conflicts = integer_zero_node;
1559 dependence_stats.num_ziv_independent++;
1561 break;
1563 default:
1564 /* We're not sure whether the indexes overlap. For the moment,
1565 conservatively answer "don't know". */
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1569 *overlaps_a = conflict_fn_not_known ();
1570 *overlaps_b = conflict_fn_not_known ();
1571 *last_conflicts = chrec_dont_know;
1572 dependence_stats.num_ziv_unimplemented++;
1573 break;
1576 if (dump_file && (dump_flags & TDF_DETAILS))
1577 fprintf (dump_file, ")\n");
1580 /* Sets NIT to the estimated number of executions of the statements in
1581 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1582 large as the number of iterations. If we have no reliable estimate,
1583 the function returns false, otherwise returns true. */
1585 bool
1586 estimated_loop_iterations (struct loop *loop, bool conservative,
1587 double_int *nit)
1589 estimate_numbers_of_iterations_loop (loop);
1590 if (conservative)
1592 if (!loop->any_upper_bound)
1593 return false;
1595 *nit = loop->nb_iterations_upper_bound;
1597 else
1599 if (!loop->any_estimate)
1600 return false;
1602 *nit = loop->nb_iterations_estimate;
1605 return true;
1608 /* Similar to estimated_loop_iterations, but returns the estimate only
1609 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1610 on the number of iterations of LOOP could not be derived, returns -1. */
1612 HOST_WIDE_INT
1613 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1615 double_int nit;
1616 HOST_WIDE_INT hwi_nit;
1618 if (!estimated_loop_iterations (loop, conservative, &nit))
1619 return -1;
1621 if (!double_int_fits_in_shwi_p (nit))
1622 return -1;
1623 hwi_nit = double_int_to_shwi (nit);
1625 return hwi_nit < 0 ? -1 : hwi_nit;
1628 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1629 and only if it fits to the int type. If this is not the case, or the
1630 estimate on the number of iterations of LOOP could not be derived, returns
1631 chrec_dont_know. */
1633 static tree
1634 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1636 double_int nit;
1637 tree type;
1639 if (!estimated_loop_iterations (loop, conservative, &nit))
1640 return chrec_dont_know;
1642 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1643 if (!double_int_fits_to_tree_p (type, nit))
1644 return chrec_dont_know;
1646 return double_int_to_tree (type, nit);
1649 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1650 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1651 *OVERLAPS_B are initialized to the functions that describe the
1652 relation between the elements accessed twice by CHREC_A and
1653 CHREC_B. For k >= 0, the following property is verified:
1655 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1657 static void
1658 analyze_siv_subscript_cst_affine (tree chrec_a,
1659 tree chrec_b,
1660 conflict_function **overlaps_a,
1661 conflict_function **overlaps_b,
1662 tree *last_conflicts)
1664 bool value0, value1, value2;
1665 tree type, difference, tmp;
1667 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1668 chrec_a = chrec_convert (type, chrec_a, NULL);
1669 chrec_b = chrec_convert (type, chrec_b, NULL);
1670 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1672 if (!chrec_is_positive (initial_condition (difference), &value0))
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1677 dependence_stats.num_siv_unimplemented++;
1678 *overlaps_a = conflict_fn_not_known ();
1679 *overlaps_b = conflict_fn_not_known ();
1680 *last_conflicts = chrec_dont_know;
1681 return;
1683 else
1685 if (value0 == false)
1687 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1690 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1692 *overlaps_a = conflict_fn_not_known ();
1693 *overlaps_b = conflict_fn_not_known ();
1694 *last_conflicts = chrec_dont_know;
1695 dependence_stats.num_siv_unimplemented++;
1696 return;
1698 else
1700 if (value1 == true)
1702 /* Example:
1703 chrec_a = 12
1704 chrec_b = {10, +, 1}
1707 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1709 HOST_WIDE_INT numiter;
1710 struct loop *loop = get_chrec_loop (chrec_b);
1712 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1713 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1714 fold_build1 (ABS_EXPR, type, difference),
1715 CHREC_RIGHT (chrec_b));
1716 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1717 *last_conflicts = integer_one_node;
1720 /* Perform weak-zero siv test to see if overlap is
1721 outside the loop bounds. */
1722 numiter = estimated_loop_iterations_int (loop, false);
1724 if (numiter >= 0
1725 && compare_tree_int (tmp, numiter) > 0)
1727 free_conflict_function (*overlaps_a);
1728 free_conflict_function (*overlaps_b);
1729 *overlaps_a = conflict_fn_no_dependence ();
1730 *overlaps_b = conflict_fn_no_dependence ();
1731 *last_conflicts = integer_zero_node;
1732 dependence_stats.num_siv_independent++;
1733 return;
1735 dependence_stats.num_siv_dependent++;
1736 return;
1739 /* When the step does not divide the difference, there are
1740 no overlaps. */
1741 else
1743 *overlaps_a = conflict_fn_no_dependence ();
1744 *overlaps_b = conflict_fn_no_dependence ();
1745 *last_conflicts = integer_zero_node;
1746 dependence_stats.num_siv_independent++;
1747 return;
1751 else
1753 /* Example:
1754 chrec_a = 12
1755 chrec_b = {10, +, -1}
1757 In this case, chrec_a will not overlap with chrec_b. */
1758 *overlaps_a = conflict_fn_no_dependence ();
1759 *overlaps_b = conflict_fn_no_dependence ();
1760 *last_conflicts = integer_zero_node;
1761 dependence_stats.num_siv_independent++;
1762 return;
1766 else
1768 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1770 if (dump_file && (dump_flags & TDF_DETAILS))
1771 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1773 *overlaps_a = conflict_fn_not_known ();
1774 *overlaps_b = conflict_fn_not_known ();
1775 *last_conflicts = chrec_dont_know;
1776 dependence_stats.num_siv_unimplemented++;
1777 return;
1779 else
1781 if (value2 == false)
1783 /* Example:
1784 chrec_a = 3
1785 chrec_b = {10, +, -1}
1787 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1789 HOST_WIDE_INT numiter;
1790 struct loop *loop = get_chrec_loop (chrec_b);
1792 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1793 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1794 CHREC_RIGHT (chrec_b));
1795 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1796 *last_conflicts = integer_one_node;
1798 /* Perform weak-zero siv test to see if overlap is
1799 outside the loop bounds. */
1800 numiter = estimated_loop_iterations_int (loop, false);
1802 if (numiter >= 0
1803 && compare_tree_int (tmp, numiter) > 0)
1805 free_conflict_function (*overlaps_a);
1806 free_conflict_function (*overlaps_b);
1807 *overlaps_a = conflict_fn_no_dependence ();
1808 *overlaps_b = conflict_fn_no_dependence ();
1809 *last_conflicts = integer_zero_node;
1810 dependence_stats.num_siv_independent++;
1811 return;
1813 dependence_stats.num_siv_dependent++;
1814 return;
1817 /* When the step does not divide the difference, there
1818 are no overlaps. */
1819 else
1821 *overlaps_a = conflict_fn_no_dependence ();
1822 *overlaps_b = conflict_fn_no_dependence ();
1823 *last_conflicts = integer_zero_node;
1824 dependence_stats.num_siv_independent++;
1825 return;
1828 else
1830 /* Example:
1831 chrec_a = 3
1832 chrec_b = {4, +, 1}
1834 In this case, chrec_a will not overlap with chrec_b. */
1835 *overlaps_a = conflict_fn_no_dependence ();
1836 *overlaps_b = conflict_fn_no_dependence ();
1837 *last_conflicts = integer_zero_node;
1838 dependence_stats.num_siv_independent++;
1839 return;
1846 /* Helper recursive function for initializing the matrix A. Returns
1847 the initial value of CHREC. */
1849 static tree
1850 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1852 gcc_assert (chrec);
1854 switch (TREE_CODE (chrec))
1856 case POLYNOMIAL_CHREC:
1857 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1859 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1860 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1862 case PLUS_EXPR:
1863 case MULT_EXPR:
1864 case MINUS_EXPR:
1866 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1867 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1869 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1872 case NOP_EXPR:
1874 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1875 return chrec_convert (chrec_type (chrec), op, NULL);
1878 case BIT_NOT_EXPR:
1880 /* Handle ~X as -1 - X. */
1881 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1882 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1883 build_int_cst (TREE_TYPE (chrec), -1), op);
1886 case INTEGER_CST:
1887 return chrec;
1889 default:
1890 gcc_unreachable ();
1891 return NULL_TREE;
1895 #define FLOOR_DIV(x,y) ((x) / (y))
1897 /* Solves the special case of the Diophantine equation:
1898 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1900 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1901 number of iterations that loops X and Y run. The overlaps will be
1902 constructed as evolutions in dimension DIM. */
1904 static void
1905 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1906 affine_fn *overlaps_a,
1907 affine_fn *overlaps_b,
1908 tree *last_conflicts, int dim)
1910 if (((step_a > 0 && step_b > 0)
1911 || (step_a < 0 && step_b < 0)))
1913 int step_overlaps_a, step_overlaps_b;
1914 int gcd_steps_a_b, last_conflict, tau2;
1916 gcd_steps_a_b = gcd (step_a, step_b);
1917 step_overlaps_a = step_b / gcd_steps_a_b;
1918 step_overlaps_b = step_a / gcd_steps_a_b;
1920 if (niter > 0)
1922 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1923 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1924 last_conflict = tau2;
1925 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1927 else
1928 *last_conflicts = chrec_dont_know;
1930 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1931 build_int_cst (NULL_TREE,
1932 step_overlaps_a));
1933 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1934 build_int_cst (NULL_TREE,
1935 step_overlaps_b));
1938 else
1940 *overlaps_a = affine_fn_cst (integer_zero_node);
1941 *overlaps_b = affine_fn_cst (integer_zero_node);
1942 *last_conflicts = integer_zero_node;
1946 /* Solves the special case of a Diophantine equation where CHREC_A is
1947 an affine bivariate function, and CHREC_B is an affine univariate
1948 function. For example,
1950 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1952 has the following overlapping functions:
1954 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1955 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1956 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1958 FORNOW: This is a specialized implementation for a case occurring in
1959 a common benchmark. Implement the general algorithm. */
1961 static void
1962 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1963 conflict_function **overlaps_a,
1964 conflict_function **overlaps_b,
1965 tree *last_conflicts)
1967 bool xz_p, yz_p, xyz_p;
1968 int step_x, step_y, step_z;
1969 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1970 affine_fn overlaps_a_xz, overlaps_b_xz;
1971 affine_fn overlaps_a_yz, overlaps_b_yz;
1972 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1973 affine_fn ova1, ova2, ovb;
1974 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1976 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1977 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1978 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1980 niter_x =
1981 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1982 false);
1983 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1984 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1986 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1988 if (dump_file && (dump_flags & TDF_DETAILS))
1989 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1991 *overlaps_a = conflict_fn_not_known ();
1992 *overlaps_b = conflict_fn_not_known ();
1993 *last_conflicts = chrec_dont_know;
1994 return;
1997 niter = MIN (niter_x, niter_z);
1998 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1999 &overlaps_a_xz,
2000 &overlaps_b_xz,
2001 &last_conflicts_xz, 1);
2002 niter = MIN (niter_y, niter_z);
2003 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2004 &overlaps_a_yz,
2005 &overlaps_b_yz,
2006 &last_conflicts_yz, 2);
2007 niter = MIN (niter_x, niter_z);
2008 niter = MIN (niter_y, niter);
2009 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2010 &overlaps_a_xyz,
2011 &overlaps_b_xyz,
2012 &last_conflicts_xyz, 3);
2014 xz_p = !integer_zerop (last_conflicts_xz);
2015 yz_p = !integer_zerop (last_conflicts_yz);
2016 xyz_p = !integer_zerop (last_conflicts_xyz);
2018 if (xz_p || yz_p || xyz_p)
2020 ova1 = affine_fn_cst (integer_zero_node);
2021 ova2 = affine_fn_cst (integer_zero_node);
2022 ovb = affine_fn_cst (integer_zero_node);
2023 if (xz_p)
2025 affine_fn t0 = ova1;
2026 affine_fn t2 = ovb;
2028 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2029 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2030 affine_fn_free (t0);
2031 affine_fn_free (t2);
2032 *last_conflicts = last_conflicts_xz;
2034 if (yz_p)
2036 affine_fn t0 = ova2;
2037 affine_fn t2 = ovb;
2039 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2040 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2041 affine_fn_free (t0);
2042 affine_fn_free (t2);
2043 *last_conflicts = last_conflicts_yz;
2045 if (xyz_p)
2047 affine_fn t0 = ova1;
2048 affine_fn t2 = ova2;
2049 affine_fn t4 = ovb;
2051 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2052 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2053 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2054 affine_fn_free (t0);
2055 affine_fn_free (t2);
2056 affine_fn_free (t4);
2057 *last_conflicts = last_conflicts_xyz;
2059 *overlaps_a = conflict_fn (2, ova1, ova2);
2060 *overlaps_b = conflict_fn (1, ovb);
2062 else
2064 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2065 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2066 *last_conflicts = integer_zero_node;
2069 affine_fn_free (overlaps_a_xz);
2070 affine_fn_free (overlaps_b_xz);
2071 affine_fn_free (overlaps_a_yz);
2072 affine_fn_free (overlaps_b_yz);
2073 affine_fn_free (overlaps_a_xyz);
2074 affine_fn_free (overlaps_b_xyz);
2077 /* Determines the overlapping elements due to accesses CHREC_A and
2078 CHREC_B, that are affine functions. This function cannot handle
2079 symbolic evolution functions, ie. when initial conditions are
2080 parameters, because it uses lambda matrices of integers. */
2082 static void
2083 analyze_subscript_affine_affine (tree chrec_a,
2084 tree chrec_b,
2085 conflict_function **overlaps_a,
2086 conflict_function **overlaps_b,
2087 tree *last_conflicts)
2089 unsigned nb_vars_a, nb_vars_b, dim;
2090 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2091 lambda_matrix A, U, S;
2093 if (eq_evolutions_p (chrec_a, chrec_b))
2095 /* The accessed index overlaps for each iteration in the
2096 loop. */
2097 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2098 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2099 *last_conflicts = chrec_dont_know;
2100 return;
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2103 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2105 /* For determining the initial intersection, we have to solve a
2106 Diophantine equation. This is the most time consuming part.
2108 For answering to the question: "Is there a dependence?" we have
2109 to prove that there exists a solution to the Diophantine
2110 equation, and that the solution is in the iteration domain,
2111 i.e. the solution is positive or zero, and that the solution
2112 happens before the upper bound loop.nb_iterations. Otherwise
2113 there is no dependence. This function outputs a description of
2114 the iterations that hold the intersections. */
2116 nb_vars_a = nb_vars_in_chrec (chrec_a);
2117 nb_vars_b = nb_vars_in_chrec (chrec_b);
2119 dim = nb_vars_a + nb_vars_b;
2120 U = lambda_matrix_new (dim, dim);
2121 A = lambda_matrix_new (dim, 1);
2122 S = lambda_matrix_new (dim, 1);
2124 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2125 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2126 gamma = init_b - init_a;
2128 /* Don't do all the hard work of solving the Diophantine equation
2129 when we already know the solution: for example,
2130 | {3, +, 1}_1
2131 | {3, +, 4}_2
2132 | gamma = 3 - 3 = 0.
2133 Then the first overlap occurs during the first iterations:
2134 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2136 if (gamma == 0)
2138 if (nb_vars_a == 1 && nb_vars_b == 1)
2140 HOST_WIDE_INT step_a, step_b;
2141 HOST_WIDE_INT niter, niter_a, niter_b;
2142 affine_fn ova, ovb;
2144 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2145 false);
2146 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2147 false);
2148 niter = MIN (niter_a, niter_b);
2149 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2150 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2152 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2153 &ova, &ovb,
2154 last_conflicts, 1);
2155 *overlaps_a = conflict_fn (1, ova);
2156 *overlaps_b = conflict_fn (1, ovb);
2159 else if (nb_vars_a == 2 && nb_vars_b == 1)
2160 compute_overlap_steps_for_affine_1_2
2161 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2163 else if (nb_vars_a == 1 && nb_vars_b == 2)
2164 compute_overlap_steps_for_affine_1_2
2165 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2167 else
2169 if (dump_file && (dump_flags & TDF_DETAILS))
2170 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2171 *overlaps_a = conflict_fn_not_known ();
2172 *overlaps_b = conflict_fn_not_known ();
2173 *last_conflicts = chrec_dont_know;
2175 goto end_analyze_subs_aa;
2178 /* U.A = S */
2179 lambda_matrix_right_hermite (A, dim, 1, S, U);
2181 if (S[0][0] < 0)
2183 S[0][0] *= -1;
2184 lambda_matrix_row_negate (U, dim, 0);
2186 gcd_alpha_beta = S[0][0];
2188 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2189 but that is a quite strange case. Instead of ICEing, answer
2190 don't know. */
2191 if (gcd_alpha_beta == 0)
2193 *overlaps_a = conflict_fn_not_known ();
2194 *overlaps_b = conflict_fn_not_known ();
2195 *last_conflicts = chrec_dont_know;
2196 goto end_analyze_subs_aa;
2199 /* The classic "gcd-test". */
2200 if (!int_divides_p (gcd_alpha_beta, gamma))
2202 /* The "gcd-test" has determined that there is no integer
2203 solution, i.e. there is no dependence. */
2204 *overlaps_a = conflict_fn_no_dependence ();
2205 *overlaps_b = conflict_fn_no_dependence ();
2206 *last_conflicts = integer_zero_node;
2209 /* Both access functions are univariate. This includes SIV and MIV cases. */
2210 else if (nb_vars_a == 1 && nb_vars_b == 1)
2212 /* Both functions should have the same evolution sign. */
2213 if (((A[0][0] > 0 && -A[1][0] > 0)
2214 || (A[0][0] < 0 && -A[1][0] < 0)))
2216 /* The solutions are given by:
2218 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2219 | [u21 u22] [y0]
2221 For a given integer t. Using the following variables,
2223 | i0 = u11 * gamma / gcd_alpha_beta
2224 | j0 = u12 * gamma / gcd_alpha_beta
2225 | i1 = u21
2226 | j1 = u22
2228 the solutions are:
2230 | x0 = i0 + i1 * t,
2231 | y0 = j0 + j1 * t. */
2232 HOST_WIDE_INT i0, j0, i1, j1;
2234 i0 = U[0][0] * gamma / gcd_alpha_beta;
2235 j0 = U[0][1] * gamma / gcd_alpha_beta;
2236 i1 = U[1][0];
2237 j1 = U[1][1];
2239 if ((i1 == 0 && i0 < 0)
2240 || (j1 == 0 && j0 < 0))
2242 /* There is no solution.
2243 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2244 falls in here, but for the moment we don't look at the
2245 upper bound of the iteration domain. */
2246 *overlaps_a = conflict_fn_no_dependence ();
2247 *overlaps_b = conflict_fn_no_dependence ();
2248 *last_conflicts = integer_zero_node;
2249 goto end_analyze_subs_aa;
2252 if (i1 > 0 && j1 > 0)
2254 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2255 (get_chrec_loop (chrec_a), false);
2256 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2257 (get_chrec_loop (chrec_b), false);
2258 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2260 /* (X0, Y0) is a solution of the Diophantine equation:
2261 "chrec_a (X0) = chrec_b (Y0)". */
2262 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2263 CEIL (-j0, j1));
2264 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2265 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2267 /* (X1, Y1) is the smallest positive solution of the eq
2268 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2269 first conflict occurs. */
2270 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2271 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2272 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2274 if (niter > 0)
2276 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2277 FLOOR_DIV (niter - j0, j1));
2278 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2280 /* If the overlap occurs outside of the bounds of the
2281 loop, there is no dependence. */
2282 if (x1 >= niter || y1 >= niter)
2284 *overlaps_a = conflict_fn_no_dependence ();
2285 *overlaps_b = conflict_fn_no_dependence ();
2286 *last_conflicts = integer_zero_node;
2287 goto end_analyze_subs_aa;
2289 else
2290 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2292 else
2293 *last_conflicts = chrec_dont_know;
2295 *overlaps_a
2296 = conflict_fn (1,
2297 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2299 build_int_cst (NULL_TREE, i1)));
2300 *overlaps_b
2301 = conflict_fn (1,
2302 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2304 build_int_cst (NULL_TREE, j1)));
2306 else
2308 /* FIXME: For the moment, the upper bound of the
2309 iteration domain for i and j is not checked. */
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2312 *overlaps_a = conflict_fn_not_known ();
2313 *overlaps_b = conflict_fn_not_known ();
2314 *last_conflicts = chrec_dont_know;
2317 else
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2321 *overlaps_a = conflict_fn_not_known ();
2322 *overlaps_b = conflict_fn_not_known ();
2323 *last_conflicts = chrec_dont_know;
2326 else
2328 if (dump_file && (dump_flags & TDF_DETAILS))
2329 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2330 *overlaps_a = conflict_fn_not_known ();
2331 *overlaps_b = conflict_fn_not_known ();
2332 *last_conflicts = chrec_dont_know;
2335 end_analyze_subs_aa:
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2338 fprintf (dump_file, " (overlaps_a = ");
2339 dump_conflict_function (dump_file, *overlaps_a);
2340 fprintf (dump_file, ")\n (overlaps_b = ");
2341 dump_conflict_function (dump_file, *overlaps_b);
2342 fprintf (dump_file, ")\n");
2343 fprintf (dump_file, ")\n");
2347 /* Returns true when analyze_subscript_affine_affine can be used for
2348 determining the dependence relation between chrec_a and chrec_b,
2349 that contain symbols. This function modifies chrec_a and chrec_b
2350 such that the analysis result is the same, and such that they don't
2351 contain symbols, and then can safely be passed to the analyzer.
2353 Example: The analysis of the following tuples of evolutions produce
2354 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2355 vs. {0, +, 1}_1
2357 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2358 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2361 static bool
2362 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2364 tree diff, type, left_a, left_b, right_b;
2366 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2367 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2368 /* FIXME: For the moment not handled. Might be refined later. */
2369 return false;
2371 type = chrec_type (*chrec_a);
2372 left_a = CHREC_LEFT (*chrec_a);
2373 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2374 diff = chrec_fold_minus (type, left_a, left_b);
2376 if (!evolution_function_is_constant_p (diff))
2377 return false;
2379 if (dump_file && (dump_flags & TDF_DETAILS))
2380 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2382 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2383 diff, CHREC_RIGHT (*chrec_a));
2384 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2385 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2386 build_int_cst (type, 0),
2387 right_b);
2388 return true;
2391 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2392 *OVERLAPS_B are initialized to the functions that describe the
2393 relation between the elements accessed twice by CHREC_A and
2394 CHREC_B. For k >= 0, the following property is verified:
2396 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2398 static void
2399 analyze_siv_subscript (tree chrec_a,
2400 tree chrec_b,
2401 conflict_function **overlaps_a,
2402 conflict_function **overlaps_b,
2403 tree *last_conflicts,
2404 int loop_nest_num)
2406 dependence_stats.num_siv++;
2408 if (dump_file && (dump_flags & TDF_DETAILS))
2409 fprintf (dump_file, "(analyze_siv_subscript \n");
2411 if (evolution_function_is_constant_p (chrec_a)
2412 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2413 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2414 overlaps_a, overlaps_b, last_conflicts);
2416 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2417 && evolution_function_is_constant_p (chrec_b))
2418 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2419 overlaps_b, overlaps_a, last_conflicts);
2421 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2422 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2424 if (!chrec_contains_symbols (chrec_a)
2425 && !chrec_contains_symbols (chrec_b))
2427 analyze_subscript_affine_affine (chrec_a, chrec_b,
2428 overlaps_a, overlaps_b,
2429 last_conflicts);
2431 if (CF_NOT_KNOWN_P (*overlaps_a)
2432 || CF_NOT_KNOWN_P (*overlaps_b))
2433 dependence_stats.num_siv_unimplemented++;
2434 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2435 || CF_NO_DEPENDENCE_P (*overlaps_b))
2436 dependence_stats.num_siv_independent++;
2437 else
2438 dependence_stats.num_siv_dependent++;
2440 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2441 &chrec_b))
2443 analyze_subscript_affine_affine (chrec_a, chrec_b,
2444 overlaps_a, overlaps_b,
2445 last_conflicts);
2447 if (CF_NOT_KNOWN_P (*overlaps_a)
2448 || CF_NOT_KNOWN_P (*overlaps_b))
2449 dependence_stats.num_siv_unimplemented++;
2450 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2451 || CF_NO_DEPENDENCE_P (*overlaps_b))
2452 dependence_stats.num_siv_independent++;
2453 else
2454 dependence_stats.num_siv_dependent++;
2456 else
2457 goto siv_subscript_dontknow;
2460 else
2462 siv_subscript_dontknow:;
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, "siv test failed: unimplemented.\n");
2465 *overlaps_a = conflict_fn_not_known ();
2466 *overlaps_b = conflict_fn_not_known ();
2467 *last_conflicts = chrec_dont_know;
2468 dependence_stats.num_siv_unimplemented++;
2471 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, ")\n");
2475 /* Returns false if we can prove that the greatest common divisor of the steps
2476 of CHREC does not divide CST, false otherwise. */
2478 static bool
2479 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2481 HOST_WIDE_INT cd = 0, val;
2482 tree step;
2484 if (!host_integerp (cst, 0))
2485 return true;
2486 val = tree_low_cst (cst, 0);
2488 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2490 step = CHREC_RIGHT (chrec);
2491 if (!host_integerp (step, 0))
2492 return true;
2493 cd = gcd (cd, tree_low_cst (step, 0));
2494 chrec = CHREC_LEFT (chrec);
2497 return val % cd == 0;
2500 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2501 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2502 functions that describe the relation between the elements accessed
2503 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2504 is verified:
2506 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2508 static void
2509 analyze_miv_subscript (tree chrec_a,
2510 tree chrec_b,
2511 conflict_function **overlaps_a,
2512 conflict_function **overlaps_b,
2513 tree *last_conflicts,
2514 struct loop *loop_nest)
2516 /* FIXME: This is a MIV subscript, not yet handled.
2517 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2518 (A[i] vs. A[j]).
2520 In the SIV test we had to solve a Diophantine equation with two
2521 variables. In the MIV case we have to solve a Diophantine
2522 equation with 2*n variables (if the subscript uses n IVs).
2524 tree type, difference;
2526 dependence_stats.num_miv++;
2527 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, "(analyze_miv_subscript \n");
2530 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2531 chrec_a = chrec_convert (type, chrec_a, NULL);
2532 chrec_b = chrec_convert (type, chrec_b, NULL);
2533 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2535 if (eq_evolutions_p (chrec_a, chrec_b))
2537 /* Access functions are the same: all the elements are accessed
2538 in the same order. */
2539 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2540 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2541 *last_conflicts = estimated_loop_iterations_tree
2542 (get_chrec_loop (chrec_a), true);
2543 dependence_stats.num_miv_dependent++;
2546 else if (evolution_function_is_constant_p (difference)
2547 /* For the moment, the following is verified:
2548 evolution_function_is_affine_multivariate_p (chrec_a,
2549 loop_nest->num) */
2550 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2552 /* testsuite/.../ssa-chrec-33.c
2553 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2555 The difference is 1, and all the evolution steps are multiples
2556 of 2, consequently there are no overlapping elements. */
2557 *overlaps_a = conflict_fn_no_dependence ();
2558 *overlaps_b = conflict_fn_no_dependence ();
2559 *last_conflicts = integer_zero_node;
2560 dependence_stats.num_miv_independent++;
2563 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2564 && !chrec_contains_symbols (chrec_a)
2565 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2566 && !chrec_contains_symbols (chrec_b))
2568 /* testsuite/.../ssa-chrec-35.c
2569 {0, +, 1}_2 vs. {0, +, 1}_3
2570 the overlapping elements are respectively located at iterations:
2571 {0, +, 1}_x and {0, +, 1}_x,
2572 in other words, we have the equality:
2573 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2575 Other examples:
2576 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2577 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2579 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2580 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2582 analyze_subscript_affine_affine (chrec_a, chrec_b,
2583 overlaps_a, overlaps_b, last_conflicts);
2585 if (CF_NOT_KNOWN_P (*overlaps_a)
2586 || CF_NOT_KNOWN_P (*overlaps_b))
2587 dependence_stats.num_miv_unimplemented++;
2588 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2589 || CF_NO_DEPENDENCE_P (*overlaps_b))
2590 dependence_stats.num_miv_independent++;
2591 else
2592 dependence_stats.num_miv_dependent++;
2595 else
2597 /* When the analysis is too difficult, answer "don't know". */
2598 if (dump_file && (dump_flags & TDF_DETAILS))
2599 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2601 *overlaps_a = conflict_fn_not_known ();
2602 *overlaps_b = conflict_fn_not_known ();
2603 *last_conflicts = chrec_dont_know;
2604 dependence_stats.num_miv_unimplemented++;
2607 if (dump_file && (dump_flags & TDF_DETAILS))
2608 fprintf (dump_file, ")\n");
2611 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2612 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2613 OVERLAP_ITERATIONS_B are initialized with two functions that
2614 describe the iterations that contain conflicting elements.
2616 Remark: For an integer k >= 0, the following equality is true:
2618 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2621 static void
2622 analyze_overlapping_iterations (tree chrec_a,
2623 tree chrec_b,
2624 conflict_function **overlap_iterations_a,
2625 conflict_function **overlap_iterations_b,
2626 tree *last_conflicts, struct loop *loop_nest)
2628 unsigned int lnn = loop_nest->num;
2630 dependence_stats.num_subscript_tests++;
2632 if (dump_file && (dump_flags & TDF_DETAILS))
2634 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2635 fprintf (dump_file, " (chrec_a = ");
2636 print_generic_expr (dump_file, chrec_a, 0);
2637 fprintf (dump_file, ")\n (chrec_b = ");
2638 print_generic_expr (dump_file, chrec_b, 0);
2639 fprintf (dump_file, ")\n");
2642 if (chrec_a == NULL_TREE
2643 || chrec_b == NULL_TREE
2644 || chrec_contains_undetermined (chrec_a)
2645 || chrec_contains_undetermined (chrec_b))
2647 dependence_stats.num_subscript_undetermined++;
2649 *overlap_iterations_a = conflict_fn_not_known ();
2650 *overlap_iterations_b = conflict_fn_not_known ();
2653 /* If they are the same chrec, and are affine, they overlap
2654 on every iteration. */
2655 else if (eq_evolutions_p (chrec_a, chrec_b)
2656 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2658 dependence_stats.num_same_subscript_function++;
2659 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2660 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2661 *last_conflicts = chrec_dont_know;
2664 /* If they aren't the same, and aren't affine, we can't do anything
2665 yet. */
2666 else if ((chrec_contains_symbols (chrec_a)
2667 || chrec_contains_symbols (chrec_b))
2668 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2669 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2671 dependence_stats.num_subscript_undetermined++;
2672 *overlap_iterations_a = conflict_fn_not_known ();
2673 *overlap_iterations_b = conflict_fn_not_known ();
2676 else if (ziv_subscript_p (chrec_a, chrec_b))
2677 analyze_ziv_subscript (chrec_a, chrec_b,
2678 overlap_iterations_a, overlap_iterations_b,
2679 last_conflicts);
2681 else if (siv_subscript_p (chrec_a, chrec_b))
2682 analyze_siv_subscript (chrec_a, chrec_b,
2683 overlap_iterations_a, overlap_iterations_b,
2684 last_conflicts, lnn);
2686 else
2687 analyze_miv_subscript (chrec_a, chrec_b,
2688 overlap_iterations_a, overlap_iterations_b,
2689 last_conflicts, loop_nest);
2691 if (dump_file && (dump_flags & TDF_DETAILS))
2693 fprintf (dump_file, " (overlap_iterations_a = ");
2694 dump_conflict_function (dump_file, *overlap_iterations_a);
2695 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2696 dump_conflict_function (dump_file, *overlap_iterations_b);
2697 fprintf (dump_file, ")\n");
2698 fprintf (dump_file, ")\n");
2702 /* Helper function for uniquely inserting distance vectors. */
2704 static void
2705 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2707 unsigned i;
2708 lambda_vector v;
2710 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2711 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2712 return;
2714 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2717 /* Helper function for uniquely inserting direction vectors. */
2719 static void
2720 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2722 unsigned i;
2723 lambda_vector v;
2725 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2726 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2727 return;
2729 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2732 /* Add a distance of 1 on all the loops outer than INDEX. If we
2733 haven't yet determined a distance for this outer loop, push a new
2734 distance vector composed of the previous distance, and a distance
2735 of 1 for this outer loop. Example:
2737 | loop_1
2738 | loop_2
2739 | A[10]
2740 | endloop_2
2741 | endloop_1
2743 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2744 save (0, 1), then we have to save (1, 0). */
2746 static void
2747 add_outer_distances (struct data_dependence_relation *ddr,
2748 lambda_vector dist_v, int index)
2750 /* For each outer loop where init_v is not set, the accesses are
2751 in dependence of distance 1 in the loop. */
2752 while (--index >= 0)
2754 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2755 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2756 save_v[index] = 1;
2757 save_dist_v (ddr, save_v);
2761 /* Return false when fail to represent the data dependence as a
2762 distance vector. INIT_B is set to true when a component has been
2763 added to the distance vector DIST_V. INDEX_CARRY is then set to
2764 the index in DIST_V that carries the dependence. */
2766 static bool
2767 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2768 struct data_reference *ddr_a,
2769 struct data_reference *ddr_b,
2770 lambda_vector dist_v, bool *init_b,
2771 int *index_carry)
2773 unsigned i;
2774 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2776 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2778 tree access_fn_a, access_fn_b;
2779 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2781 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2783 non_affine_dependence_relation (ddr);
2784 return false;
2787 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2788 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2790 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2791 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2793 int dist, index;
2794 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2795 DDR_LOOP_NEST (ddr));
2796 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2797 DDR_LOOP_NEST (ddr));
2799 /* The dependence is carried by the outermost loop. Example:
2800 | loop_1
2801 | A[{4, +, 1}_1]
2802 | loop_2
2803 | A[{5, +, 1}_2]
2804 | endloop_2
2805 | endloop_1
2806 In this case, the dependence is carried by loop_1. */
2807 index = index_a < index_b ? index_a : index_b;
2808 *index_carry = MIN (index, *index_carry);
2810 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2812 non_affine_dependence_relation (ddr);
2813 return false;
2816 dist = int_cst_value (SUB_DISTANCE (subscript));
2818 /* This is the subscript coupling test. If we have already
2819 recorded a distance for this loop (a distance coming from
2820 another subscript), it should be the same. For example,
2821 in the following code, there is no dependence:
2823 | loop i = 0, N, 1
2824 | T[i+1][i] = ...
2825 | ... = T[i][i]
2826 | endloop
2828 if (init_v[index] != 0 && dist_v[index] != dist)
2830 finalize_ddr_dependent (ddr, chrec_known);
2831 return false;
2834 dist_v[index] = dist;
2835 init_v[index] = 1;
2836 *init_b = true;
2838 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2840 /* This can be for example an affine vs. constant dependence
2841 (T[i] vs. T[3]) that is not an affine dependence and is
2842 not representable as a distance vector. */
2843 non_affine_dependence_relation (ddr);
2844 return false;
2848 return true;
2851 /* Return true when the DDR contains only constant access functions. */
2853 static bool
2854 constant_access_functions (const struct data_dependence_relation *ddr)
2856 unsigned i;
2858 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2859 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2860 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2861 return false;
2863 return true;
2866 /* Helper function for the case where DDR_A and DDR_B are the same
2867 multivariate access function with a constant step. For an example
2868 see pr34635-1.c. */
2870 static void
2871 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2873 int x_1, x_2;
2874 tree c_1 = CHREC_LEFT (c_2);
2875 tree c_0 = CHREC_LEFT (c_1);
2876 lambda_vector dist_v;
2877 int v1, v2, cd;
2879 /* Polynomials with more than 2 variables are not handled yet. When
2880 the evolution steps are parameters, it is not possible to
2881 represent the dependence using classical distance vectors. */
2882 if (TREE_CODE (c_0) != INTEGER_CST
2883 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2884 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2886 DDR_AFFINE_P (ddr) = false;
2887 return;
2890 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2891 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2893 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2894 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2895 v1 = int_cst_value (CHREC_RIGHT (c_1));
2896 v2 = int_cst_value (CHREC_RIGHT (c_2));
2897 cd = gcd (v1, v2);
2898 v1 /= cd;
2899 v2 /= cd;
2901 if (v2 < 0)
2903 v2 = -v2;
2904 v1 = -v1;
2907 dist_v[x_1] = v2;
2908 dist_v[x_2] = -v1;
2909 save_dist_v (ddr, dist_v);
2911 add_outer_distances (ddr, dist_v, x_1);
2914 /* Helper function for the case where DDR_A and DDR_B are the same
2915 access functions. */
2917 static void
2918 add_other_self_distances (struct data_dependence_relation *ddr)
2920 lambda_vector dist_v;
2921 unsigned i;
2922 int index_carry = DDR_NB_LOOPS (ddr);
2924 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2926 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2928 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2930 if (!evolution_function_is_univariate_p (access_fun))
2932 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2934 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2935 return;
2938 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2940 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2941 add_multivariate_self_dist (ddr, access_fun);
2942 else
2943 /* The evolution step is not constant: it varies in
2944 the outer loop, so this cannot be represented by a
2945 distance vector. For example in pr34635.c the
2946 evolution is {0, +, {0, +, 4}_1}_2. */
2947 DDR_AFFINE_P (ddr) = false;
2949 return;
2952 index_carry = MIN (index_carry,
2953 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2954 DDR_LOOP_NEST (ddr)));
2958 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2959 add_outer_distances (ddr, dist_v, index_carry);
2962 static void
2963 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2965 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2967 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2968 save_dist_v (ddr, dist_v);
2971 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2972 is the case for example when access functions are the same and
2973 equal to a constant, as in:
2975 | loop_1
2976 | A[3] = ...
2977 | ... = A[3]
2978 | endloop_1
2980 in which case the distance vectors are (0) and (1). */
2982 static void
2983 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2985 unsigned i, j;
2987 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2989 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2990 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2991 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2993 for (j = 0; j < ca->n; j++)
2994 if (affine_function_zero_p (ca->fns[j]))
2996 insert_innermost_unit_dist_vector (ddr);
2997 return;
3000 for (j = 0; j < cb->n; j++)
3001 if (affine_function_zero_p (cb->fns[j]))
3003 insert_innermost_unit_dist_vector (ddr);
3004 return;
3009 /* Compute the classic per loop distance vector. DDR is the data
3010 dependence relation to build a vector from. Return false when fail
3011 to represent the data dependence as a distance vector. */
3013 static bool
3014 build_classic_dist_vector (struct data_dependence_relation *ddr,
3015 struct loop *loop_nest)
3017 bool init_b = false;
3018 int index_carry = DDR_NB_LOOPS (ddr);
3019 lambda_vector dist_v;
3021 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3022 return false;
3024 if (same_access_functions (ddr))
3026 /* Save the 0 vector. */
3027 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3028 save_dist_v (ddr, dist_v);
3030 if (constant_access_functions (ddr))
3031 add_distance_for_zero_overlaps (ddr);
3033 if (DDR_NB_LOOPS (ddr) > 1)
3034 add_other_self_distances (ddr);
3036 return true;
3039 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3040 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3041 dist_v, &init_b, &index_carry))
3042 return false;
3044 /* Save the distance vector if we initialized one. */
3045 if (init_b)
3047 /* Verify a basic constraint: classic distance vectors should
3048 always be lexicographically positive.
3050 Data references are collected in the order of execution of
3051 the program, thus for the following loop
3053 | for (i = 1; i < 100; i++)
3054 | for (j = 1; j < 100; j++)
3056 | t = T[j+1][i-1]; // A
3057 | T[j][i] = t + 2; // B
3060 references are collected following the direction of the wind:
3061 A then B. The data dependence tests are performed also
3062 following this order, such that we're looking at the distance
3063 separating the elements accessed by A from the elements later
3064 accessed by B. But in this example, the distance returned by
3065 test_dep (A, B) is lexicographically negative (-1, 1), that
3066 means that the access A occurs later than B with respect to
3067 the outer loop, ie. we're actually looking upwind. In this
3068 case we solve test_dep (B, A) looking downwind to the
3069 lexicographically positive solution, that returns the
3070 distance vector (1, -1). */
3071 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3073 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3074 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3075 loop_nest))
3076 return false;
3077 compute_subscript_distance (ddr);
3078 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3079 save_v, &init_b, &index_carry))
3080 return false;
3081 save_dist_v (ddr, save_v);
3082 DDR_REVERSED_P (ddr) = true;
3084 /* In this case there is a dependence forward for all the
3085 outer loops:
3087 | for (k = 1; k < 100; k++)
3088 | for (i = 1; i < 100; i++)
3089 | for (j = 1; j < 100; j++)
3091 | t = T[j+1][i-1]; // A
3092 | T[j][i] = t + 2; // B
3095 the vectors are:
3096 (0, 1, -1)
3097 (1, 1, -1)
3098 (1, -1, 1)
3100 if (DDR_NB_LOOPS (ddr) > 1)
3102 add_outer_distances (ddr, save_v, index_carry);
3103 add_outer_distances (ddr, dist_v, index_carry);
3106 else
3108 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3109 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3111 if (DDR_NB_LOOPS (ddr) > 1)
3113 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3115 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3116 DDR_A (ddr), loop_nest))
3117 return false;
3118 compute_subscript_distance (ddr);
3119 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3120 opposite_v, &init_b,
3121 &index_carry))
3122 return false;
3124 save_dist_v (ddr, save_v);
3125 add_outer_distances (ddr, dist_v, index_carry);
3126 add_outer_distances (ddr, opposite_v, index_carry);
3128 else
3129 save_dist_v (ddr, save_v);
3132 else
3134 /* There is a distance of 1 on all the outer loops: Example:
3135 there is a dependence of distance 1 on loop_1 for the array A.
3137 | loop_1
3138 | A[5] = ...
3139 | endloop
3141 add_outer_distances (ddr, dist_v,
3142 lambda_vector_first_nz (dist_v,
3143 DDR_NB_LOOPS (ddr), 0));
3146 if (dump_file && (dump_flags & TDF_DETAILS))
3148 unsigned i;
3150 fprintf (dump_file, "(build_classic_dist_vector\n");
3151 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3153 fprintf (dump_file, " dist_vector = (");
3154 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3155 DDR_NB_LOOPS (ddr));
3156 fprintf (dump_file, " )\n");
3158 fprintf (dump_file, ")\n");
3161 return true;
3164 /* Return the direction for a given distance.
3165 FIXME: Computing dir this way is suboptimal, since dir can catch
3166 cases that dist is unable to represent. */
3168 static inline enum data_dependence_direction
3169 dir_from_dist (int dist)
3171 if (dist > 0)
3172 return dir_positive;
3173 else if (dist < 0)
3174 return dir_negative;
3175 else
3176 return dir_equal;
3179 /* Compute the classic per loop direction vector. DDR is the data
3180 dependence relation to build a vector from. */
3182 static void
3183 build_classic_dir_vector (struct data_dependence_relation *ddr)
3185 unsigned i, j;
3186 lambda_vector dist_v;
3188 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3190 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3192 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3193 dir_v[j] = dir_from_dist (dist_v[j]);
3195 save_dir_v (ddr, dir_v);
3199 /* Helper function. Returns true when there is a dependence between
3200 data references DRA and DRB. */
3202 static bool
3203 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3204 struct data_reference *dra,
3205 struct data_reference *drb,
3206 struct loop *loop_nest)
3208 unsigned int i;
3209 tree last_conflicts;
3210 struct subscript *subscript;
3212 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3213 i++)
3215 conflict_function *overlaps_a, *overlaps_b;
3217 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3218 DR_ACCESS_FN (drb, i),
3219 &overlaps_a, &overlaps_b,
3220 &last_conflicts, loop_nest);
3222 if (CF_NOT_KNOWN_P (overlaps_a)
3223 || CF_NOT_KNOWN_P (overlaps_b))
3225 finalize_ddr_dependent (ddr, chrec_dont_know);
3226 dependence_stats.num_dependence_undetermined++;
3227 free_conflict_function (overlaps_a);
3228 free_conflict_function (overlaps_b);
3229 return false;
3232 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3233 || CF_NO_DEPENDENCE_P (overlaps_b))
3235 finalize_ddr_dependent (ddr, chrec_known);
3236 dependence_stats.num_dependence_independent++;
3237 free_conflict_function (overlaps_a);
3238 free_conflict_function (overlaps_b);
3239 return false;
3242 else
3244 if (SUB_CONFLICTS_IN_A (subscript))
3245 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3246 if (SUB_CONFLICTS_IN_B (subscript))
3247 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3249 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3250 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3251 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3255 return true;
3258 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3260 static void
3261 subscript_dependence_tester (struct data_dependence_relation *ddr,
3262 struct loop *loop_nest)
3265 if (dump_file && (dump_flags & TDF_DETAILS))
3266 fprintf (dump_file, "(subscript_dependence_tester \n");
3268 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3269 dependence_stats.num_dependence_dependent++;
3271 compute_subscript_distance (ddr);
3272 if (build_classic_dist_vector (ddr, loop_nest))
3273 build_classic_dir_vector (ddr);
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3276 fprintf (dump_file, ")\n");
3279 /* Returns true when all the access functions of A are affine or
3280 constant with respect to LOOP_NEST. */
3282 static bool
3283 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3284 const struct loop *loop_nest)
3286 unsigned int i;
3287 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3288 tree t;
3290 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3291 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3292 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3293 return false;
3295 return true;
3298 /* Return true if we can create an affine data-ref for OP in STMT. */
3300 bool
3301 stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op)
3303 data_reference_p dr;
3304 bool res = true;
3306 dr = create_data_ref (loop, op, stmt, true);
3307 if (!access_functions_are_affine_or_constant_p (dr, loop))
3308 res = false;
3310 free_data_ref (dr);
3311 return res;
3314 /* Initializes an equation for an OMEGA problem using the information
3315 contained in the ACCESS_FUN. Returns true when the operation
3316 succeeded.
3318 PB is the omega constraint system.
3319 EQ is the number of the equation to be initialized.
3320 OFFSET is used for shifting the variables names in the constraints:
3321 a constrain is composed of 2 * the number of variables surrounding
3322 dependence accesses. OFFSET is set either to 0 for the first n variables,
3323 then it is set to n.
3324 ACCESS_FUN is expected to be an affine chrec. */
3326 static bool
3327 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3328 unsigned int offset, tree access_fun,
3329 struct data_dependence_relation *ddr)
3331 switch (TREE_CODE (access_fun))
3333 case POLYNOMIAL_CHREC:
3335 tree left = CHREC_LEFT (access_fun);
3336 tree right = CHREC_RIGHT (access_fun);
3337 int var = CHREC_VARIABLE (access_fun);
3338 unsigned var_idx;
3340 if (TREE_CODE (right) != INTEGER_CST)
3341 return false;
3343 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3344 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3346 /* Compute the innermost loop index. */
3347 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3349 if (offset == 0)
3350 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3351 += int_cst_value (right);
3353 switch (TREE_CODE (left))
3355 case POLYNOMIAL_CHREC:
3356 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3358 case INTEGER_CST:
3359 pb->eqs[eq].coef[0] += int_cst_value (left);
3360 return true;
3362 default:
3363 return false;
3367 case INTEGER_CST:
3368 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3369 return true;
3371 default:
3372 return false;
3376 /* As explained in the comments preceding init_omega_for_ddr, we have
3377 to set up a system for each loop level, setting outer loops
3378 variation to zero, and current loop variation to positive or zero.
3379 Save each lexico positive distance vector. */
3381 static void
3382 omega_extract_distance_vectors (omega_pb pb,
3383 struct data_dependence_relation *ddr)
3385 int eq, geq;
3386 unsigned i, j;
3387 struct loop *loopi, *loopj;
3388 enum omega_result res;
3390 /* Set a new problem for each loop in the nest. The basis is the
3391 problem that we have initialized until now. On top of this we
3392 add new constraints. */
3393 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3394 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3396 int dist = 0;
3397 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3398 DDR_NB_LOOPS (ddr));
3400 omega_copy_problem (copy, pb);
3402 /* For all the outer loops "loop_j", add "dj = 0". */
3403 for (j = 0;
3404 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3406 eq = omega_add_zero_eq (copy, omega_black);
3407 copy->eqs[eq].coef[j + 1] = 1;
3410 /* For "loop_i", add "0 <= di". */
3411 geq = omega_add_zero_geq (copy, omega_black);
3412 copy->geqs[geq].coef[i + 1] = 1;
3414 /* Reduce the constraint system, and test that the current
3415 problem is feasible. */
3416 res = omega_simplify_problem (copy);
3417 if (res == omega_false
3418 || res == omega_unknown
3419 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3420 goto next_problem;
3422 for (eq = 0; eq < copy->num_subs; eq++)
3423 if (copy->subs[eq].key == (int) i + 1)
3425 dist = copy->subs[eq].coef[0];
3426 goto found_dist;
3429 if (dist == 0)
3431 /* Reinitialize problem... */
3432 omega_copy_problem (copy, pb);
3433 for (j = 0;
3434 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3436 eq = omega_add_zero_eq (copy, omega_black);
3437 copy->eqs[eq].coef[j + 1] = 1;
3440 /* ..., but this time "di = 1". */
3441 eq = omega_add_zero_eq (copy, omega_black);
3442 copy->eqs[eq].coef[i + 1] = 1;
3443 copy->eqs[eq].coef[0] = -1;
3445 res = omega_simplify_problem (copy);
3446 if (res == omega_false
3447 || res == omega_unknown
3448 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3449 goto next_problem;
3451 for (eq = 0; eq < copy->num_subs; eq++)
3452 if (copy->subs[eq].key == (int) i + 1)
3454 dist = copy->subs[eq].coef[0];
3455 goto found_dist;
3459 found_dist:;
3460 /* Save the lexicographically positive distance vector. */
3461 if (dist >= 0)
3463 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3464 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3466 dist_v[i] = dist;
3468 for (eq = 0; eq < copy->num_subs; eq++)
3469 if (copy->subs[eq].key > 0)
3471 dist = copy->subs[eq].coef[0];
3472 dist_v[copy->subs[eq].key - 1] = dist;
3475 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3476 dir_v[j] = dir_from_dist (dist_v[j]);
3478 save_dist_v (ddr, dist_v);
3479 save_dir_v (ddr, dir_v);
3482 next_problem:;
3483 omega_free_problem (copy);
3487 /* This is called for each subscript of a tuple of data references:
3488 insert an equality for representing the conflicts. */
3490 static bool
3491 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3492 struct data_dependence_relation *ddr,
3493 omega_pb pb, bool *maybe_dependent)
3495 int eq;
3496 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3497 TREE_TYPE (access_fun_b));
3498 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3499 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3500 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3502 /* When the fun_a - fun_b is not constant, the dependence is not
3503 captured by the classic distance vector representation. */
3504 if (TREE_CODE (difference) != INTEGER_CST)
3505 return false;
3507 /* ZIV test. */
3508 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3510 /* There is no dependence. */
3511 *maybe_dependent = false;
3512 return true;
3515 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3517 eq = omega_add_zero_eq (pb, omega_black);
3518 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3519 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3520 /* There is probably a dependence, but the system of
3521 constraints cannot be built: answer "don't know". */
3522 return false;
3524 /* GCD test. */
3525 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3526 && !int_divides_p (lambda_vector_gcd
3527 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3528 2 * DDR_NB_LOOPS (ddr)),
3529 pb->eqs[eq].coef[0]))
3531 /* There is no dependence. */
3532 *maybe_dependent = false;
3533 return true;
3536 return true;
3539 /* Helper function, same as init_omega_for_ddr but specialized for
3540 data references A and B. */
3542 static bool
3543 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3544 struct data_dependence_relation *ddr,
3545 omega_pb pb, bool *maybe_dependent)
3547 unsigned i;
3548 int ineq;
3549 struct loop *loopi;
3550 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3552 /* Insert an equality per subscript. */
3553 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3555 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3556 ddr, pb, maybe_dependent))
3557 return false;
3558 else if (*maybe_dependent == false)
3560 /* There is no dependence. */
3561 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3562 return true;
3566 /* Insert inequalities: constraints corresponding to the iteration
3567 domain, i.e. the loops surrounding the references "loop_x" and
3568 the distance variables "dx". The layout of the OMEGA
3569 representation is as follows:
3570 - coef[0] is the constant
3571 - coef[1..nb_loops] are the protected variables that will not be
3572 removed by the solver: the "dx"
3573 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3575 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3576 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3578 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3580 /* 0 <= loop_x */
3581 ineq = omega_add_zero_geq (pb, omega_black);
3582 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3584 /* 0 <= loop_x + dx */
3585 ineq = omega_add_zero_geq (pb, omega_black);
3586 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3587 pb->geqs[ineq].coef[i + 1] = 1;
3589 if (nbi != -1)
3591 /* loop_x <= nb_iters */
3592 ineq = omega_add_zero_geq (pb, omega_black);
3593 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3594 pb->geqs[ineq].coef[0] = nbi;
3596 /* loop_x + dx <= nb_iters */
3597 ineq = omega_add_zero_geq (pb, omega_black);
3598 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3599 pb->geqs[ineq].coef[i + 1] = -1;
3600 pb->geqs[ineq].coef[0] = nbi;
3602 /* A step "dx" bigger than nb_iters is not feasible, so
3603 add "0 <= nb_iters + dx", */
3604 ineq = omega_add_zero_geq (pb, omega_black);
3605 pb->geqs[ineq].coef[i + 1] = 1;
3606 pb->geqs[ineq].coef[0] = nbi;
3607 /* and "dx <= nb_iters". */
3608 ineq = omega_add_zero_geq (pb, omega_black);
3609 pb->geqs[ineq].coef[i + 1] = -1;
3610 pb->geqs[ineq].coef[0] = nbi;
3614 omega_extract_distance_vectors (pb, ddr);
3616 return true;
3619 /* Sets up the Omega dependence problem for the data dependence
3620 relation DDR. Returns false when the constraint system cannot be
3621 built, ie. when the test answers "don't know". Returns true
3622 otherwise, and when independence has been proved (using one of the
3623 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3624 set MAYBE_DEPENDENT to true.
3626 Example: for setting up the dependence system corresponding to the
3627 conflicting accesses
3629 | loop_i
3630 | loop_j
3631 | A[i, i+1] = ...
3632 | ... A[2*j, 2*(i + j)]
3633 | endloop_j
3634 | endloop_i
3636 the following constraints come from the iteration domain:
3638 0 <= i <= Ni
3639 0 <= i + di <= Ni
3640 0 <= j <= Nj
3641 0 <= j + dj <= Nj
3643 where di, dj are the distance variables. The constraints
3644 representing the conflicting elements are:
3646 i = 2 * (j + dj)
3647 i + 1 = 2 * (i + di + j + dj)
3649 For asking that the resulting distance vector (di, dj) be
3650 lexicographically positive, we insert the constraint "di >= 0". If
3651 "di = 0" in the solution, we fix that component to zero, and we
3652 look at the inner loops: we set a new problem where all the outer
3653 loop distances are zero, and fix this inner component to be
3654 positive. When one of the components is positive, we save that
3655 distance, and set a new problem where the distance on this loop is
3656 zero, searching for other distances in the inner loops. Here is
3657 the classic example that illustrates that we have to set for each
3658 inner loop a new problem:
3660 | loop_1
3661 | loop_2
3662 | A[10]
3663 | endloop_2
3664 | endloop_1
3666 we have to save two distances (1, 0) and (0, 1).
3668 Given two array references, refA and refB, we have to set the
3669 dependence problem twice, refA vs. refB and refB vs. refA, and we
3670 cannot do a single test, as refB might occur before refA in the
3671 inner loops, and the contrary when considering outer loops: ex.
3673 | loop_0
3674 | loop_1
3675 | loop_2
3676 | T[{1,+,1}_2][{1,+,1}_1] // refA
3677 | T[{2,+,1}_2][{0,+,1}_1] // refB
3678 | endloop_2
3679 | endloop_1
3680 | endloop_0
3682 refB touches the elements in T before refA, and thus for the same
3683 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3684 but for successive loop_0 iterations, we have (1, -1, 1)
3686 The Omega solver expects the distance variables ("di" in the
3687 previous example) to come first in the constraint system (as
3688 variables to be protected, or "safe" variables), the constraint
3689 system is built using the following layout:
3691 "cst | distance vars | index vars".
3694 static bool
3695 init_omega_for_ddr (struct data_dependence_relation *ddr,
3696 bool *maybe_dependent)
3698 omega_pb pb;
3699 bool res = false;
3701 *maybe_dependent = true;
3703 if (same_access_functions (ddr))
3705 unsigned j;
3706 lambda_vector dir_v;
3708 /* Save the 0 vector. */
3709 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3710 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3711 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3712 dir_v[j] = dir_equal;
3713 save_dir_v (ddr, dir_v);
3715 /* Save the dependences carried by outer loops. */
3716 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3717 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3718 maybe_dependent);
3719 omega_free_problem (pb);
3720 return res;
3723 /* Omega expects the protected variables (those that have to be kept
3724 after elimination) to appear first in the constraint system.
3725 These variables are the distance variables. In the following
3726 initialization we declare NB_LOOPS safe variables, and the total
3727 number of variables for the constraint system is 2*NB_LOOPS. */
3728 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3729 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3730 maybe_dependent);
3731 omega_free_problem (pb);
3733 /* Stop computation if not decidable, or no dependence. */
3734 if (res == false || *maybe_dependent == false)
3735 return res;
3737 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3738 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3739 maybe_dependent);
3740 omega_free_problem (pb);
3742 return res;
3745 /* Return true when DDR contains the same information as that stored
3746 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3748 static bool
3749 ddr_consistent_p (FILE *file,
3750 struct data_dependence_relation *ddr,
3751 VEC (lambda_vector, heap) *dist_vects,
3752 VEC (lambda_vector, heap) *dir_vects)
3754 unsigned int i, j;
3756 /* If dump_file is set, output there. */
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 file = dump_file;
3760 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3762 lambda_vector b_dist_v;
3763 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3764 VEC_length (lambda_vector, dist_vects),
3765 DDR_NUM_DIST_VECTS (ddr));
3767 fprintf (file, "Banerjee dist vectors:\n");
3768 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3769 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3771 fprintf (file, "Omega dist vectors:\n");
3772 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3773 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3775 fprintf (file, "data dependence relation:\n");
3776 dump_data_dependence_relation (file, ddr);
3778 fprintf (file, ")\n");
3779 return false;
3782 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3784 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3785 VEC_length (lambda_vector, dir_vects),
3786 DDR_NUM_DIR_VECTS (ddr));
3787 return false;
3790 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3792 lambda_vector a_dist_v;
3793 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3795 /* Distance vectors are not ordered in the same way in the DDR
3796 and in the DIST_VECTS: search for a matching vector. */
3797 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3798 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3799 break;
3801 if (j == VEC_length (lambda_vector, dist_vects))
3803 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3804 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3805 fprintf (file, "not found in Omega dist vectors:\n");
3806 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3807 fprintf (file, "data dependence relation:\n");
3808 dump_data_dependence_relation (file, ddr);
3809 fprintf (file, ")\n");
3813 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3815 lambda_vector a_dir_v;
3816 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3818 /* Direction vectors are not ordered in the same way in the DDR
3819 and in the DIR_VECTS: search for a matching vector. */
3820 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3821 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3822 break;
3824 if (j == VEC_length (lambda_vector, dist_vects))
3826 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3827 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3828 fprintf (file, "not found in Omega dir vectors:\n");
3829 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3830 fprintf (file, "data dependence relation:\n");
3831 dump_data_dependence_relation (file, ddr);
3832 fprintf (file, ")\n");
3836 return true;
3839 /* This computes the affine dependence relation between A and B with
3840 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3841 independence between two accesses, while CHREC_DONT_KNOW is used
3842 for representing the unknown relation.
3844 Note that it is possible to stop the computation of the dependence
3845 relation the first time we detect a CHREC_KNOWN element for a given
3846 subscript. */
3848 static void
3849 compute_affine_dependence (struct data_dependence_relation *ddr,
3850 struct loop *loop_nest)
3852 struct data_reference *dra = DDR_A (ddr);
3853 struct data_reference *drb = DDR_B (ddr);
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, "(compute_affine_dependence\n");
3858 fprintf (dump_file, " (stmt_a = \n");
3859 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3860 fprintf (dump_file, ")\n (stmt_b = \n");
3861 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3862 fprintf (dump_file, ")\n");
3865 /* Analyze only when the dependence relation is not yet known. */
3866 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3867 && !DDR_SELF_REFERENCE (ddr))
3869 dependence_stats.num_dependence_tests++;
3871 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3872 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3874 if (flag_check_data_deps)
3876 /* Compute the dependences using the first algorithm. */
3877 subscript_dependence_tester (ddr, loop_nest);
3879 if (dump_file && (dump_flags & TDF_DETAILS))
3881 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3882 dump_data_dependence_relation (dump_file, ddr);
3885 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3887 bool maybe_dependent;
3888 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3890 /* Save the result of the first DD analyzer. */
3891 dist_vects = DDR_DIST_VECTS (ddr);
3892 dir_vects = DDR_DIR_VECTS (ddr);
3894 /* Reset the information. */
3895 DDR_DIST_VECTS (ddr) = NULL;
3896 DDR_DIR_VECTS (ddr) = NULL;
3898 /* Compute the same information using Omega. */
3899 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3900 goto csys_dont_know;
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3904 fprintf (dump_file, "Omega Analyzer\n");
3905 dump_data_dependence_relation (dump_file, ddr);
3908 /* Check that we get the same information. */
3909 if (maybe_dependent)
3910 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3911 dir_vects));
3914 else
3915 subscript_dependence_tester (ddr, loop_nest);
3918 /* As a last case, if the dependence cannot be determined, or if
3919 the dependence is considered too difficult to determine, answer
3920 "don't know". */
3921 else
3923 csys_dont_know:;
3924 dependence_stats.num_dependence_undetermined++;
3926 if (dump_file && (dump_flags & TDF_DETAILS))
3928 fprintf (dump_file, "Data ref a:\n");
3929 dump_data_reference (dump_file, dra);
3930 fprintf (dump_file, "Data ref b:\n");
3931 dump_data_reference (dump_file, drb);
3932 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3934 finalize_ddr_dependent (ddr, chrec_dont_know);
3938 if (dump_file && (dump_flags & TDF_DETAILS))
3939 fprintf (dump_file, ")\n");
3942 /* This computes the dependence relation for the same data
3943 reference into DDR. */
3945 static void
3946 compute_self_dependence (struct data_dependence_relation *ddr)
3948 unsigned int i;
3949 struct subscript *subscript;
3951 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3952 return;
3954 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3955 i++)
3957 if (SUB_CONFLICTS_IN_A (subscript))
3958 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3959 if (SUB_CONFLICTS_IN_B (subscript))
3960 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3962 /* The accessed index overlaps for each iteration. */
3963 SUB_CONFLICTS_IN_A (subscript)
3964 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3965 SUB_CONFLICTS_IN_B (subscript)
3966 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3967 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3970 /* The distance vector is the zero vector. */
3971 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3972 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3975 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3976 the data references in DATAREFS, in the LOOP_NEST. When
3977 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3978 relations. */
3980 void
3981 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3982 VEC (ddr_p, heap) **dependence_relations,
3983 VEC (loop_p, heap) *loop_nest,
3984 bool compute_self_and_rr)
3986 struct data_dependence_relation *ddr;
3987 struct data_reference *a, *b;
3988 unsigned int i, j;
3990 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3991 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3992 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3994 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3995 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3996 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3999 if (compute_self_and_rr)
4000 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4002 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4003 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4004 compute_self_dependence (ddr);
4008 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4009 true if STMT clobbers memory, false otherwise. */
4011 bool
4012 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4014 bool clobbers_memory = false;
4015 data_ref_loc *ref;
4016 tree *op0, *op1;
4017 enum gimple_code stmt_code = gimple_code (stmt);
4019 *references = NULL;
4021 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4022 Calls have side-effects, except those to const or pure
4023 functions. */
4024 if ((stmt_code == GIMPLE_CALL
4025 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4026 || (stmt_code == GIMPLE_ASM
4027 && gimple_asm_volatile_p (stmt)))
4028 clobbers_memory = true;
4030 if (!gimple_vuse (stmt))
4031 return clobbers_memory;
4033 if (stmt_code == GIMPLE_ASSIGN)
4035 tree base;
4036 op0 = gimple_assign_lhs_ptr (stmt);
4037 op1 = gimple_assign_rhs1_ptr (stmt);
4039 if (DECL_P (*op1)
4040 || (REFERENCE_CLASS_P (*op1)
4041 && (base = get_base_address (*op1))
4042 && TREE_CODE (base) != SSA_NAME))
4044 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4045 ref->pos = op1;
4046 ref->is_read = true;
4049 if (DECL_P (*op0)
4050 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4052 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4053 ref->pos = op0;
4054 ref->is_read = false;
4057 else if (stmt_code == GIMPLE_CALL)
4059 unsigned i, n = gimple_call_num_args (stmt);
4061 for (i = 0; i < n; i++)
4063 op0 = gimple_call_arg_ptr (stmt, i);
4065 if (DECL_P (*op0)
4066 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4068 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4069 ref->pos = op0;
4070 ref->is_read = true;
4075 return clobbers_memory;
4078 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4079 reference, returns false, otherwise returns true. NEST is the outermost
4080 loop of the loop nest in which the references should be analyzed. */
4082 bool
4083 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4084 VEC (data_reference_p, heap) **datarefs)
4086 unsigned i;
4087 VEC (data_ref_loc, heap) *references;
4088 data_ref_loc *ref;
4089 bool ret = true;
4090 data_reference_p dr;
4092 if (get_references_in_stmt (stmt, &references))
4094 VEC_free (data_ref_loc, heap, references);
4095 return false;
4098 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4100 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4101 gcc_assert (dr != NULL);
4103 /* FIXME -- data dependence analysis does not work correctly for objects with
4104 invariant addresses. Let us fail here until the problem is fixed. */
4105 if (dr_address_invariant_p (dr))
4107 free_data_ref (dr);
4108 if (dump_file && (dump_flags & TDF_DETAILS))
4109 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4110 ret = false;
4111 break;
4114 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4116 VEC_free (data_ref_loc, heap, references);
4117 return ret;
4120 /* Search the data references in LOOP, and record the information into
4121 DATAREFS. Returns chrec_dont_know when failing to analyze a
4122 difficult case, returns NULL_TREE otherwise.
4124 TODO: This function should be made smarter so that it can handle address
4125 arithmetic as if they were array accesses, etc. */
4127 tree
4128 find_data_references_in_loop (struct loop *loop,
4129 VEC (data_reference_p, heap) **datarefs)
4131 basic_block bb, *bbs;
4132 unsigned int i;
4133 gimple_stmt_iterator bsi;
4135 bbs = get_loop_body_in_dom_order (loop);
4137 for (i = 0; i < loop->num_nodes; i++)
4139 bb = bbs[i];
4141 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4143 gimple stmt = gsi_stmt (bsi);
4145 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4147 struct data_reference *res;
4148 res = XCNEW (struct data_reference);
4149 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4151 free (bbs);
4152 return chrec_dont_know;
4156 free (bbs);
4158 return NULL_TREE;
4161 /* Recursive helper function. */
4163 static bool
4164 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4166 /* Inner loops of the nest should not contain siblings. Example:
4167 when there are two consecutive loops,
4169 | loop_0
4170 | loop_1
4171 | A[{0, +, 1}_1]
4172 | endloop_1
4173 | loop_2
4174 | A[{0, +, 1}_2]
4175 | endloop_2
4176 | endloop_0
4178 the dependence relation cannot be captured by the distance
4179 abstraction. */
4180 if (loop->next)
4181 return false;
4183 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4184 if (loop->inner)
4185 return find_loop_nest_1 (loop->inner, loop_nest);
4186 return true;
4189 /* Return false when the LOOP is not well nested. Otherwise return
4190 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4191 contain the loops from the outermost to the innermost, as they will
4192 appear in the classic distance vector. */
4194 bool
4195 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4197 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4198 if (loop->inner)
4199 return find_loop_nest_1 (loop->inner, loop_nest);
4200 return true;
4203 /* Returns true when the data dependences have been computed, false otherwise.
4204 Given a loop nest LOOP, the following vectors are returned:
4205 DATAREFS is initialized to all the array elements contained in this loop,
4206 DEPENDENCE_RELATIONS contains the relations between the data references.
4207 Compute read-read and self relations if
4208 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4210 bool
4211 compute_data_dependences_for_loop (struct loop *loop,
4212 bool compute_self_and_read_read_dependences,
4213 VEC (data_reference_p, heap) **datarefs,
4214 VEC (ddr_p, heap) **dependence_relations)
4216 bool res = true;
4217 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4219 memset (&dependence_stats, 0, sizeof (dependence_stats));
4221 /* If the loop nest is not well formed, or one of the data references
4222 is not computable, give up without spending time to compute other
4223 dependences. */
4224 if (!loop
4225 || !find_loop_nest (loop, &vloops)
4226 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4228 struct data_dependence_relation *ddr;
4230 /* Insert a single relation into dependence_relations:
4231 chrec_dont_know. */
4232 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4233 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4234 res = false;
4236 else
4237 compute_all_dependences (*datarefs, dependence_relations, vloops,
4238 compute_self_and_read_read_dependences);
4240 if (dump_file && (dump_flags & TDF_STATS))
4242 fprintf (dump_file, "Dependence tester statistics:\n");
4244 fprintf (dump_file, "Number of dependence tests: %d\n",
4245 dependence_stats.num_dependence_tests);
4246 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4247 dependence_stats.num_dependence_dependent);
4248 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4249 dependence_stats.num_dependence_independent);
4250 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4251 dependence_stats.num_dependence_undetermined);
4253 fprintf (dump_file, "Number of subscript tests: %d\n",
4254 dependence_stats.num_subscript_tests);
4255 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4256 dependence_stats.num_subscript_undetermined);
4257 fprintf (dump_file, "Number of same subscript function: %d\n",
4258 dependence_stats.num_same_subscript_function);
4260 fprintf (dump_file, "Number of ziv tests: %d\n",
4261 dependence_stats.num_ziv);
4262 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4263 dependence_stats.num_ziv_dependent);
4264 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4265 dependence_stats.num_ziv_independent);
4266 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4267 dependence_stats.num_ziv_unimplemented);
4269 fprintf (dump_file, "Number of siv tests: %d\n",
4270 dependence_stats.num_siv);
4271 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4272 dependence_stats.num_siv_dependent);
4273 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4274 dependence_stats.num_siv_independent);
4275 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4276 dependence_stats.num_siv_unimplemented);
4278 fprintf (dump_file, "Number of miv tests: %d\n",
4279 dependence_stats.num_miv);
4280 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4281 dependence_stats.num_miv_dependent);
4282 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4283 dependence_stats.num_miv_independent);
4284 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4285 dependence_stats.num_miv_unimplemented);
4288 return res;
4291 /* Entry point (for testing only). Analyze all the data references
4292 and the dependence relations in LOOP.
4294 The data references are computed first.
4296 A relation on these nodes is represented by a complete graph. Some
4297 of the relations could be of no interest, thus the relations can be
4298 computed on demand.
4300 In the following function we compute all the relations. This is
4301 just a first implementation that is here for:
4302 - for showing how to ask for the dependence relations,
4303 - for the debugging the whole dependence graph,
4304 - for the dejagnu testcases and maintenance.
4306 It is possible to ask only for a part of the graph, avoiding to
4307 compute the whole dependence graph. The computed dependences are
4308 stored in a knowledge base (KB) such that later queries don't
4309 recompute the same information. The implementation of this KB is
4310 transparent to the optimizer, and thus the KB can be changed with a
4311 more efficient implementation, or the KB could be disabled. */
4312 static void
4313 analyze_all_data_dependences (struct loop *loop)
4315 unsigned int i;
4316 int nb_data_refs = 10;
4317 VEC (data_reference_p, heap) *datarefs =
4318 VEC_alloc (data_reference_p, heap, nb_data_refs);
4319 VEC (ddr_p, heap) *dependence_relations =
4320 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4322 /* Compute DDs on the whole function. */
4323 compute_data_dependences_for_loop (loop, false, &datarefs,
4324 &dependence_relations);
4326 if (dump_file)
4328 dump_data_dependence_relations (dump_file, dependence_relations);
4329 fprintf (dump_file, "\n\n");
4331 if (dump_flags & TDF_DETAILS)
4332 dump_dist_dir_vectors (dump_file, dependence_relations);
4334 if (dump_flags & TDF_STATS)
4336 unsigned nb_top_relations = 0;
4337 unsigned nb_bot_relations = 0;
4338 unsigned nb_chrec_relations = 0;
4339 struct data_dependence_relation *ddr;
4341 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4343 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4344 nb_top_relations++;
4346 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4347 nb_bot_relations++;
4349 else
4350 nb_chrec_relations++;
4353 gather_stats_on_scev_database ();
4357 free_dependence_relations (dependence_relations);
4358 free_data_refs (datarefs);
4361 /* Computes all the data dependences and check that the results of
4362 several analyzers are the same. */
4364 void
4365 tree_check_data_deps (void)
4367 loop_iterator li;
4368 struct loop *loop_nest;
4370 FOR_EACH_LOOP (li, loop_nest, 0)
4371 analyze_all_data_dependences (loop_nest);
4374 /* Free the memory used by a data dependence relation DDR. */
4376 void
4377 free_dependence_relation (struct data_dependence_relation *ddr)
4379 if (ddr == NULL)
4380 return;
4382 if (DDR_SUBSCRIPTS (ddr))
4383 free_subscripts (DDR_SUBSCRIPTS (ddr));
4384 if (DDR_DIST_VECTS (ddr))
4385 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4386 if (DDR_DIR_VECTS (ddr))
4387 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4389 free (ddr);
4392 /* Free the memory used by the data dependence relations from
4393 DEPENDENCE_RELATIONS. */
4395 void
4396 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4398 unsigned int i;
4399 struct data_dependence_relation *ddr;
4400 VEC (loop_p, heap) *loop_nest = NULL;
4402 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4404 if (ddr == NULL)
4405 continue;
4406 if (loop_nest == NULL)
4407 loop_nest = DDR_LOOP_NEST (ddr);
4408 else
4409 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4410 || DDR_LOOP_NEST (ddr) == loop_nest);
4411 free_dependence_relation (ddr);
4414 if (loop_nest)
4415 VEC_free (loop_p, heap, loop_nest);
4416 VEC_free (ddr_p, heap, dependence_relations);
4419 /* Free the memory used by the data references from DATAREFS. */
4421 void
4422 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4424 unsigned int i;
4425 struct data_reference *dr;
4427 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4428 free_data_ref (dr);
4429 VEC_free (data_reference_p, heap, datarefs);
4434 /* Dump vertex I in RDG to FILE. */
4436 void
4437 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4439 struct vertex *v = &(rdg->vertices[i]);
4440 struct graph_edge *e;
4442 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4443 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4444 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4446 if (v->pred)
4447 for (e = v->pred; e; e = e->pred_next)
4448 fprintf (file, " %d", e->src);
4450 fprintf (file, ") (out:");
4452 if (v->succ)
4453 for (e = v->succ; e; e = e->succ_next)
4454 fprintf (file, " %d", e->dest);
4456 fprintf (file, ") \n");
4457 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4458 fprintf (file, ")\n");
4461 /* Call dump_rdg_vertex on stderr. */
4463 void
4464 debug_rdg_vertex (struct graph *rdg, int i)
4466 dump_rdg_vertex (stderr, rdg, i);
4469 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4470 dumped vertices to that bitmap. */
4472 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4474 int i;
4476 fprintf (file, "(%d\n", c);
4478 for (i = 0; i < rdg->n_vertices; i++)
4479 if (rdg->vertices[i].component == c)
4481 if (dumped)
4482 bitmap_set_bit (dumped, i);
4484 dump_rdg_vertex (file, rdg, i);
4487 fprintf (file, ")\n");
4490 /* Call dump_rdg_vertex on stderr. */
4492 void
4493 debug_rdg_component (struct graph *rdg, int c)
4495 dump_rdg_component (stderr, rdg, c, NULL);
4498 /* Dump the reduced dependence graph RDG to FILE. */
4500 void
4501 dump_rdg (FILE *file, struct graph *rdg)
4503 int i;
4504 bitmap dumped = BITMAP_ALLOC (NULL);
4506 fprintf (file, "(rdg\n");
4508 for (i = 0; i < rdg->n_vertices; i++)
4509 if (!bitmap_bit_p (dumped, i))
4510 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4512 fprintf (file, ")\n");
4513 BITMAP_FREE (dumped);
4516 /* Call dump_rdg on stderr. */
4518 void
4519 debug_rdg (struct graph *rdg)
4521 dump_rdg (stderr, rdg);
4524 static void
4525 dot_rdg_1 (FILE *file, struct graph *rdg)
4527 int i;
4529 fprintf (file, "digraph RDG {\n");
4531 for (i = 0; i < rdg->n_vertices; i++)
4533 struct vertex *v = &(rdg->vertices[i]);
4534 struct graph_edge *e;
4536 /* Highlight reads from memory. */
4537 if (RDG_MEM_READS_STMT (rdg, i))
4538 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4540 /* Highlight stores to memory. */
4541 if (RDG_MEM_WRITE_STMT (rdg, i))
4542 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4544 if (v->succ)
4545 for (e = v->succ; e; e = e->succ_next)
4546 switch (RDGE_TYPE (e))
4548 case input_dd:
4549 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4550 break;
4552 case output_dd:
4553 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4554 break;
4556 case flow_dd:
4557 /* These are the most common dependences: don't print these. */
4558 fprintf (file, "%d -> %d \n", i, e->dest);
4559 break;
4561 case anti_dd:
4562 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4563 break;
4565 default:
4566 gcc_unreachable ();
4570 fprintf (file, "}\n\n");
4573 /* Display SCOP using dotty. */
4575 void
4576 dot_rdg (struct graph *rdg)
4578 FILE *file = fopen ("/tmp/rdg.dot", "w");
4579 gcc_assert (file != NULL);
4581 dot_rdg_1 (file, rdg);
4582 fclose (file);
4584 system ("dotty /tmp/rdg.dot");
4588 /* This structure is used for recording the mapping statement index in
4589 the RDG. */
4591 struct GTY(()) rdg_vertex_info
4593 gimple stmt;
4594 int index;
4597 /* Returns the index of STMT in RDG. */
4600 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4602 struct rdg_vertex_info rvi, *slot;
4604 rvi.stmt = stmt;
4605 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4607 if (!slot)
4608 return -1;
4610 return slot->index;
4613 /* Creates an edge in RDG for each distance vector from DDR. The
4614 order that we keep track of in the RDG is the order in which
4615 statements have to be executed. */
4617 static void
4618 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4620 struct graph_edge *e;
4621 int va, vb;
4622 data_reference_p dra = DDR_A (ddr);
4623 data_reference_p drb = DDR_B (ddr);
4624 unsigned level = ddr_dependence_level (ddr);
4626 /* For non scalar dependences, when the dependence is REVERSED,
4627 statement B has to be executed before statement A. */
4628 if (level > 0
4629 && !DDR_REVERSED_P (ddr))
4631 data_reference_p tmp = dra;
4632 dra = drb;
4633 drb = tmp;
4636 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4637 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4639 if (va < 0 || vb < 0)
4640 return;
4642 e = add_edge (rdg, va, vb);
4643 e->data = XNEW (struct rdg_edge);
4645 RDGE_LEVEL (e) = level;
4646 RDGE_RELATION (e) = ddr;
4648 /* Determines the type of the data dependence. */
4649 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4650 RDGE_TYPE (e) = input_dd;
4651 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4652 RDGE_TYPE (e) = output_dd;
4653 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4654 RDGE_TYPE (e) = flow_dd;
4655 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4656 RDGE_TYPE (e) = anti_dd;
4659 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4660 the index of DEF in RDG. */
4662 static void
4663 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4665 use_operand_p imm_use_p;
4666 imm_use_iterator iterator;
4668 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4670 struct graph_edge *e;
4671 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4673 if (use < 0)
4674 continue;
4676 e = add_edge (rdg, idef, use);
4677 e->data = XNEW (struct rdg_edge);
4678 RDGE_TYPE (e) = flow_dd;
4679 RDGE_RELATION (e) = NULL;
4683 /* Creates the edges of the reduced dependence graph RDG. */
4685 static void
4686 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4688 int i;
4689 struct data_dependence_relation *ddr;
4690 def_operand_p def_p;
4691 ssa_op_iter iter;
4693 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4694 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4695 create_rdg_edge_for_ddr (rdg, ddr);
4697 for (i = 0; i < rdg->n_vertices; i++)
4698 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4699 iter, SSA_OP_DEF)
4700 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4703 /* Build the vertices of the reduced dependence graph RDG. */
4705 void
4706 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4708 int i, j;
4709 gimple stmt;
4711 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4713 VEC (data_ref_loc, heap) *references;
4714 data_ref_loc *ref;
4715 struct vertex *v = &(rdg->vertices[i]);
4716 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4717 struct rdg_vertex_info **slot;
4719 rvi->stmt = stmt;
4720 rvi->index = i;
4721 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4723 if (!*slot)
4724 *slot = rvi;
4725 else
4726 free (rvi);
4728 v->data = XNEW (struct rdg_vertex);
4729 RDG_STMT (rdg, i) = stmt;
4731 RDG_MEM_WRITE_STMT (rdg, i) = false;
4732 RDG_MEM_READS_STMT (rdg, i) = false;
4733 if (gimple_code (stmt) == GIMPLE_PHI)
4734 continue;
4736 get_references_in_stmt (stmt, &references);
4737 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4738 if (!ref->is_read)
4739 RDG_MEM_WRITE_STMT (rdg, i) = true;
4740 else
4741 RDG_MEM_READS_STMT (rdg, i) = true;
4743 VEC_free (data_ref_loc, heap, references);
4747 /* Initialize STMTS with all the statements of LOOP. When
4748 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4749 which we discover statements is important as
4750 generate_loops_for_partition is using the same traversal for
4751 identifying statements. */
4753 static void
4754 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4756 unsigned int i;
4757 basic_block *bbs = get_loop_body_in_dom_order (loop);
4759 for (i = 0; i < loop->num_nodes; i++)
4761 basic_block bb = bbs[i];
4762 gimple_stmt_iterator bsi;
4763 gimple stmt;
4765 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4766 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4768 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4770 stmt = gsi_stmt (bsi);
4771 if (gimple_code (stmt) != GIMPLE_LABEL)
4772 VEC_safe_push (gimple, heap, *stmts, stmt);
4776 free (bbs);
4779 /* Returns true when all the dependences are computable. */
4781 static bool
4782 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4784 ddr_p ddr;
4785 unsigned int i;
4787 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4788 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4789 return false;
4791 return true;
4794 /* Computes a hash function for element ELT. */
4796 static hashval_t
4797 hash_stmt_vertex_info (const void *elt)
4799 const struct rdg_vertex_info *const rvi =
4800 (const struct rdg_vertex_info *) elt;
4801 gimple stmt = rvi->stmt;
4803 return htab_hash_pointer (stmt);
4806 /* Compares database elements E1 and E2. */
4808 static int
4809 eq_stmt_vertex_info (const void *e1, const void *e2)
4811 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4812 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4814 return elt1->stmt == elt2->stmt;
4817 /* Free the element E. */
4819 static void
4820 hash_stmt_vertex_del (void *e)
4822 free (e);
4825 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4826 statement of the loop nest, and one edge per data dependence or
4827 scalar dependence. */
4829 struct graph *
4830 build_empty_rdg (int n_stmts)
4832 int nb_data_refs = 10;
4833 struct graph *rdg = new_graph (n_stmts);
4835 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4836 eq_stmt_vertex_info, hash_stmt_vertex_del);
4837 return rdg;
4840 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4841 statement of the loop nest, and one edge per data dependence or
4842 scalar dependence. */
4844 struct graph *
4845 build_rdg (struct loop *loop)
4847 int nb_data_refs = 10;
4848 struct graph *rdg = NULL;
4849 VEC (ddr_p, heap) *dependence_relations;
4850 VEC (data_reference_p, heap) *datarefs;
4851 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4853 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4854 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4855 compute_data_dependences_for_loop (loop,
4856 false,
4857 &datarefs,
4858 &dependence_relations);
4860 if (!known_dependences_p (dependence_relations))
4862 free_dependence_relations (dependence_relations);
4863 free_data_refs (datarefs);
4864 VEC_free (gimple, heap, stmts);
4866 return rdg;
4869 stmts_from_loop (loop, &stmts);
4870 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4872 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4873 eq_stmt_vertex_info, hash_stmt_vertex_del);
4874 create_rdg_vertices (rdg, stmts);
4875 create_rdg_edges (rdg, dependence_relations);
4877 VEC_free (gimple, heap, stmts);
4878 return rdg;
4881 /* Free the reduced dependence graph RDG. */
4883 void
4884 free_rdg (struct graph *rdg)
4886 int i;
4888 for (i = 0; i < rdg->n_vertices; i++)
4889 free (rdg->vertices[i].data);
4891 htab_delete (rdg->indices);
4892 free_graph (rdg);
4895 /* Initialize STMTS with all the statements of LOOP that contain a
4896 store to memory. */
4898 void
4899 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4901 unsigned int i;
4902 basic_block *bbs = get_loop_body_in_dom_order (loop);
4904 for (i = 0; i < loop->num_nodes; i++)
4906 basic_block bb = bbs[i];
4907 gimple_stmt_iterator bsi;
4909 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4910 if (gimple_vdef (gsi_stmt (bsi)))
4911 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4914 free (bbs);
4917 /* For a data reference REF, return the declaration of its base
4918 address or NULL_TREE if the base is not determined. */
4920 static inline tree
4921 ref_base_address (gimple stmt, data_ref_loc *ref)
4923 tree base = NULL_TREE;
4924 tree base_address;
4925 struct data_reference *dr = XCNEW (struct data_reference);
4927 DR_STMT (dr) = stmt;
4928 DR_REF (dr) = *ref->pos;
4929 dr_analyze_innermost (dr);
4930 base_address = DR_BASE_ADDRESS (dr);
4932 if (!base_address)
4933 goto end;
4935 switch (TREE_CODE (base_address))
4937 case ADDR_EXPR:
4938 base = TREE_OPERAND (base_address, 0);
4939 break;
4941 default:
4942 base = base_address;
4943 break;
4946 end:
4947 free_data_ref (dr);
4948 return base;
4951 /* Determines whether the statement from vertex V of the RDG has a
4952 definition used outside the loop that contains this statement. */
4954 bool
4955 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
4957 gimple stmt = RDG_STMT (rdg, v);
4958 struct loop *loop = loop_containing_stmt (stmt);
4959 use_operand_p imm_use_p;
4960 imm_use_iterator iterator;
4961 ssa_op_iter it;
4962 def_operand_p def_p;
4964 if (!loop)
4965 return true;
4967 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
4969 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
4971 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
4972 return true;
4976 return false;
4979 /* Determines whether statements S1 and S2 access to similar memory
4980 locations. Two memory accesses are considered similar when they
4981 have the same base address declaration, i.e. when their
4982 ref_base_address is the same. */
4984 bool
4985 have_similar_memory_accesses (gimple s1, gimple s2)
4987 bool res = false;
4988 unsigned i, j;
4989 VEC (data_ref_loc, heap) *refs1, *refs2;
4990 data_ref_loc *ref1, *ref2;
4992 get_references_in_stmt (s1, &refs1);
4993 get_references_in_stmt (s2, &refs2);
4995 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
4997 tree base1 = ref_base_address (s1, ref1);
4999 if (base1)
5000 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5001 if (base1 == ref_base_address (s2, ref2))
5003 res = true;
5004 goto end;
5008 end:
5009 VEC_free (data_ref_loc, heap, refs1);
5010 VEC_free (data_ref_loc, heap, refs2);
5011 return res;
5014 /* Helper function for the hashtab. */
5016 static int
5017 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5019 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5020 CONST_CAST_GIMPLE ((const_gimple) s2));
5023 /* Helper function for the hashtab. */
5025 static hashval_t
5026 ref_base_address_1 (const void *s)
5028 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5029 unsigned i;
5030 VEC (data_ref_loc, heap) *refs;
5031 data_ref_loc *ref;
5032 hashval_t res = 0;
5034 get_references_in_stmt (stmt, &refs);
5036 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5037 if (!ref->is_read)
5039 res = htab_hash_pointer (ref_base_address (stmt, ref));
5040 break;
5043 VEC_free (data_ref_loc, heap, refs);
5044 return res;
5047 /* Try to remove duplicated write data references from STMTS. */
5049 void
5050 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5052 unsigned i;
5053 gimple stmt;
5054 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5055 have_similar_memory_accesses_1, NULL);
5057 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5059 void **slot;
5061 slot = htab_find_slot (seen, stmt, INSERT);
5063 if (*slot)
5064 VEC_ordered_remove (gimple, *stmts, i);
5065 else
5067 *slot = (void *) stmt;
5068 i++;
5072 htab_delete (seen);
5075 /* Returns the index of PARAMETER in the parameters vector of the
5076 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5078 int
5079 access_matrix_get_index_for_parameter (tree parameter,
5080 struct access_matrix *access_matrix)
5082 int i;
5083 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5084 tree lambda_parameter;
5086 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5087 if (lambda_parameter == parameter)
5088 return i + AM_NB_INDUCTION_VARS (access_matrix);
5090 return -1;