bitmap.h (bitmap_ior_and_into): New.
[official-gcc.git] / gcc / tree-data-ref.c
blob2181f469ca036730b07e34bdd68b470d3221394d
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "tree.h"
84 /* These RTL headers are needed for basic-block.h. */
85 #include "rtl.h"
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
90 #include "timevar.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
140 static inline bool
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 unsigned int i;
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump to STDERR all the dependence relations from DDRS. */
162 void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
174 unsigned int i;
175 struct data_dependence_relation *ddr;
177 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
178 dump_data_dependence_relation (file, ddr);
181 /* Dump function for a DATA_REFERENCE structure. */
183 void
184 dump_data_reference (FILE *outf,
185 struct data_reference *dr)
187 unsigned int i;
189 fprintf (outf, "(Data Ref: \n stmt: ");
190 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
191 fprintf (outf, " ref: ");
192 print_generic_stmt (outf, DR_REF (dr), 0);
193 fprintf (outf, " base_object: ");
194 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
196 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
198 fprintf (outf, " Access function %d: ", i);
199 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
201 fprintf (outf, ")\n");
204 /* Dumps the affine function described by FN to the file OUTF. */
206 static void
207 dump_affine_function (FILE *outf, affine_fn fn)
209 unsigned i;
210 tree coef;
212 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
213 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
215 fprintf (outf, " + ");
216 print_generic_expr (outf, coef, TDF_SLIM);
217 fprintf (outf, " * x_%u", i);
221 /* Dumps the conflict function CF to the file OUTF. */
223 static void
224 dump_conflict_function (FILE *outf, conflict_function *cf)
226 unsigned i;
228 if (cf->n == NO_DEPENDENCE)
229 fprintf (outf, "no dependence\n");
230 else if (cf->n == NOT_KNOWN)
231 fprintf (outf, "not known\n");
232 else
234 for (i = 0; i < cf->n; i++)
236 fprintf (outf, "[");
237 dump_affine_function (outf, cf->fns[i]);
238 fprintf (outf, "]\n");
243 /* Dump function for a SUBSCRIPT structure. */
245 void
246 dump_subscript (FILE *outf, struct subscript *subscript)
248 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
250 fprintf (outf, "\n (subscript \n");
251 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
260 cf = SUB_CONFLICTS_IN_B (subscript);
261 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 fprintf (outf, " (Subscript distance: ");
271 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
272 fprintf (outf, " )\n");
273 fprintf (outf, " )\n");
276 /* Print the classic direction vector DIRV to OUTF. */
278 void
279 print_direction_vector (FILE *outf,
280 lambda_vector dirv,
281 int length)
283 int eq;
285 for (eq = 0; eq < length; eq++)
287 enum data_dependence_direction dir = ((enum data_dependence_direction)
288 dirv[eq]);
290 switch (dir)
292 case dir_positive:
293 fprintf (outf, " +");
294 break;
295 case dir_negative:
296 fprintf (outf, " -");
297 break;
298 case dir_equal:
299 fprintf (outf, " =");
300 break;
301 case dir_positive_or_equal:
302 fprintf (outf, " +=");
303 break;
304 case dir_positive_or_negative:
305 fprintf (outf, " +-");
306 break;
307 case dir_negative_or_equal:
308 fprintf (outf, " -=");
309 break;
310 case dir_star:
311 fprintf (outf, " *");
312 break;
313 default:
314 fprintf (outf, "indep");
315 break;
318 fprintf (outf, "\n");
321 /* Print a vector of direction vectors. */
323 void
324 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
325 int length)
327 unsigned j;
328 lambda_vector v;
330 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
331 print_direction_vector (outf, v, length);
334 /* Print a vector of distance vectors. */
336 void
337 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
338 int length)
340 unsigned j;
341 lambda_vector v;
343 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
344 print_lambda_vector (outf, v, length);
347 /* Debug version. */
349 void
350 debug_data_dependence_relation (struct data_dependence_relation *ddr)
352 dump_data_dependence_relation (stderr, ddr);
355 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
357 void
358 dump_data_dependence_relation (FILE *outf,
359 struct data_dependence_relation *ddr)
361 struct data_reference *dra, *drb;
363 fprintf (outf, "(Data Dep: \n");
365 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
367 fprintf (outf, " (don't know)\n)\n");
368 return;
371 dra = DDR_A (ddr);
372 drb = DDR_B (ddr);
373 dump_data_reference (outf, dra);
374 dump_data_reference (outf, drb);
376 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
377 fprintf (outf, " (no dependence)\n");
379 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
381 unsigned int i;
382 struct loop *loopi;
384 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
386 fprintf (outf, " access_fn_A: ");
387 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
388 fprintf (outf, " access_fn_B: ");
389 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
390 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
393 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
394 fprintf (outf, " loop nest: (");
395 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
396 fprintf (outf, "%d ", loopi->num);
397 fprintf (outf, ")\n");
399 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
401 fprintf (outf, " distance_vector: ");
402 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
403 DDR_NB_LOOPS (ddr));
406 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
408 fprintf (outf, " direction_vector: ");
409 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
414 fprintf (outf, ")\n");
417 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
419 void
420 dump_data_dependence_direction (FILE *file,
421 enum data_dependence_direction dir)
423 switch (dir)
425 case dir_positive:
426 fprintf (file, "+");
427 break;
429 case dir_negative:
430 fprintf (file, "-");
431 break;
433 case dir_equal:
434 fprintf (file, "=");
435 break;
437 case dir_positive_or_negative:
438 fprintf (file, "+-");
439 break;
441 case dir_positive_or_equal:
442 fprintf (file, "+=");
443 break;
445 case dir_negative_or_equal:
446 fprintf (file, "-=");
447 break;
449 case dir_star:
450 fprintf (file, "*");
451 break;
453 default:
454 break;
458 /* Dumps the distance and direction vectors in FILE. DDRS contains
459 the dependence relations, and VECT_SIZE is the size of the
460 dependence vectors, or in other words the number of loops in the
461 considered nest. */
463 void
464 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
466 unsigned int i, j;
467 struct data_dependence_relation *ddr;
468 lambda_vector v;
470 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
471 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
473 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
475 fprintf (file, "DISTANCE_V (");
476 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
477 fprintf (file, ")\n");
480 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
482 fprintf (file, "DIRECTION_V (");
483 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
484 fprintf (file, ")\n");
488 fprintf (file, "\n\n");
491 /* Dumps the data dependence relations DDRS in FILE. */
493 void
494 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
496 unsigned int i;
497 struct data_dependence_relation *ddr;
499 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
500 dump_data_dependence_relation (file, ddr);
502 fprintf (file, "\n\n");
505 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
506 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
507 constant of type ssizetype, and returns true. If we cannot do this
508 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
509 is returned. */
511 static bool
512 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
513 tree *var, tree *off)
515 tree var0, var1;
516 tree off0, off1;
517 enum tree_code ocode = code;
519 *var = NULL_TREE;
520 *off = NULL_TREE;
522 switch (code)
524 case INTEGER_CST:
525 *var = build_int_cst (type, 0);
526 *off = fold_convert (ssizetype, op0);
527 return true;
529 case POINTER_PLUS_EXPR:
530 ocode = PLUS_EXPR;
531 /* FALLTHROUGH */
532 case PLUS_EXPR:
533 case MINUS_EXPR:
534 split_constant_offset (op0, &var0, &off0);
535 split_constant_offset (op1, &var1, &off1);
536 *var = fold_build2 (code, type, var0, var1);
537 *off = size_binop (ocode, off0, off1);
538 return true;
540 case MULT_EXPR:
541 if (TREE_CODE (op1) != INTEGER_CST)
542 return false;
544 split_constant_offset (op0, &var0, &off0);
545 *var = fold_build2 (MULT_EXPR, type, var0, op1);
546 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
547 return true;
549 case ADDR_EXPR:
551 tree base, poffset;
552 HOST_WIDE_INT pbitsize, pbitpos;
553 enum machine_mode pmode;
554 int punsignedp, pvolatilep;
556 op0 = TREE_OPERAND (op0, 0);
557 if (!handled_component_p (op0))
558 return false;
560 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
561 &pmode, &punsignedp, &pvolatilep, false);
563 if (pbitpos % BITS_PER_UNIT != 0)
564 return false;
565 base = build_fold_addr_expr (base);
566 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
568 if (poffset)
570 split_constant_offset (poffset, &poffset, &off1);
571 off0 = size_binop (PLUS_EXPR, off0, off1);
572 if (POINTER_TYPE_P (TREE_TYPE (base)))
573 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
574 base, fold_convert (sizetype, poffset));
575 else
576 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
577 fold_convert (TREE_TYPE (base), poffset));
580 var0 = fold_convert (type, base);
582 /* If variable length types are involved, punt, otherwise casts
583 might be converted into ARRAY_REFs in gimplify_conversion.
584 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
585 possibly no longer appears in current GIMPLE, might resurface.
586 This perhaps could run
587 if (CONVERT_EXPR_P (var0))
589 gimplify_conversion (&var0);
590 // Attempt to fill in any within var0 found ARRAY_REF's
591 // element size from corresponding op embedded ARRAY_REF,
592 // if unsuccessful, just punt.
593 } */
594 while (POINTER_TYPE_P (type))
595 type = TREE_TYPE (type);
596 if (int_size_in_bytes (type) < 0)
597 return false;
599 *var = var0;
600 *off = off0;
601 return true;
604 case SSA_NAME:
606 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
607 enum tree_code subcode;
609 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
610 return false;
612 var0 = gimple_assign_rhs1 (def_stmt);
613 subcode = gimple_assign_rhs_code (def_stmt);
614 var1 = gimple_assign_rhs2 (def_stmt);
616 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
619 default:
620 return false;
624 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
625 will be ssizetype. */
627 void
628 split_constant_offset (tree exp, tree *var, tree *off)
630 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
631 enum tree_code code;
633 *var = exp;
634 *off = ssize_int (0);
635 STRIP_NOPS (exp);
637 if (automatically_generated_chrec_p (exp))
638 return;
640 otype = TREE_TYPE (exp);
641 code = TREE_CODE (exp);
642 extract_ops_from_tree (exp, &code, &op0, &op1);
643 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
645 *var = fold_convert (type, e);
646 *off = o;
650 /* Returns the address ADDR of an object in a canonical shape (without nop
651 casts, and with type of pointer to the object). */
653 static tree
654 canonicalize_base_object_address (tree addr)
656 tree orig = addr;
658 STRIP_NOPS (addr);
660 /* The base address may be obtained by casting from integer, in that case
661 keep the cast. */
662 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
663 return orig;
665 if (TREE_CODE (addr) != ADDR_EXPR)
666 return addr;
668 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
671 /* Analyzes the behavior of the memory reference DR in the innermost loop or
672 basic block that contains it. Returns true if analysis succeed or false
673 otherwise. */
675 bool
676 dr_analyze_innermost (struct data_reference *dr)
678 gimple stmt = DR_STMT (dr);
679 struct loop *loop = loop_containing_stmt (stmt);
680 tree ref = DR_REF (dr);
681 HOST_WIDE_INT pbitsize, pbitpos;
682 tree base, poffset;
683 enum machine_mode pmode;
684 int punsignedp, pvolatilep;
685 affine_iv base_iv, offset_iv;
686 tree init, dinit, step;
687 bool in_loop = (loop && loop->num);
689 if (dump_file && (dump_flags & TDF_DETAILS))
690 fprintf (dump_file, "analyze_innermost: ");
692 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
693 &pmode, &punsignedp, &pvolatilep, false);
694 gcc_assert (base != NULL_TREE);
696 if (pbitpos % BITS_PER_UNIT != 0)
698 if (dump_file && (dump_flags & TDF_DETAILS))
699 fprintf (dump_file, "failed: bit offset alignment.\n");
700 return false;
703 base = build_fold_addr_expr (base);
704 if (in_loop)
706 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
707 false))
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fprintf (dump_file, "failed: evolution of base is not affine.\n");
711 return false;
714 else
716 base_iv.base = base;
717 base_iv.step = ssize_int (0);
718 base_iv.no_overflow = true;
721 if (!poffset)
723 offset_iv.base = ssize_int (0);
724 offset_iv.step = ssize_int (0);
726 else
728 if (!in_loop)
730 offset_iv.base = poffset;
731 offset_iv.step = ssize_int (0);
733 else if (!simple_iv (loop, loop_containing_stmt (stmt),
734 poffset, &offset_iv, false))
736 if (dump_file && (dump_flags & TDF_DETAILS))
737 fprintf (dump_file, "failed: evolution of offset is not"
738 " affine.\n");
739 return false;
743 init = ssize_int (pbitpos / BITS_PER_UNIT);
744 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
745 init = size_binop (PLUS_EXPR, init, dinit);
746 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
747 init = size_binop (PLUS_EXPR, init, dinit);
749 step = size_binop (PLUS_EXPR,
750 fold_convert (ssizetype, base_iv.step),
751 fold_convert (ssizetype, offset_iv.step));
753 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
755 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
756 DR_INIT (dr) = init;
757 DR_STEP (dr) = step;
759 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
761 if (dump_file && (dump_flags & TDF_DETAILS))
762 fprintf (dump_file, "success.\n");
764 return true;
767 /* Determines the base object and the list of indices of memory reference
768 DR, analyzed in loop nest NEST. */
770 static void
771 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
773 gimple stmt = DR_STMT (dr);
774 struct loop *loop = loop_containing_stmt (stmt);
775 VEC (tree, heap) *access_fns = NULL;
776 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
777 tree base, off, access_fn = NULL_TREE;
778 basic_block before_loop = NULL;
780 if (nest)
781 before_loop = block_before_loop (nest);
783 while (handled_component_p (aref))
785 if (TREE_CODE (aref) == ARRAY_REF)
787 op = TREE_OPERAND (aref, 1);
788 if (nest)
790 access_fn = analyze_scalar_evolution (loop, op);
791 access_fn = instantiate_scev (before_loop, loop, access_fn);
792 VEC_safe_push (tree, heap, access_fns, access_fn);
795 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
798 aref = TREE_OPERAND (aref, 0);
801 if (nest && INDIRECT_REF_P (aref))
803 op = TREE_OPERAND (aref, 0);
804 access_fn = analyze_scalar_evolution (loop, op);
805 access_fn = instantiate_scev (before_loop, loop, access_fn);
806 base = initial_condition (access_fn);
807 split_constant_offset (base, &base, &off);
808 access_fn = chrec_replace_initial_condition (access_fn,
809 fold_convert (TREE_TYPE (base), off));
811 TREE_OPERAND (aref, 0) = base;
812 VEC_safe_push (tree, heap, access_fns, access_fn);
815 DR_BASE_OBJECT (dr) = ref;
816 DR_ACCESS_FNS (dr) = access_fns;
819 /* Extracts the alias analysis information from the memory reference DR. */
821 static void
822 dr_analyze_alias (struct data_reference *dr)
824 tree ref = DR_REF (dr);
825 tree base = get_base_address (ref), addr;
827 if (INDIRECT_REF_P (base))
829 addr = TREE_OPERAND (base, 0);
830 if (TREE_CODE (addr) == SSA_NAME)
831 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
835 /* Returns true if the address of DR is invariant. */
837 static bool
838 dr_address_invariant_p (struct data_reference *dr)
840 unsigned i;
841 tree idx;
843 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
844 if (tree_contains_chrecs (idx, NULL))
845 return false;
847 return true;
850 /* Frees data reference DR. */
852 void
853 free_data_ref (data_reference_p dr)
855 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
856 free (dr);
859 /* Analyzes memory reference MEMREF accessed in STMT. The reference
860 is read if IS_READ is true, write otherwise. Returns the
861 data_reference description of MEMREF. NEST is the outermost loop of the
862 loop nest in that the reference should be analyzed. */
864 struct data_reference *
865 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
867 struct data_reference *dr;
869 if (dump_file && (dump_flags & TDF_DETAILS))
871 fprintf (dump_file, "Creating dr for ");
872 print_generic_expr (dump_file, memref, TDF_SLIM);
873 fprintf (dump_file, "\n");
876 dr = XCNEW (struct data_reference);
877 DR_STMT (dr) = stmt;
878 DR_REF (dr) = memref;
879 DR_IS_READ (dr) = is_read;
881 dr_analyze_innermost (dr);
882 dr_analyze_indices (dr, nest);
883 dr_analyze_alias (dr);
885 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "\tbase_address: ");
888 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
889 fprintf (dump_file, "\n\toffset from base address: ");
890 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
891 fprintf (dump_file, "\n\tconstant offset from base address: ");
892 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
893 fprintf (dump_file, "\n\tstep: ");
894 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
895 fprintf (dump_file, "\n\taligned to: ");
896 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
897 fprintf (dump_file, "\n\tbase_object: ");
898 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
899 fprintf (dump_file, "\n");
902 return dr;
905 /* Returns true if FNA == FNB. */
907 static bool
908 affine_function_equal_p (affine_fn fna, affine_fn fnb)
910 unsigned i, n = VEC_length (tree, fna);
912 if (n != VEC_length (tree, fnb))
913 return false;
915 for (i = 0; i < n; i++)
916 if (!operand_equal_p (VEC_index (tree, fna, i),
917 VEC_index (tree, fnb, i), 0))
918 return false;
920 return true;
923 /* If all the functions in CF are the same, returns one of them,
924 otherwise returns NULL. */
926 static affine_fn
927 common_affine_function (conflict_function *cf)
929 unsigned i;
930 affine_fn comm;
932 if (!CF_NONTRIVIAL_P (cf))
933 return NULL;
935 comm = cf->fns[0];
937 for (i = 1; i < cf->n; i++)
938 if (!affine_function_equal_p (comm, cf->fns[i]))
939 return NULL;
941 return comm;
944 /* Returns the base of the affine function FN. */
946 static tree
947 affine_function_base (affine_fn fn)
949 return VEC_index (tree, fn, 0);
952 /* Returns true if FN is a constant. */
954 static bool
955 affine_function_constant_p (affine_fn fn)
957 unsigned i;
958 tree coef;
960 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
961 if (!integer_zerop (coef))
962 return false;
964 return true;
967 /* Returns true if FN is the zero constant function. */
969 static bool
970 affine_function_zero_p (affine_fn fn)
972 return (integer_zerop (affine_function_base (fn))
973 && affine_function_constant_p (fn));
976 /* Returns a signed integer type with the largest precision from TA
977 and TB. */
979 static tree
980 signed_type_for_types (tree ta, tree tb)
982 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
983 return signed_type_for (ta);
984 else
985 return signed_type_for (tb);
988 /* Applies operation OP on affine functions FNA and FNB, and returns the
989 result. */
991 static affine_fn
992 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
994 unsigned i, n, m;
995 affine_fn ret;
996 tree coef;
998 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1000 n = VEC_length (tree, fna);
1001 m = VEC_length (tree, fnb);
1003 else
1005 n = VEC_length (tree, fnb);
1006 m = VEC_length (tree, fna);
1009 ret = VEC_alloc (tree, heap, m);
1010 for (i = 0; i < n; i++)
1012 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1013 TREE_TYPE (VEC_index (tree, fnb, i)));
1015 VEC_quick_push (tree, ret,
1016 fold_build2 (op, type,
1017 VEC_index (tree, fna, i),
1018 VEC_index (tree, fnb, i)));
1021 for (; VEC_iterate (tree, fna, i, coef); i++)
1022 VEC_quick_push (tree, ret,
1023 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1024 coef, integer_zero_node));
1025 for (; VEC_iterate (tree, fnb, i, coef); i++)
1026 VEC_quick_push (tree, ret,
1027 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1028 integer_zero_node, coef));
1030 return ret;
1033 /* Returns the sum of affine functions FNA and FNB. */
1035 static affine_fn
1036 affine_fn_plus (affine_fn fna, affine_fn fnb)
1038 return affine_fn_op (PLUS_EXPR, fna, fnb);
1041 /* Returns the difference of affine functions FNA and FNB. */
1043 static affine_fn
1044 affine_fn_minus (affine_fn fna, affine_fn fnb)
1046 return affine_fn_op (MINUS_EXPR, fna, fnb);
1049 /* Frees affine function FN. */
1051 static void
1052 affine_fn_free (affine_fn fn)
1054 VEC_free (tree, heap, fn);
1057 /* Determine for each subscript in the data dependence relation DDR
1058 the distance. */
1060 static void
1061 compute_subscript_distance (struct data_dependence_relation *ddr)
1063 conflict_function *cf_a, *cf_b;
1064 affine_fn fn_a, fn_b, diff;
1066 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1068 unsigned int i;
1070 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1072 struct subscript *subscript;
1074 subscript = DDR_SUBSCRIPT (ddr, i);
1075 cf_a = SUB_CONFLICTS_IN_A (subscript);
1076 cf_b = SUB_CONFLICTS_IN_B (subscript);
1078 fn_a = common_affine_function (cf_a);
1079 fn_b = common_affine_function (cf_b);
1080 if (!fn_a || !fn_b)
1082 SUB_DISTANCE (subscript) = chrec_dont_know;
1083 return;
1085 diff = affine_fn_minus (fn_a, fn_b);
1087 if (affine_function_constant_p (diff))
1088 SUB_DISTANCE (subscript) = affine_function_base (diff);
1089 else
1090 SUB_DISTANCE (subscript) = chrec_dont_know;
1092 affine_fn_free (diff);
1097 /* Returns the conflict function for "unknown". */
1099 static conflict_function *
1100 conflict_fn_not_known (void)
1102 conflict_function *fn = XCNEW (conflict_function);
1103 fn->n = NOT_KNOWN;
1105 return fn;
1108 /* Returns the conflict function for "independent". */
1110 static conflict_function *
1111 conflict_fn_no_dependence (void)
1113 conflict_function *fn = XCNEW (conflict_function);
1114 fn->n = NO_DEPENDENCE;
1116 return fn;
1119 /* Returns true if the address of OBJ is invariant in LOOP. */
1121 static bool
1122 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1124 while (handled_component_p (obj))
1126 if (TREE_CODE (obj) == ARRAY_REF)
1128 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1129 need to check the stride and the lower bound of the reference. */
1130 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1131 loop->num)
1132 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1133 loop->num))
1134 return false;
1136 else if (TREE_CODE (obj) == COMPONENT_REF)
1138 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1139 loop->num))
1140 return false;
1142 obj = TREE_OPERAND (obj, 0);
1145 if (!INDIRECT_REF_P (obj))
1146 return true;
1148 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1149 loop->num);
1152 /* Returns true if A and B are accesses to different objects, or to different
1153 fields of the same object. */
1155 static bool
1156 disjoint_objects_p (tree a, tree b)
1158 tree base_a, base_b;
1159 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1160 bool ret;
1162 base_a = get_base_address (a);
1163 base_b = get_base_address (b);
1165 if (DECL_P (base_a)
1166 && DECL_P (base_b)
1167 && base_a != base_b)
1168 return true;
1170 if (!operand_equal_p (base_a, base_b, 0))
1171 return false;
1173 /* Compare the component references of A and B. We must start from the inner
1174 ones, so record them to the vector first. */
1175 while (handled_component_p (a))
1177 VEC_safe_push (tree, heap, comp_a, a);
1178 a = TREE_OPERAND (a, 0);
1180 while (handled_component_p (b))
1182 VEC_safe_push (tree, heap, comp_b, b);
1183 b = TREE_OPERAND (b, 0);
1186 ret = false;
1187 while (1)
1189 if (VEC_length (tree, comp_a) == 0
1190 || VEC_length (tree, comp_b) == 0)
1191 break;
1193 a = VEC_pop (tree, comp_a);
1194 b = VEC_pop (tree, comp_b);
1196 /* Real and imaginary part of a variable do not alias. */
1197 if ((TREE_CODE (a) == REALPART_EXPR
1198 && TREE_CODE (b) == IMAGPART_EXPR)
1199 || (TREE_CODE (a) == IMAGPART_EXPR
1200 && TREE_CODE (b) == REALPART_EXPR))
1202 ret = true;
1203 break;
1206 if (TREE_CODE (a) != TREE_CODE (b))
1207 break;
1209 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1210 DR_BASE_OBJECT are always zero. */
1211 if (TREE_CODE (a) == ARRAY_REF)
1212 continue;
1213 else if (TREE_CODE (a) == COMPONENT_REF)
1215 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1216 continue;
1218 /* Different fields of unions may overlap. */
1219 base_a = TREE_OPERAND (a, 0);
1220 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1221 break;
1223 /* Different fields of structures cannot. */
1224 ret = true;
1225 break;
1227 else
1228 break;
1231 VEC_free (tree, heap, comp_a);
1232 VEC_free (tree, heap, comp_b);
1234 return ret;
1237 /* Returns false if we can prove that data references A and B do not alias,
1238 true otherwise. */
1240 bool
1241 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1243 const_tree addr_a = DR_BASE_ADDRESS (a);
1244 const_tree addr_b = DR_BASE_ADDRESS (b);
1245 const_tree type_a, type_b;
1246 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1248 /* If the accessed objects are disjoint, the memory references do not
1249 alias. */
1250 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1251 return false;
1253 /* Query the alias oracle. */
1254 if (!DR_IS_READ (a) && !DR_IS_READ (b))
1256 if (!refs_output_dependent_p (DR_REF (a), DR_REF (b)))
1257 return false;
1259 else if (DR_IS_READ (a) && !DR_IS_READ (b))
1261 if (!refs_anti_dependent_p (DR_REF (a), DR_REF (b)))
1262 return false;
1264 else if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1265 return false;
1267 if (!addr_a || !addr_b)
1268 return true;
1270 /* If the references are based on different static objects, they cannot
1271 alias (PTA should be able to disambiguate such accesses, but often
1272 it fails to). */
1273 if (TREE_CODE (addr_a) == ADDR_EXPR
1274 && TREE_CODE (addr_b) == ADDR_EXPR)
1275 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1277 /* An instruction writing through a restricted pointer is "independent" of any
1278 instruction reading or writing through a different restricted pointer,
1279 in the same block/scope. */
1281 type_a = TREE_TYPE (addr_a);
1282 type_b = TREE_TYPE (addr_b);
1283 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1285 if (TREE_CODE (addr_a) == SSA_NAME)
1286 decl_a = SSA_NAME_VAR (addr_a);
1287 if (TREE_CODE (addr_b) == SSA_NAME)
1288 decl_b = SSA_NAME_VAR (addr_b);
1290 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1291 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1292 && decl_a && DECL_P (decl_a)
1293 && decl_b && DECL_P (decl_b)
1294 && decl_a != decl_b
1295 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1296 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1297 return false;
1299 return true;
1302 static void compute_self_dependence (struct data_dependence_relation *);
1304 /* Initialize a data dependence relation between data accesses A and
1305 B. NB_LOOPS is the number of loops surrounding the references: the
1306 size of the classic distance/direction vectors. */
1308 static struct data_dependence_relation *
1309 initialize_data_dependence_relation (struct data_reference *a,
1310 struct data_reference *b,
1311 VEC (loop_p, heap) *loop_nest)
1313 struct data_dependence_relation *res;
1314 unsigned int i;
1316 res = XNEW (struct data_dependence_relation);
1317 DDR_A (res) = a;
1318 DDR_B (res) = b;
1319 DDR_LOOP_NEST (res) = NULL;
1320 DDR_REVERSED_P (res) = false;
1321 DDR_SUBSCRIPTS (res) = NULL;
1322 DDR_DIR_VECTS (res) = NULL;
1323 DDR_DIST_VECTS (res) = NULL;
1325 if (a == NULL || b == NULL)
1327 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1328 return res;
1331 /* If the data references do not alias, then they are independent. */
1332 if (!dr_may_alias_p (a, b))
1334 DDR_ARE_DEPENDENT (res) = chrec_known;
1335 return res;
1338 /* When the references are exactly the same, don't spend time doing
1339 the data dependence tests, just initialize the ddr and return. */
1340 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1342 DDR_AFFINE_P (res) = true;
1343 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1344 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1345 DDR_LOOP_NEST (res) = loop_nest;
1346 DDR_INNER_LOOP (res) = 0;
1347 DDR_SELF_REFERENCE (res) = true;
1348 compute_self_dependence (res);
1349 return res;
1352 /* If the references do not access the same object, we do not know
1353 whether they alias or not. */
1354 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1356 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1357 return res;
1360 /* If the base of the object is not invariant in the loop nest, we cannot
1361 analyze it. TODO -- in fact, it would suffice to record that there may
1362 be arbitrary dependences in the loops where the base object varies. */
1363 if (loop_nest
1364 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1365 DR_BASE_OBJECT (a)))
1367 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1368 return res;
1371 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1373 DDR_AFFINE_P (res) = true;
1374 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1375 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1376 DDR_LOOP_NEST (res) = loop_nest;
1377 DDR_INNER_LOOP (res) = 0;
1378 DDR_SELF_REFERENCE (res) = false;
1380 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1382 struct subscript *subscript;
1384 subscript = XNEW (struct subscript);
1385 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1386 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1387 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1388 SUB_DISTANCE (subscript) = chrec_dont_know;
1389 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1392 return res;
1395 /* Frees memory used by the conflict function F. */
1397 static void
1398 free_conflict_function (conflict_function *f)
1400 unsigned i;
1402 if (CF_NONTRIVIAL_P (f))
1404 for (i = 0; i < f->n; i++)
1405 affine_fn_free (f->fns[i]);
1407 free (f);
1410 /* Frees memory used by SUBSCRIPTS. */
1412 static void
1413 free_subscripts (VEC (subscript_p, heap) *subscripts)
1415 unsigned i;
1416 subscript_p s;
1418 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1420 free_conflict_function (s->conflicting_iterations_in_a);
1421 free_conflict_function (s->conflicting_iterations_in_b);
1422 free (s);
1424 VEC_free (subscript_p, heap, subscripts);
1427 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1428 description. */
1430 static inline void
1431 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1432 tree chrec)
1434 if (dump_file && (dump_flags & TDF_DETAILS))
1436 fprintf (dump_file, "(dependence classified: ");
1437 print_generic_expr (dump_file, chrec, 0);
1438 fprintf (dump_file, ")\n");
1441 DDR_ARE_DEPENDENT (ddr) = chrec;
1442 free_subscripts (DDR_SUBSCRIPTS (ddr));
1443 DDR_SUBSCRIPTS (ddr) = NULL;
1446 /* The dependence relation DDR cannot be represented by a distance
1447 vector. */
1449 static inline void
1450 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1452 if (dump_file && (dump_flags & TDF_DETAILS))
1453 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1455 DDR_AFFINE_P (ddr) = false;
1460 /* This section contains the classic Banerjee tests. */
1462 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1463 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1465 static inline bool
1466 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1468 return (evolution_function_is_constant_p (chrec_a)
1469 && evolution_function_is_constant_p (chrec_b));
1472 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1473 variable, i.e., if the SIV (Single Index Variable) test is true. */
1475 static bool
1476 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1478 if ((evolution_function_is_constant_p (chrec_a)
1479 && evolution_function_is_univariate_p (chrec_b))
1480 || (evolution_function_is_constant_p (chrec_b)
1481 && evolution_function_is_univariate_p (chrec_a)))
1482 return true;
1484 if (evolution_function_is_univariate_p (chrec_a)
1485 && evolution_function_is_univariate_p (chrec_b))
1487 switch (TREE_CODE (chrec_a))
1489 case POLYNOMIAL_CHREC:
1490 switch (TREE_CODE (chrec_b))
1492 case POLYNOMIAL_CHREC:
1493 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1494 return false;
1496 default:
1497 return true;
1500 default:
1501 return true;
1505 return false;
1508 /* Creates a conflict function with N dimensions. The affine functions
1509 in each dimension follow. */
1511 static conflict_function *
1512 conflict_fn (unsigned n, ...)
1514 unsigned i;
1515 conflict_function *ret = XCNEW (conflict_function);
1516 va_list ap;
1518 gcc_assert (0 < n && n <= MAX_DIM);
1519 va_start(ap, n);
1521 ret->n = n;
1522 for (i = 0; i < n; i++)
1523 ret->fns[i] = va_arg (ap, affine_fn);
1524 va_end(ap);
1526 return ret;
1529 /* Returns constant affine function with value CST. */
1531 static affine_fn
1532 affine_fn_cst (tree cst)
1534 affine_fn fn = VEC_alloc (tree, heap, 1);
1535 VEC_quick_push (tree, fn, cst);
1536 return fn;
1539 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1541 static affine_fn
1542 affine_fn_univar (tree cst, unsigned dim, tree coef)
1544 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1545 unsigned i;
1547 gcc_assert (dim > 0);
1548 VEC_quick_push (tree, fn, cst);
1549 for (i = 1; i < dim; i++)
1550 VEC_quick_push (tree, fn, integer_zero_node);
1551 VEC_quick_push (tree, fn, coef);
1552 return fn;
1555 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1556 *OVERLAPS_B are initialized to the functions that describe the
1557 relation between the elements accessed twice by CHREC_A and
1558 CHREC_B. For k >= 0, the following property is verified:
1560 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1562 static void
1563 analyze_ziv_subscript (tree chrec_a,
1564 tree chrec_b,
1565 conflict_function **overlaps_a,
1566 conflict_function **overlaps_b,
1567 tree *last_conflicts)
1569 tree type, difference;
1570 dependence_stats.num_ziv++;
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, "(analyze_ziv_subscript \n");
1575 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1576 chrec_a = chrec_convert (type, chrec_a, NULL);
1577 chrec_b = chrec_convert (type, chrec_b, NULL);
1578 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1580 switch (TREE_CODE (difference))
1582 case INTEGER_CST:
1583 if (integer_zerop (difference))
1585 /* The difference is equal to zero: the accessed index
1586 overlaps for each iteration in the loop. */
1587 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1588 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1589 *last_conflicts = chrec_dont_know;
1590 dependence_stats.num_ziv_dependent++;
1592 else
1594 /* The accesses do not overlap. */
1595 *overlaps_a = conflict_fn_no_dependence ();
1596 *overlaps_b = conflict_fn_no_dependence ();
1597 *last_conflicts = integer_zero_node;
1598 dependence_stats.num_ziv_independent++;
1600 break;
1602 default:
1603 /* We're not sure whether the indexes overlap. For the moment,
1604 conservatively answer "don't know". */
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1608 *overlaps_a = conflict_fn_not_known ();
1609 *overlaps_b = conflict_fn_not_known ();
1610 *last_conflicts = chrec_dont_know;
1611 dependence_stats.num_ziv_unimplemented++;
1612 break;
1615 if (dump_file && (dump_flags & TDF_DETAILS))
1616 fprintf (dump_file, ")\n");
1619 /* Sets NIT to the estimated number of executions of the statements in
1620 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1621 large as the number of iterations. If we have no reliable estimate,
1622 the function returns false, otherwise returns true. */
1624 bool
1625 estimated_loop_iterations (struct loop *loop, bool conservative,
1626 double_int *nit)
1628 estimate_numbers_of_iterations_loop (loop);
1629 if (conservative)
1631 if (!loop->any_upper_bound)
1632 return false;
1634 *nit = loop->nb_iterations_upper_bound;
1636 else
1638 if (!loop->any_estimate)
1639 return false;
1641 *nit = loop->nb_iterations_estimate;
1644 return true;
1647 /* Similar to estimated_loop_iterations, but returns the estimate only
1648 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1649 on the number of iterations of LOOP could not be derived, returns -1. */
1651 HOST_WIDE_INT
1652 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1654 double_int nit;
1655 HOST_WIDE_INT hwi_nit;
1657 if (!estimated_loop_iterations (loop, conservative, &nit))
1658 return -1;
1660 if (!double_int_fits_in_shwi_p (nit))
1661 return -1;
1662 hwi_nit = double_int_to_shwi (nit);
1664 return hwi_nit < 0 ? -1 : hwi_nit;
1667 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1668 and only if it fits to the int type. If this is not the case, or the
1669 estimate on the number of iterations of LOOP could not be derived, returns
1670 chrec_dont_know. */
1672 static tree
1673 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1675 double_int nit;
1676 tree type;
1678 if (!estimated_loop_iterations (loop, conservative, &nit))
1679 return chrec_dont_know;
1681 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1682 if (!double_int_fits_to_tree_p (type, nit))
1683 return chrec_dont_know;
1685 return double_int_to_tree (type, nit);
1688 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1689 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1690 *OVERLAPS_B are initialized to the functions that describe the
1691 relation between the elements accessed twice by CHREC_A and
1692 CHREC_B. For k >= 0, the following property is verified:
1694 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1696 static void
1697 analyze_siv_subscript_cst_affine (tree chrec_a,
1698 tree chrec_b,
1699 conflict_function **overlaps_a,
1700 conflict_function **overlaps_b,
1701 tree *last_conflicts)
1703 bool value0, value1, value2;
1704 tree type, difference, tmp;
1706 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1707 chrec_a = chrec_convert (type, chrec_a, NULL);
1708 chrec_b = chrec_convert (type, chrec_b, NULL);
1709 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1711 if (!chrec_is_positive (initial_condition (difference), &value0))
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1714 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1716 dependence_stats.num_siv_unimplemented++;
1717 *overlaps_a = conflict_fn_not_known ();
1718 *overlaps_b = conflict_fn_not_known ();
1719 *last_conflicts = chrec_dont_know;
1720 return;
1722 else
1724 if (value0 == false)
1726 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1729 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1731 *overlaps_a = conflict_fn_not_known ();
1732 *overlaps_b = conflict_fn_not_known ();
1733 *last_conflicts = chrec_dont_know;
1734 dependence_stats.num_siv_unimplemented++;
1735 return;
1737 else
1739 if (value1 == true)
1741 /* Example:
1742 chrec_a = 12
1743 chrec_b = {10, +, 1}
1746 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1748 HOST_WIDE_INT numiter;
1749 struct loop *loop = get_chrec_loop (chrec_b);
1751 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1752 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1753 fold_build1 (ABS_EXPR, type, difference),
1754 CHREC_RIGHT (chrec_b));
1755 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1756 *last_conflicts = integer_one_node;
1759 /* Perform weak-zero siv test to see if overlap is
1760 outside the loop bounds. */
1761 numiter = estimated_loop_iterations_int (loop, false);
1763 if (numiter >= 0
1764 && compare_tree_int (tmp, numiter) > 0)
1766 free_conflict_function (*overlaps_a);
1767 free_conflict_function (*overlaps_b);
1768 *overlaps_a = conflict_fn_no_dependence ();
1769 *overlaps_b = conflict_fn_no_dependence ();
1770 *last_conflicts = integer_zero_node;
1771 dependence_stats.num_siv_independent++;
1772 return;
1774 dependence_stats.num_siv_dependent++;
1775 return;
1778 /* When the step does not divide the difference, there are
1779 no overlaps. */
1780 else
1782 *overlaps_a = conflict_fn_no_dependence ();
1783 *overlaps_b = conflict_fn_no_dependence ();
1784 *last_conflicts = integer_zero_node;
1785 dependence_stats.num_siv_independent++;
1786 return;
1790 else
1792 /* Example:
1793 chrec_a = 12
1794 chrec_b = {10, +, -1}
1796 In this case, chrec_a will not overlap with chrec_b. */
1797 *overlaps_a = conflict_fn_no_dependence ();
1798 *overlaps_b = conflict_fn_no_dependence ();
1799 *last_conflicts = integer_zero_node;
1800 dependence_stats.num_siv_independent++;
1801 return;
1805 else
1807 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1809 if (dump_file && (dump_flags & TDF_DETAILS))
1810 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1812 *overlaps_a = conflict_fn_not_known ();
1813 *overlaps_b = conflict_fn_not_known ();
1814 *last_conflicts = chrec_dont_know;
1815 dependence_stats.num_siv_unimplemented++;
1816 return;
1818 else
1820 if (value2 == false)
1822 /* Example:
1823 chrec_a = 3
1824 chrec_b = {10, +, -1}
1826 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1828 HOST_WIDE_INT numiter;
1829 struct loop *loop = get_chrec_loop (chrec_b);
1831 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1832 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1833 CHREC_RIGHT (chrec_b));
1834 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1835 *last_conflicts = integer_one_node;
1837 /* Perform weak-zero siv test to see if overlap is
1838 outside the loop bounds. */
1839 numiter = estimated_loop_iterations_int (loop, false);
1841 if (numiter >= 0
1842 && compare_tree_int (tmp, numiter) > 0)
1844 free_conflict_function (*overlaps_a);
1845 free_conflict_function (*overlaps_b);
1846 *overlaps_a = conflict_fn_no_dependence ();
1847 *overlaps_b = conflict_fn_no_dependence ();
1848 *last_conflicts = integer_zero_node;
1849 dependence_stats.num_siv_independent++;
1850 return;
1852 dependence_stats.num_siv_dependent++;
1853 return;
1856 /* When the step does not divide the difference, there
1857 are no overlaps. */
1858 else
1860 *overlaps_a = conflict_fn_no_dependence ();
1861 *overlaps_b = conflict_fn_no_dependence ();
1862 *last_conflicts = integer_zero_node;
1863 dependence_stats.num_siv_independent++;
1864 return;
1867 else
1869 /* Example:
1870 chrec_a = 3
1871 chrec_b = {4, +, 1}
1873 In this case, chrec_a will not overlap with chrec_b. */
1874 *overlaps_a = conflict_fn_no_dependence ();
1875 *overlaps_b = conflict_fn_no_dependence ();
1876 *last_conflicts = integer_zero_node;
1877 dependence_stats.num_siv_independent++;
1878 return;
1885 /* Helper recursive function for initializing the matrix A. Returns
1886 the initial value of CHREC. */
1888 static tree
1889 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1891 gcc_assert (chrec);
1893 switch (TREE_CODE (chrec))
1895 case POLYNOMIAL_CHREC:
1896 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1898 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1899 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1901 case PLUS_EXPR:
1902 case MULT_EXPR:
1903 case MINUS_EXPR:
1905 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1906 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1908 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1911 case NOP_EXPR:
1913 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1914 return chrec_convert (chrec_type (chrec), op, NULL);
1917 case BIT_NOT_EXPR:
1919 /* Handle ~X as -1 - X. */
1920 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1921 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1922 build_int_cst (TREE_TYPE (chrec), -1), op);
1925 case INTEGER_CST:
1926 return chrec;
1928 default:
1929 gcc_unreachable ();
1930 return NULL_TREE;
1934 #define FLOOR_DIV(x,y) ((x) / (y))
1936 /* Solves the special case of the Diophantine equation:
1937 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1939 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1940 number of iterations that loops X and Y run. The overlaps will be
1941 constructed as evolutions in dimension DIM. */
1943 static void
1944 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1945 affine_fn *overlaps_a,
1946 affine_fn *overlaps_b,
1947 tree *last_conflicts, int dim)
1949 if (((step_a > 0 && step_b > 0)
1950 || (step_a < 0 && step_b < 0)))
1952 int step_overlaps_a, step_overlaps_b;
1953 int gcd_steps_a_b, last_conflict, tau2;
1955 gcd_steps_a_b = gcd (step_a, step_b);
1956 step_overlaps_a = step_b / gcd_steps_a_b;
1957 step_overlaps_b = step_a / gcd_steps_a_b;
1959 if (niter > 0)
1961 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1962 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1963 last_conflict = tau2;
1964 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1966 else
1967 *last_conflicts = chrec_dont_know;
1969 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1970 build_int_cst (NULL_TREE,
1971 step_overlaps_a));
1972 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1973 build_int_cst (NULL_TREE,
1974 step_overlaps_b));
1977 else
1979 *overlaps_a = affine_fn_cst (integer_zero_node);
1980 *overlaps_b = affine_fn_cst (integer_zero_node);
1981 *last_conflicts = integer_zero_node;
1985 /* Solves the special case of a Diophantine equation where CHREC_A is
1986 an affine bivariate function, and CHREC_B is an affine univariate
1987 function. For example,
1989 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1991 has the following overlapping functions:
1993 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1994 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1995 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1997 FORNOW: This is a specialized implementation for a case occurring in
1998 a common benchmark. Implement the general algorithm. */
2000 static void
2001 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2002 conflict_function **overlaps_a,
2003 conflict_function **overlaps_b,
2004 tree *last_conflicts)
2006 bool xz_p, yz_p, xyz_p;
2007 int step_x, step_y, step_z;
2008 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2009 affine_fn overlaps_a_xz, overlaps_b_xz;
2010 affine_fn overlaps_a_yz, overlaps_b_yz;
2011 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2012 affine_fn ova1, ova2, ovb;
2013 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2015 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2016 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2017 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2019 niter_x =
2020 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2021 false);
2022 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2023 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2025 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2030 *overlaps_a = conflict_fn_not_known ();
2031 *overlaps_b = conflict_fn_not_known ();
2032 *last_conflicts = chrec_dont_know;
2033 return;
2036 niter = MIN (niter_x, niter_z);
2037 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2038 &overlaps_a_xz,
2039 &overlaps_b_xz,
2040 &last_conflicts_xz, 1);
2041 niter = MIN (niter_y, niter_z);
2042 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2043 &overlaps_a_yz,
2044 &overlaps_b_yz,
2045 &last_conflicts_yz, 2);
2046 niter = MIN (niter_x, niter_z);
2047 niter = MIN (niter_y, niter);
2048 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2049 &overlaps_a_xyz,
2050 &overlaps_b_xyz,
2051 &last_conflicts_xyz, 3);
2053 xz_p = !integer_zerop (last_conflicts_xz);
2054 yz_p = !integer_zerop (last_conflicts_yz);
2055 xyz_p = !integer_zerop (last_conflicts_xyz);
2057 if (xz_p || yz_p || xyz_p)
2059 ova1 = affine_fn_cst (integer_zero_node);
2060 ova2 = affine_fn_cst (integer_zero_node);
2061 ovb = affine_fn_cst (integer_zero_node);
2062 if (xz_p)
2064 affine_fn t0 = ova1;
2065 affine_fn t2 = ovb;
2067 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2068 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2069 affine_fn_free (t0);
2070 affine_fn_free (t2);
2071 *last_conflicts = last_conflicts_xz;
2073 if (yz_p)
2075 affine_fn t0 = ova2;
2076 affine_fn t2 = ovb;
2078 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2079 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2080 affine_fn_free (t0);
2081 affine_fn_free (t2);
2082 *last_conflicts = last_conflicts_yz;
2084 if (xyz_p)
2086 affine_fn t0 = ova1;
2087 affine_fn t2 = ova2;
2088 affine_fn t4 = ovb;
2090 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2091 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2092 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2093 affine_fn_free (t0);
2094 affine_fn_free (t2);
2095 affine_fn_free (t4);
2096 *last_conflicts = last_conflicts_xyz;
2098 *overlaps_a = conflict_fn (2, ova1, ova2);
2099 *overlaps_b = conflict_fn (1, ovb);
2101 else
2103 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2104 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2105 *last_conflicts = integer_zero_node;
2108 affine_fn_free (overlaps_a_xz);
2109 affine_fn_free (overlaps_b_xz);
2110 affine_fn_free (overlaps_a_yz);
2111 affine_fn_free (overlaps_b_yz);
2112 affine_fn_free (overlaps_a_xyz);
2113 affine_fn_free (overlaps_b_xyz);
2116 /* Determines the overlapping elements due to accesses CHREC_A and
2117 CHREC_B, that are affine functions. This function cannot handle
2118 symbolic evolution functions, ie. when initial conditions are
2119 parameters, because it uses lambda matrices of integers. */
2121 static void
2122 analyze_subscript_affine_affine (tree chrec_a,
2123 tree chrec_b,
2124 conflict_function **overlaps_a,
2125 conflict_function **overlaps_b,
2126 tree *last_conflicts)
2128 unsigned nb_vars_a, nb_vars_b, dim;
2129 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2130 lambda_matrix A, U, S;
2132 if (eq_evolutions_p (chrec_a, chrec_b))
2134 /* The accessed index overlaps for each iteration in the
2135 loop. */
2136 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2137 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2138 *last_conflicts = chrec_dont_know;
2139 return;
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2144 /* For determining the initial intersection, we have to solve a
2145 Diophantine equation. This is the most time consuming part.
2147 For answering to the question: "Is there a dependence?" we have
2148 to prove that there exists a solution to the Diophantine
2149 equation, and that the solution is in the iteration domain,
2150 i.e. the solution is positive or zero, and that the solution
2151 happens before the upper bound loop.nb_iterations. Otherwise
2152 there is no dependence. This function outputs a description of
2153 the iterations that hold the intersections. */
2155 nb_vars_a = nb_vars_in_chrec (chrec_a);
2156 nb_vars_b = nb_vars_in_chrec (chrec_b);
2158 dim = nb_vars_a + nb_vars_b;
2159 U = lambda_matrix_new (dim, dim);
2160 A = lambda_matrix_new (dim, 1);
2161 S = lambda_matrix_new (dim, 1);
2163 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2164 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2165 gamma = init_b - init_a;
2167 /* Don't do all the hard work of solving the Diophantine equation
2168 when we already know the solution: for example,
2169 | {3, +, 1}_1
2170 | {3, +, 4}_2
2171 | gamma = 3 - 3 = 0.
2172 Then the first overlap occurs during the first iterations:
2173 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2175 if (gamma == 0)
2177 if (nb_vars_a == 1 && nb_vars_b == 1)
2179 HOST_WIDE_INT step_a, step_b;
2180 HOST_WIDE_INT niter, niter_a, niter_b;
2181 affine_fn ova, ovb;
2183 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2184 false);
2185 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2186 false);
2187 niter = MIN (niter_a, niter_b);
2188 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2189 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2191 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2192 &ova, &ovb,
2193 last_conflicts, 1);
2194 *overlaps_a = conflict_fn (1, ova);
2195 *overlaps_b = conflict_fn (1, ovb);
2198 else if (nb_vars_a == 2 && nb_vars_b == 1)
2199 compute_overlap_steps_for_affine_1_2
2200 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2202 else if (nb_vars_a == 1 && nb_vars_b == 2)
2203 compute_overlap_steps_for_affine_1_2
2204 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2206 else
2208 if (dump_file && (dump_flags & TDF_DETAILS))
2209 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2210 *overlaps_a = conflict_fn_not_known ();
2211 *overlaps_b = conflict_fn_not_known ();
2212 *last_conflicts = chrec_dont_know;
2214 goto end_analyze_subs_aa;
2217 /* U.A = S */
2218 lambda_matrix_right_hermite (A, dim, 1, S, U);
2220 if (S[0][0] < 0)
2222 S[0][0] *= -1;
2223 lambda_matrix_row_negate (U, dim, 0);
2225 gcd_alpha_beta = S[0][0];
2227 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2228 but that is a quite strange case. Instead of ICEing, answer
2229 don't know. */
2230 if (gcd_alpha_beta == 0)
2232 *overlaps_a = conflict_fn_not_known ();
2233 *overlaps_b = conflict_fn_not_known ();
2234 *last_conflicts = chrec_dont_know;
2235 goto end_analyze_subs_aa;
2238 /* The classic "gcd-test". */
2239 if (!int_divides_p (gcd_alpha_beta, gamma))
2241 /* The "gcd-test" has determined that there is no integer
2242 solution, i.e. there is no dependence. */
2243 *overlaps_a = conflict_fn_no_dependence ();
2244 *overlaps_b = conflict_fn_no_dependence ();
2245 *last_conflicts = integer_zero_node;
2248 /* Both access functions are univariate. This includes SIV and MIV cases. */
2249 else if (nb_vars_a == 1 && nb_vars_b == 1)
2251 /* Both functions should have the same evolution sign. */
2252 if (((A[0][0] > 0 && -A[1][0] > 0)
2253 || (A[0][0] < 0 && -A[1][0] < 0)))
2255 /* The solutions are given by:
2257 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2258 | [u21 u22] [y0]
2260 For a given integer t. Using the following variables,
2262 | i0 = u11 * gamma / gcd_alpha_beta
2263 | j0 = u12 * gamma / gcd_alpha_beta
2264 | i1 = u21
2265 | j1 = u22
2267 the solutions are:
2269 | x0 = i0 + i1 * t,
2270 | y0 = j0 + j1 * t. */
2271 HOST_WIDE_INT i0, j0, i1, j1;
2273 i0 = U[0][0] * gamma / gcd_alpha_beta;
2274 j0 = U[0][1] * gamma / gcd_alpha_beta;
2275 i1 = U[1][0];
2276 j1 = U[1][1];
2278 if ((i1 == 0 && i0 < 0)
2279 || (j1 == 0 && j0 < 0))
2281 /* There is no solution.
2282 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2283 falls in here, but for the moment we don't look at the
2284 upper bound of the iteration domain. */
2285 *overlaps_a = conflict_fn_no_dependence ();
2286 *overlaps_b = conflict_fn_no_dependence ();
2287 *last_conflicts = integer_zero_node;
2288 goto end_analyze_subs_aa;
2291 if (i1 > 0 && j1 > 0)
2293 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2294 (get_chrec_loop (chrec_a), false);
2295 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2296 (get_chrec_loop (chrec_b), false);
2297 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2299 /* (X0, Y0) is a solution of the Diophantine equation:
2300 "chrec_a (X0) = chrec_b (Y0)". */
2301 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2302 CEIL (-j0, j1));
2303 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2304 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2306 /* (X1, Y1) is the smallest positive solution of the eq
2307 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2308 first conflict occurs. */
2309 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2310 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2311 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2313 if (niter > 0)
2315 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2316 FLOOR_DIV (niter - j0, j1));
2317 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2319 /* If the overlap occurs outside of the bounds of the
2320 loop, there is no dependence. */
2321 if (x1 >= niter || y1 >= niter)
2323 *overlaps_a = conflict_fn_no_dependence ();
2324 *overlaps_b = conflict_fn_no_dependence ();
2325 *last_conflicts = integer_zero_node;
2326 goto end_analyze_subs_aa;
2328 else
2329 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2331 else
2332 *last_conflicts = chrec_dont_know;
2334 *overlaps_a
2335 = conflict_fn (1,
2336 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2338 build_int_cst (NULL_TREE, i1)));
2339 *overlaps_b
2340 = conflict_fn (1,
2341 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2343 build_int_cst (NULL_TREE, j1)));
2345 else
2347 /* FIXME: For the moment, the upper bound of the
2348 iteration domain for i and j is not checked. */
2349 if (dump_file && (dump_flags & TDF_DETAILS))
2350 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2351 *overlaps_a = conflict_fn_not_known ();
2352 *overlaps_b = conflict_fn_not_known ();
2353 *last_conflicts = chrec_dont_know;
2356 else
2358 if (dump_file && (dump_flags & TDF_DETAILS))
2359 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
2362 *last_conflicts = chrec_dont_know;
2365 else
2367 if (dump_file && (dump_flags & TDF_DETAILS))
2368 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2369 *overlaps_a = conflict_fn_not_known ();
2370 *overlaps_b = conflict_fn_not_known ();
2371 *last_conflicts = chrec_dont_know;
2374 end_analyze_subs_aa:
2375 if (dump_file && (dump_flags & TDF_DETAILS))
2377 fprintf (dump_file, " (overlaps_a = ");
2378 dump_conflict_function (dump_file, *overlaps_a);
2379 fprintf (dump_file, ")\n (overlaps_b = ");
2380 dump_conflict_function (dump_file, *overlaps_b);
2381 fprintf (dump_file, ")\n");
2382 fprintf (dump_file, ")\n");
2386 /* Returns true when analyze_subscript_affine_affine can be used for
2387 determining the dependence relation between chrec_a and chrec_b,
2388 that contain symbols. This function modifies chrec_a and chrec_b
2389 such that the analysis result is the same, and such that they don't
2390 contain symbols, and then can safely be passed to the analyzer.
2392 Example: The analysis of the following tuples of evolutions produce
2393 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2394 vs. {0, +, 1}_1
2396 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2397 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2400 static bool
2401 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2403 tree diff, type, left_a, left_b, right_b;
2405 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2406 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2407 /* FIXME: For the moment not handled. Might be refined later. */
2408 return false;
2410 type = chrec_type (*chrec_a);
2411 left_a = CHREC_LEFT (*chrec_a);
2412 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2413 diff = chrec_fold_minus (type, left_a, left_b);
2415 if (!evolution_function_is_constant_p (diff))
2416 return false;
2418 if (dump_file && (dump_flags & TDF_DETAILS))
2419 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2421 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2422 diff, CHREC_RIGHT (*chrec_a));
2423 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2424 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2425 build_int_cst (type, 0),
2426 right_b);
2427 return true;
2430 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2431 *OVERLAPS_B are initialized to the functions that describe the
2432 relation between the elements accessed twice by CHREC_A and
2433 CHREC_B. For k >= 0, the following property is verified:
2435 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2437 static void
2438 analyze_siv_subscript (tree chrec_a,
2439 tree chrec_b,
2440 conflict_function **overlaps_a,
2441 conflict_function **overlaps_b,
2442 tree *last_conflicts,
2443 int loop_nest_num)
2445 dependence_stats.num_siv++;
2447 if (dump_file && (dump_flags & TDF_DETAILS))
2448 fprintf (dump_file, "(analyze_siv_subscript \n");
2450 if (evolution_function_is_constant_p (chrec_a)
2451 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2452 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2453 overlaps_a, overlaps_b, last_conflicts);
2455 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2456 && evolution_function_is_constant_p (chrec_b))
2457 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2458 overlaps_b, overlaps_a, last_conflicts);
2460 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2461 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2463 if (!chrec_contains_symbols (chrec_a)
2464 && !chrec_contains_symbols (chrec_b))
2466 analyze_subscript_affine_affine (chrec_a, chrec_b,
2467 overlaps_a, overlaps_b,
2468 last_conflicts);
2470 if (CF_NOT_KNOWN_P (*overlaps_a)
2471 || CF_NOT_KNOWN_P (*overlaps_b))
2472 dependence_stats.num_siv_unimplemented++;
2473 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2474 || CF_NO_DEPENDENCE_P (*overlaps_b))
2475 dependence_stats.num_siv_independent++;
2476 else
2477 dependence_stats.num_siv_dependent++;
2479 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2480 &chrec_b))
2482 analyze_subscript_affine_affine (chrec_a, chrec_b,
2483 overlaps_a, overlaps_b,
2484 last_conflicts);
2486 if (CF_NOT_KNOWN_P (*overlaps_a)
2487 || CF_NOT_KNOWN_P (*overlaps_b))
2488 dependence_stats.num_siv_unimplemented++;
2489 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2490 || CF_NO_DEPENDENCE_P (*overlaps_b))
2491 dependence_stats.num_siv_independent++;
2492 else
2493 dependence_stats.num_siv_dependent++;
2495 else
2496 goto siv_subscript_dontknow;
2499 else
2501 siv_subscript_dontknow:;
2502 if (dump_file && (dump_flags & TDF_DETAILS))
2503 fprintf (dump_file, "siv test failed: unimplemented.\n");
2504 *overlaps_a = conflict_fn_not_known ();
2505 *overlaps_b = conflict_fn_not_known ();
2506 *last_conflicts = chrec_dont_know;
2507 dependence_stats.num_siv_unimplemented++;
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file, ")\n");
2514 /* Returns false if we can prove that the greatest common divisor of the steps
2515 of CHREC does not divide CST, false otherwise. */
2517 static bool
2518 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2520 HOST_WIDE_INT cd = 0, val;
2521 tree step;
2523 if (!host_integerp (cst, 0))
2524 return true;
2525 val = tree_low_cst (cst, 0);
2527 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2529 step = CHREC_RIGHT (chrec);
2530 if (!host_integerp (step, 0))
2531 return true;
2532 cd = gcd (cd, tree_low_cst (step, 0));
2533 chrec = CHREC_LEFT (chrec);
2536 return val % cd == 0;
2539 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2540 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2541 functions that describe the relation between the elements accessed
2542 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2543 is verified:
2545 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2547 static void
2548 analyze_miv_subscript (tree chrec_a,
2549 tree chrec_b,
2550 conflict_function **overlaps_a,
2551 conflict_function **overlaps_b,
2552 tree *last_conflicts,
2553 struct loop *loop_nest)
2555 /* FIXME: This is a MIV subscript, not yet handled.
2556 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2557 (A[i] vs. A[j]).
2559 In the SIV test we had to solve a Diophantine equation with two
2560 variables. In the MIV case we have to solve a Diophantine
2561 equation with 2*n variables (if the subscript uses n IVs).
2563 tree type, difference;
2565 dependence_stats.num_miv++;
2566 if (dump_file && (dump_flags & TDF_DETAILS))
2567 fprintf (dump_file, "(analyze_miv_subscript \n");
2569 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2570 chrec_a = chrec_convert (type, chrec_a, NULL);
2571 chrec_b = chrec_convert (type, chrec_b, NULL);
2572 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2574 if (eq_evolutions_p (chrec_a, chrec_b))
2576 /* Access functions are the same: all the elements are accessed
2577 in the same order. */
2578 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2579 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2580 *last_conflicts = estimated_loop_iterations_tree
2581 (get_chrec_loop (chrec_a), true);
2582 dependence_stats.num_miv_dependent++;
2585 else if (evolution_function_is_constant_p (difference)
2586 /* For the moment, the following is verified:
2587 evolution_function_is_affine_multivariate_p (chrec_a,
2588 loop_nest->num) */
2589 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2591 /* testsuite/.../ssa-chrec-33.c
2592 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2594 The difference is 1, and all the evolution steps are multiples
2595 of 2, consequently there are no overlapping elements. */
2596 *overlaps_a = conflict_fn_no_dependence ();
2597 *overlaps_b = conflict_fn_no_dependence ();
2598 *last_conflicts = integer_zero_node;
2599 dependence_stats.num_miv_independent++;
2602 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2603 && !chrec_contains_symbols (chrec_a)
2604 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2605 && !chrec_contains_symbols (chrec_b))
2607 /* testsuite/.../ssa-chrec-35.c
2608 {0, +, 1}_2 vs. {0, +, 1}_3
2609 the overlapping elements are respectively located at iterations:
2610 {0, +, 1}_x and {0, +, 1}_x,
2611 in other words, we have the equality:
2612 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2614 Other examples:
2615 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2616 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2618 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2619 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2621 analyze_subscript_affine_affine (chrec_a, chrec_b,
2622 overlaps_a, overlaps_b, last_conflicts);
2624 if (CF_NOT_KNOWN_P (*overlaps_a)
2625 || CF_NOT_KNOWN_P (*overlaps_b))
2626 dependence_stats.num_miv_unimplemented++;
2627 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2628 || CF_NO_DEPENDENCE_P (*overlaps_b))
2629 dependence_stats.num_miv_independent++;
2630 else
2631 dependence_stats.num_miv_dependent++;
2634 else
2636 /* When the analysis is too difficult, answer "don't know". */
2637 if (dump_file && (dump_flags & TDF_DETAILS))
2638 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2640 *overlaps_a = conflict_fn_not_known ();
2641 *overlaps_b = conflict_fn_not_known ();
2642 *last_conflicts = chrec_dont_know;
2643 dependence_stats.num_miv_unimplemented++;
2646 if (dump_file && (dump_flags & TDF_DETAILS))
2647 fprintf (dump_file, ")\n");
2650 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2651 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2652 OVERLAP_ITERATIONS_B are initialized with two functions that
2653 describe the iterations that contain conflicting elements.
2655 Remark: For an integer k >= 0, the following equality is true:
2657 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2660 static void
2661 analyze_overlapping_iterations (tree chrec_a,
2662 tree chrec_b,
2663 conflict_function **overlap_iterations_a,
2664 conflict_function **overlap_iterations_b,
2665 tree *last_conflicts, struct loop *loop_nest)
2667 unsigned int lnn = loop_nest->num;
2669 dependence_stats.num_subscript_tests++;
2671 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2674 fprintf (dump_file, " (chrec_a = ");
2675 print_generic_expr (dump_file, chrec_a, 0);
2676 fprintf (dump_file, ")\n (chrec_b = ");
2677 print_generic_expr (dump_file, chrec_b, 0);
2678 fprintf (dump_file, ")\n");
2681 if (chrec_a == NULL_TREE
2682 || chrec_b == NULL_TREE
2683 || chrec_contains_undetermined (chrec_a)
2684 || chrec_contains_undetermined (chrec_b))
2686 dependence_stats.num_subscript_undetermined++;
2688 *overlap_iterations_a = conflict_fn_not_known ();
2689 *overlap_iterations_b = conflict_fn_not_known ();
2692 /* If they are the same chrec, and are affine, they overlap
2693 on every iteration. */
2694 else if (eq_evolutions_p (chrec_a, chrec_b)
2695 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2697 dependence_stats.num_same_subscript_function++;
2698 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2699 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2700 *last_conflicts = chrec_dont_know;
2703 /* If they aren't the same, and aren't affine, we can't do anything
2704 yet. */
2705 else if ((chrec_contains_symbols (chrec_a)
2706 || chrec_contains_symbols (chrec_b))
2707 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2708 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2710 dependence_stats.num_subscript_undetermined++;
2711 *overlap_iterations_a = conflict_fn_not_known ();
2712 *overlap_iterations_b = conflict_fn_not_known ();
2715 else if (ziv_subscript_p (chrec_a, chrec_b))
2716 analyze_ziv_subscript (chrec_a, chrec_b,
2717 overlap_iterations_a, overlap_iterations_b,
2718 last_conflicts);
2720 else if (siv_subscript_p (chrec_a, chrec_b))
2721 analyze_siv_subscript (chrec_a, chrec_b,
2722 overlap_iterations_a, overlap_iterations_b,
2723 last_conflicts, lnn);
2725 else
2726 analyze_miv_subscript (chrec_a, chrec_b,
2727 overlap_iterations_a, overlap_iterations_b,
2728 last_conflicts, loop_nest);
2730 if (dump_file && (dump_flags & TDF_DETAILS))
2732 fprintf (dump_file, " (overlap_iterations_a = ");
2733 dump_conflict_function (dump_file, *overlap_iterations_a);
2734 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2735 dump_conflict_function (dump_file, *overlap_iterations_b);
2736 fprintf (dump_file, ")\n");
2737 fprintf (dump_file, ")\n");
2741 /* Helper function for uniquely inserting distance vectors. */
2743 static void
2744 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2746 unsigned i;
2747 lambda_vector v;
2749 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2750 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2751 return;
2753 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2756 /* Helper function for uniquely inserting direction vectors. */
2758 static void
2759 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2761 unsigned i;
2762 lambda_vector v;
2764 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2765 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2766 return;
2768 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2771 /* Add a distance of 1 on all the loops outer than INDEX. If we
2772 haven't yet determined a distance for this outer loop, push a new
2773 distance vector composed of the previous distance, and a distance
2774 of 1 for this outer loop. Example:
2776 | loop_1
2777 | loop_2
2778 | A[10]
2779 | endloop_2
2780 | endloop_1
2782 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2783 save (0, 1), then we have to save (1, 0). */
2785 static void
2786 add_outer_distances (struct data_dependence_relation *ddr,
2787 lambda_vector dist_v, int index)
2789 /* For each outer loop where init_v is not set, the accesses are
2790 in dependence of distance 1 in the loop. */
2791 while (--index >= 0)
2793 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2794 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2795 save_v[index] = 1;
2796 save_dist_v (ddr, save_v);
2800 /* Return false when fail to represent the data dependence as a
2801 distance vector. INIT_B is set to true when a component has been
2802 added to the distance vector DIST_V. INDEX_CARRY is then set to
2803 the index in DIST_V that carries the dependence. */
2805 static bool
2806 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2807 struct data_reference *ddr_a,
2808 struct data_reference *ddr_b,
2809 lambda_vector dist_v, bool *init_b,
2810 int *index_carry)
2812 unsigned i;
2813 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2815 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2817 tree access_fn_a, access_fn_b;
2818 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2820 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2822 non_affine_dependence_relation (ddr);
2823 return false;
2826 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2827 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2829 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2830 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2832 int dist, index;
2833 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2834 DDR_LOOP_NEST (ddr));
2835 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2836 DDR_LOOP_NEST (ddr));
2838 /* The dependence is carried by the outermost loop. Example:
2839 | loop_1
2840 | A[{4, +, 1}_1]
2841 | loop_2
2842 | A[{5, +, 1}_2]
2843 | endloop_2
2844 | endloop_1
2845 In this case, the dependence is carried by loop_1. */
2846 index = index_a < index_b ? index_a : index_b;
2847 *index_carry = MIN (index, *index_carry);
2849 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2851 non_affine_dependence_relation (ddr);
2852 return false;
2855 dist = int_cst_value (SUB_DISTANCE (subscript));
2857 /* This is the subscript coupling test. If we have already
2858 recorded a distance for this loop (a distance coming from
2859 another subscript), it should be the same. For example,
2860 in the following code, there is no dependence:
2862 | loop i = 0, N, 1
2863 | T[i+1][i] = ...
2864 | ... = T[i][i]
2865 | endloop
2867 if (init_v[index] != 0 && dist_v[index] != dist)
2869 finalize_ddr_dependent (ddr, chrec_known);
2870 return false;
2873 dist_v[index] = dist;
2874 init_v[index] = 1;
2875 *init_b = true;
2877 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2879 /* This can be for example an affine vs. constant dependence
2880 (T[i] vs. T[3]) that is not an affine dependence and is
2881 not representable as a distance vector. */
2882 non_affine_dependence_relation (ddr);
2883 return false;
2887 return true;
2890 /* Return true when the DDR contains only constant access functions. */
2892 static bool
2893 constant_access_functions (const struct data_dependence_relation *ddr)
2895 unsigned i;
2897 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2898 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2899 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2900 return false;
2902 return true;
2905 /* Helper function for the case where DDR_A and DDR_B are the same
2906 multivariate access function with a constant step. For an example
2907 see pr34635-1.c. */
2909 static void
2910 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2912 int x_1, x_2;
2913 tree c_1 = CHREC_LEFT (c_2);
2914 tree c_0 = CHREC_LEFT (c_1);
2915 lambda_vector dist_v;
2916 int v1, v2, cd;
2918 /* Polynomials with more than 2 variables are not handled yet. When
2919 the evolution steps are parameters, it is not possible to
2920 represent the dependence using classical distance vectors. */
2921 if (TREE_CODE (c_0) != INTEGER_CST
2922 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2923 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2925 DDR_AFFINE_P (ddr) = false;
2926 return;
2929 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2930 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2932 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2933 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2934 v1 = int_cst_value (CHREC_RIGHT (c_1));
2935 v2 = int_cst_value (CHREC_RIGHT (c_2));
2936 cd = gcd (v1, v2);
2937 v1 /= cd;
2938 v2 /= cd;
2940 if (v2 < 0)
2942 v2 = -v2;
2943 v1 = -v1;
2946 dist_v[x_1] = v2;
2947 dist_v[x_2] = -v1;
2948 save_dist_v (ddr, dist_v);
2950 add_outer_distances (ddr, dist_v, x_1);
2953 /* Helper function for the case where DDR_A and DDR_B are the same
2954 access functions. */
2956 static void
2957 add_other_self_distances (struct data_dependence_relation *ddr)
2959 lambda_vector dist_v;
2960 unsigned i;
2961 int index_carry = DDR_NB_LOOPS (ddr);
2963 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2965 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2967 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2969 if (!evolution_function_is_univariate_p (access_fun))
2971 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2973 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2974 return;
2977 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2979 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2980 add_multivariate_self_dist (ddr, access_fun);
2981 else
2982 /* The evolution step is not constant: it varies in
2983 the outer loop, so this cannot be represented by a
2984 distance vector. For example in pr34635.c the
2985 evolution is {0, +, {0, +, 4}_1}_2. */
2986 DDR_AFFINE_P (ddr) = false;
2988 return;
2991 index_carry = MIN (index_carry,
2992 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2993 DDR_LOOP_NEST (ddr)));
2997 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2998 add_outer_distances (ddr, dist_v, index_carry);
3001 static void
3002 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3004 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3006 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3007 save_dist_v (ddr, dist_v);
3010 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3011 is the case for example when access functions are the same and
3012 equal to a constant, as in:
3014 | loop_1
3015 | A[3] = ...
3016 | ... = A[3]
3017 | endloop_1
3019 in which case the distance vectors are (0) and (1). */
3021 static void
3022 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3024 unsigned i, j;
3026 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3028 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3029 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3030 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3032 for (j = 0; j < ca->n; j++)
3033 if (affine_function_zero_p (ca->fns[j]))
3035 insert_innermost_unit_dist_vector (ddr);
3036 return;
3039 for (j = 0; j < cb->n; j++)
3040 if (affine_function_zero_p (cb->fns[j]))
3042 insert_innermost_unit_dist_vector (ddr);
3043 return;
3048 /* Compute the classic per loop distance vector. DDR is the data
3049 dependence relation to build a vector from. Return false when fail
3050 to represent the data dependence as a distance vector. */
3052 static bool
3053 build_classic_dist_vector (struct data_dependence_relation *ddr,
3054 struct loop *loop_nest)
3056 bool init_b = false;
3057 int index_carry = DDR_NB_LOOPS (ddr);
3058 lambda_vector dist_v;
3060 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3061 return false;
3063 if (same_access_functions (ddr))
3065 /* Save the 0 vector. */
3066 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3067 save_dist_v (ddr, dist_v);
3069 if (constant_access_functions (ddr))
3070 add_distance_for_zero_overlaps (ddr);
3072 if (DDR_NB_LOOPS (ddr) > 1)
3073 add_other_self_distances (ddr);
3075 return true;
3078 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3079 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3080 dist_v, &init_b, &index_carry))
3081 return false;
3083 /* Save the distance vector if we initialized one. */
3084 if (init_b)
3086 /* Verify a basic constraint: classic distance vectors should
3087 always be lexicographically positive.
3089 Data references are collected in the order of execution of
3090 the program, thus for the following loop
3092 | for (i = 1; i < 100; i++)
3093 | for (j = 1; j < 100; j++)
3095 | t = T[j+1][i-1]; // A
3096 | T[j][i] = t + 2; // B
3099 references are collected following the direction of the wind:
3100 A then B. The data dependence tests are performed also
3101 following this order, such that we're looking at the distance
3102 separating the elements accessed by A from the elements later
3103 accessed by B. But in this example, the distance returned by
3104 test_dep (A, B) is lexicographically negative (-1, 1), that
3105 means that the access A occurs later than B with respect to
3106 the outer loop, ie. we're actually looking upwind. In this
3107 case we solve test_dep (B, A) looking downwind to the
3108 lexicographically positive solution, that returns the
3109 distance vector (1, -1). */
3110 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3112 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3113 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3114 loop_nest))
3115 return false;
3116 compute_subscript_distance (ddr);
3117 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3118 save_v, &init_b, &index_carry))
3119 return false;
3120 save_dist_v (ddr, save_v);
3121 DDR_REVERSED_P (ddr) = true;
3123 /* In this case there is a dependence forward for all the
3124 outer loops:
3126 | for (k = 1; k < 100; k++)
3127 | for (i = 1; i < 100; i++)
3128 | for (j = 1; j < 100; j++)
3130 | t = T[j+1][i-1]; // A
3131 | T[j][i] = t + 2; // B
3134 the vectors are:
3135 (0, 1, -1)
3136 (1, 1, -1)
3137 (1, -1, 1)
3139 if (DDR_NB_LOOPS (ddr) > 1)
3141 add_outer_distances (ddr, save_v, index_carry);
3142 add_outer_distances (ddr, dist_v, index_carry);
3145 else
3147 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3148 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3150 if (DDR_NB_LOOPS (ddr) > 1)
3152 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3154 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3155 DDR_A (ddr), loop_nest))
3156 return false;
3157 compute_subscript_distance (ddr);
3158 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3159 opposite_v, &init_b,
3160 &index_carry))
3161 return false;
3163 save_dist_v (ddr, save_v);
3164 add_outer_distances (ddr, dist_v, index_carry);
3165 add_outer_distances (ddr, opposite_v, index_carry);
3167 else
3168 save_dist_v (ddr, save_v);
3171 else
3173 /* There is a distance of 1 on all the outer loops: Example:
3174 there is a dependence of distance 1 on loop_1 for the array A.
3176 | loop_1
3177 | A[5] = ...
3178 | endloop
3180 add_outer_distances (ddr, dist_v,
3181 lambda_vector_first_nz (dist_v,
3182 DDR_NB_LOOPS (ddr), 0));
3185 if (dump_file && (dump_flags & TDF_DETAILS))
3187 unsigned i;
3189 fprintf (dump_file, "(build_classic_dist_vector\n");
3190 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3192 fprintf (dump_file, " dist_vector = (");
3193 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3194 DDR_NB_LOOPS (ddr));
3195 fprintf (dump_file, " )\n");
3197 fprintf (dump_file, ")\n");
3200 return true;
3203 /* Return the direction for a given distance.
3204 FIXME: Computing dir this way is suboptimal, since dir can catch
3205 cases that dist is unable to represent. */
3207 static inline enum data_dependence_direction
3208 dir_from_dist (int dist)
3210 if (dist > 0)
3211 return dir_positive;
3212 else if (dist < 0)
3213 return dir_negative;
3214 else
3215 return dir_equal;
3218 /* Compute the classic per loop direction vector. DDR is the data
3219 dependence relation to build a vector from. */
3221 static void
3222 build_classic_dir_vector (struct data_dependence_relation *ddr)
3224 unsigned i, j;
3225 lambda_vector dist_v;
3227 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3229 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3231 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3232 dir_v[j] = dir_from_dist (dist_v[j]);
3234 save_dir_v (ddr, dir_v);
3238 /* Helper function. Returns true when there is a dependence between
3239 data references DRA and DRB. */
3241 static bool
3242 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3243 struct data_reference *dra,
3244 struct data_reference *drb,
3245 struct loop *loop_nest)
3247 unsigned int i;
3248 tree last_conflicts;
3249 struct subscript *subscript;
3251 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3252 i++)
3254 conflict_function *overlaps_a, *overlaps_b;
3256 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3257 DR_ACCESS_FN (drb, i),
3258 &overlaps_a, &overlaps_b,
3259 &last_conflicts, loop_nest);
3261 if (CF_NOT_KNOWN_P (overlaps_a)
3262 || CF_NOT_KNOWN_P (overlaps_b))
3264 finalize_ddr_dependent (ddr, chrec_dont_know);
3265 dependence_stats.num_dependence_undetermined++;
3266 free_conflict_function (overlaps_a);
3267 free_conflict_function (overlaps_b);
3268 return false;
3271 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3272 || CF_NO_DEPENDENCE_P (overlaps_b))
3274 finalize_ddr_dependent (ddr, chrec_known);
3275 dependence_stats.num_dependence_independent++;
3276 free_conflict_function (overlaps_a);
3277 free_conflict_function (overlaps_b);
3278 return false;
3281 else
3283 if (SUB_CONFLICTS_IN_A (subscript))
3284 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3285 if (SUB_CONFLICTS_IN_B (subscript))
3286 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3288 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3289 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3290 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3294 return true;
3297 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3299 static void
3300 subscript_dependence_tester (struct data_dependence_relation *ddr,
3301 struct loop *loop_nest)
3304 if (dump_file && (dump_flags & TDF_DETAILS))
3305 fprintf (dump_file, "(subscript_dependence_tester \n");
3307 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3308 dependence_stats.num_dependence_dependent++;
3310 compute_subscript_distance (ddr);
3311 if (build_classic_dist_vector (ddr, loop_nest))
3312 build_classic_dir_vector (ddr);
3314 if (dump_file && (dump_flags & TDF_DETAILS))
3315 fprintf (dump_file, ")\n");
3318 /* Returns true when all the access functions of A are affine or
3319 constant with respect to LOOP_NEST. */
3321 static bool
3322 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3323 const struct loop *loop_nest)
3325 unsigned int i;
3326 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3327 tree t;
3329 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3330 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3331 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3332 return false;
3334 return true;
3337 /* Return true if we can create an affine data-ref for OP in STMT. */
3339 bool
3340 stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op)
3342 data_reference_p dr;
3343 bool res = true;
3345 dr = create_data_ref (loop, op, stmt, true);
3346 if (!access_functions_are_affine_or_constant_p (dr, loop))
3347 res = false;
3349 free_data_ref (dr);
3350 return res;
3353 /* Initializes an equation for an OMEGA problem using the information
3354 contained in the ACCESS_FUN. Returns true when the operation
3355 succeeded.
3357 PB is the omega constraint system.
3358 EQ is the number of the equation to be initialized.
3359 OFFSET is used for shifting the variables names in the constraints:
3360 a constrain is composed of 2 * the number of variables surrounding
3361 dependence accesses. OFFSET is set either to 0 for the first n variables,
3362 then it is set to n.
3363 ACCESS_FUN is expected to be an affine chrec. */
3365 static bool
3366 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3367 unsigned int offset, tree access_fun,
3368 struct data_dependence_relation *ddr)
3370 switch (TREE_CODE (access_fun))
3372 case POLYNOMIAL_CHREC:
3374 tree left = CHREC_LEFT (access_fun);
3375 tree right = CHREC_RIGHT (access_fun);
3376 int var = CHREC_VARIABLE (access_fun);
3377 unsigned var_idx;
3379 if (TREE_CODE (right) != INTEGER_CST)
3380 return false;
3382 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3383 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3385 /* Compute the innermost loop index. */
3386 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3388 if (offset == 0)
3389 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3390 += int_cst_value (right);
3392 switch (TREE_CODE (left))
3394 case POLYNOMIAL_CHREC:
3395 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3397 case INTEGER_CST:
3398 pb->eqs[eq].coef[0] += int_cst_value (left);
3399 return true;
3401 default:
3402 return false;
3406 case INTEGER_CST:
3407 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3408 return true;
3410 default:
3411 return false;
3415 /* As explained in the comments preceding init_omega_for_ddr, we have
3416 to set up a system for each loop level, setting outer loops
3417 variation to zero, and current loop variation to positive or zero.
3418 Save each lexico positive distance vector. */
3420 static void
3421 omega_extract_distance_vectors (omega_pb pb,
3422 struct data_dependence_relation *ddr)
3424 int eq, geq;
3425 unsigned i, j;
3426 struct loop *loopi, *loopj;
3427 enum omega_result res;
3429 /* Set a new problem for each loop in the nest. The basis is the
3430 problem that we have initialized until now. On top of this we
3431 add new constraints. */
3432 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3433 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3435 int dist = 0;
3436 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3437 DDR_NB_LOOPS (ddr));
3439 omega_copy_problem (copy, pb);
3441 /* For all the outer loops "loop_j", add "dj = 0". */
3442 for (j = 0;
3443 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3445 eq = omega_add_zero_eq (copy, omega_black);
3446 copy->eqs[eq].coef[j + 1] = 1;
3449 /* For "loop_i", add "0 <= di". */
3450 geq = omega_add_zero_geq (copy, omega_black);
3451 copy->geqs[geq].coef[i + 1] = 1;
3453 /* Reduce the constraint system, and test that the current
3454 problem is feasible. */
3455 res = omega_simplify_problem (copy);
3456 if (res == omega_false
3457 || res == omega_unknown
3458 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3459 goto next_problem;
3461 for (eq = 0; eq < copy->num_subs; eq++)
3462 if (copy->subs[eq].key == (int) i + 1)
3464 dist = copy->subs[eq].coef[0];
3465 goto found_dist;
3468 if (dist == 0)
3470 /* Reinitialize problem... */
3471 omega_copy_problem (copy, pb);
3472 for (j = 0;
3473 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3475 eq = omega_add_zero_eq (copy, omega_black);
3476 copy->eqs[eq].coef[j + 1] = 1;
3479 /* ..., but this time "di = 1". */
3480 eq = omega_add_zero_eq (copy, omega_black);
3481 copy->eqs[eq].coef[i + 1] = 1;
3482 copy->eqs[eq].coef[0] = -1;
3484 res = omega_simplify_problem (copy);
3485 if (res == omega_false
3486 || res == omega_unknown
3487 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3488 goto next_problem;
3490 for (eq = 0; eq < copy->num_subs; eq++)
3491 if (copy->subs[eq].key == (int) i + 1)
3493 dist = copy->subs[eq].coef[0];
3494 goto found_dist;
3498 found_dist:;
3499 /* Save the lexicographically positive distance vector. */
3500 if (dist >= 0)
3502 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3503 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3505 dist_v[i] = dist;
3507 for (eq = 0; eq < copy->num_subs; eq++)
3508 if (copy->subs[eq].key > 0)
3510 dist = copy->subs[eq].coef[0];
3511 dist_v[copy->subs[eq].key - 1] = dist;
3514 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3515 dir_v[j] = dir_from_dist (dist_v[j]);
3517 save_dist_v (ddr, dist_v);
3518 save_dir_v (ddr, dir_v);
3521 next_problem:;
3522 omega_free_problem (copy);
3526 /* This is called for each subscript of a tuple of data references:
3527 insert an equality for representing the conflicts. */
3529 static bool
3530 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3531 struct data_dependence_relation *ddr,
3532 omega_pb pb, bool *maybe_dependent)
3534 int eq;
3535 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3536 TREE_TYPE (access_fun_b));
3537 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3538 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3539 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3541 /* When the fun_a - fun_b is not constant, the dependence is not
3542 captured by the classic distance vector representation. */
3543 if (TREE_CODE (difference) != INTEGER_CST)
3544 return false;
3546 /* ZIV test. */
3547 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3549 /* There is no dependence. */
3550 *maybe_dependent = false;
3551 return true;
3554 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3556 eq = omega_add_zero_eq (pb, omega_black);
3557 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3558 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3559 /* There is probably a dependence, but the system of
3560 constraints cannot be built: answer "don't know". */
3561 return false;
3563 /* GCD test. */
3564 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3565 && !int_divides_p (lambda_vector_gcd
3566 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3567 2 * DDR_NB_LOOPS (ddr)),
3568 pb->eqs[eq].coef[0]))
3570 /* There is no dependence. */
3571 *maybe_dependent = false;
3572 return true;
3575 return true;
3578 /* Helper function, same as init_omega_for_ddr but specialized for
3579 data references A and B. */
3581 static bool
3582 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3583 struct data_dependence_relation *ddr,
3584 omega_pb pb, bool *maybe_dependent)
3586 unsigned i;
3587 int ineq;
3588 struct loop *loopi;
3589 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3591 /* Insert an equality per subscript. */
3592 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3594 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3595 ddr, pb, maybe_dependent))
3596 return false;
3597 else if (*maybe_dependent == false)
3599 /* There is no dependence. */
3600 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3601 return true;
3605 /* Insert inequalities: constraints corresponding to the iteration
3606 domain, i.e. the loops surrounding the references "loop_x" and
3607 the distance variables "dx". The layout of the OMEGA
3608 representation is as follows:
3609 - coef[0] is the constant
3610 - coef[1..nb_loops] are the protected variables that will not be
3611 removed by the solver: the "dx"
3612 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3614 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3615 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3617 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3619 /* 0 <= loop_x */
3620 ineq = omega_add_zero_geq (pb, omega_black);
3621 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3623 /* 0 <= loop_x + dx */
3624 ineq = omega_add_zero_geq (pb, omega_black);
3625 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3626 pb->geqs[ineq].coef[i + 1] = 1;
3628 if (nbi != -1)
3630 /* loop_x <= nb_iters */
3631 ineq = omega_add_zero_geq (pb, omega_black);
3632 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3633 pb->geqs[ineq].coef[0] = nbi;
3635 /* loop_x + dx <= nb_iters */
3636 ineq = omega_add_zero_geq (pb, omega_black);
3637 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3638 pb->geqs[ineq].coef[i + 1] = -1;
3639 pb->geqs[ineq].coef[0] = nbi;
3641 /* A step "dx" bigger than nb_iters is not feasible, so
3642 add "0 <= nb_iters + dx", */
3643 ineq = omega_add_zero_geq (pb, omega_black);
3644 pb->geqs[ineq].coef[i + 1] = 1;
3645 pb->geqs[ineq].coef[0] = nbi;
3646 /* and "dx <= nb_iters". */
3647 ineq = omega_add_zero_geq (pb, omega_black);
3648 pb->geqs[ineq].coef[i + 1] = -1;
3649 pb->geqs[ineq].coef[0] = nbi;
3653 omega_extract_distance_vectors (pb, ddr);
3655 return true;
3658 /* Sets up the Omega dependence problem for the data dependence
3659 relation DDR. Returns false when the constraint system cannot be
3660 built, ie. when the test answers "don't know". Returns true
3661 otherwise, and when independence has been proved (using one of the
3662 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3663 set MAYBE_DEPENDENT to true.
3665 Example: for setting up the dependence system corresponding to the
3666 conflicting accesses
3668 | loop_i
3669 | loop_j
3670 | A[i, i+1] = ...
3671 | ... A[2*j, 2*(i + j)]
3672 | endloop_j
3673 | endloop_i
3675 the following constraints come from the iteration domain:
3677 0 <= i <= Ni
3678 0 <= i + di <= Ni
3679 0 <= j <= Nj
3680 0 <= j + dj <= Nj
3682 where di, dj are the distance variables. The constraints
3683 representing the conflicting elements are:
3685 i = 2 * (j + dj)
3686 i + 1 = 2 * (i + di + j + dj)
3688 For asking that the resulting distance vector (di, dj) be
3689 lexicographically positive, we insert the constraint "di >= 0". If
3690 "di = 0" in the solution, we fix that component to zero, and we
3691 look at the inner loops: we set a new problem where all the outer
3692 loop distances are zero, and fix this inner component to be
3693 positive. When one of the components is positive, we save that
3694 distance, and set a new problem where the distance on this loop is
3695 zero, searching for other distances in the inner loops. Here is
3696 the classic example that illustrates that we have to set for each
3697 inner loop a new problem:
3699 | loop_1
3700 | loop_2
3701 | A[10]
3702 | endloop_2
3703 | endloop_1
3705 we have to save two distances (1, 0) and (0, 1).
3707 Given two array references, refA and refB, we have to set the
3708 dependence problem twice, refA vs. refB and refB vs. refA, and we
3709 cannot do a single test, as refB might occur before refA in the
3710 inner loops, and the contrary when considering outer loops: ex.
3712 | loop_0
3713 | loop_1
3714 | loop_2
3715 | T[{1,+,1}_2][{1,+,1}_1] // refA
3716 | T[{2,+,1}_2][{0,+,1}_1] // refB
3717 | endloop_2
3718 | endloop_1
3719 | endloop_0
3721 refB touches the elements in T before refA, and thus for the same
3722 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3723 but for successive loop_0 iterations, we have (1, -1, 1)
3725 The Omega solver expects the distance variables ("di" in the
3726 previous example) to come first in the constraint system (as
3727 variables to be protected, or "safe" variables), the constraint
3728 system is built using the following layout:
3730 "cst | distance vars | index vars".
3733 static bool
3734 init_omega_for_ddr (struct data_dependence_relation *ddr,
3735 bool *maybe_dependent)
3737 omega_pb pb;
3738 bool res = false;
3740 *maybe_dependent = true;
3742 if (same_access_functions (ddr))
3744 unsigned j;
3745 lambda_vector dir_v;
3747 /* Save the 0 vector. */
3748 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3749 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3750 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3751 dir_v[j] = dir_equal;
3752 save_dir_v (ddr, dir_v);
3754 /* Save the dependences carried by outer loops. */
3755 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3756 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3757 maybe_dependent);
3758 omega_free_problem (pb);
3759 return res;
3762 /* Omega expects the protected variables (those that have to be kept
3763 after elimination) to appear first in the constraint system.
3764 These variables are the distance variables. In the following
3765 initialization we declare NB_LOOPS safe variables, and the total
3766 number of variables for the constraint system is 2*NB_LOOPS. */
3767 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3768 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3769 maybe_dependent);
3770 omega_free_problem (pb);
3772 /* Stop computation if not decidable, or no dependence. */
3773 if (res == false || *maybe_dependent == false)
3774 return res;
3776 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3777 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3778 maybe_dependent);
3779 omega_free_problem (pb);
3781 return res;
3784 /* Return true when DDR contains the same information as that stored
3785 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3787 static bool
3788 ddr_consistent_p (FILE *file,
3789 struct data_dependence_relation *ddr,
3790 VEC (lambda_vector, heap) *dist_vects,
3791 VEC (lambda_vector, heap) *dir_vects)
3793 unsigned int i, j;
3795 /* If dump_file is set, output there. */
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 file = dump_file;
3799 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3801 lambda_vector b_dist_v;
3802 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3803 VEC_length (lambda_vector, dist_vects),
3804 DDR_NUM_DIST_VECTS (ddr));
3806 fprintf (file, "Banerjee dist vectors:\n");
3807 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3808 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3810 fprintf (file, "Omega dist vectors:\n");
3811 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3812 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3814 fprintf (file, "data dependence relation:\n");
3815 dump_data_dependence_relation (file, ddr);
3817 fprintf (file, ")\n");
3818 return false;
3821 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3823 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3824 VEC_length (lambda_vector, dir_vects),
3825 DDR_NUM_DIR_VECTS (ddr));
3826 return false;
3829 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3831 lambda_vector a_dist_v;
3832 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3834 /* Distance vectors are not ordered in the same way in the DDR
3835 and in the DIST_VECTS: search for a matching vector. */
3836 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3837 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3838 break;
3840 if (j == VEC_length (lambda_vector, dist_vects))
3842 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3843 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3844 fprintf (file, "not found in Omega dist vectors:\n");
3845 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3846 fprintf (file, "data dependence relation:\n");
3847 dump_data_dependence_relation (file, ddr);
3848 fprintf (file, ")\n");
3852 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3854 lambda_vector a_dir_v;
3855 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3857 /* Direction vectors are not ordered in the same way in the DDR
3858 and in the DIR_VECTS: search for a matching vector. */
3859 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3860 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3861 break;
3863 if (j == VEC_length (lambda_vector, dist_vects))
3865 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3866 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3867 fprintf (file, "not found in Omega dir vectors:\n");
3868 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3869 fprintf (file, "data dependence relation:\n");
3870 dump_data_dependence_relation (file, ddr);
3871 fprintf (file, ")\n");
3875 return true;
3878 /* This computes the affine dependence relation between A and B with
3879 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3880 independence between two accesses, while CHREC_DONT_KNOW is used
3881 for representing the unknown relation.
3883 Note that it is possible to stop the computation of the dependence
3884 relation the first time we detect a CHREC_KNOWN element for a given
3885 subscript. */
3887 static void
3888 compute_affine_dependence (struct data_dependence_relation *ddr,
3889 struct loop *loop_nest)
3891 struct data_reference *dra = DDR_A (ddr);
3892 struct data_reference *drb = DDR_B (ddr);
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3896 fprintf (dump_file, "(compute_affine_dependence\n");
3897 fprintf (dump_file, " (stmt_a = \n");
3898 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3899 fprintf (dump_file, ")\n (stmt_b = \n");
3900 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3901 fprintf (dump_file, ")\n");
3904 /* Analyze only when the dependence relation is not yet known. */
3905 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3906 && !DDR_SELF_REFERENCE (ddr))
3908 dependence_stats.num_dependence_tests++;
3910 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3911 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3913 if (flag_check_data_deps)
3915 /* Compute the dependences using the first algorithm. */
3916 subscript_dependence_tester (ddr, loop_nest);
3918 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3921 dump_data_dependence_relation (dump_file, ddr);
3924 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3926 bool maybe_dependent;
3927 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3929 /* Save the result of the first DD analyzer. */
3930 dist_vects = DDR_DIST_VECTS (ddr);
3931 dir_vects = DDR_DIR_VECTS (ddr);
3933 /* Reset the information. */
3934 DDR_DIST_VECTS (ddr) = NULL;
3935 DDR_DIR_VECTS (ddr) = NULL;
3937 /* Compute the same information using Omega. */
3938 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3939 goto csys_dont_know;
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3943 fprintf (dump_file, "Omega Analyzer\n");
3944 dump_data_dependence_relation (dump_file, ddr);
3947 /* Check that we get the same information. */
3948 if (maybe_dependent)
3949 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3950 dir_vects));
3953 else
3954 subscript_dependence_tester (ddr, loop_nest);
3957 /* As a last case, if the dependence cannot be determined, or if
3958 the dependence is considered too difficult to determine, answer
3959 "don't know". */
3960 else
3962 csys_dont_know:;
3963 dependence_stats.num_dependence_undetermined++;
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3967 fprintf (dump_file, "Data ref a:\n");
3968 dump_data_reference (dump_file, dra);
3969 fprintf (dump_file, "Data ref b:\n");
3970 dump_data_reference (dump_file, drb);
3971 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3973 finalize_ddr_dependent (ddr, chrec_dont_know);
3977 if (dump_file && (dump_flags & TDF_DETAILS))
3978 fprintf (dump_file, ")\n");
3981 /* This computes the dependence relation for the same data
3982 reference into DDR. */
3984 static void
3985 compute_self_dependence (struct data_dependence_relation *ddr)
3987 unsigned int i;
3988 struct subscript *subscript;
3990 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3991 return;
3993 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3994 i++)
3996 if (SUB_CONFLICTS_IN_A (subscript))
3997 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3998 if (SUB_CONFLICTS_IN_B (subscript))
3999 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4001 /* The accessed index overlaps for each iteration. */
4002 SUB_CONFLICTS_IN_A (subscript)
4003 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4004 SUB_CONFLICTS_IN_B (subscript)
4005 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4006 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4009 /* The distance vector is the zero vector. */
4010 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4011 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4014 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4015 the data references in DATAREFS, in the LOOP_NEST. When
4016 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4017 relations. */
4019 void
4020 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4021 VEC (ddr_p, heap) **dependence_relations,
4022 VEC (loop_p, heap) *loop_nest,
4023 bool compute_self_and_rr)
4025 struct data_dependence_relation *ddr;
4026 struct data_reference *a, *b;
4027 unsigned int i, j;
4029 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4030 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4031 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
4033 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4034 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4035 if (loop_nest)
4036 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4039 if (compute_self_and_rr)
4040 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4042 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4043 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4044 compute_self_dependence (ddr);
4048 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4049 true if STMT clobbers memory, false otherwise. */
4051 bool
4052 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4054 bool clobbers_memory = false;
4055 data_ref_loc *ref;
4056 tree *op0, *op1;
4057 enum gimple_code stmt_code = gimple_code (stmt);
4059 *references = NULL;
4061 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4062 Calls have side-effects, except those to const or pure
4063 functions. */
4064 if ((stmt_code == GIMPLE_CALL
4065 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4066 || (stmt_code == GIMPLE_ASM
4067 && gimple_asm_volatile_p (stmt)))
4068 clobbers_memory = true;
4070 if (!gimple_vuse (stmt))
4071 return clobbers_memory;
4073 if (stmt_code == GIMPLE_ASSIGN)
4075 tree base;
4076 op0 = gimple_assign_lhs_ptr (stmt);
4077 op1 = gimple_assign_rhs1_ptr (stmt);
4079 if (DECL_P (*op1)
4080 || (REFERENCE_CLASS_P (*op1)
4081 && (base = get_base_address (*op1))
4082 && TREE_CODE (base) != SSA_NAME))
4084 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4085 ref->pos = op1;
4086 ref->is_read = true;
4089 if (DECL_P (*op0)
4090 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4092 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4093 ref->pos = op0;
4094 ref->is_read = false;
4097 else if (stmt_code == GIMPLE_CALL)
4099 unsigned i, n = gimple_call_num_args (stmt);
4101 for (i = 0; i < n; i++)
4103 op0 = gimple_call_arg_ptr (stmt, i);
4105 if (DECL_P (*op0)
4106 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4108 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4109 ref->pos = op0;
4110 ref->is_read = true;
4115 return clobbers_memory;
4118 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4119 reference, returns false, otherwise returns true. NEST is the outermost
4120 loop of the loop nest in which the references should be analyzed. */
4122 bool
4123 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4124 VEC (data_reference_p, heap) **datarefs)
4126 unsigned i;
4127 VEC (data_ref_loc, heap) *references;
4128 data_ref_loc *ref;
4129 bool ret = true;
4130 data_reference_p dr;
4132 if (get_references_in_stmt (stmt, &references))
4134 VEC_free (data_ref_loc, heap, references);
4135 return false;
4138 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4140 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4141 gcc_assert (dr != NULL);
4143 /* FIXME -- data dependence analysis does not work correctly for objects
4144 with invariant addresses in loop nests. Let us fail here until the
4145 problem is fixed. */
4146 if (dr_address_invariant_p (dr) && nest)
4148 free_data_ref (dr);
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4151 ret = false;
4152 break;
4155 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4157 VEC_free (data_ref_loc, heap, references);
4158 return ret;
4161 /* Search the data references in LOOP, and record the information into
4162 DATAREFS. Returns chrec_dont_know when failing to analyze a
4163 difficult case, returns NULL_TREE otherwise. */
4165 static tree
4166 find_data_references_in_bb (struct loop *loop, basic_block bb,
4167 VEC (data_reference_p, heap) **datarefs)
4169 gimple_stmt_iterator bsi;
4171 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4173 gimple stmt = gsi_stmt (bsi);
4175 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4177 struct data_reference *res;
4178 res = XCNEW (struct data_reference);
4179 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4181 return chrec_dont_know;
4185 return NULL_TREE;
4188 /* Search the data references in LOOP, and record the information into
4189 DATAREFS. Returns chrec_dont_know when failing to analyze a
4190 difficult case, returns NULL_TREE otherwise.
4192 TODO: This function should be made smarter so that it can handle address
4193 arithmetic as if they were array accesses, etc. */
4195 tree
4196 find_data_references_in_loop (struct loop *loop,
4197 VEC (data_reference_p, heap) **datarefs)
4199 basic_block bb, *bbs;
4200 unsigned int i;
4202 bbs = get_loop_body_in_dom_order (loop);
4204 for (i = 0; i < loop->num_nodes; i++)
4206 bb = bbs[i];
4208 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4210 free (bbs);
4211 return chrec_dont_know;
4214 free (bbs);
4216 return NULL_TREE;
4219 /* Recursive helper function. */
4221 static bool
4222 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4224 /* Inner loops of the nest should not contain siblings. Example:
4225 when there are two consecutive loops,
4227 | loop_0
4228 | loop_1
4229 | A[{0, +, 1}_1]
4230 | endloop_1
4231 | loop_2
4232 | A[{0, +, 1}_2]
4233 | endloop_2
4234 | endloop_0
4236 the dependence relation cannot be captured by the distance
4237 abstraction. */
4238 if (loop->next)
4239 return false;
4241 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4242 if (loop->inner)
4243 return find_loop_nest_1 (loop->inner, loop_nest);
4244 return true;
4247 /* Return false when the LOOP is not well nested. Otherwise return
4248 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4249 contain the loops from the outermost to the innermost, as they will
4250 appear in the classic distance vector. */
4252 bool
4253 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4255 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4256 if (loop->inner)
4257 return find_loop_nest_1 (loop->inner, loop_nest);
4258 return true;
4261 /* Returns true when the data dependences have been computed, false otherwise.
4262 Given a loop nest LOOP, the following vectors are returned:
4263 DATAREFS is initialized to all the array elements contained in this loop,
4264 DEPENDENCE_RELATIONS contains the relations between the data references.
4265 Compute read-read and self relations if
4266 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4268 bool
4269 compute_data_dependences_for_loop (struct loop *loop,
4270 bool compute_self_and_read_read_dependences,
4271 VEC (data_reference_p, heap) **datarefs,
4272 VEC (ddr_p, heap) **dependence_relations)
4274 bool res = true;
4275 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4277 memset (&dependence_stats, 0, sizeof (dependence_stats));
4279 /* If the loop nest is not well formed, or one of the data references
4280 is not computable, give up without spending time to compute other
4281 dependences. */
4282 if (!loop
4283 || !find_loop_nest (loop, &vloops)
4284 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4286 struct data_dependence_relation *ddr;
4288 /* Insert a single relation into dependence_relations:
4289 chrec_dont_know. */
4290 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4291 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4292 res = false;
4294 else
4295 compute_all_dependences (*datarefs, dependence_relations, vloops,
4296 compute_self_and_read_read_dependences);
4298 if (dump_file && (dump_flags & TDF_STATS))
4300 fprintf (dump_file, "Dependence tester statistics:\n");
4302 fprintf (dump_file, "Number of dependence tests: %d\n",
4303 dependence_stats.num_dependence_tests);
4304 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4305 dependence_stats.num_dependence_dependent);
4306 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4307 dependence_stats.num_dependence_independent);
4308 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4309 dependence_stats.num_dependence_undetermined);
4311 fprintf (dump_file, "Number of subscript tests: %d\n",
4312 dependence_stats.num_subscript_tests);
4313 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4314 dependence_stats.num_subscript_undetermined);
4315 fprintf (dump_file, "Number of same subscript function: %d\n",
4316 dependence_stats.num_same_subscript_function);
4318 fprintf (dump_file, "Number of ziv tests: %d\n",
4319 dependence_stats.num_ziv);
4320 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4321 dependence_stats.num_ziv_dependent);
4322 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4323 dependence_stats.num_ziv_independent);
4324 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4325 dependence_stats.num_ziv_unimplemented);
4327 fprintf (dump_file, "Number of siv tests: %d\n",
4328 dependence_stats.num_siv);
4329 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4330 dependence_stats.num_siv_dependent);
4331 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4332 dependence_stats.num_siv_independent);
4333 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4334 dependence_stats.num_siv_unimplemented);
4336 fprintf (dump_file, "Number of miv tests: %d\n",
4337 dependence_stats.num_miv);
4338 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4339 dependence_stats.num_miv_dependent);
4340 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4341 dependence_stats.num_miv_independent);
4342 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4343 dependence_stats.num_miv_unimplemented);
4346 return res;
4349 /* Returns true when the data dependences for the basic block BB have been
4350 computed, false otherwise.
4351 DATAREFS is initialized to all the array elements contained in this basic
4352 block, DEPENDENCE_RELATIONS contains the relations between the data
4353 references. Compute read-read and self relations if
4354 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4355 bool
4356 compute_data_dependences_for_bb (basic_block bb,
4357 bool compute_self_and_read_read_dependences,
4358 VEC (data_reference_p, heap) **datarefs,
4359 VEC (ddr_p, heap) **dependence_relations)
4361 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4362 return false;
4364 compute_all_dependences (*datarefs, dependence_relations, NULL,
4365 compute_self_and_read_read_dependences);
4366 return true;
4369 /* Entry point (for testing only). Analyze all the data references
4370 and the dependence relations in LOOP.
4372 The data references are computed first.
4374 A relation on these nodes is represented by a complete graph. Some
4375 of the relations could be of no interest, thus the relations can be
4376 computed on demand.
4378 In the following function we compute all the relations. This is
4379 just a first implementation that is here for:
4380 - for showing how to ask for the dependence relations,
4381 - for the debugging the whole dependence graph,
4382 - for the dejagnu testcases and maintenance.
4384 It is possible to ask only for a part of the graph, avoiding to
4385 compute the whole dependence graph. The computed dependences are
4386 stored in a knowledge base (KB) such that later queries don't
4387 recompute the same information. The implementation of this KB is
4388 transparent to the optimizer, and thus the KB can be changed with a
4389 more efficient implementation, or the KB could be disabled. */
4390 static void
4391 analyze_all_data_dependences (struct loop *loop)
4393 unsigned int i;
4394 int nb_data_refs = 10;
4395 VEC (data_reference_p, heap) *datarefs =
4396 VEC_alloc (data_reference_p, heap, nb_data_refs);
4397 VEC (ddr_p, heap) *dependence_relations =
4398 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4400 /* Compute DDs on the whole function. */
4401 compute_data_dependences_for_loop (loop, false, &datarefs,
4402 &dependence_relations);
4404 if (dump_file)
4406 dump_data_dependence_relations (dump_file, dependence_relations);
4407 fprintf (dump_file, "\n\n");
4409 if (dump_flags & TDF_DETAILS)
4410 dump_dist_dir_vectors (dump_file, dependence_relations);
4412 if (dump_flags & TDF_STATS)
4414 unsigned nb_top_relations = 0;
4415 unsigned nb_bot_relations = 0;
4416 unsigned nb_chrec_relations = 0;
4417 struct data_dependence_relation *ddr;
4419 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4421 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4422 nb_top_relations++;
4424 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4425 nb_bot_relations++;
4427 else
4428 nb_chrec_relations++;
4431 gather_stats_on_scev_database ();
4435 free_dependence_relations (dependence_relations);
4436 free_data_refs (datarefs);
4439 /* Computes all the data dependences and check that the results of
4440 several analyzers are the same. */
4442 void
4443 tree_check_data_deps (void)
4445 loop_iterator li;
4446 struct loop *loop_nest;
4448 FOR_EACH_LOOP (li, loop_nest, 0)
4449 analyze_all_data_dependences (loop_nest);
4452 /* Free the memory used by a data dependence relation DDR. */
4454 void
4455 free_dependence_relation (struct data_dependence_relation *ddr)
4457 if (ddr == NULL)
4458 return;
4460 if (DDR_SUBSCRIPTS (ddr))
4461 free_subscripts (DDR_SUBSCRIPTS (ddr));
4462 if (DDR_DIST_VECTS (ddr))
4463 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4464 if (DDR_DIR_VECTS (ddr))
4465 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4467 free (ddr);
4470 /* Free the memory used by the data dependence relations from
4471 DEPENDENCE_RELATIONS. */
4473 void
4474 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4476 unsigned int i;
4477 struct data_dependence_relation *ddr;
4478 VEC (loop_p, heap) *loop_nest = NULL;
4480 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4482 if (ddr == NULL)
4483 continue;
4484 if (loop_nest == NULL)
4485 loop_nest = DDR_LOOP_NEST (ddr);
4486 else
4487 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4488 || DDR_LOOP_NEST (ddr) == loop_nest);
4489 free_dependence_relation (ddr);
4492 if (loop_nest)
4493 VEC_free (loop_p, heap, loop_nest);
4494 VEC_free (ddr_p, heap, dependence_relations);
4497 /* Free the memory used by the data references from DATAREFS. */
4499 void
4500 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4502 unsigned int i;
4503 struct data_reference *dr;
4505 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4506 free_data_ref (dr);
4507 VEC_free (data_reference_p, heap, datarefs);
4512 /* Dump vertex I in RDG to FILE. */
4514 void
4515 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4517 struct vertex *v = &(rdg->vertices[i]);
4518 struct graph_edge *e;
4520 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4521 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4522 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4524 if (v->pred)
4525 for (e = v->pred; e; e = e->pred_next)
4526 fprintf (file, " %d", e->src);
4528 fprintf (file, ") (out:");
4530 if (v->succ)
4531 for (e = v->succ; e; e = e->succ_next)
4532 fprintf (file, " %d", e->dest);
4534 fprintf (file, ") \n");
4535 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4536 fprintf (file, ")\n");
4539 /* Call dump_rdg_vertex on stderr. */
4541 void
4542 debug_rdg_vertex (struct graph *rdg, int i)
4544 dump_rdg_vertex (stderr, rdg, i);
4547 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4548 dumped vertices to that bitmap. */
4550 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4552 int i;
4554 fprintf (file, "(%d\n", c);
4556 for (i = 0; i < rdg->n_vertices; i++)
4557 if (rdg->vertices[i].component == c)
4559 if (dumped)
4560 bitmap_set_bit (dumped, i);
4562 dump_rdg_vertex (file, rdg, i);
4565 fprintf (file, ")\n");
4568 /* Call dump_rdg_vertex on stderr. */
4570 void
4571 debug_rdg_component (struct graph *rdg, int c)
4573 dump_rdg_component (stderr, rdg, c, NULL);
4576 /* Dump the reduced dependence graph RDG to FILE. */
4578 void
4579 dump_rdg (FILE *file, struct graph *rdg)
4581 int i;
4582 bitmap dumped = BITMAP_ALLOC (NULL);
4584 fprintf (file, "(rdg\n");
4586 for (i = 0; i < rdg->n_vertices; i++)
4587 if (!bitmap_bit_p (dumped, i))
4588 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4590 fprintf (file, ")\n");
4591 BITMAP_FREE (dumped);
4594 /* Call dump_rdg on stderr. */
4596 void
4597 debug_rdg (struct graph *rdg)
4599 dump_rdg (stderr, rdg);
4602 static void
4603 dot_rdg_1 (FILE *file, struct graph *rdg)
4605 int i;
4607 fprintf (file, "digraph RDG {\n");
4609 for (i = 0; i < rdg->n_vertices; i++)
4611 struct vertex *v = &(rdg->vertices[i]);
4612 struct graph_edge *e;
4614 /* Highlight reads from memory. */
4615 if (RDG_MEM_READS_STMT (rdg, i))
4616 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4618 /* Highlight stores to memory. */
4619 if (RDG_MEM_WRITE_STMT (rdg, i))
4620 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4622 if (v->succ)
4623 for (e = v->succ; e; e = e->succ_next)
4624 switch (RDGE_TYPE (e))
4626 case input_dd:
4627 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4628 break;
4630 case output_dd:
4631 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4632 break;
4634 case flow_dd:
4635 /* These are the most common dependences: don't print these. */
4636 fprintf (file, "%d -> %d \n", i, e->dest);
4637 break;
4639 case anti_dd:
4640 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4641 break;
4643 default:
4644 gcc_unreachable ();
4648 fprintf (file, "}\n\n");
4651 /* Display SCOP using dotty. */
4653 void
4654 dot_rdg (struct graph *rdg)
4656 FILE *file = fopen ("/tmp/rdg.dot", "w");
4657 gcc_assert (file != NULL);
4659 dot_rdg_1 (file, rdg);
4660 fclose (file);
4662 system ("dotty /tmp/rdg.dot");
4666 /* This structure is used for recording the mapping statement index in
4667 the RDG. */
4669 struct GTY(()) rdg_vertex_info
4671 gimple stmt;
4672 int index;
4675 /* Returns the index of STMT in RDG. */
4678 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4680 struct rdg_vertex_info rvi, *slot;
4682 rvi.stmt = stmt;
4683 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4685 if (!slot)
4686 return -1;
4688 return slot->index;
4691 /* Creates an edge in RDG for each distance vector from DDR. The
4692 order that we keep track of in the RDG is the order in which
4693 statements have to be executed. */
4695 static void
4696 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4698 struct graph_edge *e;
4699 int va, vb;
4700 data_reference_p dra = DDR_A (ddr);
4701 data_reference_p drb = DDR_B (ddr);
4702 unsigned level = ddr_dependence_level (ddr);
4704 /* For non scalar dependences, when the dependence is REVERSED,
4705 statement B has to be executed before statement A. */
4706 if (level > 0
4707 && !DDR_REVERSED_P (ddr))
4709 data_reference_p tmp = dra;
4710 dra = drb;
4711 drb = tmp;
4714 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4715 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4717 if (va < 0 || vb < 0)
4718 return;
4720 e = add_edge (rdg, va, vb);
4721 e->data = XNEW (struct rdg_edge);
4723 RDGE_LEVEL (e) = level;
4724 RDGE_RELATION (e) = ddr;
4726 /* Determines the type of the data dependence. */
4727 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4728 RDGE_TYPE (e) = input_dd;
4729 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4730 RDGE_TYPE (e) = output_dd;
4731 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4732 RDGE_TYPE (e) = flow_dd;
4733 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4734 RDGE_TYPE (e) = anti_dd;
4737 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4738 the index of DEF in RDG. */
4740 static void
4741 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4743 use_operand_p imm_use_p;
4744 imm_use_iterator iterator;
4746 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4748 struct graph_edge *e;
4749 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4751 if (use < 0)
4752 continue;
4754 e = add_edge (rdg, idef, use);
4755 e->data = XNEW (struct rdg_edge);
4756 RDGE_TYPE (e) = flow_dd;
4757 RDGE_RELATION (e) = NULL;
4761 /* Creates the edges of the reduced dependence graph RDG. */
4763 static void
4764 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4766 int i;
4767 struct data_dependence_relation *ddr;
4768 def_operand_p def_p;
4769 ssa_op_iter iter;
4771 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4772 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4773 create_rdg_edge_for_ddr (rdg, ddr);
4775 for (i = 0; i < rdg->n_vertices; i++)
4776 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4777 iter, SSA_OP_DEF)
4778 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4781 /* Build the vertices of the reduced dependence graph RDG. */
4783 void
4784 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4786 int i, j;
4787 gimple stmt;
4789 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4791 VEC (data_ref_loc, heap) *references;
4792 data_ref_loc *ref;
4793 struct vertex *v = &(rdg->vertices[i]);
4794 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4795 struct rdg_vertex_info **slot;
4797 rvi->stmt = stmt;
4798 rvi->index = i;
4799 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4801 if (!*slot)
4802 *slot = rvi;
4803 else
4804 free (rvi);
4806 v->data = XNEW (struct rdg_vertex);
4807 RDG_STMT (rdg, i) = stmt;
4809 RDG_MEM_WRITE_STMT (rdg, i) = false;
4810 RDG_MEM_READS_STMT (rdg, i) = false;
4811 if (gimple_code (stmt) == GIMPLE_PHI)
4812 continue;
4814 get_references_in_stmt (stmt, &references);
4815 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4816 if (!ref->is_read)
4817 RDG_MEM_WRITE_STMT (rdg, i) = true;
4818 else
4819 RDG_MEM_READS_STMT (rdg, i) = true;
4821 VEC_free (data_ref_loc, heap, references);
4825 /* Initialize STMTS with all the statements of LOOP. When
4826 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4827 which we discover statements is important as
4828 generate_loops_for_partition is using the same traversal for
4829 identifying statements. */
4831 static void
4832 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4834 unsigned int i;
4835 basic_block *bbs = get_loop_body_in_dom_order (loop);
4837 for (i = 0; i < loop->num_nodes; i++)
4839 basic_block bb = bbs[i];
4840 gimple_stmt_iterator bsi;
4841 gimple stmt;
4843 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4844 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4846 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4848 stmt = gsi_stmt (bsi);
4849 if (gimple_code (stmt) != GIMPLE_LABEL)
4850 VEC_safe_push (gimple, heap, *stmts, stmt);
4854 free (bbs);
4857 /* Returns true when all the dependences are computable. */
4859 static bool
4860 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4862 ddr_p ddr;
4863 unsigned int i;
4865 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4866 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4867 return false;
4869 return true;
4872 /* Computes a hash function for element ELT. */
4874 static hashval_t
4875 hash_stmt_vertex_info (const void *elt)
4877 const struct rdg_vertex_info *const rvi =
4878 (const struct rdg_vertex_info *) elt;
4879 gimple stmt = rvi->stmt;
4881 return htab_hash_pointer (stmt);
4884 /* Compares database elements E1 and E2. */
4886 static int
4887 eq_stmt_vertex_info (const void *e1, const void *e2)
4889 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4890 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4892 return elt1->stmt == elt2->stmt;
4895 /* Free the element E. */
4897 static void
4898 hash_stmt_vertex_del (void *e)
4900 free (e);
4903 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4904 statement of the loop nest, and one edge per data dependence or
4905 scalar dependence. */
4907 struct graph *
4908 build_empty_rdg (int n_stmts)
4910 int nb_data_refs = 10;
4911 struct graph *rdg = new_graph (n_stmts);
4913 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4914 eq_stmt_vertex_info, hash_stmt_vertex_del);
4915 return rdg;
4918 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4919 statement of the loop nest, and one edge per data dependence or
4920 scalar dependence. */
4922 struct graph *
4923 build_rdg (struct loop *loop)
4925 int nb_data_refs = 10;
4926 struct graph *rdg = NULL;
4927 VEC (ddr_p, heap) *dependence_relations;
4928 VEC (data_reference_p, heap) *datarefs;
4929 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4931 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4932 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4933 compute_data_dependences_for_loop (loop,
4934 false,
4935 &datarefs,
4936 &dependence_relations);
4938 if (!known_dependences_p (dependence_relations))
4940 free_dependence_relations (dependence_relations);
4941 free_data_refs (datarefs);
4942 VEC_free (gimple, heap, stmts);
4944 return rdg;
4947 stmts_from_loop (loop, &stmts);
4948 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4950 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4951 eq_stmt_vertex_info, hash_stmt_vertex_del);
4952 create_rdg_vertices (rdg, stmts);
4953 create_rdg_edges (rdg, dependence_relations);
4955 VEC_free (gimple, heap, stmts);
4956 return rdg;
4959 /* Free the reduced dependence graph RDG. */
4961 void
4962 free_rdg (struct graph *rdg)
4964 int i;
4966 for (i = 0; i < rdg->n_vertices; i++)
4967 free (rdg->vertices[i].data);
4969 htab_delete (rdg->indices);
4970 free_graph (rdg);
4973 /* Initialize STMTS with all the statements of LOOP that contain a
4974 store to memory. */
4976 void
4977 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4979 unsigned int i;
4980 basic_block *bbs = get_loop_body_in_dom_order (loop);
4982 for (i = 0; i < loop->num_nodes; i++)
4984 basic_block bb = bbs[i];
4985 gimple_stmt_iterator bsi;
4987 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4988 if (gimple_vdef (gsi_stmt (bsi)))
4989 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4992 free (bbs);
4995 /* For a data reference REF, return the declaration of its base
4996 address or NULL_TREE if the base is not determined. */
4998 static inline tree
4999 ref_base_address (gimple stmt, data_ref_loc *ref)
5001 tree base = NULL_TREE;
5002 tree base_address;
5003 struct data_reference *dr = XCNEW (struct data_reference);
5005 DR_STMT (dr) = stmt;
5006 DR_REF (dr) = *ref->pos;
5007 dr_analyze_innermost (dr);
5008 base_address = DR_BASE_ADDRESS (dr);
5010 if (!base_address)
5011 goto end;
5013 switch (TREE_CODE (base_address))
5015 case ADDR_EXPR:
5016 base = TREE_OPERAND (base_address, 0);
5017 break;
5019 default:
5020 base = base_address;
5021 break;
5024 end:
5025 free_data_ref (dr);
5026 return base;
5029 /* Determines whether the statement from vertex V of the RDG has a
5030 definition used outside the loop that contains this statement. */
5032 bool
5033 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5035 gimple stmt = RDG_STMT (rdg, v);
5036 struct loop *loop = loop_containing_stmt (stmt);
5037 use_operand_p imm_use_p;
5038 imm_use_iterator iterator;
5039 ssa_op_iter it;
5040 def_operand_p def_p;
5042 if (!loop)
5043 return true;
5045 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5047 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5049 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5050 return true;
5054 return false;
5057 /* Determines whether statements S1 and S2 access to similar memory
5058 locations. Two memory accesses are considered similar when they
5059 have the same base address declaration, i.e. when their
5060 ref_base_address is the same. */
5062 bool
5063 have_similar_memory_accesses (gimple s1, gimple s2)
5065 bool res = false;
5066 unsigned i, j;
5067 VEC (data_ref_loc, heap) *refs1, *refs2;
5068 data_ref_loc *ref1, *ref2;
5070 get_references_in_stmt (s1, &refs1);
5071 get_references_in_stmt (s2, &refs2);
5073 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
5075 tree base1 = ref_base_address (s1, ref1);
5077 if (base1)
5078 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5079 if (base1 == ref_base_address (s2, ref2))
5081 res = true;
5082 goto end;
5086 end:
5087 VEC_free (data_ref_loc, heap, refs1);
5088 VEC_free (data_ref_loc, heap, refs2);
5089 return res;
5092 /* Helper function for the hashtab. */
5094 static int
5095 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5097 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5098 CONST_CAST_GIMPLE ((const_gimple) s2));
5101 /* Helper function for the hashtab. */
5103 static hashval_t
5104 ref_base_address_1 (const void *s)
5106 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5107 unsigned i;
5108 VEC (data_ref_loc, heap) *refs;
5109 data_ref_loc *ref;
5110 hashval_t res = 0;
5112 get_references_in_stmt (stmt, &refs);
5114 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5115 if (!ref->is_read)
5117 res = htab_hash_pointer (ref_base_address (stmt, ref));
5118 break;
5121 VEC_free (data_ref_loc, heap, refs);
5122 return res;
5125 /* Try to remove duplicated write data references from STMTS. */
5127 void
5128 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5130 unsigned i;
5131 gimple stmt;
5132 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5133 have_similar_memory_accesses_1, NULL);
5135 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5137 void **slot;
5139 slot = htab_find_slot (seen, stmt, INSERT);
5141 if (*slot)
5142 VEC_ordered_remove (gimple, *stmts, i);
5143 else
5145 *slot = (void *) stmt;
5146 i++;
5150 htab_delete (seen);
5153 /* Returns the index of PARAMETER in the parameters vector of the
5154 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5156 int
5157 access_matrix_get_index_for_parameter (tree parameter,
5158 struct access_matrix *access_matrix)
5160 int i;
5161 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5162 tree lambda_parameter;
5164 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5165 if (lambda_parameter == parameter)
5166 return i + AM_NB_INDUCTION_VARS (access_matrix);
5168 return -1;