1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
90 static struct datadep_stats
92 int num_dependence_tests
;
93 int num_dependence_dependent
;
94 int num_dependence_independent
;
95 int num_dependence_undetermined
;
97 int num_subscript_tests
;
98 int num_subscript_undetermined
;
99 int num_same_subscript_function
;
102 int num_ziv_independent
;
103 int num_ziv_dependent
;
104 int num_ziv_unimplemented
;
107 int num_siv_independent
;
108 int num_siv_dependent
;
109 int num_siv_unimplemented
;
112 int num_miv_independent
;
113 int num_miv_dependent
;
114 int num_miv_unimplemented
;
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
118 struct data_reference
*,
119 struct data_reference
*,
121 /* Returns true iff A divides B. */
124 tree_fold_divides_p (const_tree a
, const_tree b
)
126 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
127 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
131 /* Returns true iff A divides B. */
134 int_divides_p (int a
, int b
)
136 return ((b
% a
) == 0);
141 /* Dump into FILE all the data references from DATAREFS. */
144 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
147 struct data_reference
*dr
;
149 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
150 dump_data_reference (file
, dr
);
153 /* Dump into STDERR all the data references from DATAREFS. */
156 debug_data_references (vec
<data_reference_p
> datarefs
)
158 dump_data_references (stderr
, datarefs
);
161 /* Print to STDERR the data_reference DR. */
164 debug_data_reference (struct data_reference
*dr
)
166 dump_data_reference (stderr
, dr
);
169 /* Dump function for a DATA_REFERENCE structure. */
172 dump_data_reference (FILE *outf
,
173 struct data_reference
*dr
)
177 fprintf (outf
, "#(Data Ref: \n");
178 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
179 fprintf (outf
, "# stmt: ");
180 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
181 fprintf (outf
, "# ref: ");
182 print_generic_stmt (outf
, DR_REF (dr
), 0);
183 fprintf (outf
, "# base_object: ");
184 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
186 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
188 fprintf (outf
, "# Access function %d: ", i
);
189 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
191 fprintf (outf
, "#)\n");
194 /* Dumps the affine function described by FN to the file OUTF. */
197 dump_affine_function (FILE *outf
, affine_fn fn
)
202 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
203 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
205 fprintf (outf
, " + ");
206 print_generic_expr (outf
, coef
, TDF_SLIM
);
207 fprintf (outf
, " * x_%u", i
);
211 /* Dumps the conflict function CF to the file OUTF. */
214 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
218 if (cf
->n
== NO_DEPENDENCE
)
219 fprintf (outf
, "no dependence\n");
220 else if (cf
->n
== NOT_KNOWN
)
221 fprintf (outf
, "not known\n");
224 for (i
= 0; i
< cf
->n
; i
++)
227 dump_affine_function (outf
, cf
->fns
[i
]);
228 fprintf (outf
, "]\n");
233 /* Dump function for a SUBSCRIPT structure. */
236 dump_subscript (FILE *outf
, struct subscript
*subscript
)
238 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
240 fprintf (outf
, "\n (subscript \n");
241 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
242 dump_conflict_function (outf
, cf
);
243 if (CF_NONTRIVIAL_P (cf
))
245 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
246 fprintf (outf
, " last_conflict: ");
247 print_generic_stmt (outf
, last_iteration
, 0);
250 cf
= SUB_CONFLICTS_IN_B (subscript
);
251 fprintf (outf
, " iterations_that_access_an_element_twice_in_B: ");
252 dump_conflict_function (outf
, cf
);
253 if (CF_NONTRIVIAL_P (cf
))
255 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
256 fprintf (outf
, " last_conflict: ");
257 print_generic_stmt (outf
, last_iteration
, 0);
260 fprintf (outf
, " (Subscript distance: ");
261 print_generic_stmt (outf
, SUB_DISTANCE (subscript
), 0);
262 fprintf (outf
, " )\n");
263 fprintf (outf
, " )\n");
266 /* Print the classic direction vector DIRV to OUTF. */
269 print_direction_vector (FILE *outf
,
275 for (eq
= 0; eq
< length
; eq
++)
277 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
283 fprintf (outf
, " +");
286 fprintf (outf
, " -");
289 fprintf (outf
, " =");
291 case dir_positive_or_equal
:
292 fprintf (outf
, " +=");
294 case dir_positive_or_negative
:
295 fprintf (outf
, " +-");
297 case dir_negative_or_equal
:
298 fprintf (outf
, " -=");
301 fprintf (outf
, " *");
304 fprintf (outf
, "indep");
308 fprintf (outf
, "\n");
311 /* Print a vector of direction vectors. */
314 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
320 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
321 print_direction_vector (outf
, v
, length
);
324 /* Print out a vector VEC of length N to OUTFILE. */
327 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
331 for (i
= 0; i
< n
; i
++)
332 fprintf (outfile
, "%3d ", vector
[i
]);
333 fprintf (outfile
, "\n");
336 /* Print a vector of distance vectors. */
339 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
345 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
346 print_lambda_vector (outf
, v
, length
);
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
352 dump_data_dependence_relation (FILE *outf
,
353 struct data_dependence_relation
*ddr
)
355 struct data_reference
*dra
, *drb
;
357 fprintf (outf
, "(Data Dep: \n");
359 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
366 dump_data_reference (outf
, dra
);
368 fprintf (outf
, " (nil)\n");
370 dump_data_reference (outf
, drb
);
372 fprintf (outf
, " (nil)\n");
374 fprintf (outf
, " (don't know)\n)\n");
380 dump_data_reference (outf
, dra
);
381 dump_data_reference (outf
, drb
);
383 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
384 fprintf (outf
, " (no dependence)\n");
386 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
391 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
393 fprintf (outf
, " access_fn_A: ");
394 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
395 fprintf (outf
, " access_fn_B: ");
396 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
397 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
400 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
401 fprintf (outf
, " loop nest: (");
402 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
403 fprintf (outf
, "%d ", loopi
->num
);
404 fprintf (outf
, ")\n");
406 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
408 fprintf (outf
, " distance_vector: ");
409 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
413 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
415 fprintf (outf
, " direction_vector: ");
416 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
421 fprintf (outf
, ")\n");
427 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
429 dump_data_dependence_relation (stderr
, ddr
);
432 /* Dump into FILE all the dependence relations from DDRS. */
435 dump_data_dependence_relations (FILE *file
,
439 struct data_dependence_relation
*ddr
;
441 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
442 dump_data_dependence_relation (file
, ddr
);
445 /* Dump to STDERR all the dependence relations from DDRS. */
448 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
450 dump_data_dependence_relations (stderr
, ddrs
);
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
459 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
462 struct data_dependence_relation
*ddr
;
465 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
466 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
468 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
470 fprintf (file
, "DISTANCE_V (");
471 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
472 fprintf (file
, ")\n");
475 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
477 fprintf (file
, "DIRECTION_V (");
478 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
479 fprintf (file
, ")\n");
483 fprintf (file
, "\n\n");
486 /* Dumps the data dependence relations DDRS in FILE. */
489 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
492 struct data_dependence_relation
*ddr
;
494 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
495 dump_data_dependence_relation (file
, ddr
);
497 fprintf (file
, "\n\n");
501 debug_ddrs (vec
<ddr_p
> ddrs
)
503 dump_ddrs (stderr
, ddrs
);
506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
513 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
514 tree
*var
, tree
*off
)
518 enum tree_code ocode
= code
;
526 *var
= build_int_cst (type
, 0);
527 *off
= fold_convert (ssizetype
, op0
);
530 case POINTER_PLUS_EXPR
:
535 split_constant_offset (op0
, &var0
, &off0
);
536 split_constant_offset (op1
, &var1
, &off1
);
537 *var
= fold_build2 (code
, type
, var0
, var1
);
538 *off
= size_binop (ocode
, off0
, off1
);
542 if (TREE_CODE (op1
) != INTEGER_CST
)
545 split_constant_offset (op0
, &var0
, &off0
);
546 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
547 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
553 HOST_WIDE_INT pbitsize
, pbitpos
;
554 enum machine_mode pmode
;
555 int punsignedp
, pvolatilep
;
557 op0
= TREE_OPERAND (op0
, 0);
558 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
559 &pmode
, &punsignedp
, &pvolatilep
, false);
561 if (pbitpos
% BITS_PER_UNIT
!= 0)
563 base
= build_fold_addr_expr (base
);
564 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
568 split_constant_offset (poffset
, &poffset
, &off1
);
569 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
570 if (POINTER_TYPE_P (TREE_TYPE (base
)))
571 base
= fold_build_pointer_plus (base
, poffset
);
573 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
574 fold_convert (TREE_TYPE (base
), poffset
));
577 var0
= fold_convert (type
, base
);
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (CONVERT_EXPR_P (var0))
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
591 while (POINTER_TYPE_P (type
))
592 type
= TREE_TYPE (type
);
593 if (int_size_in_bytes (type
) < 0)
603 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
604 enum tree_code subcode
;
606 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
609 var0
= gimple_assign_rhs1 (def_stmt
);
610 subcode
= gimple_assign_rhs_code (def_stmt
);
611 var1
= gimple_assign_rhs2 (def_stmt
);
613 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype
= TREE_TYPE (op0
);
622 if ((POINTER_TYPE_P (itype
)
623 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
624 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
625 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
627 split_constant_offset (op0
, &var0
, off
);
628 *var
= fold_convert (type
, var0
);
639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
643 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
645 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
649 *off
= ssize_int (0);
652 if (tree_is_chrec (exp
)
653 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
656 otype
= TREE_TYPE (exp
);
657 code
= TREE_CODE (exp
);
658 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
659 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
661 *var
= fold_convert (type
, e
);
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
670 canonicalize_base_object_address (tree addr
)
676 /* The base address may be obtained by casting from integer, in that case
678 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
681 if (TREE_CODE (addr
) != ADDR_EXPR
)
684 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
692 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
694 gimple stmt
= DR_STMT (dr
);
695 struct loop
*loop
= loop_containing_stmt (stmt
);
696 tree ref
= DR_REF (dr
);
697 HOST_WIDE_INT pbitsize
, pbitpos
;
699 enum machine_mode pmode
;
700 int punsignedp
, pvolatilep
;
701 affine_iv base_iv
, offset_iv
;
702 tree init
, dinit
, step
;
703 bool in_loop
= (loop
&& loop
->num
);
705 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
706 fprintf (dump_file
, "analyze_innermost: ");
708 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
709 &pmode
, &punsignedp
, &pvolatilep
, false);
710 gcc_assert (base
!= NULL_TREE
);
712 if (pbitpos
% BITS_PER_UNIT
!= 0)
714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
715 fprintf (dump_file
, "failed: bit offset alignment.\n");
719 if (TREE_CODE (base
) == MEM_REF
)
721 if (!integer_zerop (TREE_OPERAND (base
, 1)))
723 double_int moff
= mem_ref_offset (base
);
724 tree mofft
= double_int_to_tree (sizetype
, moff
);
728 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
730 base
= TREE_OPERAND (base
, 0);
733 base
= build_fold_addr_expr (base
);
737 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
738 nest
? true : false))
742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
743 fprintf (dump_file
, "failed: evolution of base is not"
750 base_iv
.step
= ssize_int (0);
751 base_iv
.no_overflow
= true;
758 base_iv
.step
= ssize_int (0);
759 base_iv
.no_overflow
= true;
764 offset_iv
.base
= ssize_int (0);
765 offset_iv
.step
= ssize_int (0);
771 offset_iv
.base
= poffset
;
772 offset_iv
.step
= ssize_int (0);
774 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
776 nest
? true : false))
780 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
781 fprintf (dump_file
, "failed: evolution of offset is not"
787 offset_iv
.base
= poffset
;
788 offset_iv
.step
= ssize_int (0);
793 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
794 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
795 init
= size_binop (PLUS_EXPR
, init
, dinit
);
796 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
797 init
= size_binop (PLUS_EXPR
, init
, dinit
);
799 step
= size_binop (PLUS_EXPR
,
800 fold_convert (ssizetype
, base_iv
.step
),
801 fold_convert (ssizetype
, offset_iv
.step
));
803 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
805 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
809 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
812 fprintf (dump_file
, "success.\n");
817 /* Determines the base object and the list of indices of memory reference
818 DR, analyzed in LOOP and instantiated in loop nest NEST. */
821 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
823 vec
<tree
> access_fns
= vec
<tree
>();
825 tree base
, off
, access_fn
;
826 basic_block before_loop
;
828 /* If analyzing a basic-block there are no indices to analyze
829 and thus no access functions. */
832 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
833 DR_ACCESS_FNS (dr
).create (0);
838 before_loop
= block_before_loop (nest
);
840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841 into a two element array with a constant index. The base is
842 then just the immediate underlying object. */
843 if (TREE_CODE (ref
) == REALPART_EXPR
)
845 ref
= TREE_OPERAND (ref
, 0);
846 access_fns
.safe_push (integer_zero_node
);
848 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
850 ref
= TREE_OPERAND (ref
, 0);
851 access_fns
.safe_push (integer_one_node
);
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (ref
))
857 if (TREE_CODE (ref
) == ARRAY_REF
)
859 op
= TREE_OPERAND (ref
, 1);
860 access_fn
= analyze_scalar_evolution (loop
, op
);
861 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
862 access_fns
.safe_push (access_fn
);
864 else if (TREE_CODE (ref
) == COMPONENT_REF
865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
867 /* For COMPONENT_REFs of records (but not unions!) use the
868 FIELD_DECL offset as constant access function so we can
869 disambiguate a[i].f1 and a[i].f2. */
870 tree off
= component_ref_field_offset (ref
);
871 off
= size_binop (PLUS_EXPR
,
872 size_binop (MULT_EXPR
,
873 fold_convert (bitsizetype
, off
),
874 bitsize_int (BITS_PER_UNIT
)),
875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
876 access_fns
.safe_push (off
);
879 /* If we have an unhandled component we could not translate
880 to an access function stop analyzing. We have determined
881 our base object in this case. */
884 ref
= TREE_OPERAND (ref
, 0);
887 /* If the address operand of a MEM_REF base has an evolution in the
888 analyzed nest, add it as an additional independent access-function. */
889 if (TREE_CODE (ref
) == MEM_REF
)
891 op
= TREE_OPERAND (ref
, 0);
892 access_fn
= analyze_scalar_evolution (loop
, op
);
893 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
894 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
897 tree memoff
= TREE_OPERAND (ref
, 1);
898 base
= initial_condition (access_fn
);
899 orig_type
= TREE_TYPE (base
);
900 STRIP_USELESS_TYPE_CONVERSION (base
);
901 split_constant_offset (base
, &base
, &off
);
902 /* Fold the MEM_REF offset into the evolutions initial
903 value to make more bases comparable. */
904 if (!integer_zerop (memoff
))
906 off
= size_binop (PLUS_EXPR
, off
,
907 fold_convert (ssizetype
, memoff
));
908 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
910 access_fn
= chrec_replace_initial_condition
911 (access_fn
, fold_convert (orig_type
, off
));
912 /* ??? This is still not a suitable base object for
913 dr_may_alias_p - the base object needs to be an
914 access that covers the object as whole. With
915 an evolution in the pointer this cannot be
917 As a band-aid, mark the access so we can special-case
918 it in dr_may_alias_p. */
919 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
920 MEM_REF
, TREE_TYPE (ref
),
922 DR_UNCONSTRAINED_BASE (dr
) = true;
923 access_fns
.safe_push (access_fn
);
926 else if (DECL_P (ref
))
928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
929 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
930 build_fold_addr_expr (ref
),
931 build_int_cst (reference_alias_ptr_type (ref
), 0));
934 DR_BASE_OBJECT (dr
) = ref
;
935 DR_ACCESS_FNS (dr
) = access_fns
;
938 /* Extracts the alias analysis information from the memory reference DR. */
941 dr_analyze_alias (struct data_reference
*dr
)
943 tree ref
= DR_REF (dr
);
944 tree base
= get_base_address (ref
), addr
;
946 if (INDIRECT_REF_P (base
)
947 || TREE_CODE (base
) == MEM_REF
)
949 addr
= TREE_OPERAND (base
, 0);
950 if (TREE_CODE (addr
) == SSA_NAME
)
951 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
955 /* Frees data reference DR. */
958 free_data_ref (data_reference_p dr
)
960 DR_ACCESS_FNS (dr
).release ();
964 /* Analyzes memory reference MEMREF accessed in STMT. The reference
965 is read if IS_READ is true, write otherwise. Returns the
966 data_reference description of MEMREF. NEST is the outermost loop
967 in which the reference should be instantiated, LOOP is the loop in
968 which the data reference should be analyzed. */
970 struct data_reference
*
971 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
974 struct data_reference
*dr
;
976 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
978 fprintf (dump_file
, "Creating dr for ");
979 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
980 fprintf (dump_file
, "\n");
983 dr
= XCNEW (struct data_reference
);
985 DR_REF (dr
) = memref
;
986 DR_IS_READ (dr
) = is_read
;
988 dr_analyze_innermost (dr
, nest
);
989 dr_analyze_indices (dr
, nest
, loop
);
990 dr_analyze_alias (dr
);
992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
995 fprintf (dump_file
, "\tbase_address: ");
996 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
997 fprintf (dump_file
, "\n\toffset from base address: ");
998 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
999 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1000 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1001 fprintf (dump_file
, "\n\tstep: ");
1002 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1003 fprintf (dump_file
, "\n\taligned to: ");
1004 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1005 fprintf (dump_file
, "\n\tbase_object: ");
1006 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1007 fprintf (dump_file
, "\n");
1008 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1010 fprintf (dump_file
, "\tAccess function %d: ", i
);
1011 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1021 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1025 STRIP_NOPS (offset1
);
1026 STRIP_NOPS (offset2
);
1028 if (offset1
== offset2
)
1031 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1032 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1035 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1036 TREE_OPERAND (offset2
, 0));
1038 if (!res
|| !BINARY_CLASS_P (offset1
))
1041 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1042 TREE_OPERAND (offset2
, 1));
1047 /* Check if DRA and DRB have equal offsets. */
1049 dr_equal_offsets_p (struct data_reference
*dra
,
1050 struct data_reference
*drb
)
1052 tree offset1
, offset2
;
1054 offset1
= DR_OFFSET (dra
);
1055 offset2
= DR_OFFSET (drb
);
1057 return dr_equal_offsets_p1 (offset1
, offset2
);
1060 /* Returns true if FNA == FNB. */
1063 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1065 unsigned i
, n
= fna
.length ();
1067 if (n
!= fnb
.length ())
1070 for (i
= 0; i
< n
; i
++)
1071 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1077 /* If all the functions in CF are the same, returns one of them,
1078 otherwise returns NULL. */
1081 common_affine_function (conflict_function
*cf
)
1086 if (!CF_NONTRIVIAL_P (cf
))
1091 for (i
= 1; i
< cf
->n
; i
++)
1092 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1098 /* Returns the base of the affine function FN. */
1101 affine_function_base (affine_fn fn
)
1106 /* Returns true if FN is a constant. */
1109 affine_function_constant_p (affine_fn fn
)
1114 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1115 if (!integer_zerop (coef
))
1121 /* Returns true if FN is the zero constant function. */
1124 affine_function_zero_p (affine_fn fn
)
1126 return (integer_zerop (affine_function_base (fn
))
1127 && affine_function_constant_p (fn
));
1130 /* Returns a signed integer type with the largest precision from TA
1134 signed_type_for_types (tree ta
, tree tb
)
1136 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1137 return signed_type_for (ta
);
1139 return signed_type_for (tb
);
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1146 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1152 if (fnb
.length () > fna
.length ())
1164 for (i
= 0; i
< n
; i
++)
1166 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1167 TREE_TYPE (fnb
[i
]));
1168 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1171 for (; fna
.iterate (i
, &coef
); i
++)
1172 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1173 coef
, integer_zero_node
));
1174 for (; fnb
.iterate (i
, &coef
); i
++)
1175 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1176 integer_zero_node
, coef
));
1181 /* Returns the sum of affine functions FNA and FNB. */
1184 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1186 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1189 /* Returns the difference of affine functions FNA and FNB. */
1192 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1194 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1197 /* Frees affine function FN. */
1200 affine_fn_free (affine_fn fn
)
1205 /* Determine for each subscript in the data dependence relation DDR
1209 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1211 conflict_function
*cf_a
, *cf_b
;
1212 affine_fn fn_a
, fn_b
, diff
;
1214 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1218 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1220 struct subscript
*subscript
;
1222 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1223 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1224 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1226 fn_a
= common_affine_function (cf_a
);
1227 fn_b
= common_affine_function (cf_b
);
1228 if (!fn_a
.exists () || !fn_b
.exists ())
1230 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1233 diff
= affine_fn_minus (fn_a
, fn_b
);
1235 if (affine_function_constant_p (diff
))
1236 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1238 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1240 affine_fn_free (diff
);
1245 /* Returns the conflict function for "unknown". */
1247 static conflict_function
*
1248 conflict_fn_not_known (void)
1250 conflict_function
*fn
= XCNEW (conflict_function
);
1256 /* Returns the conflict function for "independent". */
1258 static conflict_function
*
1259 conflict_fn_no_dependence (void)
1261 conflict_function
*fn
= XCNEW (conflict_function
);
1262 fn
->n
= NO_DEPENDENCE
;
1267 /* Returns true if the address of OBJ is invariant in LOOP. */
1270 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1272 while (handled_component_p (obj
))
1274 if (TREE_CODE (obj
) == ARRAY_REF
)
1276 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 need to check the stride and the lower bound of the reference. */
1278 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1280 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1284 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1290 obj
= TREE_OPERAND (obj
, 0);
1293 if (!INDIRECT_REF_P (obj
)
1294 && TREE_CODE (obj
) != MEM_REF
)
1297 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1301 /* Returns false if we can prove that data references A and B do not alias,
1302 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1306 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1309 tree addr_a
= DR_BASE_OBJECT (a
);
1310 tree addr_b
= DR_BASE_OBJECT (b
);
1312 /* If we are not processing a loop nest but scalar code we
1313 do not need to care about possible cross-iteration dependences
1314 and thus can process the full original reference. Do so,
1315 similar to how loop invariant motion applies extra offset-based
1319 aff_tree off1
, off2
;
1320 double_int size1
, size2
;
1321 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1322 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1323 aff_combination_scale (&off1
, double_int_minus_one
);
1324 aff_combination_add (&off2
, &off1
);
1325 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1329 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1330 the size of the base-object. So we cannot do any offset/overlap
1331 based analysis but have to rely on points-to information only. */
1332 if (TREE_CODE (addr_a
) == MEM_REF
1333 && DR_UNCONSTRAINED_BASE (a
))
1335 if (TREE_CODE (addr_b
) == MEM_REF
1336 && DR_UNCONSTRAINED_BASE (b
))
1337 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1338 TREE_OPERAND (addr_b
, 0));
1340 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1341 build_fold_addr_expr (addr_b
));
1343 else if (TREE_CODE (addr_b
) == MEM_REF
1344 && DR_UNCONSTRAINED_BASE (b
))
1345 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1346 TREE_OPERAND (addr_b
, 0));
1348 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1349 that is being subsetted in the loop nest. */
1350 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1351 return refs_output_dependent_p (addr_a
, addr_b
);
1352 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1353 return refs_anti_dependent_p (addr_a
, addr_b
);
1354 return refs_may_alias_p (addr_a
, addr_b
);
1357 /* Initialize a data dependence relation between data accesses A and
1358 B. NB_LOOPS is the number of loops surrounding the references: the
1359 size of the classic distance/direction vectors. */
1361 struct data_dependence_relation
*
1362 initialize_data_dependence_relation (struct data_reference
*a
,
1363 struct data_reference
*b
,
1364 vec
<loop_p
> loop_nest
)
1366 struct data_dependence_relation
*res
;
1369 res
= XNEW (struct data_dependence_relation
);
1372 DDR_LOOP_NEST (res
).create (0);
1373 DDR_REVERSED_P (res
) = false;
1374 DDR_SUBSCRIPTS (res
).create (0);
1375 DDR_DIR_VECTS (res
).create (0);
1376 DDR_DIST_VECTS (res
).create (0);
1378 if (a
== NULL
|| b
== NULL
)
1380 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1384 /* If the data references do not alias, then they are independent. */
1385 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1387 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1391 /* The case where the references are exactly the same. */
1392 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1394 if (loop_nest
.exists ()
1395 && !object_address_invariant_in_loop_p (loop_nest
[0],
1396 DR_BASE_OBJECT (a
)))
1398 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1401 DDR_AFFINE_P (res
) = true;
1402 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1403 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1404 DDR_LOOP_NEST (res
) = loop_nest
;
1405 DDR_INNER_LOOP (res
) = 0;
1406 DDR_SELF_REFERENCE (res
) = true;
1407 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1409 struct subscript
*subscript
;
1411 subscript
= XNEW (struct subscript
);
1412 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1413 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1414 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1415 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1416 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1421 /* If the references do not access the same object, we do not know
1422 whether they alias or not. */
1423 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1425 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1429 /* If the base of the object is not invariant in the loop nest, we cannot
1430 analyze it. TODO -- in fact, it would suffice to record that there may
1431 be arbitrary dependences in the loops where the base object varies. */
1432 if (loop_nest
.exists ()
1433 && !object_address_invariant_in_loop_p (loop_nest
[0],
1434 DR_BASE_OBJECT (a
)))
1436 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1440 /* If the number of dimensions of the access to not agree we can have
1441 a pointer access to a component of the array element type and an
1442 array access while the base-objects are still the same. Punt. */
1443 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1445 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1449 DDR_AFFINE_P (res
) = true;
1450 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1451 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1452 DDR_LOOP_NEST (res
) = loop_nest
;
1453 DDR_INNER_LOOP (res
) = 0;
1454 DDR_SELF_REFERENCE (res
) = false;
1456 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1458 struct subscript
*subscript
;
1460 subscript
= XNEW (struct subscript
);
1461 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1462 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1463 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1464 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1465 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1471 /* Frees memory used by the conflict function F. */
1474 free_conflict_function (conflict_function
*f
)
1478 if (CF_NONTRIVIAL_P (f
))
1480 for (i
= 0; i
< f
->n
; i
++)
1481 affine_fn_free (f
->fns
[i
]);
1486 /* Frees memory used by SUBSCRIPTS. */
1489 free_subscripts (vec
<subscript_p
> subscripts
)
1494 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1496 free_conflict_function (s
->conflicting_iterations_in_a
);
1497 free_conflict_function (s
->conflicting_iterations_in_b
);
1500 subscripts
.release ();
1503 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1507 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1512 fprintf (dump_file
, "(dependence classified: ");
1513 print_generic_expr (dump_file
, chrec
, 0);
1514 fprintf (dump_file
, ")\n");
1517 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1518 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1519 DDR_SUBSCRIPTS (ddr
).create (0);
1522 /* The dependence relation DDR cannot be represented by a distance
1526 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1529 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1531 DDR_AFFINE_P (ddr
) = false;
1536 /* This section contains the classic Banerjee tests. */
1538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1542 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1544 return (evolution_function_is_constant_p (chrec_a
)
1545 && evolution_function_is_constant_p (chrec_b
));
1548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1549 variable, i.e., if the SIV (Single Index Variable) test is true. */
1552 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1554 if ((evolution_function_is_constant_p (chrec_a
)
1555 && evolution_function_is_univariate_p (chrec_b
))
1556 || (evolution_function_is_constant_p (chrec_b
)
1557 && evolution_function_is_univariate_p (chrec_a
)))
1560 if (evolution_function_is_univariate_p (chrec_a
)
1561 && evolution_function_is_univariate_p (chrec_b
))
1563 switch (TREE_CODE (chrec_a
))
1565 case POLYNOMIAL_CHREC
:
1566 switch (TREE_CODE (chrec_b
))
1568 case POLYNOMIAL_CHREC
:
1569 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1584 /* Creates a conflict function with N dimensions. The affine functions
1585 in each dimension follow. */
1587 static conflict_function
*
1588 conflict_fn (unsigned n
, ...)
1591 conflict_function
*ret
= XCNEW (conflict_function
);
1594 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1598 for (i
= 0; i
< n
; i
++)
1599 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1605 /* Returns constant affine function with value CST. */
1608 affine_fn_cst (tree cst
)
1612 fn
.quick_push (cst
);
1616 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1619 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1622 fn
.create (dim
+ 1);
1625 gcc_assert (dim
> 0);
1626 fn
.quick_push (cst
);
1627 for (i
= 1; i
< dim
; i
++)
1628 fn
.quick_push (integer_zero_node
);
1629 fn
.quick_push (coef
);
1633 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1634 *OVERLAPS_B are initialized to the functions that describe the
1635 relation between the elements accessed twice by CHREC_A and
1636 CHREC_B. For k >= 0, the following property is verified:
1638 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1641 analyze_ziv_subscript (tree chrec_a
,
1643 conflict_function
**overlaps_a
,
1644 conflict_function
**overlaps_b
,
1645 tree
*last_conflicts
)
1647 tree type
, difference
;
1648 dependence_stats
.num_ziv
++;
1650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1651 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1653 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1654 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1655 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1656 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1658 switch (TREE_CODE (difference
))
1661 if (integer_zerop (difference
))
1663 /* The difference is equal to zero: the accessed index
1664 overlaps for each iteration in the loop. */
1665 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1666 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1667 *last_conflicts
= chrec_dont_know
;
1668 dependence_stats
.num_ziv_dependent
++;
1672 /* The accesses do not overlap. */
1673 *overlaps_a
= conflict_fn_no_dependence ();
1674 *overlaps_b
= conflict_fn_no_dependence ();
1675 *last_conflicts
= integer_zero_node
;
1676 dependence_stats
.num_ziv_independent
++;
1681 /* We're not sure whether the indexes overlap. For the moment,
1682 conservatively answer "don't know". */
1683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1684 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1686 *overlaps_a
= conflict_fn_not_known ();
1687 *overlaps_b
= conflict_fn_not_known ();
1688 *last_conflicts
= chrec_dont_know
;
1689 dependence_stats
.num_ziv_unimplemented
++;
1693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1694 fprintf (dump_file
, ")\n");
1697 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1698 and only if it fits to the int type. If this is not the case, or the
1699 bound on the number of iterations of LOOP could not be derived, returns
1703 max_stmt_executions_tree (struct loop
*loop
)
1707 if (!max_stmt_executions (loop
, &nit
))
1708 return chrec_dont_know
;
1710 if (!double_int_fits_to_tree_p (unsigned_type_node
, nit
))
1711 return chrec_dont_know
;
1713 return double_int_to_tree (unsigned_type_node
, nit
);
1716 /* Determine whether the CHREC is always positive/negative. If the expression
1717 cannot be statically analyzed, return false, otherwise set the answer into
1721 chrec_is_positive (tree chrec
, bool *value
)
1723 bool value0
, value1
, value2
;
1724 tree end_value
, nb_iter
;
1726 switch (TREE_CODE (chrec
))
1728 case POLYNOMIAL_CHREC
:
1729 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1730 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1733 /* FIXME -- overflows. */
1734 if (value0
== value1
)
1740 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1741 and the proof consists in showing that the sign never
1742 changes during the execution of the loop, from 0 to
1743 loop->nb_iterations. */
1744 if (!evolution_function_is_affine_p (chrec
))
1747 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1748 if (chrec_contains_undetermined (nb_iter
))
1752 /* TODO -- If the test is after the exit, we may decrease the number of
1753 iterations by one. */
1755 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1758 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1760 if (!chrec_is_positive (end_value
, &value2
))
1764 return value0
== value1
;
1767 switch (tree_int_cst_sgn (chrec
))
1786 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1787 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1788 *OVERLAPS_B are initialized to the functions that describe the
1789 relation between the elements accessed twice by CHREC_A and
1790 CHREC_B. For k >= 0, the following property is verified:
1792 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1795 analyze_siv_subscript_cst_affine (tree chrec_a
,
1797 conflict_function
**overlaps_a
,
1798 conflict_function
**overlaps_b
,
1799 tree
*last_conflicts
)
1801 bool value0
, value1
, value2
;
1802 tree type
, difference
, tmp
;
1804 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1805 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1806 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1807 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1809 /* Special case overlap in the first iteration. */
1810 if (integer_zerop (difference
))
1812 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1813 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1814 *last_conflicts
= integer_one_node
;
1818 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1821 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1823 dependence_stats
.num_siv_unimplemented
++;
1824 *overlaps_a
= conflict_fn_not_known ();
1825 *overlaps_b
= conflict_fn_not_known ();
1826 *last_conflicts
= chrec_dont_know
;
1831 if (value0
== false)
1833 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1835 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1836 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1838 *overlaps_a
= conflict_fn_not_known ();
1839 *overlaps_b
= conflict_fn_not_known ();
1840 *last_conflicts
= chrec_dont_know
;
1841 dependence_stats
.num_siv_unimplemented
++;
1850 chrec_b = {10, +, 1}
1853 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1855 HOST_WIDE_INT numiter
;
1856 struct loop
*loop
= get_chrec_loop (chrec_b
);
1858 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1859 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1860 fold_build1 (ABS_EXPR
, type
, difference
),
1861 CHREC_RIGHT (chrec_b
));
1862 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1863 *last_conflicts
= integer_one_node
;
1866 /* Perform weak-zero siv test to see if overlap is
1867 outside the loop bounds. */
1868 numiter
= max_stmt_executions_int (loop
);
1871 && compare_tree_int (tmp
, numiter
) > 0)
1873 free_conflict_function (*overlaps_a
);
1874 free_conflict_function (*overlaps_b
);
1875 *overlaps_a
= conflict_fn_no_dependence ();
1876 *overlaps_b
= conflict_fn_no_dependence ();
1877 *last_conflicts
= integer_zero_node
;
1878 dependence_stats
.num_siv_independent
++;
1881 dependence_stats
.num_siv_dependent
++;
1885 /* When the step does not divide the difference, there are
1889 *overlaps_a
= conflict_fn_no_dependence ();
1890 *overlaps_b
= conflict_fn_no_dependence ();
1891 *last_conflicts
= integer_zero_node
;
1892 dependence_stats
.num_siv_independent
++;
1901 chrec_b = {10, +, -1}
1903 In this case, chrec_a will not overlap with chrec_b. */
1904 *overlaps_a
= conflict_fn_no_dependence ();
1905 *overlaps_b
= conflict_fn_no_dependence ();
1906 *last_conflicts
= integer_zero_node
;
1907 dependence_stats
.num_siv_independent
++;
1914 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1916 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1917 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1919 *overlaps_a
= conflict_fn_not_known ();
1920 *overlaps_b
= conflict_fn_not_known ();
1921 *last_conflicts
= chrec_dont_know
;
1922 dependence_stats
.num_siv_unimplemented
++;
1927 if (value2
== false)
1931 chrec_b = {10, +, -1}
1933 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1935 HOST_WIDE_INT numiter
;
1936 struct loop
*loop
= get_chrec_loop (chrec_b
);
1938 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1939 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1940 CHREC_RIGHT (chrec_b
));
1941 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1942 *last_conflicts
= integer_one_node
;
1944 /* Perform weak-zero siv test to see if overlap is
1945 outside the loop bounds. */
1946 numiter
= max_stmt_executions_int (loop
);
1949 && compare_tree_int (tmp
, numiter
) > 0)
1951 free_conflict_function (*overlaps_a
);
1952 free_conflict_function (*overlaps_b
);
1953 *overlaps_a
= conflict_fn_no_dependence ();
1954 *overlaps_b
= conflict_fn_no_dependence ();
1955 *last_conflicts
= integer_zero_node
;
1956 dependence_stats
.num_siv_independent
++;
1959 dependence_stats
.num_siv_dependent
++;
1963 /* When the step does not divide the difference, there
1967 *overlaps_a
= conflict_fn_no_dependence ();
1968 *overlaps_b
= conflict_fn_no_dependence ();
1969 *last_conflicts
= integer_zero_node
;
1970 dependence_stats
.num_siv_independent
++;
1980 In this case, chrec_a will not overlap with chrec_b. */
1981 *overlaps_a
= conflict_fn_no_dependence ();
1982 *overlaps_b
= conflict_fn_no_dependence ();
1983 *last_conflicts
= integer_zero_node
;
1984 dependence_stats
.num_siv_independent
++;
1992 /* Helper recursive function for initializing the matrix A. Returns
1993 the initial value of CHREC. */
1996 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2000 switch (TREE_CODE (chrec
))
2002 case POLYNOMIAL_CHREC
:
2003 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2005 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2006 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2012 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2013 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2015 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2020 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2021 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2026 /* Handle ~X as -1 - X. */
2027 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2028 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2029 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2041 #define FLOOR_DIV(x,y) ((x) / (y))
2043 /* Solves the special case of the Diophantine equation:
2044 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2046 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2047 number of iterations that loops X and Y run. The overlaps will be
2048 constructed as evolutions in dimension DIM. */
2051 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2052 affine_fn
*overlaps_a
,
2053 affine_fn
*overlaps_b
,
2054 tree
*last_conflicts
, int dim
)
2056 if (((step_a
> 0 && step_b
> 0)
2057 || (step_a
< 0 && step_b
< 0)))
2059 int step_overlaps_a
, step_overlaps_b
;
2060 int gcd_steps_a_b
, last_conflict
, tau2
;
2062 gcd_steps_a_b
= gcd (step_a
, step_b
);
2063 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2064 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2068 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2069 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2070 last_conflict
= tau2
;
2071 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2074 *last_conflicts
= chrec_dont_know
;
2076 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2077 build_int_cst (NULL_TREE
,
2079 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2080 build_int_cst (NULL_TREE
,
2086 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2087 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2088 *last_conflicts
= integer_zero_node
;
2092 /* Solves the special case of a Diophantine equation where CHREC_A is
2093 an affine bivariate function, and CHREC_B is an affine univariate
2094 function. For example,
2096 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2098 has the following overlapping functions:
2100 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2101 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2102 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2104 FORNOW: This is a specialized implementation for a case occurring in
2105 a common benchmark. Implement the general algorithm. */
2108 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2109 conflict_function
**overlaps_a
,
2110 conflict_function
**overlaps_b
,
2111 tree
*last_conflicts
)
2113 bool xz_p
, yz_p
, xyz_p
;
2114 int step_x
, step_y
, step_z
;
2115 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2116 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2117 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2118 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2119 affine_fn ova1
, ova2
, ovb
;
2120 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2122 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2123 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2124 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2126 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2127 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2128 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2130 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2132 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2133 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2135 *overlaps_a
= conflict_fn_not_known ();
2136 *overlaps_b
= conflict_fn_not_known ();
2137 *last_conflicts
= chrec_dont_know
;
2141 niter
= MIN (niter_x
, niter_z
);
2142 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2145 &last_conflicts_xz
, 1);
2146 niter
= MIN (niter_y
, niter_z
);
2147 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2150 &last_conflicts_yz
, 2);
2151 niter
= MIN (niter_x
, niter_z
);
2152 niter
= MIN (niter_y
, niter
);
2153 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2156 &last_conflicts_xyz
, 3);
2158 xz_p
= !integer_zerop (last_conflicts_xz
);
2159 yz_p
= !integer_zerop (last_conflicts_yz
);
2160 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2162 if (xz_p
|| yz_p
|| xyz_p
)
2164 ova1
= affine_fn_cst (integer_zero_node
);
2165 ova2
= affine_fn_cst (integer_zero_node
);
2166 ovb
= affine_fn_cst (integer_zero_node
);
2169 affine_fn t0
= ova1
;
2172 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2173 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2174 affine_fn_free (t0
);
2175 affine_fn_free (t2
);
2176 *last_conflicts
= last_conflicts_xz
;
2180 affine_fn t0
= ova2
;
2183 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2184 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2185 affine_fn_free (t0
);
2186 affine_fn_free (t2
);
2187 *last_conflicts
= last_conflicts_yz
;
2191 affine_fn t0
= ova1
;
2192 affine_fn t2
= ova2
;
2195 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2196 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2197 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2198 affine_fn_free (t0
);
2199 affine_fn_free (t2
);
2200 affine_fn_free (t4
);
2201 *last_conflicts
= last_conflicts_xyz
;
2203 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2204 *overlaps_b
= conflict_fn (1, ovb
);
2208 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2209 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2210 *last_conflicts
= integer_zero_node
;
2213 affine_fn_free (overlaps_a_xz
);
2214 affine_fn_free (overlaps_b_xz
);
2215 affine_fn_free (overlaps_a_yz
);
2216 affine_fn_free (overlaps_b_yz
);
2217 affine_fn_free (overlaps_a_xyz
);
2218 affine_fn_free (overlaps_b_xyz
);
2221 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2224 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2227 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2230 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2233 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2238 for (i
= 0; i
< m
; i
++)
2239 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2242 /* Store the N x N identity matrix in MAT. */
2245 lambda_matrix_id (lambda_matrix mat
, int size
)
2249 for (i
= 0; i
< size
; i
++)
2250 for (j
= 0; j
< size
; j
++)
2251 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2254 /* Return the first nonzero element of vector VEC1 between START and N.
2255 We must have START <= N. Returns N if VEC1 is the zero vector. */
2258 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2261 while (j
< n
&& vec1
[j
] == 0)
2266 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2267 R2 = R2 + CONST1 * R1. */
2270 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2277 for (i
= 0; i
< n
; i
++)
2278 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2281 /* Swap rows R1 and R2 in matrix MAT. */
2284 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2293 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2294 and store the result in VEC2. */
2297 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2298 int size
, int const1
)
2303 lambda_vector_clear (vec2
, size
);
2305 for (i
= 0; i
< size
; i
++)
2306 vec2
[i
] = const1
* vec1
[i
];
2309 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2312 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2315 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2318 /* Negate row R1 of matrix MAT which has N columns. */
2321 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2323 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2326 /* Return true if two vectors are equal. */
2329 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2332 for (i
= 0; i
< size
; i
++)
2333 if (vec1
[i
] != vec2
[i
])
2338 /* Given an M x N integer matrix A, this function determines an M x
2339 M unimodular matrix U, and an M x N echelon matrix S such that
2340 "U.A = S". This decomposition is also known as "right Hermite".
2342 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2343 Restructuring Compilers" Utpal Banerjee. */
2346 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2347 lambda_matrix S
, lambda_matrix U
)
2351 lambda_matrix_copy (A
, S
, m
, n
);
2352 lambda_matrix_id (U
, m
);
2354 for (j
= 0; j
< n
; j
++)
2356 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2359 for (i
= m
- 1; i
>= i0
; i
--)
2361 while (S
[i
][j
] != 0)
2363 int sigma
, factor
, a
, b
;
2367 sigma
= (a
* b
< 0) ? -1: 1;
2370 factor
= sigma
* (a
/ b
);
2372 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2373 lambda_matrix_row_exchange (S
, i
, i
-1);
2375 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2376 lambda_matrix_row_exchange (U
, i
, i
-1);
2383 /* Determines the overlapping elements due to accesses CHREC_A and
2384 CHREC_B, that are affine functions. This function cannot handle
2385 symbolic evolution functions, ie. when initial conditions are
2386 parameters, because it uses lambda matrices of integers. */
2389 analyze_subscript_affine_affine (tree chrec_a
,
2391 conflict_function
**overlaps_a
,
2392 conflict_function
**overlaps_b
,
2393 tree
*last_conflicts
)
2395 unsigned nb_vars_a
, nb_vars_b
, dim
;
2396 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2397 lambda_matrix A
, U
, S
;
2398 struct obstack scratch_obstack
;
2400 if (eq_evolutions_p (chrec_a
, chrec_b
))
2402 /* The accessed index overlaps for each iteration in the
2404 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2405 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2406 *last_conflicts
= chrec_dont_know
;
2409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2410 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2412 /* For determining the initial intersection, we have to solve a
2413 Diophantine equation. This is the most time consuming part.
2415 For answering to the question: "Is there a dependence?" we have
2416 to prove that there exists a solution to the Diophantine
2417 equation, and that the solution is in the iteration domain,
2418 i.e. the solution is positive or zero, and that the solution
2419 happens before the upper bound loop.nb_iterations. Otherwise
2420 there is no dependence. This function outputs a description of
2421 the iterations that hold the intersections. */
2423 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2424 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2426 gcc_obstack_init (&scratch_obstack
);
2428 dim
= nb_vars_a
+ nb_vars_b
;
2429 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2430 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2431 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2433 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2434 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2435 gamma
= init_b
- init_a
;
2437 /* Don't do all the hard work of solving the Diophantine equation
2438 when we already know the solution: for example,
2441 | gamma = 3 - 3 = 0.
2442 Then the first overlap occurs during the first iterations:
2443 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2447 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2449 HOST_WIDE_INT step_a
, step_b
;
2450 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2453 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2454 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2455 niter
= MIN (niter_a
, niter_b
);
2456 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2457 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2459 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2462 *overlaps_a
= conflict_fn (1, ova
);
2463 *overlaps_b
= conflict_fn (1, ovb
);
2466 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2467 compute_overlap_steps_for_affine_1_2
2468 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2470 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2471 compute_overlap_steps_for_affine_1_2
2472 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2477 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2478 *overlaps_a
= conflict_fn_not_known ();
2479 *overlaps_b
= conflict_fn_not_known ();
2480 *last_conflicts
= chrec_dont_know
;
2482 goto end_analyze_subs_aa
;
2486 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2491 lambda_matrix_row_negate (U
, dim
, 0);
2493 gcd_alpha_beta
= S
[0][0];
2495 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2496 but that is a quite strange case. Instead of ICEing, answer
2498 if (gcd_alpha_beta
== 0)
2500 *overlaps_a
= conflict_fn_not_known ();
2501 *overlaps_b
= conflict_fn_not_known ();
2502 *last_conflicts
= chrec_dont_know
;
2503 goto end_analyze_subs_aa
;
2506 /* The classic "gcd-test". */
2507 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2509 /* The "gcd-test" has determined that there is no integer
2510 solution, i.e. there is no dependence. */
2511 *overlaps_a
= conflict_fn_no_dependence ();
2512 *overlaps_b
= conflict_fn_no_dependence ();
2513 *last_conflicts
= integer_zero_node
;
2516 /* Both access functions are univariate. This includes SIV and MIV cases. */
2517 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2519 /* Both functions should have the same evolution sign. */
2520 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2521 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2523 /* The solutions are given by:
2525 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2528 For a given integer t. Using the following variables,
2530 | i0 = u11 * gamma / gcd_alpha_beta
2531 | j0 = u12 * gamma / gcd_alpha_beta
2538 | y0 = j0 + j1 * t. */
2539 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2541 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2542 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2546 if ((i1
== 0 && i0
< 0)
2547 || (j1
== 0 && j0
< 0))
2549 /* There is no solution.
2550 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2551 falls in here, but for the moment we don't look at the
2552 upper bound of the iteration domain. */
2553 *overlaps_a
= conflict_fn_no_dependence ();
2554 *overlaps_b
= conflict_fn_no_dependence ();
2555 *last_conflicts
= integer_zero_node
;
2556 goto end_analyze_subs_aa
;
2559 if (i1
> 0 && j1
> 0)
2561 HOST_WIDE_INT niter_a
2562 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2563 HOST_WIDE_INT niter_b
2564 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2565 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2567 /* (X0, Y0) is a solution of the Diophantine equation:
2568 "chrec_a (X0) = chrec_b (Y0)". */
2569 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2571 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2572 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2574 /* (X1, Y1) is the smallest positive solution of the eq
2575 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2576 first conflict occurs. */
2577 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2578 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2579 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2583 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2584 FLOOR_DIV (niter
- j0
, j1
));
2585 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2587 /* If the overlap occurs outside of the bounds of the
2588 loop, there is no dependence. */
2589 if (x1
>= niter
|| y1
>= niter
)
2591 *overlaps_a
= conflict_fn_no_dependence ();
2592 *overlaps_b
= conflict_fn_no_dependence ();
2593 *last_conflicts
= integer_zero_node
;
2594 goto end_analyze_subs_aa
;
2597 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2600 *last_conflicts
= chrec_dont_know
;
2604 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2606 build_int_cst (NULL_TREE
, i1
)));
2609 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2611 build_int_cst (NULL_TREE
, j1
)));
2615 /* FIXME: For the moment, the upper bound of the
2616 iteration domain for i and j is not checked. */
2617 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2618 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2619 *overlaps_a
= conflict_fn_not_known ();
2620 *overlaps_b
= conflict_fn_not_known ();
2621 *last_conflicts
= chrec_dont_know
;
2626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2627 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2628 *overlaps_a
= conflict_fn_not_known ();
2629 *overlaps_b
= conflict_fn_not_known ();
2630 *last_conflicts
= chrec_dont_know
;
2635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2636 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2637 *overlaps_a
= conflict_fn_not_known ();
2638 *overlaps_b
= conflict_fn_not_known ();
2639 *last_conflicts
= chrec_dont_know
;
2642 end_analyze_subs_aa
:
2643 obstack_free (&scratch_obstack
, NULL
);
2644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2646 fprintf (dump_file
, " (overlaps_a = ");
2647 dump_conflict_function (dump_file
, *overlaps_a
);
2648 fprintf (dump_file
, ")\n (overlaps_b = ");
2649 dump_conflict_function (dump_file
, *overlaps_b
);
2650 fprintf (dump_file
, ")\n");
2651 fprintf (dump_file
, ")\n");
2655 /* Returns true when analyze_subscript_affine_affine can be used for
2656 determining the dependence relation between chrec_a and chrec_b,
2657 that contain symbols. This function modifies chrec_a and chrec_b
2658 such that the analysis result is the same, and such that they don't
2659 contain symbols, and then can safely be passed to the analyzer.
2661 Example: The analysis of the following tuples of evolutions produce
2662 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2665 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2666 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2670 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2672 tree diff
, type
, left_a
, left_b
, right_b
;
2674 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2675 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2676 /* FIXME: For the moment not handled. Might be refined later. */
2679 type
= chrec_type (*chrec_a
);
2680 left_a
= CHREC_LEFT (*chrec_a
);
2681 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2682 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2684 if (!evolution_function_is_constant_p (diff
))
2687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2688 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2690 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2691 diff
, CHREC_RIGHT (*chrec_a
));
2692 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2693 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2694 build_int_cst (type
, 0),
2699 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2700 *OVERLAPS_B are initialized to the functions that describe the
2701 relation between the elements accessed twice by CHREC_A and
2702 CHREC_B. For k >= 0, the following property is verified:
2704 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2707 analyze_siv_subscript (tree chrec_a
,
2709 conflict_function
**overlaps_a
,
2710 conflict_function
**overlaps_b
,
2711 tree
*last_conflicts
,
2714 dependence_stats
.num_siv
++;
2716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2717 fprintf (dump_file
, "(analyze_siv_subscript \n");
2719 if (evolution_function_is_constant_p (chrec_a
)
2720 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2721 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2722 overlaps_a
, overlaps_b
, last_conflicts
);
2724 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2725 && evolution_function_is_constant_p (chrec_b
))
2726 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2727 overlaps_b
, overlaps_a
, last_conflicts
);
2729 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2730 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2732 if (!chrec_contains_symbols (chrec_a
)
2733 && !chrec_contains_symbols (chrec_b
))
2735 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2736 overlaps_a
, overlaps_b
,
2739 if (CF_NOT_KNOWN_P (*overlaps_a
)
2740 || CF_NOT_KNOWN_P (*overlaps_b
))
2741 dependence_stats
.num_siv_unimplemented
++;
2742 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2743 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2744 dependence_stats
.num_siv_independent
++;
2746 dependence_stats
.num_siv_dependent
++;
2748 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2751 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2752 overlaps_a
, overlaps_b
,
2755 if (CF_NOT_KNOWN_P (*overlaps_a
)
2756 || CF_NOT_KNOWN_P (*overlaps_b
))
2757 dependence_stats
.num_siv_unimplemented
++;
2758 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2759 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2760 dependence_stats
.num_siv_independent
++;
2762 dependence_stats
.num_siv_dependent
++;
2765 goto siv_subscript_dontknow
;
2770 siv_subscript_dontknow
:;
2771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2772 fprintf (dump_file
, "siv test failed: unimplemented.\n");
2773 *overlaps_a
= conflict_fn_not_known ();
2774 *overlaps_b
= conflict_fn_not_known ();
2775 *last_conflicts
= chrec_dont_know
;
2776 dependence_stats
.num_siv_unimplemented
++;
2779 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2780 fprintf (dump_file
, ")\n");
2783 /* Returns false if we can prove that the greatest common divisor of the steps
2784 of CHREC does not divide CST, false otherwise. */
2787 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2789 HOST_WIDE_INT cd
= 0, val
;
2792 if (!host_integerp (cst
, 0))
2794 val
= tree_low_cst (cst
, 0);
2796 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2798 step
= CHREC_RIGHT (chrec
);
2799 if (!host_integerp (step
, 0))
2801 cd
= gcd (cd
, tree_low_cst (step
, 0));
2802 chrec
= CHREC_LEFT (chrec
);
2805 return val
% cd
== 0;
2808 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2809 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2810 functions that describe the relation between the elements accessed
2811 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2814 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2817 analyze_miv_subscript (tree chrec_a
,
2819 conflict_function
**overlaps_a
,
2820 conflict_function
**overlaps_b
,
2821 tree
*last_conflicts
,
2822 struct loop
*loop_nest
)
2824 tree type
, difference
;
2826 dependence_stats
.num_miv
++;
2827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2828 fprintf (dump_file
, "(analyze_miv_subscript \n");
2830 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2831 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2832 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2833 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2835 if (eq_evolutions_p (chrec_a
, chrec_b
))
2837 /* Access functions are the same: all the elements are accessed
2838 in the same order. */
2839 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2840 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2841 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2842 dependence_stats
.num_miv_dependent
++;
2845 else if (evolution_function_is_constant_p (difference
)
2846 /* For the moment, the following is verified:
2847 evolution_function_is_affine_multivariate_p (chrec_a,
2849 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2851 /* testsuite/.../ssa-chrec-33.c
2852 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2854 The difference is 1, and all the evolution steps are multiples
2855 of 2, consequently there are no overlapping elements. */
2856 *overlaps_a
= conflict_fn_no_dependence ();
2857 *overlaps_b
= conflict_fn_no_dependence ();
2858 *last_conflicts
= integer_zero_node
;
2859 dependence_stats
.num_miv_independent
++;
2862 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2863 && !chrec_contains_symbols (chrec_a
)
2864 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2865 && !chrec_contains_symbols (chrec_b
))
2867 /* testsuite/.../ssa-chrec-35.c
2868 {0, +, 1}_2 vs. {0, +, 1}_3
2869 the overlapping elements are respectively located at iterations:
2870 {0, +, 1}_x and {0, +, 1}_x,
2871 in other words, we have the equality:
2872 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2875 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2876 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2878 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2879 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2881 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2882 overlaps_a
, overlaps_b
, last_conflicts
);
2884 if (CF_NOT_KNOWN_P (*overlaps_a
)
2885 || CF_NOT_KNOWN_P (*overlaps_b
))
2886 dependence_stats
.num_miv_unimplemented
++;
2887 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2888 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2889 dependence_stats
.num_miv_independent
++;
2891 dependence_stats
.num_miv_dependent
++;
2896 /* When the analysis is too difficult, answer "don't know". */
2897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2898 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2900 *overlaps_a
= conflict_fn_not_known ();
2901 *overlaps_b
= conflict_fn_not_known ();
2902 *last_conflicts
= chrec_dont_know
;
2903 dependence_stats
.num_miv_unimplemented
++;
2906 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2907 fprintf (dump_file
, ")\n");
2910 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2911 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2912 OVERLAP_ITERATIONS_B are initialized with two functions that
2913 describe the iterations that contain conflicting elements.
2915 Remark: For an integer k >= 0, the following equality is true:
2917 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2921 analyze_overlapping_iterations (tree chrec_a
,
2923 conflict_function
**overlap_iterations_a
,
2924 conflict_function
**overlap_iterations_b
,
2925 tree
*last_conflicts
, struct loop
*loop_nest
)
2927 unsigned int lnn
= loop_nest
->num
;
2929 dependence_stats
.num_subscript_tests
++;
2931 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2933 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2934 fprintf (dump_file
, " (chrec_a = ");
2935 print_generic_expr (dump_file
, chrec_a
, 0);
2936 fprintf (dump_file
, ")\n (chrec_b = ");
2937 print_generic_expr (dump_file
, chrec_b
, 0);
2938 fprintf (dump_file
, ")\n");
2941 if (chrec_a
== NULL_TREE
2942 || chrec_b
== NULL_TREE
2943 || chrec_contains_undetermined (chrec_a
)
2944 || chrec_contains_undetermined (chrec_b
))
2946 dependence_stats
.num_subscript_undetermined
++;
2948 *overlap_iterations_a
= conflict_fn_not_known ();
2949 *overlap_iterations_b
= conflict_fn_not_known ();
2952 /* If they are the same chrec, and are affine, they overlap
2953 on every iteration. */
2954 else if (eq_evolutions_p (chrec_a
, chrec_b
)
2955 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2956 || operand_equal_p (chrec_a
, chrec_b
, 0)))
2958 dependence_stats
.num_same_subscript_function
++;
2959 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2960 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2961 *last_conflicts
= chrec_dont_know
;
2964 /* If they aren't the same, and aren't affine, we can't do anything
2966 else if ((chrec_contains_symbols (chrec_a
)
2967 || chrec_contains_symbols (chrec_b
))
2968 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2969 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
2971 dependence_stats
.num_subscript_undetermined
++;
2972 *overlap_iterations_a
= conflict_fn_not_known ();
2973 *overlap_iterations_b
= conflict_fn_not_known ();
2976 else if (ziv_subscript_p (chrec_a
, chrec_b
))
2977 analyze_ziv_subscript (chrec_a
, chrec_b
,
2978 overlap_iterations_a
, overlap_iterations_b
,
2981 else if (siv_subscript_p (chrec_a
, chrec_b
))
2982 analyze_siv_subscript (chrec_a
, chrec_b
,
2983 overlap_iterations_a
, overlap_iterations_b
,
2984 last_conflicts
, lnn
);
2987 analyze_miv_subscript (chrec_a
, chrec_b
,
2988 overlap_iterations_a
, overlap_iterations_b
,
2989 last_conflicts
, loop_nest
);
2991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2993 fprintf (dump_file
, " (overlap_iterations_a = ");
2994 dump_conflict_function (dump_file
, *overlap_iterations_a
);
2995 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
2996 dump_conflict_function (dump_file
, *overlap_iterations_b
);
2997 fprintf (dump_file
, ")\n");
2998 fprintf (dump_file
, ")\n");
3002 /* Helper function for uniquely inserting distance vectors. */
3005 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3010 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3011 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3014 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3017 /* Helper function for uniquely inserting direction vectors. */
3020 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3025 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3026 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3029 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3032 /* Add a distance of 1 on all the loops outer than INDEX. If we
3033 haven't yet determined a distance for this outer loop, push a new
3034 distance vector composed of the previous distance, and a distance
3035 of 1 for this outer loop. Example:
3043 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3044 save (0, 1), then we have to save (1, 0). */
3047 add_outer_distances (struct data_dependence_relation
*ddr
,
3048 lambda_vector dist_v
, int index
)
3050 /* For each outer loop where init_v is not set, the accesses are
3051 in dependence of distance 1 in the loop. */
3052 while (--index
>= 0)
3054 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3055 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3057 save_dist_v (ddr
, save_v
);
3061 /* Return false when fail to represent the data dependence as a
3062 distance vector. INIT_B is set to true when a component has been
3063 added to the distance vector DIST_V. INDEX_CARRY is then set to
3064 the index in DIST_V that carries the dependence. */
3067 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3068 struct data_reference
*ddr_a
,
3069 struct data_reference
*ddr_b
,
3070 lambda_vector dist_v
, bool *init_b
,
3074 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3076 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3078 tree access_fn_a
, access_fn_b
;
3079 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3081 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3083 non_affine_dependence_relation (ddr
);
3087 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3088 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3090 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3091 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3094 int var_a
= CHREC_VARIABLE (access_fn_a
);
3095 int var_b
= CHREC_VARIABLE (access_fn_b
);
3098 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3100 non_affine_dependence_relation (ddr
);
3104 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3105 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3106 *index_carry
= MIN (index
, *index_carry
);
3108 /* This is the subscript coupling test. If we have already
3109 recorded a distance for this loop (a distance coming from
3110 another subscript), it should be the same. For example,
3111 in the following code, there is no dependence:
3118 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3120 finalize_ddr_dependent (ddr
, chrec_known
);
3124 dist_v
[index
] = dist
;
3128 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3130 /* This can be for example an affine vs. constant dependence
3131 (T[i] vs. T[3]) that is not an affine dependence and is
3132 not representable as a distance vector. */
3133 non_affine_dependence_relation (ddr
);
3141 /* Return true when the DDR contains only constant access functions. */
3144 constant_access_functions (const struct data_dependence_relation
*ddr
)
3148 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3149 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3150 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3156 /* Helper function for the case where DDR_A and DDR_B are the same
3157 multivariate access function with a constant step. For an example
3161 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3164 tree c_1
= CHREC_LEFT (c_2
);
3165 tree c_0
= CHREC_LEFT (c_1
);
3166 lambda_vector dist_v
;
3169 /* Polynomials with more than 2 variables are not handled yet. When
3170 the evolution steps are parameters, it is not possible to
3171 represent the dependence using classical distance vectors. */
3172 if (TREE_CODE (c_0
) != INTEGER_CST
3173 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3174 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3176 DDR_AFFINE_P (ddr
) = false;
3180 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3181 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3183 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3184 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3185 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3186 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3199 save_dist_v (ddr
, dist_v
);
3201 add_outer_distances (ddr
, dist_v
, x_1
);
3204 /* Helper function for the case where DDR_A and DDR_B are the same
3205 access functions. */
3208 add_other_self_distances (struct data_dependence_relation
*ddr
)
3210 lambda_vector dist_v
;
3212 int index_carry
= DDR_NB_LOOPS (ddr
);
3214 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3216 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3218 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3220 if (!evolution_function_is_univariate_p (access_fun
))
3222 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3224 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3228 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3230 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3231 add_multivariate_self_dist (ddr
, access_fun
);
3233 /* The evolution step is not constant: it varies in
3234 the outer loop, so this cannot be represented by a
3235 distance vector. For example in pr34635.c the
3236 evolution is {0, +, {0, +, 4}_1}_2. */
3237 DDR_AFFINE_P (ddr
) = false;
3242 index_carry
= MIN (index_carry
,
3243 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3244 DDR_LOOP_NEST (ddr
)));
3248 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3249 add_outer_distances (ddr
, dist_v
, index_carry
);
3253 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3255 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3257 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3258 save_dist_v (ddr
, dist_v
);
3261 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3262 is the case for example when access functions are the same and
3263 equal to a constant, as in:
3270 in which case the distance vectors are (0) and (1). */
3273 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3277 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3279 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3280 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3281 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3283 for (j
= 0; j
< ca
->n
; j
++)
3284 if (affine_function_zero_p (ca
->fns
[j
]))
3286 insert_innermost_unit_dist_vector (ddr
);
3290 for (j
= 0; j
< cb
->n
; j
++)
3291 if (affine_function_zero_p (cb
->fns
[j
]))
3293 insert_innermost_unit_dist_vector (ddr
);
3299 /* Compute the classic per loop distance vector. DDR is the data
3300 dependence relation to build a vector from. Return false when fail
3301 to represent the data dependence as a distance vector. */
3304 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3305 struct loop
*loop_nest
)
3307 bool init_b
= false;
3308 int index_carry
= DDR_NB_LOOPS (ddr
);
3309 lambda_vector dist_v
;
3311 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3314 if (same_access_functions (ddr
))
3316 /* Save the 0 vector. */
3317 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3318 save_dist_v (ddr
, dist_v
);
3320 if (constant_access_functions (ddr
))
3321 add_distance_for_zero_overlaps (ddr
);
3323 if (DDR_NB_LOOPS (ddr
) > 1)
3324 add_other_self_distances (ddr
);
3329 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3330 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3331 dist_v
, &init_b
, &index_carry
))
3334 /* Save the distance vector if we initialized one. */
3337 /* Verify a basic constraint: classic distance vectors should
3338 always be lexicographically positive.
3340 Data references are collected in the order of execution of
3341 the program, thus for the following loop
3343 | for (i = 1; i < 100; i++)
3344 | for (j = 1; j < 100; j++)
3346 | t = T[j+1][i-1]; // A
3347 | T[j][i] = t + 2; // B
3350 references are collected following the direction of the wind:
3351 A then B. The data dependence tests are performed also
3352 following this order, such that we're looking at the distance
3353 separating the elements accessed by A from the elements later
3354 accessed by B. But in this example, the distance returned by
3355 test_dep (A, B) is lexicographically negative (-1, 1), that
3356 means that the access A occurs later than B with respect to
3357 the outer loop, ie. we're actually looking upwind. In this
3358 case we solve test_dep (B, A) looking downwind to the
3359 lexicographically positive solution, that returns the
3360 distance vector (1, -1). */
3361 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3363 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3364 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3367 compute_subscript_distance (ddr
);
3368 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3369 save_v
, &init_b
, &index_carry
))
3371 save_dist_v (ddr
, save_v
);
3372 DDR_REVERSED_P (ddr
) = true;
3374 /* In this case there is a dependence forward for all the
3377 | for (k = 1; k < 100; k++)
3378 | for (i = 1; i < 100; i++)
3379 | for (j = 1; j < 100; j++)
3381 | t = T[j+1][i-1]; // A
3382 | T[j][i] = t + 2; // B
3390 if (DDR_NB_LOOPS (ddr
) > 1)
3392 add_outer_distances (ddr
, save_v
, index_carry
);
3393 add_outer_distances (ddr
, dist_v
, index_carry
);
3398 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3399 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3401 if (DDR_NB_LOOPS (ddr
) > 1)
3403 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3405 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3406 DDR_A (ddr
), loop_nest
))
3408 compute_subscript_distance (ddr
);
3409 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3410 opposite_v
, &init_b
,
3414 save_dist_v (ddr
, save_v
);
3415 add_outer_distances (ddr
, dist_v
, index_carry
);
3416 add_outer_distances (ddr
, opposite_v
, index_carry
);
3419 save_dist_v (ddr
, save_v
);
3424 /* There is a distance of 1 on all the outer loops: Example:
3425 there is a dependence of distance 1 on loop_1 for the array A.
3431 add_outer_distances (ddr
, dist_v
,
3432 lambda_vector_first_nz (dist_v
,
3433 DDR_NB_LOOPS (ddr
), 0));
3436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3440 fprintf (dump_file
, "(build_classic_dist_vector\n");
3441 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3443 fprintf (dump_file
, " dist_vector = (");
3444 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3445 DDR_NB_LOOPS (ddr
));
3446 fprintf (dump_file
, " )\n");
3448 fprintf (dump_file
, ")\n");
3454 /* Return the direction for a given distance.
3455 FIXME: Computing dir this way is suboptimal, since dir can catch
3456 cases that dist is unable to represent. */
3458 static inline enum data_dependence_direction
3459 dir_from_dist (int dist
)
3462 return dir_positive
;
3464 return dir_negative
;
3469 /* Compute the classic per loop direction vector. DDR is the data
3470 dependence relation to build a vector from. */
3473 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3476 lambda_vector dist_v
;
3478 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3480 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3482 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3483 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3485 save_dir_v (ddr
, dir_v
);
3489 /* Helper function. Returns true when there is a dependence between
3490 data references DRA and DRB. */
3493 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3494 struct data_reference
*dra
,
3495 struct data_reference
*drb
,
3496 struct loop
*loop_nest
)
3499 tree last_conflicts
;
3500 struct subscript
*subscript
;
3501 tree res
= NULL_TREE
;
3503 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3505 conflict_function
*overlaps_a
, *overlaps_b
;
3507 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3508 DR_ACCESS_FN (drb
, i
),
3509 &overlaps_a
, &overlaps_b
,
3510 &last_conflicts
, loop_nest
);
3512 if (SUB_CONFLICTS_IN_A (subscript
))
3513 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3514 if (SUB_CONFLICTS_IN_B (subscript
))
3515 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3517 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3518 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3519 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3521 /* If there is any undetermined conflict function we have to
3522 give a conservative answer in case we cannot prove that
3523 no dependence exists when analyzing another subscript. */
3524 if (CF_NOT_KNOWN_P (overlaps_a
)
3525 || CF_NOT_KNOWN_P (overlaps_b
))
3527 res
= chrec_dont_know
;
3531 /* When there is a subscript with no dependence we can stop. */
3532 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3533 || CF_NO_DEPENDENCE_P (overlaps_b
))
3540 if (res
== NULL_TREE
)
3543 if (res
== chrec_known
)
3544 dependence_stats
.num_dependence_independent
++;
3546 dependence_stats
.num_dependence_undetermined
++;
3547 finalize_ddr_dependent (ddr
, res
);
3551 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3554 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3555 struct loop
*loop_nest
)
3558 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3559 fprintf (dump_file
, "(subscript_dependence_tester \n");
3561 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3562 dependence_stats
.num_dependence_dependent
++;
3564 compute_subscript_distance (ddr
);
3565 if (build_classic_dist_vector (ddr
, loop_nest
))
3566 build_classic_dir_vector (ddr
);
3568 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3569 fprintf (dump_file
, ")\n");
3572 /* Returns true when all the access functions of A are affine or
3573 constant with respect to LOOP_NEST. */
3576 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3577 const struct loop
*loop_nest
)
3580 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3583 FOR_EACH_VEC_ELT (fns
, i
, t
)
3584 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3585 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3591 /* Initializes an equation for an OMEGA problem using the information
3592 contained in the ACCESS_FUN. Returns true when the operation
3595 PB is the omega constraint system.
3596 EQ is the number of the equation to be initialized.
3597 OFFSET is used for shifting the variables names in the constraints:
3598 a constrain is composed of 2 * the number of variables surrounding
3599 dependence accesses. OFFSET is set either to 0 for the first n variables,
3600 then it is set to n.
3601 ACCESS_FUN is expected to be an affine chrec. */
3604 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3605 unsigned int offset
, tree access_fun
,
3606 struct data_dependence_relation
*ddr
)
3608 switch (TREE_CODE (access_fun
))
3610 case POLYNOMIAL_CHREC
:
3612 tree left
= CHREC_LEFT (access_fun
);
3613 tree right
= CHREC_RIGHT (access_fun
);
3614 int var
= CHREC_VARIABLE (access_fun
);
3617 if (TREE_CODE (right
) != INTEGER_CST
)
3620 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3621 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3623 /* Compute the innermost loop index. */
3624 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3627 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3628 += int_cst_value (right
);
3630 switch (TREE_CODE (left
))
3632 case POLYNOMIAL_CHREC
:
3633 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3636 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3645 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3653 /* As explained in the comments preceding init_omega_for_ddr, we have
3654 to set up a system for each loop level, setting outer loops
3655 variation to zero, and current loop variation to positive or zero.
3656 Save each lexico positive distance vector. */
3659 omega_extract_distance_vectors (omega_pb pb
,
3660 struct data_dependence_relation
*ddr
)
3664 struct loop
*loopi
, *loopj
;
3665 enum omega_result res
;
3667 /* Set a new problem for each loop in the nest. The basis is the
3668 problem that we have initialized until now. On top of this we
3669 add new constraints. */
3670 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3671 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3674 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3675 DDR_NB_LOOPS (ddr
));
3677 omega_copy_problem (copy
, pb
);
3679 /* For all the outer loops "loop_j", add "dj = 0". */
3680 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3682 eq
= omega_add_zero_eq (copy
, omega_black
);
3683 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3686 /* For "loop_i", add "0 <= di". */
3687 geq
= omega_add_zero_geq (copy
, omega_black
);
3688 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3690 /* Reduce the constraint system, and test that the current
3691 problem is feasible. */
3692 res
= omega_simplify_problem (copy
);
3693 if (res
== omega_false
3694 || res
== omega_unknown
3695 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3698 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3699 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3701 dist
= copy
->subs
[eq
].coef
[0];
3707 /* Reinitialize problem... */
3708 omega_copy_problem (copy
, pb
);
3709 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3711 eq
= omega_add_zero_eq (copy
, omega_black
);
3712 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3715 /* ..., but this time "di = 1". */
3716 eq
= omega_add_zero_eq (copy
, omega_black
);
3717 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3718 copy
->eqs
[eq
].coef
[0] = -1;
3720 res
= omega_simplify_problem (copy
);
3721 if (res
== omega_false
3722 || res
== omega_unknown
3723 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3726 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3727 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3729 dist
= copy
->subs
[eq
].coef
[0];
3735 /* Save the lexicographically positive distance vector. */
3738 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3739 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3743 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3744 if (copy
->subs
[eq
].key
> 0)
3746 dist
= copy
->subs
[eq
].coef
[0];
3747 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3750 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3751 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3753 save_dist_v (ddr
, dist_v
);
3754 save_dir_v (ddr
, dir_v
);
3758 omega_free_problem (copy
);
3762 /* This is called for each subscript of a tuple of data references:
3763 insert an equality for representing the conflicts. */
3766 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3767 struct data_dependence_relation
*ddr
,
3768 omega_pb pb
, bool *maybe_dependent
)
3771 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3772 TREE_TYPE (access_fun_b
));
3773 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3774 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3775 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3778 /* When the fun_a - fun_b is not constant, the dependence is not
3779 captured by the classic distance vector representation. */
3780 if (TREE_CODE (difference
) != INTEGER_CST
)
3784 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3786 /* There is no dependence. */
3787 *maybe_dependent
= false;
3791 minus_one
= build_int_cst (type
, -1);
3792 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3794 eq
= omega_add_zero_eq (pb
, omega_black
);
3795 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3796 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3797 /* There is probably a dependence, but the system of
3798 constraints cannot be built: answer "don't know". */
3802 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3803 && !int_divides_p (lambda_vector_gcd
3804 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3805 2 * DDR_NB_LOOPS (ddr
)),
3806 pb
->eqs
[eq
].coef
[0]))
3808 /* There is no dependence. */
3809 *maybe_dependent
= false;
3816 /* Helper function, same as init_omega_for_ddr but specialized for
3817 data references A and B. */
3820 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3821 struct data_dependence_relation
*ddr
,
3822 omega_pb pb
, bool *maybe_dependent
)
3827 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3829 /* Insert an equality per subscript. */
3830 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3832 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3833 ddr
, pb
, maybe_dependent
))
3835 else if (*maybe_dependent
== false)
3837 /* There is no dependence. */
3838 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3843 /* Insert inequalities: constraints corresponding to the iteration
3844 domain, i.e. the loops surrounding the references "loop_x" and
3845 the distance variables "dx". The layout of the OMEGA
3846 representation is as follows:
3847 - coef[0] is the constant
3848 - coef[1..nb_loops] are the protected variables that will not be
3849 removed by the solver: the "dx"
3850 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3852 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3853 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3855 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
);
3858 ineq
= omega_add_zero_geq (pb
, omega_black
);
3859 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3861 /* 0 <= loop_x + dx */
3862 ineq
= omega_add_zero_geq (pb
, omega_black
);
3863 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3864 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3868 /* loop_x <= nb_iters */
3869 ineq
= omega_add_zero_geq (pb
, omega_black
);
3870 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3871 pb
->geqs
[ineq
].coef
[0] = nbi
;
3873 /* loop_x + dx <= nb_iters */
3874 ineq
= omega_add_zero_geq (pb
, omega_black
);
3875 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3876 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3877 pb
->geqs
[ineq
].coef
[0] = nbi
;
3879 /* A step "dx" bigger than nb_iters is not feasible, so
3880 add "0 <= nb_iters + dx", */
3881 ineq
= omega_add_zero_geq (pb
, omega_black
);
3882 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3883 pb
->geqs
[ineq
].coef
[0] = nbi
;
3884 /* and "dx <= nb_iters". */
3885 ineq
= omega_add_zero_geq (pb
, omega_black
);
3886 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3887 pb
->geqs
[ineq
].coef
[0] = nbi
;
3891 omega_extract_distance_vectors (pb
, ddr
);
3896 /* Sets up the Omega dependence problem for the data dependence
3897 relation DDR. Returns false when the constraint system cannot be
3898 built, ie. when the test answers "don't know". Returns true
3899 otherwise, and when independence has been proved (using one of the
3900 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3901 set MAYBE_DEPENDENT to true.
3903 Example: for setting up the dependence system corresponding to the
3904 conflicting accesses
3909 | ... A[2*j, 2*(i + j)]
3913 the following constraints come from the iteration domain:
3920 where di, dj are the distance variables. The constraints
3921 representing the conflicting elements are:
3924 i + 1 = 2 * (i + di + j + dj)
3926 For asking that the resulting distance vector (di, dj) be
3927 lexicographically positive, we insert the constraint "di >= 0". If
3928 "di = 0" in the solution, we fix that component to zero, and we
3929 look at the inner loops: we set a new problem where all the outer
3930 loop distances are zero, and fix this inner component to be
3931 positive. When one of the components is positive, we save that
3932 distance, and set a new problem where the distance on this loop is
3933 zero, searching for other distances in the inner loops. Here is
3934 the classic example that illustrates that we have to set for each
3935 inner loop a new problem:
3943 we have to save two distances (1, 0) and (0, 1).
3945 Given two array references, refA and refB, we have to set the
3946 dependence problem twice, refA vs. refB and refB vs. refA, and we
3947 cannot do a single test, as refB might occur before refA in the
3948 inner loops, and the contrary when considering outer loops: ex.
3953 | T[{1,+,1}_2][{1,+,1}_1] // refA
3954 | T[{2,+,1}_2][{0,+,1}_1] // refB
3959 refB touches the elements in T before refA, and thus for the same
3960 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3961 but for successive loop_0 iterations, we have (1, -1, 1)
3963 The Omega solver expects the distance variables ("di" in the
3964 previous example) to come first in the constraint system (as
3965 variables to be protected, or "safe" variables), the constraint
3966 system is built using the following layout:
3968 "cst | distance vars | index vars".
3972 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
3973 bool *maybe_dependent
)
3978 *maybe_dependent
= true;
3980 if (same_access_functions (ddr
))
3983 lambda_vector dir_v
;
3985 /* Save the 0 vector. */
3986 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
3987 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3988 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3989 dir_v
[j
] = dir_equal
;
3990 save_dir_v (ddr
, dir_v
);
3992 /* Save the dependences carried by outer loops. */
3993 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3994 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3996 omega_free_problem (pb
);
4000 /* Omega expects the protected variables (those that have to be kept
4001 after elimination) to appear first in the constraint system.
4002 These variables are the distance variables. In the following
4003 initialization we declare NB_LOOPS safe variables, and the total
4004 number of variables for the constraint system is 2*NB_LOOPS. */
4005 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4006 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4008 omega_free_problem (pb
);
4010 /* Stop computation if not decidable, or no dependence. */
4011 if (res
== false || *maybe_dependent
== false)
4014 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4015 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
4017 omega_free_problem (pb
);
4022 /* Return true when DDR contains the same information as that stored
4023 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4026 ddr_consistent_p (FILE *file
,
4027 struct data_dependence_relation
*ddr
,
4028 vec
<lambda_vector
> dist_vects
,
4029 vec
<lambda_vector
> dir_vects
)
4033 /* If dump_file is set, output there. */
4034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4037 if (dist_vects
.length () != DDR_NUM_DIST_VECTS (ddr
))
4039 lambda_vector b_dist_v
;
4040 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4041 dist_vects
.length (),
4042 DDR_NUM_DIST_VECTS (ddr
));
4044 fprintf (file
, "Banerjee dist vectors:\n");
4045 FOR_EACH_VEC_ELT (dist_vects
, i
, b_dist_v
)
4046 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
4048 fprintf (file
, "Omega dist vectors:\n");
4049 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4050 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
4052 fprintf (file
, "data dependence relation:\n");
4053 dump_data_dependence_relation (file
, ddr
);
4055 fprintf (file
, ")\n");
4059 if (dir_vects
.length () != DDR_NUM_DIR_VECTS (ddr
))
4061 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4062 dir_vects
.length (),
4063 DDR_NUM_DIR_VECTS (ddr
));
4067 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4069 lambda_vector a_dist_v
;
4070 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
4072 /* Distance vectors are not ordered in the same way in the DDR
4073 and in the DIST_VECTS: search for a matching vector. */
4074 FOR_EACH_VEC_ELT (dist_vects
, j
, a_dist_v
)
4075 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
4078 if (j
== dist_vects
.length ())
4080 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
4081 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
4082 fprintf (file
, "not found in Omega dist vectors:\n");
4083 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4084 fprintf (file
, "data dependence relation:\n");
4085 dump_data_dependence_relation (file
, ddr
);
4086 fprintf (file
, ")\n");
4090 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
4092 lambda_vector a_dir_v
;
4093 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
4095 /* Direction vectors are not ordered in the same way in the DDR
4096 and in the DIR_VECTS: search for a matching vector. */
4097 FOR_EACH_VEC_ELT (dir_vects
, j
, a_dir_v
)
4098 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4101 if (j
== dist_vects
.length ())
4103 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4104 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4105 fprintf (file
, "not found in Omega dir vectors:\n");
4106 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4107 fprintf (file
, "data dependence relation:\n");
4108 dump_data_dependence_relation (file
, ddr
);
4109 fprintf (file
, ")\n");
4116 /* This computes the affine dependence relation between A and B with
4117 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4118 independence between two accesses, while CHREC_DONT_KNOW is used
4119 for representing the unknown relation.
4121 Note that it is possible to stop the computation of the dependence
4122 relation the first time we detect a CHREC_KNOWN element for a given
4126 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4127 struct loop
*loop_nest
)
4129 struct data_reference
*dra
= DDR_A (ddr
);
4130 struct data_reference
*drb
= DDR_B (ddr
);
4132 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4134 fprintf (dump_file
, "(compute_affine_dependence\n");
4135 fprintf (dump_file
, " stmt_a: ");
4136 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4137 fprintf (dump_file
, " stmt_b: ");
4138 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4141 /* Analyze only when the dependence relation is not yet known. */
4142 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4144 dependence_stats
.num_dependence_tests
++;
4146 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4147 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4149 if (flag_check_data_deps
)
4151 /* Compute the dependences using the first algorithm. */
4152 subscript_dependence_tester (ddr
, loop_nest
);
4154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4157 dump_data_dependence_relation (dump_file
, ddr
);
4160 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4162 bool maybe_dependent
;
4163 vec
<lambda_vector
> dir_vects
, dist_vects
;
4165 /* Save the result of the first DD analyzer. */
4166 dist_vects
= DDR_DIST_VECTS (ddr
);
4167 dir_vects
= DDR_DIR_VECTS (ddr
);
4169 /* Reset the information. */
4170 DDR_DIST_VECTS (ddr
).create (0);
4171 DDR_DIR_VECTS (ddr
).create (0);
4173 /* Compute the same information using Omega. */
4174 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4175 goto csys_dont_know
;
4177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4179 fprintf (dump_file
, "Omega Analyzer\n");
4180 dump_data_dependence_relation (dump_file
, ddr
);
4183 /* Check that we get the same information. */
4184 if (maybe_dependent
)
4185 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4190 subscript_dependence_tester (ddr
, loop_nest
);
4193 /* As a last case, if the dependence cannot be determined, or if
4194 the dependence is considered too difficult to determine, answer
4199 dependence_stats
.num_dependence_undetermined
++;
4201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4203 fprintf (dump_file
, "Data ref a:\n");
4204 dump_data_reference (dump_file
, dra
);
4205 fprintf (dump_file
, "Data ref b:\n");
4206 dump_data_reference (dump_file
, drb
);
4207 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4209 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4213 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4215 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4216 fprintf (dump_file
, ") -> no dependence\n");
4217 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4218 fprintf (dump_file
, ") -> dependence analysis failed\n");
4220 fprintf (dump_file
, ")\n");
4224 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4225 the data references in DATAREFS, in the LOOP_NEST. When
4226 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4227 relations. Return true when successful, i.e. data references number
4228 is small enough to be handled. */
4231 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4232 vec
<ddr_p
> *dependence_relations
,
4233 vec
<loop_p
> loop_nest
,
4234 bool compute_self_and_rr
)
4236 struct data_dependence_relation
*ddr
;
4237 struct data_reference
*a
, *b
;
4240 if ((int) datarefs
.length ()
4241 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4243 struct data_dependence_relation
*ddr
;
4245 /* Insert a single relation into dependence_relations:
4247 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4248 dependence_relations
->safe_push (ddr
);
4252 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4253 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4254 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4256 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4257 dependence_relations
->safe_push (ddr
);
4258 if (loop_nest
.exists ())
4259 compute_affine_dependence (ddr
, loop_nest
[0]);
4262 if (compute_self_and_rr
)
4263 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4265 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4266 dependence_relations
->safe_push (ddr
);
4267 if (loop_nest
.exists ())
4268 compute_affine_dependence (ddr
, loop_nest
[0]);
4274 /* Describes a location of a memory reference. */
4276 typedef struct data_ref_loc_d
4278 /* Position of the memory reference. */
4281 /* True if the memory reference is read. */
4286 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4287 true if STMT clobbers memory, false otherwise. */
4290 get_references_in_stmt (gimple stmt
, vec
<data_ref_loc
> *references
)
4292 bool clobbers_memory
= false;
4295 enum gimple_code stmt_code
= gimple_code (stmt
);
4297 references
->create (0);
4299 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4300 As we cannot model data-references to not spelled out
4301 accesses give up if they may occur. */
4302 if ((stmt_code
== GIMPLE_CALL
4303 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4304 || (stmt_code
== GIMPLE_ASM
4305 && (gimple_asm_volatile_p (stmt
) || gimple_vuse (stmt
))))
4306 clobbers_memory
= true;
4308 if (!gimple_vuse (stmt
))
4309 return clobbers_memory
;
4311 if (stmt_code
== GIMPLE_ASSIGN
)
4314 op0
= gimple_assign_lhs_ptr (stmt
);
4315 op1
= gimple_assign_rhs1_ptr (stmt
);
4318 || (REFERENCE_CLASS_P (*op1
)
4319 && (base
= get_base_address (*op1
))
4320 && TREE_CODE (base
) != SSA_NAME
))
4324 references
->safe_push (ref
);
4327 else if (stmt_code
== GIMPLE_CALL
)
4331 op0
= gimple_call_lhs_ptr (stmt
);
4332 n
= gimple_call_num_args (stmt
);
4333 for (i
= 0; i
< n
; i
++)
4335 op1
= gimple_call_arg_ptr (stmt
, i
);
4338 || (REFERENCE_CLASS_P (*op1
) && get_base_address (*op1
)))
4342 references
->safe_push (ref
);
4347 return clobbers_memory
;
4351 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
))))
4354 ref
.is_read
= false;
4355 references
->safe_push (ref
);
4357 return clobbers_memory
;
4360 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4361 reference, returns false, otherwise returns true. NEST is the outermost
4362 loop of the loop nest in which the references should be analyzed. */
4365 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4366 vec
<data_reference_p
> *datarefs
)
4369 vec
<data_ref_loc
> references
;
4372 data_reference_p dr
;
4374 if (get_references_in_stmt (stmt
, &references
))
4376 references
.release ();
4380 FOR_EACH_VEC_ELT (references
, i
, ref
)
4382 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4383 *ref
->pos
, stmt
, ref
->is_read
);
4384 gcc_assert (dr
!= NULL
);
4385 datarefs
->safe_push (dr
);
4387 references
.release ();
4391 /* Stores the data references in STMT to DATAREFS. If there is an
4392 unanalyzable reference, returns false, otherwise returns true.
4393 NEST is the outermost loop of the loop nest in which the references
4394 should be instantiated, LOOP is the loop in which the references
4395 should be analyzed. */
4398 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4399 vec
<data_reference_p
> *datarefs
)
4402 vec
<data_ref_loc
> references
;
4405 data_reference_p dr
;
4407 if (get_references_in_stmt (stmt
, &references
))
4409 references
.release ();
4413 FOR_EACH_VEC_ELT (references
, i
, ref
)
4415 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4416 gcc_assert (dr
!= NULL
);
4417 datarefs
->safe_push (dr
);
4420 references
.release ();
4424 /* Search the data references in LOOP, and record the information into
4425 DATAREFS. Returns chrec_dont_know when failing to analyze a
4426 difficult case, returns NULL_TREE otherwise. */
4429 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4430 vec
<data_reference_p
> *datarefs
)
4432 gimple_stmt_iterator bsi
;
4434 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4436 gimple stmt
= gsi_stmt (bsi
);
4438 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4440 struct data_reference
*res
;
4441 res
= XCNEW (struct data_reference
);
4442 datarefs
->safe_push (res
);
4444 return chrec_dont_know
;
4451 /* Search the data references in LOOP, and record the information into
4452 DATAREFS. Returns chrec_dont_know when failing to analyze a
4453 difficult case, returns NULL_TREE otherwise.
4455 TODO: This function should be made smarter so that it can handle address
4456 arithmetic as if they were array accesses, etc. */
4459 find_data_references_in_loop (struct loop
*loop
,
4460 vec
<data_reference_p
> *datarefs
)
4462 basic_block bb
, *bbs
;
4465 bbs
= get_loop_body_in_dom_order (loop
);
4467 for (i
= 0; i
< loop
->num_nodes
; i
++)
4471 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4474 return chrec_dont_know
;
4482 /* Recursive helper function. */
4485 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4487 /* Inner loops of the nest should not contain siblings. Example:
4488 when there are two consecutive loops,
4499 the dependence relation cannot be captured by the distance
4504 loop_nest
->safe_push (loop
);
4506 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4510 /* Return false when the LOOP is not well nested. Otherwise return
4511 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4512 contain the loops from the outermost to the innermost, as they will
4513 appear in the classic distance vector. */
4516 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4518 loop_nest
->safe_push (loop
);
4520 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4524 /* Returns true when the data dependences have been computed, false otherwise.
4525 Given a loop nest LOOP, the following vectors are returned:
4526 DATAREFS is initialized to all the array elements contained in this loop,
4527 DEPENDENCE_RELATIONS contains the relations between the data references.
4528 Compute read-read and self relations if
4529 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4532 compute_data_dependences_for_loop (struct loop
*loop
,
4533 bool compute_self_and_read_read_dependences
,
4534 vec
<loop_p
> *loop_nest
,
4535 vec
<data_reference_p
> *datarefs
,
4536 vec
<ddr_p
> *dependence_relations
)
4540 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4542 /* If the loop nest is not well formed, or one of the data references
4543 is not computable, give up without spending time to compute other
4546 || !find_loop_nest (loop
, loop_nest
)
4547 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4548 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4549 compute_self_and_read_read_dependences
))
4552 if (dump_file
&& (dump_flags
& TDF_STATS
))
4554 fprintf (dump_file
, "Dependence tester statistics:\n");
4556 fprintf (dump_file
, "Number of dependence tests: %d\n",
4557 dependence_stats
.num_dependence_tests
);
4558 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4559 dependence_stats
.num_dependence_dependent
);
4560 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4561 dependence_stats
.num_dependence_independent
);
4562 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4563 dependence_stats
.num_dependence_undetermined
);
4565 fprintf (dump_file
, "Number of subscript tests: %d\n",
4566 dependence_stats
.num_subscript_tests
);
4567 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4568 dependence_stats
.num_subscript_undetermined
);
4569 fprintf (dump_file
, "Number of same subscript function: %d\n",
4570 dependence_stats
.num_same_subscript_function
);
4572 fprintf (dump_file
, "Number of ziv tests: %d\n",
4573 dependence_stats
.num_ziv
);
4574 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4575 dependence_stats
.num_ziv_dependent
);
4576 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4577 dependence_stats
.num_ziv_independent
);
4578 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4579 dependence_stats
.num_ziv_unimplemented
);
4581 fprintf (dump_file
, "Number of siv tests: %d\n",
4582 dependence_stats
.num_siv
);
4583 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4584 dependence_stats
.num_siv_dependent
);
4585 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4586 dependence_stats
.num_siv_independent
);
4587 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4588 dependence_stats
.num_siv_unimplemented
);
4590 fprintf (dump_file
, "Number of miv tests: %d\n",
4591 dependence_stats
.num_miv
);
4592 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4593 dependence_stats
.num_miv_dependent
);
4594 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4595 dependence_stats
.num_miv_independent
);
4596 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4597 dependence_stats
.num_miv_unimplemented
);
4603 /* Returns true when the data dependences for the basic block BB have been
4604 computed, false otherwise.
4605 DATAREFS is initialized to all the array elements contained in this basic
4606 block, DEPENDENCE_RELATIONS contains the relations between the data
4607 references. Compute read-read and self relations if
4608 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4610 compute_data_dependences_for_bb (basic_block bb
,
4611 bool compute_self_and_read_read_dependences
,
4612 vec
<data_reference_p
> *datarefs
,
4613 vec
<ddr_p
> *dependence_relations
)
4615 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4618 return compute_all_dependences (*datarefs
, dependence_relations
,
4620 compute_self_and_read_read_dependences
);
4623 /* Entry point (for testing only). Analyze all the data references
4624 and the dependence relations in LOOP.
4626 The data references are computed first.
4628 A relation on these nodes is represented by a complete graph. Some
4629 of the relations could be of no interest, thus the relations can be
4632 In the following function we compute all the relations. This is
4633 just a first implementation that is here for:
4634 - for showing how to ask for the dependence relations,
4635 - for the debugging the whole dependence graph,
4636 - for the dejagnu testcases and maintenance.
4638 It is possible to ask only for a part of the graph, avoiding to
4639 compute the whole dependence graph. The computed dependences are
4640 stored in a knowledge base (KB) such that later queries don't
4641 recompute the same information. The implementation of this KB is
4642 transparent to the optimizer, and thus the KB can be changed with a
4643 more efficient implementation, or the KB could be disabled. */
4645 analyze_all_data_dependences (struct loop
*loop
)
4648 int nb_data_refs
= 10;
4649 vec
<data_reference_p
> datarefs
;
4650 datarefs
.create (nb_data_refs
);
4651 vec
<ddr_p
> dependence_relations
;
4652 dependence_relations
.create (nb_data_refs
* nb_data_refs
);
4653 vec
<loop_p
> loop_nest
;
4654 loop_nest
.create (3);
4656 /* Compute DDs on the whole function. */
4657 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4658 &dependence_relations
);
4662 dump_data_dependence_relations (dump_file
, dependence_relations
);
4663 fprintf (dump_file
, "\n\n");
4665 if (dump_flags
& TDF_DETAILS
)
4666 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4668 if (dump_flags
& TDF_STATS
)
4670 unsigned nb_top_relations
= 0;
4671 unsigned nb_bot_relations
= 0;
4672 unsigned nb_chrec_relations
= 0;
4673 struct data_dependence_relation
*ddr
;
4675 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4677 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4680 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4684 nb_chrec_relations
++;
4687 gather_stats_on_scev_database ();
4691 loop_nest
.release ();
4692 free_dependence_relations (dependence_relations
);
4693 free_data_refs (datarefs
);
4696 /* Computes all the data dependences and check that the results of
4697 several analyzers are the same. */
4700 tree_check_data_deps (void)
4703 struct loop
*loop_nest
;
4705 FOR_EACH_LOOP (li
, loop_nest
, 0)
4706 analyze_all_data_dependences (loop_nest
);
4709 /* Free the memory used by a data dependence relation DDR. */
4712 free_dependence_relation (struct data_dependence_relation
*ddr
)
4717 if (DDR_SUBSCRIPTS (ddr
).exists ())
4718 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4719 DDR_DIST_VECTS (ddr
).release ();
4720 DDR_DIR_VECTS (ddr
).release ();
4725 /* Free the memory used by the data dependence relations from
4726 DEPENDENCE_RELATIONS. */
4729 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4732 struct data_dependence_relation
*ddr
;
4734 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4736 free_dependence_relation (ddr
);
4738 dependence_relations
.release ();
4741 /* Free the memory used by the data references from DATAREFS. */
4744 free_data_refs (vec
<data_reference_p
> datarefs
)
4747 struct data_reference
*dr
;
4749 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4751 datarefs
.release ();
4756 /* Dump vertex I in RDG to FILE. */
4759 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
4761 struct vertex
*v
= &(rdg
->vertices
[i
]);
4762 struct graph_edge
*e
;
4764 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
4765 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
4766 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
4769 for (e
= v
->pred
; e
; e
= e
->pred_next
)
4770 fprintf (file
, " %d", e
->src
);
4772 fprintf (file
, ") (out:");
4775 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4776 fprintf (file
, " %d", e
->dest
);
4778 fprintf (file
, ")\n");
4779 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
4780 fprintf (file
, ")\n");
4783 /* Call dump_rdg_vertex on stderr. */
4786 debug_rdg_vertex (struct graph
*rdg
, int i
)
4788 dump_rdg_vertex (stderr
, rdg
, i
);
4791 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4792 dumped vertices to that bitmap. */
4795 dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
4799 fprintf (file
, "(%d\n", c
);
4801 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4802 if (rdg
->vertices
[i
].component
== c
)
4805 bitmap_set_bit (dumped
, i
);
4807 dump_rdg_vertex (file
, rdg
, i
);
4810 fprintf (file
, ")\n");
4813 /* Call dump_rdg_vertex on stderr. */
4816 debug_rdg_component (struct graph
*rdg
, int c
)
4818 dump_rdg_component (stderr
, rdg
, c
, NULL
);
4821 /* Dump the reduced dependence graph RDG to FILE. */
4824 dump_rdg (FILE *file
, struct graph
*rdg
)
4827 bitmap dumped
= BITMAP_ALLOC (NULL
);
4829 fprintf (file
, "(rdg\n");
4831 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4832 if (!bitmap_bit_p (dumped
, i
))
4833 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
4835 fprintf (file
, ")\n");
4836 BITMAP_FREE (dumped
);
4839 /* Call dump_rdg on stderr. */
4842 debug_rdg (struct graph
*rdg
)
4844 dump_rdg (stderr
, rdg
);
4848 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
4852 fprintf (file
, "digraph RDG {\n");
4854 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4856 struct vertex
*v
= &(rdg
->vertices
[i
]);
4857 struct graph_edge
*e
;
4859 /* Highlight reads from memory. */
4860 if (RDG_MEM_READS_STMT (rdg
, i
))
4861 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
4863 /* Highlight stores to memory. */
4864 if (RDG_MEM_WRITE_STMT (rdg
, i
))
4865 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
4868 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4869 switch (RDGE_TYPE (e
))
4872 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
4876 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
4880 /* These are the most common dependences: don't print these. */
4881 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
4885 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
4893 fprintf (file
, "}\n\n");
4896 /* Display the Reduced Dependence Graph using dotty. */
4897 extern void dot_rdg (struct graph
*);
4900 dot_rdg (struct graph
*rdg
)
4902 /* When debugging, enable the following code. This cannot be used
4903 in production compilers because it calls "system". */
4905 FILE *file
= fopen ("/tmp/rdg.dot", "w");
4906 gcc_assert (file
!= NULL
);
4908 dot_rdg_1 (file
, rdg
);
4911 system ("dotty /tmp/rdg.dot &");
4913 dot_rdg_1 (stderr
, rdg
);
4917 /* Returns the index of STMT in RDG. */
4920 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
4922 int index
= gimple_uid (stmt
);
4923 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
4927 /* Creates an edge in RDG for each distance vector from DDR. The
4928 order that we keep track of in the RDG is the order in which
4929 statements have to be executed. */
4932 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
4934 struct graph_edge
*e
;
4936 data_reference_p dra
= DDR_A (ddr
);
4937 data_reference_p drb
= DDR_B (ddr
);
4938 unsigned level
= ddr_dependence_level (ddr
);
4940 /* For non scalar dependences, when the dependence is REVERSED,
4941 statement B has to be executed before statement A. */
4943 && !DDR_REVERSED_P (ddr
))
4945 data_reference_p tmp
= dra
;
4950 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
4951 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
4953 if (va
< 0 || vb
< 0)
4956 e
= add_edge (rdg
, va
, vb
);
4957 e
->data
= XNEW (struct rdg_edge
);
4959 RDGE_LEVEL (e
) = level
;
4960 RDGE_RELATION (e
) = ddr
;
4962 /* Determines the type of the data dependence. */
4963 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
4964 RDGE_TYPE (e
) = input_dd
;
4965 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
4966 RDGE_TYPE (e
) = output_dd
;
4967 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
4968 RDGE_TYPE (e
) = flow_dd
;
4969 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
4970 RDGE_TYPE (e
) = anti_dd
;
4973 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4974 the index of DEF in RDG. */
4977 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
4979 use_operand_p imm_use_p
;
4980 imm_use_iterator iterator
;
4982 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
4984 struct graph_edge
*e
;
4985 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
4990 e
= add_edge (rdg
, idef
, use
);
4991 e
->data
= XNEW (struct rdg_edge
);
4992 RDGE_TYPE (e
) = flow_dd
;
4993 RDGE_RELATION (e
) = NULL
;
4997 /* Creates the edges of the reduced dependence graph RDG. */
5000 create_rdg_edges (struct graph
*rdg
, vec
<ddr_p
> ddrs
)
5003 struct data_dependence_relation
*ddr
;
5004 def_operand_p def_p
;
5007 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
5008 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
5009 create_rdg_edge_for_ddr (rdg
, ddr
);
5011 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5012 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
5014 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
5017 /* Build the vertices of the reduced dependence graph RDG. */
5020 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
)
5025 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
5027 vec
<data_ref_loc
> references
;
5029 struct vertex
*v
= &(rdg
->vertices
[i
]);
5031 /* Record statement to vertex mapping. */
5032 gimple_set_uid (stmt
, i
);
5034 v
->data
= XNEW (struct rdg_vertex
);
5035 RDGV_STMT (v
) = stmt
;
5036 RDGV_DATAREFS (v
).create (0);
5037 RDGV_HAS_MEM_WRITE (v
) = false;
5038 RDGV_HAS_MEM_READS (v
) = false;
5039 if (gimple_code (stmt
) == GIMPLE_PHI
)
5042 get_references_in_stmt (stmt
, &references
);
5043 FOR_EACH_VEC_ELT (references
, j
, ref
)
5045 data_reference_p dr
;
5047 RDGV_HAS_MEM_WRITE (v
) = true;
5049 RDGV_HAS_MEM_READS (v
) = true;
5050 dr
= create_data_ref (loop
, loop_containing_stmt (stmt
),
5051 *ref
->pos
, stmt
, ref
->is_read
);
5053 RDGV_DATAREFS (v
).safe_push (dr
);
5055 references
.release ();
5059 /* Initialize STMTS with all the statements of LOOP. When
5060 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5061 which we discover statements is important as
5062 generate_loops_for_partition is using the same traversal for
5063 identifying statements. */
5066 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
5069 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5071 for (i
= 0; i
< loop
->num_nodes
; i
++)
5073 basic_block bb
= bbs
[i
];
5074 gimple_stmt_iterator bsi
;
5077 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5078 stmts
->safe_push (gsi_stmt (bsi
));
5080 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5082 stmt
= gsi_stmt (bsi
);
5083 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
5084 stmts
->safe_push (stmt
);
5091 /* Returns true when all the dependences are computable. */
5094 known_dependences_p (vec
<ddr_p
> dependence_relations
)
5099 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
5100 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5106 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5107 statement of the loop nest, and one edge per data dependence or
5108 scalar dependence. */
5111 build_empty_rdg (int n_stmts
)
5113 struct graph
*rdg
= new_graph (n_stmts
);
5117 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5118 statement of the loop nest, and one edge per data dependence or
5119 scalar dependence. */
5122 build_rdg (struct loop
*loop
,
5123 vec
<loop_p
> *loop_nest
,
5124 vec
<ddr_p
> *dependence_relations
,
5125 vec
<data_reference_p
> *datarefs
)
5127 struct graph
*rdg
= NULL
;
5129 if (compute_data_dependences_for_loop (loop
, false, loop_nest
, datarefs
,
5130 dependence_relations
)
5131 && known_dependences_p (*dependence_relations
))
5135 stmts_from_loop (loop
, &stmts
);
5136 rdg
= build_empty_rdg (stmts
.length ());
5137 create_rdg_vertices (rdg
, stmts
, loop
);
5138 create_rdg_edges (rdg
, *dependence_relations
);
5145 /* Free the reduced dependence graph RDG. */
5148 free_rdg (struct graph
*rdg
)
5152 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5154 struct vertex
*v
= &(rdg
->vertices
[i
]);
5155 struct graph_edge
*e
;
5157 for (e
= v
->succ
; e
; e
= e
->succ_next
)
5160 gimple_set_uid (RDGV_STMT (v
), -1);
5161 free_data_refs (RDGV_DATAREFS (v
));
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5172 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
5174 gimple stmt
= RDG_STMT (rdg
, v
);
5175 struct loop
*loop
= loop_containing_stmt (stmt
);
5176 use_operand_p imm_use_p
;
5177 imm_use_iterator iterator
;
5179 def_operand_p def_p
;
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
5186 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
5188 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)