1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
81 #include "gimple-pretty-print.h"
83 #include "gimple-iterator.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
95 static struct datadep_stats
97 int num_dependence_tests
;
98 int num_dependence_dependent
;
99 int num_dependence_independent
;
100 int num_dependence_undetermined
;
102 int num_subscript_tests
;
103 int num_subscript_undetermined
;
104 int num_same_subscript_function
;
107 int num_ziv_independent
;
108 int num_ziv_dependent
;
109 int num_ziv_unimplemented
;
112 int num_siv_independent
;
113 int num_siv_dependent
;
114 int num_siv_unimplemented
;
117 int num_miv_independent
;
118 int num_miv_dependent
;
119 int num_miv_unimplemented
;
122 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
123 struct data_reference
*,
124 struct data_reference
*,
126 /* Returns true iff A divides B. */
129 tree_fold_divides_p (const_tree a
, const_tree b
)
131 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
132 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
133 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
136 /* Returns true iff A divides B. */
139 int_divides_p (int a
, int b
)
141 return ((b
% a
) == 0);
146 /* Dump into FILE all the data references from DATAREFS. */
149 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
152 struct data_reference
*dr
;
154 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
155 dump_data_reference (file
, dr
);
158 /* Unified dump into FILE all the data references from DATAREFS. */
161 debug (vec
<data_reference_p
> &ref
)
163 dump_data_references (stderr
, ref
);
167 debug (vec
<data_reference_p
> *ptr
)
172 fprintf (stderr
, "<nil>\n");
176 /* Dump into STDERR all the data references from DATAREFS. */
179 debug_data_references (vec
<data_reference_p
> datarefs
)
181 dump_data_references (stderr
, datarefs
);
184 /* Print to STDERR the data_reference DR. */
187 debug_data_reference (struct data_reference
*dr
)
189 dump_data_reference (stderr
, dr
);
192 /* Dump function for a DATA_REFERENCE structure. */
195 dump_data_reference (FILE *outf
,
196 struct data_reference
*dr
)
200 fprintf (outf
, "#(Data Ref: \n");
201 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
202 fprintf (outf
, "# stmt: ");
203 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
204 fprintf (outf
, "# ref: ");
205 print_generic_stmt (outf
, DR_REF (dr
), 0);
206 fprintf (outf
, "# base_object: ");
207 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
209 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
211 fprintf (outf
, "# Access function %d: ", i
);
212 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
214 fprintf (outf
, "#)\n");
217 /* Unified dump function for a DATA_REFERENCE structure. */
220 debug (data_reference
&ref
)
222 dump_data_reference (stderr
, &ref
);
226 debug (data_reference
*ptr
)
231 fprintf (stderr
, "<nil>\n");
235 /* Dumps the affine function described by FN to the file OUTF. */
238 dump_affine_function (FILE *outf
, affine_fn fn
)
243 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
244 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
246 fprintf (outf
, " + ");
247 print_generic_expr (outf
, coef
, TDF_SLIM
);
248 fprintf (outf
, " * x_%u", i
);
252 /* Dumps the conflict function CF to the file OUTF. */
255 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
259 if (cf
->n
== NO_DEPENDENCE
)
260 fprintf (outf
, "no dependence");
261 else if (cf
->n
== NOT_KNOWN
)
262 fprintf (outf
, "not known");
265 for (i
= 0; i
< cf
->n
; i
++)
270 dump_affine_function (outf
, cf
->fns
[i
]);
276 /* Dump function for a SUBSCRIPT structure. */
279 dump_subscript (FILE *outf
, struct subscript
*subscript
)
281 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
283 fprintf (outf
, "\n (subscript \n");
284 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
285 dump_conflict_function (outf
, cf
);
286 if (CF_NONTRIVIAL_P (cf
))
288 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
289 fprintf (outf
, "\n last_conflict: ");
290 print_generic_expr (outf
, last_iteration
, 0);
293 cf
= SUB_CONFLICTS_IN_B (subscript
);
294 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
295 dump_conflict_function (outf
, cf
);
296 if (CF_NONTRIVIAL_P (cf
))
298 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
299 fprintf (outf
, "\n last_conflict: ");
300 print_generic_expr (outf
, last_iteration
, 0);
303 fprintf (outf
, "\n (Subscript distance: ");
304 print_generic_expr (outf
, SUB_DISTANCE (subscript
), 0);
305 fprintf (outf
, " ))\n");
308 /* Print the classic direction vector DIRV to OUTF. */
311 print_direction_vector (FILE *outf
,
317 for (eq
= 0; eq
< length
; eq
++)
319 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
325 fprintf (outf
, " +");
328 fprintf (outf
, " -");
331 fprintf (outf
, " =");
333 case dir_positive_or_equal
:
334 fprintf (outf
, " +=");
336 case dir_positive_or_negative
:
337 fprintf (outf
, " +-");
339 case dir_negative_or_equal
:
340 fprintf (outf
, " -=");
343 fprintf (outf
, " *");
346 fprintf (outf
, "indep");
350 fprintf (outf
, "\n");
353 /* Print a vector of direction vectors. */
356 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
362 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
363 print_direction_vector (outf
, v
, length
);
366 /* Print out a vector VEC of length N to OUTFILE. */
369 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
373 for (i
= 0; i
< n
; i
++)
374 fprintf (outfile
, "%3d ", vector
[i
]);
375 fprintf (outfile
, "\n");
378 /* Print a vector of distance vectors. */
381 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
387 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
388 print_lambda_vector (outf
, v
, length
);
391 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
394 dump_data_dependence_relation (FILE *outf
,
395 struct data_dependence_relation
*ddr
)
397 struct data_reference
*dra
, *drb
;
399 fprintf (outf
, "(Data Dep: \n");
401 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
408 dump_data_reference (outf
, dra
);
410 fprintf (outf
, " (nil)\n");
412 dump_data_reference (outf
, drb
);
414 fprintf (outf
, " (nil)\n");
416 fprintf (outf
, " (don't know)\n)\n");
422 dump_data_reference (outf
, dra
);
423 dump_data_reference (outf
, drb
);
425 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
426 fprintf (outf
, " (no dependence)\n");
428 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
433 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
435 fprintf (outf
, " access_fn_A: ");
436 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
437 fprintf (outf
, " access_fn_B: ");
438 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
439 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
442 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
443 fprintf (outf
, " loop nest: (");
444 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
445 fprintf (outf
, "%d ", loopi
->num
);
446 fprintf (outf
, ")\n");
448 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
450 fprintf (outf
, " distance_vector: ");
451 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
455 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
457 fprintf (outf
, " direction_vector: ");
458 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
463 fprintf (outf
, ")\n");
469 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
471 dump_data_dependence_relation (stderr
, ddr
);
474 /* Dump into FILE all the dependence relations from DDRS. */
477 dump_data_dependence_relations (FILE *file
,
481 struct data_dependence_relation
*ddr
;
483 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
484 dump_data_dependence_relation (file
, ddr
);
488 debug (vec
<ddr_p
> &ref
)
490 dump_data_dependence_relations (stderr
, ref
);
494 debug (vec
<ddr_p
> *ptr
)
499 fprintf (stderr
, "<nil>\n");
503 /* Dump to STDERR all the dependence relations from DDRS. */
506 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
508 dump_data_dependence_relations (stderr
, ddrs
);
511 /* Dumps the distance and direction vectors in FILE. DDRS contains
512 the dependence relations, and VECT_SIZE is the size of the
513 dependence vectors, or in other words the number of loops in the
517 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
520 struct data_dependence_relation
*ddr
;
523 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
524 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
526 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
528 fprintf (file
, "DISTANCE_V (");
529 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
530 fprintf (file
, ")\n");
533 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
535 fprintf (file
, "DIRECTION_V (");
536 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
537 fprintf (file
, ")\n");
541 fprintf (file
, "\n\n");
544 /* Dumps the data dependence relations DDRS in FILE. */
547 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
550 struct data_dependence_relation
*ddr
;
552 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
553 dump_data_dependence_relation (file
, ddr
);
555 fprintf (file
, "\n\n");
559 debug_ddrs (vec
<ddr_p
> ddrs
)
561 dump_ddrs (stderr
, ddrs
);
564 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
565 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
566 constant of type ssizetype, and returns true. If we cannot do this
567 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
571 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
572 tree
*var
, tree
*off
)
576 enum tree_code ocode
= code
;
584 *var
= build_int_cst (type
, 0);
585 *off
= fold_convert (ssizetype
, op0
);
588 case POINTER_PLUS_EXPR
:
593 split_constant_offset (op0
, &var0
, &off0
);
594 split_constant_offset (op1
, &var1
, &off1
);
595 *var
= fold_build2 (code
, type
, var0
, var1
);
596 *off
= size_binop (ocode
, off0
, off1
);
600 if (TREE_CODE (op1
) != INTEGER_CST
)
603 split_constant_offset (op0
, &var0
, &off0
);
604 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
605 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
611 HOST_WIDE_INT pbitsize
, pbitpos
;
612 enum machine_mode pmode
;
613 int punsignedp
, pvolatilep
;
615 op0
= TREE_OPERAND (op0
, 0);
616 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
617 &pmode
, &punsignedp
, &pvolatilep
, false);
619 if (pbitpos
% BITS_PER_UNIT
!= 0)
621 base
= build_fold_addr_expr (base
);
622 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
626 split_constant_offset (poffset
, &poffset
, &off1
);
627 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
628 if (POINTER_TYPE_P (TREE_TYPE (base
)))
629 base
= fold_build_pointer_plus (base
, poffset
);
631 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
632 fold_convert (TREE_TYPE (base
), poffset
));
635 var0
= fold_convert (type
, base
);
637 /* If variable length types are involved, punt, otherwise casts
638 might be converted into ARRAY_REFs in gimplify_conversion.
639 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
640 possibly no longer appears in current GIMPLE, might resurface.
641 This perhaps could run
642 if (CONVERT_EXPR_P (var0))
644 gimplify_conversion (&var0);
645 // Attempt to fill in any within var0 found ARRAY_REF's
646 // element size from corresponding op embedded ARRAY_REF,
647 // if unsuccessful, just punt.
649 while (POINTER_TYPE_P (type
))
650 type
= TREE_TYPE (type
);
651 if (int_size_in_bytes (type
) < 0)
661 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
662 enum tree_code subcode
;
664 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
667 var0
= gimple_assign_rhs1 (def_stmt
);
668 subcode
= gimple_assign_rhs_code (def_stmt
);
669 var1
= gimple_assign_rhs2 (def_stmt
);
671 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
675 /* We must not introduce undefined overflow, and we must not change the value.
676 Hence we're okay if the inner type doesn't overflow to start with
677 (pointer or signed), the outer type also is an integer or pointer
678 and the outer precision is at least as large as the inner. */
679 tree itype
= TREE_TYPE (op0
);
680 if ((POINTER_TYPE_P (itype
)
681 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
682 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
683 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
685 split_constant_offset (op0
, &var0
, off
);
686 *var
= fold_convert (type
, var0
);
697 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
698 will be ssizetype. */
701 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
703 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
707 *off
= ssize_int (0);
710 if (tree_is_chrec (exp
)
711 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
714 otype
= TREE_TYPE (exp
);
715 code
= TREE_CODE (exp
);
716 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
717 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
719 *var
= fold_convert (type
, e
);
724 /* Returns the address ADDR of an object in a canonical shape (without nop
725 casts, and with type of pointer to the object). */
728 canonicalize_base_object_address (tree addr
)
734 /* The base address may be obtained by casting from integer, in that case
736 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
739 if (TREE_CODE (addr
) != ADDR_EXPR
)
742 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
745 /* Analyzes the behavior of the memory reference DR in the innermost loop or
746 basic block that contains it. Returns true if analysis succeed or false
750 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
752 gimple stmt
= DR_STMT (dr
);
753 struct loop
*loop
= loop_containing_stmt (stmt
);
754 tree ref
= DR_REF (dr
);
755 HOST_WIDE_INT pbitsize
, pbitpos
;
757 enum machine_mode pmode
;
758 int punsignedp
, pvolatilep
;
759 affine_iv base_iv
, offset_iv
;
760 tree init
, dinit
, step
;
761 bool in_loop
= (loop
&& loop
->num
);
763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
764 fprintf (dump_file
, "analyze_innermost: ");
766 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
767 &pmode
, &punsignedp
, &pvolatilep
, false);
768 gcc_assert (base
!= NULL_TREE
);
770 if (pbitpos
% BITS_PER_UNIT
!= 0)
772 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
773 fprintf (dump_file
, "failed: bit offset alignment.\n");
777 if (TREE_CODE (base
) == MEM_REF
)
779 if (!integer_zerop (TREE_OPERAND (base
, 1)))
781 double_int moff
= mem_ref_offset (base
);
782 tree mofft
= double_int_to_tree (sizetype
, moff
);
786 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
788 base
= TREE_OPERAND (base
, 0);
791 base
= build_fold_addr_expr (base
);
795 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
796 nest
? true : false))
800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
801 fprintf (dump_file
, "failed: evolution of base is not"
808 base_iv
.step
= ssize_int (0);
809 base_iv
.no_overflow
= true;
816 base_iv
.step
= ssize_int (0);
817 base_iv
.no_overflow
= true;
822 offset_iv
.base
= ssize_int (0);
823 offset_iv
.step
= ssize_int (0);
829 offset_iv
.base
= poffset
;
830 offset_iv
.step
= ssize_int (0);
832 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
834 nest
? true : false))
838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
839 fprintf (dump_file
, "failed: evolution of offset is not"
845 offset_iv
.base
= poffset
;
846 offset_iv
.step
= ssize_int (0);
851 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
852 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
853 init
= size_binop (PLUS_EXPR
, init
, dinit
);
854 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
855 init
= size_binop (PLUS_EXPR
, init
, dinit
);
857 step
= size_binop (PLUS_EXPR
,
858 fold_convert (ssizetype
, base_iv
.step
),
859 fold_convert (ssizetype
, offset_iv
.step
));
861 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
863 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
867 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
869 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
870 fprintf (dump_file
, "success.\n");
875 /* Determines the base object and the list of indices of memory reference
876 DR, analyzed in LOOP and instantiated in loop nest NEST. */
879 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
881 vec
<tree
> access_fns
= vNULL
;
883 tree base
, off
, access_fn
;
884 basic_block before_loop
;
886 /* If analyzing a basic-block there are no indices to analyze
887 and thus no access functions. */
890 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
891 DR_ACCESS_FNS (dr
).create (0);
896 before_loop
= block_before_loop (nest
);
898 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
899 into a two element array with a constant index. The base is
900 then just the immediate underlying object. */
901 if (TREE_CODE (ref
) == REALPART_EXPR
)
903 ref
= TREE_OPERAND (ref
, 0);
904 access_fns
.safe_push (integer_zero_node
);
906 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
908 ref
= TREE_OPERAND (ref
, 0);
909 access_fns
.safe_push (integer_one_node
);
912 /* Analyze access functions of dimensions we know to be independent. */
913 while (handled_component_p (ref
))
915 if (TREE_CODE (ref
) == ARRAY_REF
)
917 op
= TREE_OPERAND (ref
, 1);
918 access_fn
= analyze_scalar_evolution (loop
, op
);
919 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
920 access_fns
.safe_push (access_fn
);
922 else if (TREE_CODE (ref
) == COMPONENT_REF
923 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
925 /* For COMPONENT_REFs of records (but not unions!) use the
926 FIELD_DECL offset as constant access function so we can
927 disambiguate a[i].f1 and a[i].f2. */
928 tree off
= component_ref_field_offset (ref
);
929 off
= size_binop (PLUS_EXPR
,
930 size_binop (MULT_EXPR
,
931 fold_convert (bitsizetype
, off
),
932 bitsize_int (BITS_PER_UNIT
)),
933 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
934 access_fns
.safe_push (off
);
937 /* If we have an unhandled component we could not translate
938 to an access function stop analyzing. We have determined
939 our base object in this case. */
942 ref
= TREE_OPERAND (ref
, 0);
945 /* If the address operand of a MEM_REF base has an evolution in the
946 analyzed nest, add it as an additional independent access-function. */
947 if (TREE_CODE (ref
) == MEM_REF
)
949 op
= TREE_OPERAND (ref
, 0);
950 access_fn
= analyze_scalar_evolution (loop
, op
);
951 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
952 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
955 tree memoff
= TREE_OPERAND (ref
, 1);
956 base
= initial_condition (access_fn
);
957 orig_type
= TREE_TYPE (base
);
958 STRIP_USELESS_TYPE_CONVERSION (base
);
959 split_constant_offset (base
, &base
, &off
);
960 /* Fold the MEM_REF offset into the evolutions initial
961 value to make more bases comparable. */
962 if (!integer_zerop (memoff
))
964 off
= size_binop (PLUS_EXPR
, off
,
965 fold_convert (ssizetype
, memoff
));
966 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
968 access_fn
= chrec_replace_initial_condition
969 (access_fn
, fold_convert (orig_type
, off
));
970 /* ??? This is still not a suitable base object for
971 dr_may_alias_p - the base object needs to be an
972 access that covers the object as whole. With
973 an evolution in the pointer this cannot be
975 As a band-aid, mark the access so we can special-case
976 it in dr_may_alias_p. */
977 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
978 MEM_REF
, TREE_TYPE (ref
),
980 DR_UNCONSTRAINED_BASE (dr
) = true;
981 access_fns
.safe_push (access_fn
);
984 else if (DECL_P (ref
))
986 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
987 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
988 build_fold_addr_expr (ref
),
989 build_int_cst (reference_alias_ptr_type (ref
), 0));
992 DR_BASE_OBJECT (dr
) = ref
;
993 DR_ACCESS_FNS (dr
) = access_fns
;
996 /* Extracts the alias analysis information from the memory reference DR. */
999 dr_analyze_alias (struct data_reference
*dr
)
1001 tree ref
= DR_REF (dr
);
1002 tree base
= get_base_address (ref
), addr
;
1004 if (INDIRECT_REF_P (base
)
1005 || TREE_CODE (base
) == MEM_REF
)
1007 addr
= TREE_OPERAND (base
, 0);
1008 if (TREE_CODE (addr
) == SSA_NAME
)
1009 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1013 /* Frees data reference DR. */
1016 free_data_ref (data_reference_p dr
)
1018 DR_ACCESS_FNS (dr
).release ();
1022 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1023 is read if IS_READ is true, write otherwise. Returns the
1024 data_reference description of MEMREF. NEST is the outermost loop
1025 in which the reference should be instantiated, LOOP is the loop in
1026 which the data reference should be analyzed. */
1028 struct data_reference
*
1029 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
1032 struct data_reference
*dr
;
1034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1036 fprintf (dump_file
, "Creating dr for ");
1037 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1038 fprintf (dump_file
, "\n");
1041 dr
= XCNEW (struct data_reference
);
1042 DR_STMT (dr
) = stmt
;
1043 DR_REF (dr
) = memref
;
1044 DR_IS_READ (dr
) = is_read
;
1046 dr_analyze_innermost (dr
, nest
);
1047 dr_analyze_indices (dr
, nest
, loop
);
1048 dr_analyze_alias (dr
);
1050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1053 fprintf (dump_file
, "\tbase_address: ");
1054 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1055 fprintf (dump_file
, "\n\toffset from base address: ");
1056 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1057 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1058 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1059 fprintf (dump_file
, "\n\tstep: ");
1060 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1061 fprintf (dump_file
, "\n\taligned to: ");
1062 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1063 fprintf (dump_file
, "\n\tbase_object: ");
1064 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1065 fprintf (dump_file
, "\n");
1066 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1068 fprintf (dump_file
, "\tAccess function %d: ", i
);
1069 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1076 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1079 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1083 STRIP_NOPS (offset1
);
1084 STRIP_NOPS (offset2
);
1086 if (offset1
== offset2
)
1089 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1090 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1093 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1094 TREE_OPERAND (offset2
, 0));
1096 if (!res
|| !BINARY_CLASS_P (offset1
))
1099 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1100 TREE_OPERAND (offset2
, 1));
1105 /* Check if DRA and DRB have equal offsets. */
1107 dr_equal_offsets_p (struct data_reference
*dra
,
1108 struct data_reference
*drb
)
1110 tree offset1
, offset2
;
1112 offset1
= DR_OFFSET (dra
);
1113 offset2
= DR_OFFSET (drb
);
1115 return dr_equal_offsets_p1 (offset1
, offset2
);
1118 /* Returns true if FNA == FNB. */
1121 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1123 unsigned i
, n
= fna
.length ();
1125 if (n
!= fnb
.length ())
1128 for (i
= 0; i
< n
; i
++)
1129 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1135 /* If all the functions in CF are the same, returns one of them,
1136 otherwise returns NULL. */
1139 common_affine_function (conflict_function
*cf
)
1144 if (!CF_NONTRIVIAL_P (cf
))
1145 return affine_fn ();
1149 for (i
= 1; i
< cf
->n
; i
++)
1150 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1151 return affine_fn ();
1156 /* Returns the base of the affine function FN. */
1159 affine_function_base (affine_fn fn
)
1164 /* Returns true if FN is a constant. */
1167 affine_function_constant_p (affine_fn fn
)
1172 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1173 if (!integer_zerop (coef
))
1179 /* Returns true if FN is the zero constant function. */
1182 affine_function_zero_p (affine_fn fn
)
1184 return (integer_zerop (affine_function_base (fn
))
1185 && affine_function_constant_p (fn
));
1188 /* Returns a signed integer type with the largest precision from TA
1192 signed_type_for_types (tree ta
, tree tb
)
1194 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1195 return signed_type_for (ta
);
1197 return signed_type_for (tb
);
1200 /* Applies operation OP on affine functions FNA and FNB, and returns the
1204 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1210 if (fnb
.length () > fna
.length ())
1222 for (i
= 0; i
< n
; i
++)
1224 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1225 TREE_TYPE (fnb
[i
]));
1226 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1229 for (; fna
.iterate (i
, &coef
); i
++)
1230 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1231 coef
, integer_zero_node
));
1232 for (; fnb
.iterate (i
, &coef
); i
++)
1233 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1234 integer_zero_node
, coef
));
1239 /* Returns the sum of affine functions FNA and FNB. */
1242 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1244 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1247 /* Returns the difference of affine functions FNA and FNB. */
1250 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1252 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1255 /* Frees affine function FN. */
1258 affine_fn_free (affine_fn fn
)
1263 /* Determine for each subscript in the data dependence relation DDR
1267 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1269 conflict_function
*cf_a
, *cf_b
;
1270 affine_fn fn_a
, fn_b
, diff
;
1272 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1276 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1278 struct subscript
*subscript
;
1280 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1281 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1282 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1284 fn_a
= common_affine_function (cf_a
);
1285 fn_b
= common_affine_function (cf_b
);
1286 if (!fn_a
.exists () || !fn_b
.exists ())
1288 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1291 diff
= affine_fn_minus (fn_a
, fn_b
);
1293 if (affine_function_constant_p (diff
))
1294 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1296 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1298 affine_fn_free (diff
);
1303 /* Returns the conflict function for "unknown". */
1305 static conflict_function
*
1306 conflict_fn_not_known (void)
1308 conflict_function
*fn
= XCNEW (conflict_function
);
1314 /* Returns the conflict function for "independent". */
1316 static conflict_function
*
1317 conflict_fn_no_dependence (void)
1319 conflict_function
*fn
= XCNEW (conflict_function
);
1320 fn
->n
= NO_DEPENDENCE
;
1325 /* Returns true if the address of OBJ is invariant in LOOP. */
1328 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1330 while (handled_component_p (obj
))
1332 if (TREE_CODE (obj
) == ARRAY_REF
)
1334 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1335 need to check the stride and the lower bound of the reference. */
1336 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1338 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1342 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1344 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1348 obj
= TREE_OPERAND (obj
, 0);
1351 if (!INDIRECT_REF_P (obj
)
1352 && TREE_CODE (obj
) != MEM_REF
)
1355 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1359 /* Returns false if we can prove that data references A and B do not alias,
1360 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1364 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1367 tree addr_a
= DR_BASE_OBJECT (a
);
1368 tree addr_b
= DR_BASE_OBJECT (b
);
1370 /* If we are not processing a loop nest but scalar code we
1371 do not need to care about possible cross-iteration dependences
1372 and thus can process the full original reference. Do so,
1373 similar to how loop invariant motion applies extra offset-based
1377 aff_tree off1
, off2
;
1378 double_int size1
, size2
;
1379 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1380 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1381 aff_combination_scale (&off1
, double_int_minus_one
);
1382 aff_combination_add (&off2
, &off1
);
1383 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1387 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1388 the size of the base-object. So we cannot do any offset/overlap
1389 based analysis but have to rely on points-to information only. */
1390 if (TREE_CODE (addr_a
) == MEM_REF
1391 && DR_UNCONSTRAINED_BASE (a
))
1393 if (TREE_CODE (addr_b
) == MEM_REF
1394 && DR_UNCONSTRAINED_BASE (b
))
1395 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1396 TREE_OPERAND (addr_b
, 0));
1398 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1399 build_fold_addr_expr (addr_b
));
1401 else if (TREE_CODE (addr_b
) == MEM_REF
1402 && DR_UNCONSTRAINED_BASE (b
))
1403 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1404 TREE_OPERAND (addr_b
, 0));
1406 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1407 that is being subsetted in the loop nest. */
1408 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1409 return refs_output_dependent_p (addr_a
, addr_b
);
1410 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1411 return refs_anti_dependent_p (addr_a
, addr_b
);
1412 return refs_may_alias_p (addr_a
, addr_b
);
1415 /* Initialize a data dependence relation between data accesses A and
1416 B. NB_LOOPS is the number of loops surrounding the references: the
1417 size of the classic distance/direction vectors. */
1419 struct data_dependence_relation
*
1420 initialize_data_dependence_relation (struct data_reference
*a
,
1421 struct data_reference
*b
,
1422 vec
<loop_p
> loop_nest
)
1424 struct data_dependence_relation
*res
;
1427 res
= XNEW (struct data_dependence_relation
);
1430 DDR_LOOP_NEST (res
).create (0);
1431 DDR_REVERSED_P (res
) = false;
1432 DDR_SUBSCRIPTS (res
).create (0);
1433 DDR_DIR_VECTS (res
).create (0);
1434 DDR_DIST_VECTS (res
).create (0);
1436 if (a
== NULL
|| b
== NULL
)
1438 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1442 /* If the data references do not alias, then they are independent. */
1443 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1445 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1449 /* The case where the references are exactly the same. */
1450 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1452 if (loop_nest
.exists ()
1453 && !object_address_invariant_in_loop_p (loop_nest
[0],
1454 DR_BASE_OBJECT (a
)))
1456 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1459 DDR_AFFINE_P (res
) = true;
1460 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1461 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1462 DDR_LOOP_NEST (res
) = loop_nest
;
1463 DDR_INNER_LOOP (res
) = 0;
1464 DDR_SELF_REFERENCE (res
) = true;
1465 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1467 struct subscript
*subscript
;
1469 subscript
= XNEW (struct subscript
);
1470 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1471 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1472 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1473 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1474 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1479 /* If the references do not access the same object, we do not know
1480 whether they alias or not. */
1481 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1483 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1487 /* If the base of the object is not invariant in the loop nest, we cannot
1488 analyze it. TODO -- in fact, it would suffice to record that there may
1489 be arbitrary dependences in the loops where the base object varies. */
1490 if (loop_nest
.exists ()
1491 && !object_address_invariant_in_loop_p (loop_nest
[0],
1492 DR_BASE_OBJECT (a
)))
1494 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1498 /* If the number of dimensions of the access to not agree we can have
1499 a pointer access to a component of the array element type and an
1500 array access while the base-objects are still the same. Punt. */
1501 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1503 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1507 DDR_AFFINE_P (res
) = true;
1508 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1509 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1510 DDR_LOOP_NEST (res
) = loop_nest
;
1511 DDR_INNER_LOOP (res
) = 0;
1512 DDR_SELF_REFERENCE (res
) = false;
1514 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1516 struct subscript
*subscript
;
1518 subscript
= XNEW (struct subscript
);
1519 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1520 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1521 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1522 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1523 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1529 /* Frees memory used by the conflict function F. */
1532 free_conflict_function (conflict_function
*f
)
1536 if (CF_NONTRIVIAL_P (f
))
1538 for (i
= 0; i
< f
->n
; i
++)
1539 affine_fn_free (f
->fns
[i
]);
1544 /* Frees memory used by SUBSCRIPTS. */
1547 free_subscripts (vec
<subscript_p
> subscripts
)
1552 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1554 free_conflict_function (s
->conflicting_iterations_in_a
);
1555 free_conflict_function (s
->conflicting_iterations_in_b
);
1558 subscripts
.release ();
1561 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1565 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1568 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1569 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1570 DDR_SUBSCRIPTS (ddr
).create (0);
1573 /* The dependence relation DDR cannot be represented by a distance
1577 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1579 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1580 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1582 DDR_AFFINE_P (ddr
) = false;
1587 /* This section contains the classic Banerjee tests. */
1589 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1590 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1593 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1595 return (evolution_function_is_constant_p (chrec_a
)
1596 && evolution_function_is_constant_p (chrec_b
));
1599 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1600 variable, i.e., if the SIV (Single Index Variable) test is true. */
1603 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1605 if ((evolution_function_is_constant_p (chrec_a
)
1606 && evolution_function_is_univariate_p (chrec_b
))
1607 || (evolution_function_is_constant_p (chrec_b
)
1608 && evolution_function_is_univariate_p (chrec_a
)))
1611 if (evolution_function_is_univariate_p (chrec_a
)
1612 && evolution_function_is_univariate_p (chrec_b
))
1614 switch (TREE_CODE (chrec_a
))
1616 case POLYNOMIAL_CHREC
:
1617 switch (TREE_CODE (chrec_b
))
1619 case POLYNOMIAL_CHREC
:
1620 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1635 /* Creates a conflict function with N dimensions. The affine functions
1636 in each dimension follow. */
1638 static conflict_function
*
1639 conflict_fn (unsigned n
, ...)
1642 conflict_function
*ret
= XCNEW (conflict_function
);
1645 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1649 for (i
= 0; i
< n
; i
++)
1650 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1656 /* Returns constant affine function with value CST. */
1659 affine_fn_cst (tree cst
)
1663 fn
.quick_push (cst
);
1667 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1670 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1673 fn
.create (dim
+ 1);
1676 gcc_assert (dim
> 0);
1677 fn
.quick_push (cst
);
1678 for (i
= 1; i
< dim
; i
++)
1679 fn
.quick_push (integer_zero_node
);
1680 fn
.quick_push (coef
);
1684 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1685 *OVERLAPS_B are initialized to the functions that describe the
1686 relation between the elements accessed twice by CHREC_A and
1687 CHREC_B. For k >= 0, the following property is verified:
1689 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1692 analyze_ziv_subscript (tree chrec_a
,
1694 conflict_function
**overlaps_a
,
1695 conflict_function
**overlaps_b
,
1696 tree
*last_conflicts
)
1698 tree type
, difference
;
1699 dependence_stats
.num_ziv
++;
1701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1702 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1704 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1705 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1706 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1707 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1709 switch (TREE_CODE (difference
))
1712 if (integer_zerop (difference
))
1714 /* The difference is equal to zero: the accessed index
1715 overlaps for each iteration in the loop. */
1716 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1717 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1718 *last_conflicts
= chrec_dont_know
;
1719 dependence_stats
.num_ziv_dependent
++;
1723 /* The accesses do not overlap. */
1724 *overlaps_a
= conflict_fn_no_dependence ();
1725 *overlaps_b
= conflict_fn_no_dependence ();
1726 *last_conflicts
= integer_zero_node
;
1727 dependence_stats
.num_ziv_independent
++;
1732 /* We're not sure whether the indexes overlap. For the moment,
1733 conservatively answer "don't know". */
1734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1735 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1737 *overlaps_a
= conflict_fn_not_known ();
1738 *overlaps_b
= conflict_fn_not_known ();
1739 *last_conflicts
= chrec_dont_know
;
1740 dependence_stats
.num_ziv_unimplemented
++;
1744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1745 fprintf (dump_file
, ")\n");
1748 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1749 and only if it fits to the int type. If this is not the case, or the
1750 bound on the number of iterations of LOOP could not be derived, returns
1754 max_stmt_executions_tree (struct loop
*loop
)
1758 if (!max_stmt_executions (loop
, &nit
))
1759 return chrec_dont_know
;
1761 if (!double_int_fits_to_tree_p (unsigned_type_node
, nit
))
1762 return chrec_dont_know
;
1764 return double_int_to_tree (unsigned_type_node
, nit
);
1767 /* Determine whether the CHREC is always positive/negative. If the expression
1768 cannot be statically analyzed, return false, otherwise set the answer into
1772 chrec_is_positive (tree chrec
, bool *value
)
1774 bool value0
, value1
, value2
;
1775 tree end_value
, nb_iter
;
1777 switch (TREE_CODE (chrec
))
1779 case POLYNOMIAL_CHREC
:
1780 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1781 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1784 /* FIXME -- overflows. */
1785 if (value0
== value1
)
1791 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1792 and the proof consists in showing that the sign never
1793 changes during the execution of the loop, from 0 to
1794 loop->nb_iterations. */
1795 if (!evolution_function_is_affine_p (chrec
))
1798 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1799 if (chrec_contains_undetermined (nb_iter
))
1803 /* TODO -- If the test is after the exit, we may decrease the number of
1804 iterations by one. */
1806 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1809 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1811 if (!chrec_is_positive (end_value
, &value2
))
1815 return value0
== value1
;
1818 switch (tree_int_cst_sgn (chrec
))
1837 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1838 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1839 *OVERLAPS_B are initialized to the functions that describe the
1840 relation between the elements accessed twice by CHREC_A and
1841 CHREC_B. For k >= 0, the following property is verified:
1843 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1846 analyze_siv_subscript_cst_affine (tree chrec_a
,
1848 conflict_function
**overlaps_a
,
1849 conflict_function
**overlaps_b
,
1850 tree
*last_conflicts
)
1852 bool value0
, value1
, value2
;
1853 tree type
, difference
, tmp
;
1855 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1856 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1857 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1858 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1860 /* Special case overlap in the first iteration. */
1861 if (integer_zerop (difference
))
1863 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1864 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1865 *last_conflicts
= integer_one_node
;
1869 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1871 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1872 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1874 dependence_stats
.num_siv_unimplemented
++;
1875 *overlaps_a
= conflict_fn_not_known ();
1876 *overlaps_b
= conflict_fn_not_known ();
1877 *last_conflicts
= chrec_dont_know
;
1882 if (value0
== false)
1884 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1887 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1889 *overlaps_a
= conflict_fn_not_known ();
1890 *overlaps_b
= conflict_fn_not_known ();
1891 *last_conflicts
= chrec_dont_know
;
1892 dependence_stats
.num_siv_unimplemented
++;
1901 chrec_b = {10, +, 1}
1904 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1906 HOST_WIDE_INT numiter
;
1907 struct loop
*loop
= get_chrec_loop (chrec_b
);
1909 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1910 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1911 fold_build1 (ABS_EXPR
, type
, difference
),
1912 CHREC_RIGHT (chrec_b
));
1913 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1914 *last_conflicts
= integer_one_node
;
1917 /* Perform weak-zero siv test to see if overlap is
1918 outside the loop bounds. */
1919 numiter
= max_stmt_executions_int (loop
);
1922 && compare_tree_int (tmp
, numiter
) > 0)
1924 free_conflict_function (*overlaps_a
);
1925 free_conflict_function (*overlaps_b
);
1926 *overlaps_a
= conflict_fn_no_dependence ();
1927 *overlaps_b
= conflict_fn_no_dependence ();
1928 *last_conflicts
= integer_zero_node
;
1929 dependence_stats
.num_siv_independent
++;
1932 dependence_stats
.num_siv_dependent
++;
1936 /* When the step does not divide the difference, there are
1940 *overlaps_a
= conflict_fn_no_dependence ();
1941 *overlaps_b
= conflict_fn_no_dependence ();
1942 *last_conflicts
= integer_zero_node
;
1943 dependence_stats
.num_siv_independent
++;
1952 chrec_b = {10, +, -1}
1954 In this case, chrec_a will not overlap with chrec_b. */
1955 *overlaps_a
= conflict_fn_no_dependence ();
1956 *overlaps_b
= conflict_fn_no_dependence ();
1957 *last_conflicts
= integer_zero_node
;
1958 dependence_stats
.num_siv_independent
++;
1965 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1967 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1968 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1970 *overlaps_a
= conflict_fn_not_known ();
1971 *overlaps_b
= conflict_fn_not_known ();
1972 *last_conflicts
= chrec_dont_know
;
1973 dependence_stats
.num_siv_unimplemented
++;
1978 if (value2
== false)
1982 chrec_b = {10, +, -1}
1984 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1986 HOST_WIDE_INT numiter
;
1987 struct loop
*loop
= get_chrec_loop (chrec_b
);
1989 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1990 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1991 CHREC_RIGHT (chrec_b
));
1992 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1993 *last_conflicts
= integer_one_node
;
1995 /* Perform weak-zero siv test to see if overlap is
1996 outside the loop bounds. */
1997 numiter
= max_stmt_executions_int (loop
);
2000 && compare_tree_int (tmp
, numiter
) > 0)
2002 free_conflict_function (*overlaps_a
);
2003 free_conflict_function (*overlaps_b
);
2004 *overlaps_a
= conflict_fn_no_dependence ();
2005 *overlaps_b
= conflict_fn_no_dependence ();
2006 *last_conflicts
= integer_zero_node
;
2007 dependence_stats
.num_siv_independent
++;
2010 dependence_stats
.num_siv_dependent
++;
2014 /* When the step does not divide the difference, there
2018 *overlaps_a
= conflict_fn_no_dependence ();
2019 *overlaps_b
= conflict_fn_no_dependence ();
2020 *last_conflicts
= integer_zero_node
;
2021 dependence_stats
.num_siv_independent
++;
2031 In this case, chrec_a will not overlap with chrec_b. */
2032 *overlaps_a
= conflict_fn_no_dependence ();
2033 *overlaps_b
= conflict_fn_no_dependence ();
2034 *last_conflicts
= integer_zero_node
;
2035 dependence_stats
.num_siv_independent
++;
2043 /* Helper recursive function for initializing the matrix A. Returns
2044 the initial value of CHREC. */
2047 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2051 switch (TREE_CODE (chrec
))
2053 case POLYNOMIAL_CHREC
:
2054 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2056 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2057 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2063 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2064 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2066 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2071 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2072 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2077 /* Handle ~X as -1 - X. */
2078 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2079 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2080 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2092 #define FLOOR_DIV(x,y) ((x) / (y))
2094 /* Solves the special case of the Diophantine equation:
2095 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2097 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2098 number of iterations that loops X and Y run. The overlaps will be
2099 constructed as evolutions in dimension DIM. */
2102 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2103 affine_fn
*overlaps_a
,
2104 affine_fn
*overlaps_b
,
2105 tree
*last_conflicts
, int dim
)
2107 if (((step_a
> 0 && step_b
> 0)
2108 || (step_a
< 0 && step_b
< 0)))
2110 int step_overlaps_a
, step_overlaps_b
;
2111 int gcd_steps_a_b
, last_conflict
, tau2
;
2113 gcd_steps_a_b
= gcd (step_a
, step_b
);
2114 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2115 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2119 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2120 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2121 last_conflict
= tau2
;
2122 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2125 *last_conflicts
= chrec_dont_know
;
2127 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2128 build_int_cst (NULL_TREE
,
2130 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2131 build_int_cst (NULL_TREE
,
2137 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2138 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2139 *last_conflicts
= integer_zero_node
;
2143 /* Solves the special case of a Diophantine equation where CHREC_A is
2144 an affine bivariate function, and CHREC_B is an affine univariate
2145 function. For example,
2147 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2149 has the following overlapping functions:
2151 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2152 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2153 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2155 FORNOW: This is a specialized implementation for a case occurring in
2156 a common benchmark. Implement the general algorithm. */
2159 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2160 conflict_function
**overlaps_a
,
2161 conflict_function
**overlaps_b
,
2162 tree
*last_conflicts
)
2164 bool xz_p
, yz_p
, xyz_p
;
2165 int step_x
, step_y
, step_z
;
2166 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2167 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2168 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2169 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2170 affine_fn ova1
, ova2
, ovb
;
2171 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2173 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2174 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2175 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2177 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2178 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2179 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2181 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2184 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2186 *overlaps_a
= conflict_fn_not_known ();
2187 *overlaps_b
= conflict_fn_not_known ();
2188 *last_conflicts
= chrec_dont_know
;
2192 niter
= MIN (niter_x
, niter_z
);
2193 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2196 &last_conflicts_xz
, 1);
2197 niter
= MIN (niter_y
, niter_z
);
2198 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2201 &last_conflicts_yz
, 2);
2202 niter
= MIN (niter_x
, niter_z
);
2203 niter
= MIN (niter_y
, niter
);
2204 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2207 &last_conflicts_xyz
, 3);
2209 xz_p
= !integer_zerop (last_conflicts_xz
);
2210 yz_p
= !integer_zerop (last_conflicts_yz
);
2211 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2213 if (xz_p
|| yz_p
|| xyz_p
)
2215 ova1
= affine_fn_cst (integer_zero_node
);
2216 ova2
= affine_fn_cst (integer_zero_node
);
2217 ovb
= affine_fn_cst (integer_zero_node
);
2220 affine_fn t0
= ova1
;
2223 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2224 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2225 affine_fn_free (t0
);
2226 affine_fn_free (t2
);
2227 *last_conflicts
= last_conflicts_xz
;
2231 affine_fn t0
= ova2
;
2234 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2235 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2236 affine_fn_free (t0
);
2237 affine_fn_free (t2
);
2238 *last_conflicts
= last_conflicts_yz
;
2242 affine_fn t0
= ova1
;
2243 affine_fn t2
= ova2
;
2246 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2247 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2248 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2249 affine_fn_free (t0
);
2250 affine_fn_free (t2
);
2251 affine_fn_free (t4
);
2252 *last_conflicts
= last_conflicts_xyz
;
2254 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2255 *overlaps_b
= conflict_fn (1, ovb
);
2259 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2260 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2261 *last_conflicts
= integer_zero_node
;
2264 affine_fn_free (overlaps_a_xz
);
2265 affine_fn_free (overlaps_b_xz
);
2266 affine_fn_free (overlaps_a_yz
);
2267 affine_fn_free (overlaps_b_yz
);
2268 affine_fn_free (overlaps_a_xyz
);
2269 affine_fn_free (overlaps_b_xyz
);
2272 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2275 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2278 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2281 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2284 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2289 for (i
= 0; i
< m
; i
++)
2290 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2293 /* Store the N x N identity matrix in MAT. */
2296 lambda_matrix_id (lambda_matrix mat
, int size
)
2300 for (i
= 0; i
< size
; i
++)
2301 for (j
= 0; j
< size
; j
++)
2302 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2305 /* Return the first nonzero element of vector VEC1 between START and N.
2306 We must have START <= N. Returns N if VEC1 is the zero vector. */
2309 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2312 while (j
< n
&& vec1
[j
] == 0)
2317 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2318 R2 = R2 + CONST1 * R1. */
2321 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2328 for (i
= 0; i
< n
; i
++)
2329 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2332 /* Swap rows R1 and R2 in matrix MAT. */
2335 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2344 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2345 and store the result in VEC2. */
2348 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2349 int size
, int const1
)
2354 lambda_vector_clear (vec2
, size
);
2356 for (i
= 0; i
< size
; i
++)
2357 vec2
[i
] = const1
* vec1
[i
];
2360 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2363 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2366 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2369 /* Negate row R1 of matrix MAT which has N columns. */
2372 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2374 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2377 /* Return true if two vectors are equal. */
2380 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2383 for (i
= 0; i
< size
; i
++)
2384 if (vec1
[i
] != vec2
[i
])
2389 /* Given an M x N integer matrix A, this function determines an M x
2390 M unimodular matrix U, and an M x N echelon matrix S such that
2391 "U.A = S". This decomposition is also known as "right Hermite".
2393 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2394 Restructuring Compilers" Utpal Banerjee. */
2397 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2398 lambda_matrix S
, lambda_matrix U
)
2402 lambda_matrix_copy (A
, S
, m
, n
);
2403 lambda_matrix_id (U
, m
);
2405 for (j
= 0; j
< n
; j
++)
2407 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2410 for (i
= m
- 1; i
>= i0
; i
--)
2412 while (S
[i
][j
] != 0)
2414 int sigma
, factor
, a
, b
;
2418 sigma
= (a
* b
< 0) ? -1: 1;
2421 factor
= sigma
* (a
/ b
);
2423 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2424 lambda_matrix_row_exchange (S
, i
, i
-1);
2426 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2427 lambda_matrix_row_exchange (U
, i
, i
-1);
2434 /* Determines the overlapping elements due to accesses CHREC_A and
2435 CHREC_B, that are affine functions. This function cannot handle
2436 symbolic evolution functions, ie. when initial conditions are
2437 parameters, because it uses lambda matrices of integers. */
2440 analyze_subscript_affine_affine (tree chrec_a
,
2442 conflict_function
**overlaps_a
,
2443 conflict_function
**overlaps_b
,
2444 tree
*last_conflicts
)
2446 unsigned nb_vars_a
, nb_vars_b
, dim
;
2447 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2448 lambda_matrix A
, U
, S
;
2449 struct obstack scratch_obstack
;
2451 if (eq_evolutions_p (chrec_a
, chrec_b
))
2453 /* The accessed index overlaps for each iteration in the
2455 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2456 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2457 *last_conflicts
= chrec_dont_know
;
2460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2461 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2463 /* For determining the initial intersection, we have to solve a
2464 Diophantine equation. This is the most time consuming part.
2466 For answering to the question: "Is there a dependence?" we have
2467 to prove that there exists a solution to the Diophantine
2468 equation, and that the solution is in the iteration domain,
2469 i.e. the solution is positive or zero, and that the solution
2470 happens before the upper bound loop.nb_iterations. Otherwise
2471 there is no dependence. This function outputs a description of
2472 the iterations that hold the intersections. */
2474 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2475 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2477 gcc_obstack_init (&scratch_obstack
);
2479 dim
= nb_vars_a
+ nb_vars_b
;
2480 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2481 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2482 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2484 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2485 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2486 gamma
= init_b
- init_a
;
2488 /* Don't do all the hard work of solving the Diophantine equation
2489 when we already know the solution: for example,
2492 | gamma = 3 - 3 = 0.
2493 Then the first overlap occurs during the first iterations:
2494 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2498 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2500 HOST_WIDE_INT step_a
, step_b
;
2501 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2504 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2505 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2506 niter
= MIN (niter_a
, niter_b
);
2507 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2508 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2510 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2513 *overlaps_a
= conflict_fn (1, ova
);
2514 *overlaps_b
= conflict_fn (1, ovb
);
2517 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2518 compute_overlap_steps_for_affine_1_2
2519 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2521 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2522 compute_overlap_steps_for_affine_1_2
2523 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2527 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2528 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2529 *overlaps_a
= conflict_fn_not_known ();
2530 *overlaps_b
= conflict_fn_not_known ();
2531 *last_conflicts
= chrec_dont_know
;
2533 goto end_analyze_subs_aa
;
2537 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2542 lambda_matrix_row_negate (U
, dim
, 0);
2544 gcd_alpha_beta
= S
[0][0];
2546 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2547 but that is a quite strange case. Instead of ICEing, answer
2549 if (gcd_alpha_beta
== 0)
2551 *overlaps_a
= conflict_fn_not_known ();
2552 *overlaps_b
= conflict_fn_not_known ();
2553 *last_conflicts
= chrec_dont_know
;
2554 goto end_analyze_subs_aa
;
2557 /* The classic "gcd-test". */
2558 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2560 /* The "gcd-test" has determined that there is no integer
2561 solution, i.e. there is no dependence. */
2562 *overlaps_a
= conflict_fn_no_dependence ();
2563 *overlaps_b
= conflict_fn_no_dependence ();
2564 *last_conflicts
= integer_zero_node
;
2567 /* Both access functions are univariate. This includes SIV and MIV cases. */
2568 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2570 /* Both functions should have the same evolution sign. */
2571 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2572 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2574 /* The solutions are given by:
2576 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2579 For a given integer t. Using the following variables,
2581 | i0 = u11 * gamma / gcd_alpha_beta
2582 | j0 = u12 * gamma / gcd_alpha_beta
2589 | y0 = j0 + j1 * t. */
2590 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2592 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2593 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2597 if ((i1
== 0 && i0
< 0)
2598 || (j1
== 0 && j0
< 0))
2600 /* There is no solution.
2601 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2602 falls in here, but for the moment we don't look at the
2603 upper bound of the iteration domain. */
2604 *overlaps_a
= conflict_fn_no_dependence ();
2605 *overlaps_b
= conflict_fn_no_dependence ();
2606 *last_conflicts
= integer_zero_node
;
2607 goto end_analyze_subs_aa
;
2610 if (i1
> 0 && j1
> 0)
2612 HOST_WIDE_INT niter_a
2613 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2614 HOST_WIDE_INT niter_b
2615 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2616 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2618 /* (X0, Y0) is a solution of the Diophantine equation:
2619 "chrec_a (X0) = chrec_b (Y0)". */
2620 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2622 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2623 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2625 /* (X1, Y1) is the smallest positive solution of the eq
2626 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2627 first conflict occurs. */
2628 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2629 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2630 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2634 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2635 FLOOR_DIV (niter
- j0
, j1
));
2636 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2638 /* If the overlap occurs outside of the bounds of the
2639 loop, there is no dependence. */
2640 if (x1
>= niter
|| y1
>= niter
)
2642 *overlaps_a
= conflict_fn_no_dependence ();
2643 *overlaps_b
= conflict_fn_no_dependence ();
2644 *last_conflicts
= integer_zero_node
;
2645 goto end_analyze_subs_aa
;
2648 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2651 *last_conflicts
= chrec_dont_know
;
2655 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2657 build_int_cst (NULL_TREE
, i1
)));
2660 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2662 build_int_cst (NULL_TREE
, j1
)));
2666 /* FIXME: For the moment, the upper bound of the
2667 iteration domain for i and j is not checked. */
2668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2669 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2670 *overlaps_a
= conflict_fn_not_known ();
2671 *overlaps_b
= conflict_fn_not_known ();
2672 *last_conflicts
= chrec_dont_know
;
2677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2678 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2679 *overlaps_a
= conflict_fn_not_known ();
2680 *overlaps_b
= conflict_fn_not_known ();
2681 *last_conflicts
= chrec_dont_know
;
2686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2687 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2688 *overlaps_a
= conflict_fn_not_known ();
2689 *overlaps_b
= conflict_fn_not_known ();
2690 *last_conflicts
= chrec_dont_know
;
2693 end_analyze_subs_aa
:
2694 obstack_free (&scratch_obstack
, NULL
);
2695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2697 fprintf (dump_file
, " (overlaps_a = ");
2698 dump_conflict_function (dump_file
, *overlaps_a
);
2699 fprintf (dump_file
, ")\n (overlaps_b = ");
2700 dump_conflict_function (dump_file
, *overlaps_b
);
2701 fprintf (dump_file
, "))\n");
2705 /* Returns true when analyze_subscript_affine_affine can be used for
2706 determining the dependence relation between chrec_a and chrec_b,
2707 that contain symbols. This function modifies chrec_a and chrec_b
2708 such that the analysis result is the same, and such that they don't
2709 contain symbols, and then can safely be passed to the analyzer.
2711 Example: The analysis of the following tuples of evolutions produce
2712 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2715 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2716 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2720 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2722 tree diff
, type
, left_a
, left_b
, right_b
;
2724 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2725 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2726 /* FIXME: For the moment not handled. Might be refined later. */
2729 type
= chrec_type (*chrec_a
);
2730 left_a
= CHREC_LEFT (*chrec_a
);
2731 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2732 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2734 if (!evolution_function_is_constant_p (diff
))
2737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2738 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2740 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2741 diff
, CHREC_RIGHT (*chrec_a
));
2742 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2743 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2744 build_int_cst (type
, 0),
2749 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2750 *OVERLAPS_B are initialized to the functions that describe the
2751 relation between the elements accessed twice by CHREC_A and
2752 CHREC_B. For k >= 0, the following property is verified:
2754 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2757 analyze_siv_subscript (tree chrec_a
,
2759 conflict_function
**overlaps_a
,
2760 conflict_function
**overlaps_b
,
2761 tree
*last_conflicts
,
2764 dependence_stats
.num_siv
++;
2766 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2767 fprintf (dump_file
, "(analyze_siv_subscript \n");
2769 if (evolution_function_is_constant_p (chrec_a
)
2770 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2771 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2772 overlaps_a
, overlaps_b
, last_conflicts
);
2774 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2775 && evolution_function_is_constant_p (chrec_b
))
2776 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2777 overlaps_b
, overlaps_a
, last_conflicts
);
2779 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2780 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2782 if (!chrec_contains_symbols (chrec_a
)
2783 && !chrec_contains_symbols (chrec_b
))
2785 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2786 overlaps_a
, overlaps_b
,
2789 if (CF_NOT_KNOWN_P (*overlaps_a
)
2790 || CF_NOT_KNOWN_P (*overlaps_b
))
2791 dependence_stats
.num_siv_unimplemented
++;
2792 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2793 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2794 dependence_stats
.num_siv_independent
++;
2796 dependence_stats
.num_siv_dependent
++;
2798 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2801 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2802 overlaps_a
, overlaps_b
,
2805 if (CF_NOT_KNOWN_P (*overlaps_a
)
2806 || CF_NOT_KNOWN_P (*overlaps_b
))
2807 dependence_stats
.num_siv_unimplemented
++;
2808 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2809 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2810 dependence_stats
.num_siv_independent
++;
2812 dependence_stats
.num_siv_dependent
++;
2815 goto siv_subscript_dontknow
;
2820 siv_subscript_dontknow
:;
2821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2822 fprintf (dump_file
, " siv test failed: unimplemented");
2823 *overlaps_a
= conflict_fn_not_known ();
2824 *overlaps_b
= conflict_fn_not_known ();
2825 *last_conflicts
= chrec_dont_know
;
2826 dependence_stats
.num_siv_unimplemented
++;
2829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2830 fprintf (dump_file
, ")\n");
2833 /* Returns false if we can prove that the greatest common divisor of the steps
2834 of CHREC does not divide CST, false otherwise. */
2837 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2839 HOST_WIDE_INT cd
= 0, val
;
2842 if (!tree_fits_shwi_p (cst
))
2844 val
= tree_to_shwi (cst
);
2846 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2848 step
= CHREC_RIGHT (chrec
);
2849 if (!tree_fits_shwi_p (step
))
2851 cd
= gcd (cd
, tree_to_shwi (step
));
2852 chrec
= CHREC_LEFT (chrec
);
2855 return val
% cd
== 0;
2858 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2859 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2860 functions that describe the relation between the elements accessed
2861 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2864 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2867 analyze_miv_subscript (tree chrec_a
,
2869 conflict_function
**overlaps_a
,
2870 conflict_function
**overlaps_b
,
2871 tree
*last_conflicts
,
2872 struct loop
*loop_nest
)
2874 tree type
, difference
;
2876 dependence_stats
.num_miv
++;
2877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2878 fprintf (dump_file
, "(analyze_miv_subscript \n");
2880 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2881 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2882 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2883 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2885 if (eq_evolutions_p (chrec_a
, chrec_b
))
2887 /* Access functions are the same: all the elements are accessed
2888 in the same order. */
2889 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2890 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2891 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2892 dependence_stats
.num_miv_dependent
++;
2895 else if (evolution_function_is_constant_p (difference
)
2896 /* For the moment, the following is verified:
2897 evolution_function_is_affine_multivariate_p (chrec_a,
2899 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2901 /* testsuite/.../ssa-chrec-33.c
2902 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2904 The difference is 1, and all the evolution steps are multiples
2905 of 2, consequently there are no overlapping elements. */
2906 *overlaps_a
= conflict_fn_no_dependence ();
2907 *overlaps_b
= conflict_fn_no_dependence ();
2908 *last_conflicts
= integer_zero_node
;
2909 dependence_stats
.num_miv_independent
++;
2912 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2913 && !chrec_contains_symbols (chrec_a
)
2914 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2915 && !chrec_contains_symbols (chrec_b
))
2917 /* testsuite/.../ssa-chrec-35.c
2918 {0, +, 1}_2 vs. {0, +, 1}_3
2919 the overlapping elements are respectively located at iterations:
2920 {0, +, 1}_x and {0, +, 1}_x,
2921 in other words, we have the equality:
2922 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2925 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2926 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2928 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2929 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2931 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2932 overlaps_a
, overlaps_b
, last_conflicts
);
2934 if (CF_NOT_KNOWN_P (*overlaps_a
)
2935 || CF_NOT_KNOWN_P (*overlaps_b
))
2936 dependence_stats
.num_miv_unimplemented
++;
2937 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2938 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2939 dependence_stats
.num_miv_independent
++;
2941 dependence_stats
.num_miv_dependent
++;
2946 /* When the analysis is too difficult, answer "don't know". */
2947 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2948 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2950 *overlaps_a
= conflict_fn_not_known ();
2951 *overlaps_b
= conflict_fn_not_known ();
2952 *last_conflicts
= chrec_dont_know
;
2953 dependence_stats
.num_miv_unimplemented
++;
2956 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2957 fprintf (dump_file
, ")\n");
2960 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2961 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2962 OVERLAP_ITERATIONS_B are initialized with two functions that
2963 describe the iterations that contain conflicting elements.
2965 Remark: For an integer k >= 0, the following equality is true:
2967 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2971 analyze_overlapping_iterations (tree chrec_a
,
2973 conflict_function
**overlap_iterations_a
,
2974 conflict_function
**overlap_iterations_b
,
2975 tree
*last_conflicts
, struct loop
*loop_nest
)
2977 unsigned int lnn
= loop_nest
->num
;
2979 dependence_stats
.num_subscript_tests
++;
2981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2983 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2984 fprintf (dump_file
, " (chrec_a = ");
2985 print_generic_expr (dump_file
, chrec_a
, 0);
2986 fprintf (dump_file
, ")\n (chrec_b = ");
2987 print_generic_expr (dump_file
, chrec_b
, 0);
2988 fprintf (dump_file
, ")\n");
2991 if (chrec_a
== NULL_TREE
2992 || chrec_b
== NULL_TREE
2993 || chrec_contains_undetermined (chrec_a
)
2994 || chrec_contains_undetermined (chrec_b
))
2996 dependence_stats
.num_subscript_undetermined
++;
2998 *overlap_iterations_a
= conflict_fn_not_known ();
2999 *overlap_iterations_b
= conflict_fn_not_known ();
3002 /* If they are the same chrec, and are affine, they overlap
3003 on every iteration. */
3004 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3005 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3006 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3008 dependence_stats
.num_same_subscript_function
++;
3009 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3010 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3011 *last_conflicts
= chrec_dont_know
;
3014 /* If they aren't the same, and aren't affine, we can't do anything
3016 else if ((chrec_contains_symbols (chrec_a
)
3017 || chrec_contains_symbols (chrec_b
))
3018 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3019 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3021 dependence_stats
.num_subscript_undetermined
++;
3022 *overlap_iterations_a
= conflict_fn_not_known ();
3023 *overlap_iterations_b
= conflict_fn_not_known ();
3026 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3027 analyze_ziv_subscript (chrec_a
, chrec_b
,
3028 overlap_iterations_a
, overlap_iterations_b
,
3031 else if (siv_subscript_p (chrec_a
, chrec_b
))
3032 analyze_siv_subscript (chrec_a
, chrec_b
,
3033 overlap_iterations_a
, overlap_iterations_b
,
3034 last_conflicts
, lnn
);
3037 analyze_miv_subscript (chrec_a
, chrec_b
,
3038 overlap_iterations_a
, overlap_iterations_b
,
3039 last_conflicts
, loop_nest
);
3041 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3043 fprintf (dump_file
, " (overlap_iterations_a = ");
3044 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3045 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3046 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3047 fprintf (dump_file
, "))\n");
3051 /* Helper function for uniquely inserting distance vectors. */
3054 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3059 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3060 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3063 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3066 /* Helper function for uniquely inserting direction vectors. */
3069 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3074 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3075 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3078 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3081 /* Add a distance of 1 on all the loops outer than INDEX. If we
3082 haven't yet determined a distance for this outer loop, push a new
3083 distance vector composed of the previous distance, and a distance
3084 of 1 for this outer loop. Example:
3092 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3093 save (0, 1), then we have to save (1, 0). */
3096 add_outer_distances (struct data_dependence_relation
*ddr
,
3097 lambda_vector dist_v
, int index
)
3099 /* For each outer loop where init_v is not set, the accesses are
3100 in dependence of distance 1 in the loop. */
3101 while (--index
>= 0)
3103 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3104 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3106 save_dist_v (ddr
, save_v
);
3110 /* Return false when fail to represent the data dependence as a
3111 distance vector. INIT_B is set to true when a component has been
3112 added to the distance vector DIST_V. INDEX_CARRY is then set to
3113 the index in DIST_V that carries the dependence. */
3116 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3117 struct data_reference
*ddr_a
,
3118 struct data_reference
*ddr_b
,
3119 lambda_vector dist_v
, bool *init_b
,
3123 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3125 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3127 tree access_fn_a
, access_fn_b
;
3128 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3130 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3132 non_affine_dependence_relation (ddr
);
3136 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3137 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3139 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3140 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3143 int var_a
= CHREC_VARIABLE (access_fn_a
);
3144 int var_b
= CHREC_VARIABLE (access_fn_b
);
3147 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3149 non_affine_dependence_relation (ddr
);
3153 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3154 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3155 *index_carry
= MIN (index
, *index_carry
);
3157 /* This is the subscript coupling test. If we have already
3158 recorded a distance for this loop (a distance coming from
3159 another subscript), it should be the same. For example,
3160 in the following code, there is no dependence:
3167 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3169 finalize_ddr_dependent (ddr
, chrec_known
);
3173 dist_v
[index
] = dist
;
3177 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3179 /* This can be for example an affine vs. constant dependence
3180 (T[i] vs. T[3]) that is not an affine dependence and is
3181 not representable as a distance vector. */
3182 non_affine_dependence_relation (ddr
);
3190 /* Return true when the DDR contains only constant access functions. */
3193 constant_access_functions (const struct data_dependence_relation
*ddr
)
3197 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3198 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3199 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3205 /* Helper function for the case where DDR_A and DDR_B are the same
3206 multivariate access function with a constant step. For an example
3210 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3213 tree c_1
= CHREC_LEFT (c_2
);
3214 tree c_0
= CHREC_LEFT (c_1
);
3215 lambda_vector dist_v
;
3218 /* Polynomials with more than 2 variables are not handled yet. When
3219 the evolution steps are parameters, it is not possible to
3220 represent the dependence using classical distance vectors. */
3221 if (TREE_CODE (c_0
) != INTEGER_CST
3222 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3223 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3225 DDR_AFFINE_P (ddr
) = false;
3229 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3230 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3233 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3234 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3235 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3248 save_dist_v (ddr
, dist_v
);
3250 add_outer_distances (ddr
, dist_v
, x_1
);
3253 /* Helper function for the case where DDR_A and DDR_B are the same
3254 access functions. */
3257 add_other_self_distances (struct data_dependence_relation
*ddr
)
3259 lambda_vector dist_v
;
3261 int index_carry
= DDR_NB_LOOPS (ddr
);
3263 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3265 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3267 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3269 if (!evolution_function_is_univariate_p (access_fun
))
3271 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3273 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3277 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3279 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3280 add_multivariate_self_dist (ddr
, access_fun
);
3282 /* The evolution step is not constant: it varies in
3283 the outer loop, so this cannot be represented by a
3284 distance vector. For example in pr34635.c the
3285 evolution is {0, +, {0, +, 4}_1}_2. */
3286 DDR_AFFINE_P (ddr
) = false;
3291 index_carry
= MIN (index_carry
,
3292 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3293 DDR_LOOP_NEST (ddr
)));
3297 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3298 add_outer_distances (ddr
, dist_v
, index_carry
);
3302 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3304 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3306 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3307 save_dist_v (ddr
, dist_v
);
3310 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3311 is the case for example when access functions are the same and
3312 equal to a constant, as in:
3319 in which case the distance vectors are (0) and (1). */
3322 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3326 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3328 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3329 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3330 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3332 for (j
= 0; j
< ca
->n
; j
++)
3333 if (affine_function_zero_p (ca
->fns
[j
]))
3335 insert_innermost_unit_dist_vector (ddr
);
3339 for (j
= 0; j
< cb
->n
; j
++)
3340 if (affine_function_zero_p (cb
->fns
[j
]))
3342 insert_innermost_unit_dist_vector (ddr
);
3348 /* Compute the classic per loop distance vector. DDR is the data
3349 dependence relation to build a vector from. Return false when fail
3350 to represent the data dependence as a distance vector. */
3353 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3354 struct loop
*loop_nest
)
3356 bool init_b
= false;
3357 int index_carry
= DDR_NB_LOOPS (ddr
);
3358 lambda_vector dist_v
;
3360 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3363 if (same_access_functions (ddr
))
3365 /* Save the 0 vector. */
3366 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3367 save_dist_v (ddr
, dist_v
);
3369 if (constant_access_functions (ddr
))
3370 add_distance_for_zero_overlaps (ddr
);
3372 if (DDR_NB_LOOPS (ddr
) > 1)
3373 add_other_self_distances (ddr
);
3378 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3379 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3380 dist_v
, &init_b
, &index_carry
))
3383 /* Save the distance vector if we initialized one. */
3386 /* Verify a basic constraint: classic distance vectors should
3387 always be lexicographically positive.
3389 Data references are collected in the order of execution of
3390 the program, thus for the following loop
3392 | for (i = 1; i < 100; i++)
3393 | for (j = 1; j < 100; j++)
3395 | t = T[j+1][i-1]; // A
3396 | T[j][i] = t + 2; // B
3399 references are collected following the direction of the wind:
3400 A then B. The data dependence tests are performed also
3401 following this order, such that we're looking at the distance
3402 separating the elements accessed by A from the elements later
3403 accessed by B. But in this example, the distance returned by
3404 test_dep (A, B) is lexicographically negative (-1, 1), that
3405 means that the access A occurs later than B with respect to
3406 the outer loop, ie. we're actually looking upwind. In this
3407 case we solve test_dep (B, A) looking downwind to the
3408 lexicographically positive solution, that returns the
3409 distance vector (1, -1). */
3410 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3412 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3413 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3416 compute_subscript_distance (ddr
);
3417 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3418 save_v
, &init_b
, &index_carry
))
3420 save_dist_v (ddr
, save_v
);
3421 DDR_REVERSED_P (ddr
) = true;
3423 /* In this case there is a dependence forward for all the
3426 | for (k = 1; k < 100; k++)
3427 | for (i = 1; i < 100; i++)
3428 | for (j = 1; j < 100; j++)
3430 | t = T[j+1][i-1]; // A
3431 | T[j][i] = t + 2; // B
3439 if (DDR_NB_LOOPS (ddr
) > 1)
3441 add_outer_distances (ddr
, save_v
, index_carry
);
3442 add_outer_distances (ddr
, dist_v
, index_carry
);
3447 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3448 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3450 if (DDR_NB_LOOPS (ddr
) > 1)
3452 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3454 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3455 DDR_A (ddr
), loop_nest
))
3457 compute_subscript_distance (ddr
);
3458 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3459 opposite_v
, &init_b
,
3463 save_dist_v (ddr
, save_v
);
3464 add_outer_distances (ddr
, dist_v
, index_carry
);
3465 add_outer_distances (ddr
, opposite_v
, index_carry
);
3468 save_dist_v (ddr
, save_v
);
3473 /* There is a distance of 1 on all the outer loops: Example:
3474 there is a dependence of distance 1 on loop_1 for the array A.
3480 add_outer_distances (ddr
, dist_v
,
3481 lambda_vector_first_nz (dist_v
,
3482 DDR_NB_LOOPS (ddr
), 0));
3485 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3489 fprintf (dump_file
, "(build_classic_dist_vector\n");
3490 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3492 fprintf (dump_file
, " dist_vector = (");
3493 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3494 DDR_NB_LOOPS (ddr
));
3495 fprintf (dump_file
, " )\n");
3497 fprintf (dump_file
, ")\n");
3503 /* Return the direction for a given distance.
3504 FIXME: Computing dir this way is suboptimal, since dir can catch
3505 cases that dist is unable to represent. */
3507 static inline enum data_dependence_direction
3508 dir_from_dist (int dist
)
3511 return dir_positive
;
3513 return dir_negative
;
3518 /* Compute the classic per loop direction vector. DDR is the data
3519 dependence relation to build a vector from. */
3522 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3525 lambda_vector dist_v
;
3527 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3529 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3531 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3532 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3534 save_dir_v (ddr
, dir_v
);
3538 /* Helper function. Returns true when there is a dependence between
3539 data references DRA and DRB. */
3542 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3543 struct data_reference
*dra
,
3544 struct data_reference
*drb
,
3545 struct loop
*loop_nest
)
3548 tree last_conflicts
;
3549 struct subscript
*subscript
;
3550 tree res
= NULL_TREE
;
3552 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3554 conflict_function
*overlaps_a
, *overlaps_b
;
3556 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3557 DR_ACCESS_FN (drb
, i
),
3558 &overlaps_a
, &overlaps_b
,
3559 &last_conflicts
, loop_nest
);
3561 if (SUB_CONFLICTS_IN_A (subscript
))
3562 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3563 if (SUB_CONFLICTS_IN_B (subscript
))
3564 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3566 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3567 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3568 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3570 /* If there is any undetermined conflict function we have to
3571 give a conservative answer in case we cannot prove that
3572 no dependence exists when analyzing another subscript. */
3573 if (CF_NOT_KNOWN_P (overlaps_a
)
3574 || CF_NOT_KNOWN_P (overlaps_b
))
3576 res
= chrec_dont_know
;
3580 /* When there is a subscript with no dependence we can stop. */
3581 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3582 || CF_NO_DEPENDENCE_P (overlaps_b
))
3589 if (res
== NULL_TREE
)
3592 if (res
== chrec_known
)
3593 dependence_stats
.num_dependence_independent
++;
3595 dependence_stats
.num_dependence_undetermined
++;
3596 finalize_ddr_dependent (ddr
, res
);
3600 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3603 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3604 struct loop
*loop_nest
)
3606 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3607 dependence_stats
.num_dependence_dependent
++;
3609 compute_subscript_distance (ddr
);
3610 if (build_classic_dist_vector (ddr
, loop_nest
))
3611 build_classic_dir_vector (ddr
);
3614 /* Returns true when all the access functions of A are affine or
3615 constant with respect to LOOP_NEST. */
3618 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3619 const struct loop
*loop_nest
)
3622 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3625 FOR_EACH_VEC_ELT (fns
, i
, t
)
3626 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3627 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3633 /* Initializes an equation for an OMEGA problem using the information
3634 contained in the ACCESS_FUN. Returns true when the operation
3637 PB is the omega constraint system.
3638 EQ is the number of the equation to be initialized.
3639 OFFSET is used for shifting the variables names in the constraints:
3640 a constrain is composed of 2 * the number of variables surrounding
3641 dependence accesses. OFFSET is set either to 0 for the first n variables,
3642 then it is set to n.
3643 ACCESS_FUN is expected to be an affine chrec. */
3646 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3647 unsigned int offset
, tree access_fun
,
3648 struct data_dependence_relation
*ddr
)
3650 switch (TREE_CODE (access_fun
))
3652 case POLYNOMIAL_CHREC
:
3654 tree left
= CHREC_LEFT (access_fun
);
3655 tree right
= CHREC_RIGHT (access_fun
);
3656 int var
= CHREC_VARIABLE (access_fun
);
3659 if (TREE_CODE (right
) != INTEGER_CST
)
3662 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3663 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3665 /* Compute the innermost loop index. */
3666 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3669 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3670 += int_cst_value (right
);
3672 switch (TREE_CODE (left
))
3674 case POLYNOMIAL_CHREC
:
3675 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3678 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3687 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3695 /* As explained in the comments preceding init_omega_for_ddr, we have
3696 to set up a system for each loop level, setting outer loops
3697 variation to zero, and current loop variation to positive or zero.
3698 Save each lexico positive distance vector. */
3701 omega_extract_distance_vectors (omega_pb pb
,
3702 struct data_dependence_relation
*ddr
)
3706 struct loop
*loopi
, *loopj
;
3707 enum omega_result res
;
3709 /* Set a new problem for each loop in the nest. The basis is the
3710 problem that we have initialized until now. On top of this we
3711 add new constraints. */
3712 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3713 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3716 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3717 DDR_NB_LOOPS (ddr
));
3719 omega_copy_problem (copy
, pb
);
3721 /* For all the outer loops "loop_j", add "dj = 0". */
3722 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3724 eq
= omega_add_zero_eq (copy
, omega_black
);
3725 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3728 /* For "loop_i", add "0 <= di". */
3729 geq
= omega_add_zero_geq (copy
, omega_black
);
3730 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3732 /* Reduce the constraint system, and test that the current
3733 problem is feasible. */
3734 res
= omega_simplify_problem (copy
);
3735 if (res
== omega_false
3736 || res
== omega_unknown
3737 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3740 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3741 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3743 dist
= copy
->subs
[eq
].coef
[0];
3749 /* Reinitialize problem... */
3750 omega_copy_problem (copy
, pb
);
3751 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3753 eq
= omega_add_zero_eq (copy
, omega_black
);
3754 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3757 /* ..., but this time "di = 1". */
3758 eq
= omega_add_zero_eq (copy
, omega_black
);
3759 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3760 copy
->eqs
[eq
].coef
[0] = -1;
3762 res
= omega_simplify_problem (copy
);
3763 if (res
== omega_false
3764 || res
== omega_unknown
3765 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3768 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3769 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3771 dist
= copy
->subs
[eq
].coef
[0];
3777 /* Save the lexicographically positive distance vector. */
3780 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3781 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3785 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3786 if (copy
->subs
[eq
].key
> 0)
3788 dist
= copy
->subs
[eq
].coef
[0];
3789 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3792 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3793 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3795 save_dist_v (ddr
, dist_v
);
3796 save_dir_v (ddr
, dir_v
);
3800 omega_free_problem (copy
);
3804 /* This is called for each subscript of a tuple of data references:
3805 insert an equality for representing the conflicts. */
3808 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3809 struct data_dependence_relation
*ddr
,
3810 omega_pb pb
, bool *maybe_dependent
)
3813 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3814 TREE_TYPE (access_fun_b
));
3815 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3816 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3817 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3820 /* When the fun_a - fun_b is not constant, the dependence is not
3821 captured by the classic distance vector representation. */
3822 if (TREE_CODE (difference
) != INTEGER_CST
)
3826 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3828 /* There is no dependence. */
3829 *maybe_dependent
= false;
3833 minus_one
= build_int_cst (type
, -1);
3834 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3836 eq
= omega_add_zero_eq (pb
, omega_black
);
3837 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3838 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3839 /* There is probably a dependence, but the system of
3840 constraints cannot be built: answer "don't know". */
3844 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3845 && !int_divides_p (lambda_vector_gcd
3846 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3847 2 * DDR_NB_LOOPS (ddr
)),
3848 pb
->eqs
[eq
].coef
[0]))
3850 /* There is no dependence. */
3851 *maybe_dependent
= false;
3858 /* Helper function, same as init_omega_for_ddr but specialized for
3859 data references A and B. */
3862 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3863 struct data_dependence_relation
*ddr
,
3864 omega_pb pb
, bool *maybe_dependent
)
3869 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3871 /* Insert an equality per subscript. */
3872 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3874 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3875 ddr
, pb
, maybe_dependent
))
3877 else if (*maybe_dependent
== false)
3879 /* There is no dependence. */
3880 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3885 /* Insert inequalities: constraints corresponding to the iteration
3886 domain, i.e. the loops surrounding the references "loop_x" and
3887 the distance variables "dx". The layout of the OMEGA
3888 representation is as follows:
3889 - coef[0] is the constant
3890 - coef[1..nb_loops] are the protected variables that will not be
3891 removed by the solver: the "dx"
3892 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3894 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3895 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3897 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
);
3900 ineq
= omega_add_zero_geq (pb
, omega_black
);
3901 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3903 /* 0 <= loop_x + dx */
3904 ineq
= omega_add_zero_geq (pb
, omega_black
);
3905 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3906 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3910 /* loop_x <= nb_iters */
3911 ineq
= omega_add_zero_geq (pb
, omega_black
);
3912 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3913 pb
->geqs
[ineq
].coef
[0] = nbi
;
3915 /* loop_x + dx <= nb_iters */
3916 ineq
= omega_add_zero_geq (pb
, omega_black
);
3917 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3918 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3919 pb
->geqs
[ineq
].coef
[0] = nbi
;
3921 /* A step "dx" bigger than nb_iters is not feasible, so
3922 add "0 <= nb_iters + dx", */
3923 ineq
= omega_add_zero_geq (pb
, omega_black
);
3924 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3925 pb
->geqs
[ineq
].coef
[0] = nbi
;
3926 /* and "dx <= nb_iters". */
3927 ineq
= omega_add_zero_geq (pb
, omega_black
);
3928 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3929 pb
->geqs
[ineq
].coef
[0] = nbi
;
3933 omega_extract_distance_vectors (pb
, ddr
);
3938 /* Sets up the Omega dependence problem for the data dependence
3939 relation DDR. Returns false when the constraint system cannot be
3940 built, ie. when the test answers "don't know". Returns true
3941 otherwise, and when independence has been proved (using one of the
3942 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3943 set MAYBE_DEPENDENT to true.
3945 Example: for setting up the dependence system corresponding to the
3946 conflicting accesses
3951 | ... A[2*j, 2*(i + j)]
3955 the following constraints come from the iteration domain:
3962 where di, dj are the distance variables. The constraints
3963 representing the conflicting elements are:
3966 i + 1 = 2 * (i + di + j + dj)
3968 For asking that the resulting distance vector (di, dj) be
3969 lexicographically positive, we insert the constraint "di >= 0". If
3970 "di = 0" in the solution, we fix that component to zero, and we
3971 look at the inner loops: we set a new problem where all the outer
3972 loop distances are zero, and fix this inner component to be
3973 positive. When one of the components is positive, we save that
3974 distance, and set a new problem where the distance on this loop is
3975 zero, searching for other distances in the inner loops. Here is
3976 the classic example that illustrates that we have to set for each
3977 inner loop a new problem:
3985 we have to save two distances (1, 0) and (0, 1).
3987 Given two array references, refA and refB, we have to set the
3988 dependence problem twice, refA vs. refB and refB vs. refA, and we
3989 cannot do a single test, as refB might occur before refA in the
3990 inner loops, and the contrary when considering outer loops: ex.
3995 | T[{1,+,1}_2][{1,+,1}_1] // refA
3996 | T[{2,+,1}_2][{0,+,1}_1] // refB
4001 refB touches the elements in T before refA, and thus for the same
4002 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4003 but for successive loop_0 iterations, we have (1, -1, 1)
4005 The Omega solver expects the distance variables ("di" in the
4006 previous example) to come first in the constraint system (as
4007 variables to be protected, or "safe" variables), the constraint
4008 system is built using the following layout:
4010 "cst | distance vars | index vars".
4014 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
4015 bool *maybe_dependent
)
4020 *maybe_dependent
= true;
4022 if (same_access_functions (ddr
))
4025 lambda_vector dir_v
;
4027 /* Save the 0 vector. */
4028 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4029 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4030 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4031 dir_v
[j
] = dir_equal
;
4032 save_dir_v (ddr
, dir_v
);
4034 /* Save the dependences carried by outer loops. */
4035 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4036 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4038 omega_free_problem (pb
);
4042 /* Omega expects the protected variables (those that have to be kept
4043 after elimination) to appear first in the constraint system.
4044 These variables are the distance variables. In the following
4045 initialization we declare NB_LOOPS safe variables, and the total
4046 number of variables for the constraint system is 2*NB_LOOPS. */
4047 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4048 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4050 omega_free_problem (pb
);
4052 /* Stop computation if not decidable, or no dependence. */
4053 if (res
== false || *maybe_dependent
== false)
4056 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4057 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
4059 omega_free_problem (pb
);
4064 /* Return true when DDR contains the same information as that stored
4065 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4068 ddr_consistent_p (FILE *file
,
4069 struct data_dependence_relation
*ddr
,
4070 vec
<lambda_vector
> dist_vects
,
4071 vec
<lambda_vector
> dir_vects
)
4075 /* If dump_file is set, output there. */
4076 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4079 if (dist_vects
.length () != DDR_NUM_DIST_VECTS (ddr
))
4081 lambda_vector b_dist_v
;
4082 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4083 dist_vects
.length (),
4084 DDR_NUM_DIST_VECTS (ddr
));
4086 fprintf (file
, "Banerjee dist vectors:\n");
4087 FOR_EACH_VEC_ELT (dist_vects
, i
, b_dist_v
)
4088 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
4090 fprintf (file
, "Omega dist vectors:\n");
4091 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4092 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
4094 fprintf (file
, "data dependence relation:\n");
4095 dump_data_dependence_relation (file
, ddr
);
4097 fprintf (file
, ")\n");
4101 if (dir_vects
.length () != DDR_NUM_DIR_VECTS (ddr
))
4103 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4104 dir_vects
.length (),
4105 DDR_NUM_DIR_VECTS (ddr
));
4109 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4111 lambda_vector a_dist_v
;
4112 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
4114 /* Distance vectors are not ordered in the same way in the DDR
4115 and in the DIST_VECTS: search for a matching vector. */
4116 FOR_EACH_VEC_ELT (dist_vects
, j
, a_dist_v
)
4117 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
4120 if (j
== dist_vects
.length ())
4122 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
4123 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
4124 fprintf (file
, "not found in Omega dist vectors:\n");
4125 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4126 fprintf (file
, "data dependence relation:\n");
4127 dump_data_dependence_relation (file
, ddr
);
4128 fprintf (file
, ")\n");
4132 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
4134 lambda_vector a_dir_v
;
4135 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
4137 /* Direction vectors are not ordered in the same way in the DDR
4138 and in the DIR_VECTS: search for a matching vector. */
4139 FOR_EACH_VEC_ELT (dir_vects
, j
, a_dir_v
)
4140 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4143 if (j
== dist_vects
.length ())
4145 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4146 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4147 fprintf (file
, "not found in Omega dir vectors:\n");
4148 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4149 fprintf (file
, "data dependence relation:\n");
4150 dump_data_dependence_relation (file
, ddr
);
4151 fprintf (file
, ")\n");
4158 /* This computes the affine dependence relation between A and B with
4159 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4160 independence between two accesses, while CHREC_DONT_KNOW is used
4161 for representing the unknown relation.
4163 Note that it is possible to stop the computation of the dependence
4164 relation the first time we detect a CHREC_KNOWN element for a given
4168 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4169 struct loop
*loop_nest
)
4171 struct data_reference
*dra
= DDR_A (ddr
);
4172 struct data_reference
*drb
= DDR_B (ddr
);
4174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4176 fprintf (dump_file
, "(compute_affine_dependence\n");
4177 fprintf (dump_file
, " stmt_a: ");
4178 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4179 fprintf (dump_file
, " stmt_b: ");
4180 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4183 /* Analyze only when the dependence relation is not yet known. */
4184 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4186 dependence_stats
.num_dependence_tests
++;
4188 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4189 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4191 subscript_dependence_tester (ddr
, loop_nest
);
4193 if (flag_check_data_deps
)
4195 /* Dump the dependences from the first algorithm. */
4196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4198 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4199 dump_data_dependence_relation (dump_file
, ddr
);
4202 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4204 bool maybe_dependent
;
4205 vec
<lambda_vector
> dir_vects
, dist_vects
;
4207 /* Save the result of the first DD analyzer. */
4208 dist_vects
= DDR_DIST_VECTS (ddr
);
4209 dir_vects
= DDR_DIR_VECTS (ddr
);
4211 /* Reset the information. */
4212 DDR_DIST_VECTS (ddr
).create (0);
4213 DDR_DIR_VECTS (ddr
).create (0);
4215 /* Compute the same information using Omega. */
4216 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4217 goto csys_dont_know
;
4219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4221 fprintf (dump_file
, "Omega Analyzer\n");
4222 dump_data_dependence_relation (dump_file
, ddr
);
4225 /* Check that we get the same information. */
4226 if (maybe_dependent
)
4227 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4233 /* As a last case, if the dependence cannot be determined, or if
4234 the dependence is considered too difficult to determine, answer
4239 dependence_stats
.num_dependence_undetermined
++;
4241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4243 fprintf (dump_file
, "Data ref a:\n");
4244 dump_data_reference (dump_file
, dra
);
4245 fprintf (dump_file
, "Data ref b:\n");
4246 dump_data_reference (dump_file
, drb
);
4247 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4249 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4255 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4256 fprintf (dump_file
, ") -> no dependence\n");
4257 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4258 fprintf (dump_file
, ") -> dependence analysis failed\n");
4260 fprintf (dump_file
, ")\n");
4264 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4265 the data references in DATAREFS, in the LOOP_NEST. When
4266 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4267 relations. Return true when successful, i.e. data references number
4268 is small enough to be handled. */
4271 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4272 vec
<ddr_p
> *dependence_relations
,
4273 vec
<loop_p
> loop_nest
,
4274 bool compute_self_and_rr
)
4276 struct data_dependence_relation
*ddr
;
4277 struct data_reference
*a
, *b
;
4280 if ((int) datarefs
.length ()
4281 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4283 struct data_dependence_relation
*ddr
;
4285 /* Insert a single relation into dependence_relations:
4287 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4288 dependence_relations
->safe_push (ddr
);
4292 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4293 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4294 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4296 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4297 dependence_relations
->safe_push (ddr
);
4298 if (loop_nest
.exists ())
4299 compute_affine_dependence (ddr
, loop_nest
[0]);
4302 if (compute_self_and_rr
)
4303 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4305 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4306 dependence_relations
->safe_push (ddr
);
4307 if (loop_nest
.exists ())
4308 compute_affine_dependence (ddr
, loop_nest
[0]);
4314 /* Describes a location of a memory reference. */
4316 typedef struct data_ref_loc_d
4318 /* Position of the memory reference. */
4321 /* True if the memory reference is read. */
4326 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4327 true if STMT clobbers memory, false otherwise. */
4330 get_references_in_stmt (gimple stmt
, vec
<data_ref_loc
, va_heap
> *references
)
4332 bool clobbers_memory
= false;
4335 enum gimple_code stmt_code
= gimple_code (stmt
);
4337 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4338 As we cannot model data-references to not spelled out
4339 accesses give up if they may occur. */
4340 if (stmt_code
== GIMPLE_CALL
4341 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4343 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4344 if (gimple_call_internal_p (stmt
)
4345 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
4347 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
4348 tree uid
= gimple_call_arg (stmt
, 0);
4349 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
4351 || loop
->simduid
!= SSA_NAME_VAR (uid
))
4352 clobbers_memory
= true;
4355 clobbers_memory
= true;
4357 else if (stmt_code
== GIMPLE_ASM
4358 && (gimple_asm_volatile_p (stmt
) || gimple_vuse (stmt
)))
4359 clobbers_memory
= true;
4361 if (!gimple_vuse (stmt
))
4362 return clobbers_memory
;
4364 if (stmt_code
== GIMPLE_ASSIGN
)
4367 op0
= gimple_assign_lhs_ptr (stmt
);
4368 op1
= gimple_assign_rhs1_ptr (stmt
);
4371 || (REFERENCE_CLASS_P (*op1
)
4372 && (base
= get_base_address (*op1
))
4373 && TREE_CODE (base
) != SSA_NAME
))
4377 references
->safe_push (ref
);
4380 else if (stmt_code
== GIMPLE_CALL
)
4384 op0
= gimple_call_lhs_ptr (stmt
);
4385 n
= gimple_call_num_args (stmt
);
4386 for (i
= 0; i
< n
; i
++)
4388 op1
= gimple_call_arg_ptr (stmt
, i
);
4391 || (REFERENCE_CLASS_P (*op1
) && get_base_address (*op1
)))
4395 references
->safe_push (ref
);
4400 return clobbers_memory
;
4404 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
))))
4407 ref
.is_read
= false;
4408 references
->safe_push (ref
);
4410 return clobbers_memory
;
4413 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4414 reference, returns false, otherwise returns true. NEST is the outermost
4415 loop of the loop nest in which the references should be analyzed. */
4418 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4419 vec
<data_reference_p
> *datarefs
)
4422 stack_vec
<data_ref_loc
, 2> references
;
4425 data_reference_p dr
;
4427 if (get_references_in_stmt (stmt
, &references
))
4430 FOR_EACH_VEC_ELT (references
, i
, ref
)
4432 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4433 *ref
->pos
, stmt
, ref
->is_read
);
4434 gcc_assert (dr
!= NULL
);
4435 datarefs
->safe_push (dr
);
4437 references
.release ();
4441 /* Stores the data references in STMT to DATAREFS. If there is an
4442 unanalyzable reference, returns false, otherwise returns true.
4443 NEST is the outermost loop of the loop nest in which the references
4444 should be instantiated, LOOP is the loop in which the references
4445 should be analyzed. */
4448 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4449 vec
<data_reference_p
> *datarefs
)
4452 stack_vec
<data_ref_loc
, 2> references
;
4455 data_reference_p dr
;
4457 if (get_references_in_stmt (stmt
, &references
))
4460 FOR_EACH_VEC_ELT (references
, i
, ref
)
4462 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4463 gcc_assert (dr
!= NULL
);
4464 datarefs
->safe_push (dr
);
4467 references
.release ();
4471 /* Search the data references in LOOP, and record the information into
4472 DATAREFS. Returns chrec_dont_know when failing to analyze a
4473 difficult case, returns NULL_TREE otherwise. */
4476 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4477 vec
<data_reference_p
> *datarefs
)
4479 gimple_stmt_iterator bsi
;
4481 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4483 gimple stmt
= gsi_stmt (bsi
);
4485 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4487 struct data_reference
*res
;
4488 res
= XCNEW (struct data_reference
);
4489 datarefs
->safe_push (res
);
4491 return chrec_dont_know
;
4498 /* Search the data references in LOOP, and record the information into
4499 DATAREFS. Returns chrec_dont_know when failing to analyze a
4500 difficult case, returns NULL_TREE otherwise.
4502 TODO: This function should be made smarter so that it can handle address
4503 arithmetic as if they were array accesses, etc. */
4506 find_data_references_in_loop (struct loop
*loop
,
4507 vec
<data_reference_p
> *datarefs
)
4509 basic_block bb
, *bbs
;
4512 bbs
= get_loop_body_in_dom_order (loop
);
4514 for (i
= 0; i
< loop
->num_nodes
; i
++)
4518 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4521 return chrec_dont_know
;
4529 /* Recursive helper function. */
4532 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4534 /* Inner loops of the nest should not contain siblings. Example:
4535 when there are two consecutive loops,
4546 the dependence relation cannot be captured by the distance
4551 loop_nest
->safe_push (loop
);
4553 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4557 /* Return false when the LOOP is not well nested. Otherwise return
4558 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4559 contain the loops from the outermost to the innermost, as they will
4560 appear in the classic distance vector. */
4563 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4565 loop_nest
->safe_push (loop
);
4567 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4571 /* Returns true when the data dependences have been computed, false otherwise.
4572 Given a loop nest LOOP, the following vectors are returned:
4573 DATAREFS is initialized to all the array elements contained in this loop,
4574 DEPENDENCE_RELATIONS contains the relations between the data references.
4575 Compute read-read and self relations if
4576 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4579 compute_data_dependences_for_loop (struct loop
*loop
,
4580 bool compute_self_and_read_read_dependences
,
4581 vec
<loop_p
> *loop_nest
,
4582 vec
<data_reference_p
> *datarefs
,
4583 vec
<ddr_p
> *dependence_relations
)
4587 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4589 /* If the loop nest is not well formed, or one of the data references
4590 is not computable, give up without spending time to compute other
4593 || !find_loop_nest (loop
, loop_nest
)
4594 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4595 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4596 compute_self_and_read_read_dependences
))
4599 if (dump_file
&& (dump_flags
& TDF_STATS
))
4601 fprintf (dump_file
, "Dependence tester statistics:\n");
4603 fprintf (dump_file
, "Number of dependence tests: %d\n",
4604 dependence_stats
.num_dependence_tests
);
4605 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4606 dependence_stats
.num_dependence_dependent
);
4607 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4608 dependence_stats
.num_dependence_independent
);
4609 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4610 dependence_stats
.num_dependence_undetermined
);
4612 fprintf (dump_file
, "Number of subscript tests: %d\n",
4613 dependence_stats
.num_subscript_tests
);
4614 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4615 dependence_stats
.num_subscript_undetermined
);
4616 fprintf (dump_file
, "Number of same subscript function: %d\n",
4617 dependence_stats
.num_same_subscript_function
);
4619 fprintf (dump_file
, "Number of ziv tests: %d\n",
4620 dependence_stats
.num_ziv
);
4621 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4622 dependence_stats
.num_ziv_dependent
);
4623 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4624 dependence_stats
.num_ziv_independent
);
4625 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4626 dependence_stats
.num_ziv_unimplemented
);
4628 fprintf (dump_file
, "Number of siv tests: %d\n",
4629 dependence_stats
.num_siv
);
4630 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4631 dependence_stats
.num_siv_dependent
);
4632 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4633 dependence_stats
.num_siv_independent
);
4634 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4635 dependence_stats
.num_siv_unimplemented
);
4637 fprintf (dump_file
, "Number of miv tests: %d\n",
4638 dependence_stats
.num_miv
);
4639 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4640 dependence_stats
.num_miv_dependent
);
4641 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4642 dependence_stats
.num_miv_independent
);
4643 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4644 dependence_stats
.num_miv_unimplemented
);
4650 /* Returns true when the data dependences for the basic block BB have been
4651 computed, false otherwise.
4652 DATAREFS is initialized to all the array elements contained in this basic
4653 block, DEPENDENCE_RELATIONS contains the relations between the data
4654 references. Compute read-read and self relations if
4655 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4657 compute_data_dependences_for_bb (basic_block bb
,
4658 bool compute_self_and_read_read_dependences
,
4659 vec
<data_reference_p
> *datarefs
,
4660 vec
<ddr_p
> *dependence_relations
)
4662 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4665 return compute_all_dependences (*datarefs
, dependence_relations
, vNULL
,
4666 compute_self_and_read_read_dependences
);
4669 /* Entry point (for testing only). Analyze all the data references
4670 and the dependence relations in LOOP.
4672 The data references are computed first.
4674 A relation on these nodes is represented by a complete graph. Some
4675 of the relations could be of no interest, thus the relations can be
4678 In the following function we compute all the relations. This is
4679 just a first implementation that is here for:
4680 - for showing how to ask for the dependence relations,
4681 - for the debugging the whole dependence graph,
4682 - for the dejagnu testcases and maintenance.
4684 It is possible to ask only for a part of the graph, avoiding to
4685 compute the whole dependence graph. The computed dependences are
4686 stored in a knowledge base (KB) such that later queries don't
4687 recompute the same information. The implementation of this KB is
4688 transparent to the optimizer, and thus the KB can be changed with a
4689 more efficient implementation, or the KB could be disabled. */
4691 analyze_all_data_dependences (struct loop
*loop
)
4694 int nb_data_refs
= 10;
4695 vec
<data_reference_p
> datarefs
;
4696 datarefs
.create (nb_data_refs
);
4697 vec
<ddr_p
> dependence_relations
;
4698 dependence_relations
.create (nb_data_refs
* nb_data_refs
);
4699 vec
<loop_p
> loop_nest
;
4700 loop_nest
.create (3);
4702 /* Compute DDs on the whole function. */
4703 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4704 &dependence_relations
);
4708 dump_data_dependence_relations (dump_file
, dependence_relations
);
4709 fprintf (dump_file
, "\n\n");
4711 if (dump_flags
& TDF_DETAILS
)
4712 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4714 if (dump_flags
& TDF_STATS
)
4716 unsigned nb_top_relations
= 0;
4717 unsigned nb_bot_relations
= 0;
4718 unsigned nb_chrec_relations
= 0;
4719 struct data_dependence_relation
*ddr
;
4721 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4723 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4726 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4730 nb_chrec_relations
++;
4733 gather_stats_on_scev_database ();
4737 loop_nest
.release ();
4738 free_dependence_relations (dependence_relations
);
4739 free_data_refs (datarefs
);
4742 /* Computes all the data dependences and check that the results of
4743 several analyzers are the same. */
4746 tree_check_data_deps (void)
4748 struct loop
*loop_nest
;
4750 FOR_EACH_LOOP (loop_nest
, 0)
4751 analyze_all_data_dependences (loop_nest
);
4754 /* Free the memory used by a data dependence relation DDR. */
4757 free_dependence_relation (struct data_dependence_relation
*ddr
)
4762 if (DDR_SUBSCRIPTS (ddr
).exists ())
4763 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4764 DDR_DIST_VECTS (ddr
).release ();
4765 DDR_DIR_VECTS (ddr
).release ();
4770 /* Free the memory used by the data dependence relations from
4771 DEPENDENCE_RELATIONS. */
4774 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4777 struct data_dependence_relation
*ddr
;
4779 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4781 free_dependence_relation (ddr
);
4783 dependence_relations
.release ();
4786 /* Free the memory used by the data references from DATAREFS. */
4789 free_data_refs (vec
<data_reference_p
> datarefs
)
4792 struct data_reference
*dr
;
4794 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4796 datarefs
.release ();