1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests
;
91 int num_dependence_dependent
;
92 int num_dependence_independent
;
93 int num_dependence_undetermined
;
95 int num_subscript_tests
;
96 int num_subscript_undetermined
;
97 int num_same_subscript_function
;
100 int num_ziv_independent
;
101 int num_ziv_dependent
;
102 int num_ziv_unimplemented
;
105 int num_siv_independent
;
106 int num_siv_dependent
;
107 int num_siv_unimplemented
;
110 int num_miv_independent
;
111 int num_miv_dependent
;
112 int num_miv_unimplemented
;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
116 struct data_reference
*,
117 struct data_reference
*,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a
, const_tree b
)
124 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
125 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a
, int b
)
134 return ((b
% a
) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file
, VEC (data_reference_p
, heap
) *datarefs
)
145 struct data_reference
*dr
;
147 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
148 dump_data_reference (file
, dr
);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p
, heap
) *datarefs
)
156 dump_data_references (stderr
, datarefs
);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p
, heap
) *ddrs
)
164 dump_data_dependence_relations (stderr
, ddrs
);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file
,
171 VEC (ddr_p
, heap
) *ddrs
)
174 struct data_dependence_relation
*ddr
;
176 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
177 dump_data_dependence_relation (file
, ddr
);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference
*dr
)
185 dump_data_reference (stderr
, dr
);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf
,
192 struct data_reference
*dr
)
196 fprintf (outf
, "#(Data Ref: \n");
197 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
198 fprintf (outf
, "# stmt: ");
199 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
200 fprintf (outf
, "# ref: ");
201 print_generic_stmt (outf
, DR_REF (dr
), 0);
202 fprintf (outf
, "# base_object: ");
203 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
205 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
207 fprintf (outf
, "# Access function %d: ", i
);
208 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
210 fprintf (outf
, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf
, affine_fn fn
)
221 print_generic_expr (outf
, VEC_index (tree
, fn
, 0), TDF_SLIM
);
222 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
224 fprintf (outf
, " + ");
225 print_generic_expr (outf
, coef
, TDF_SLIM
);
226 fprintf (outf
, " * x_%u", i
);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
237 if (cf
->n
== NO_DEPENDENCE
)
238 fprintf (outf
, "no dependence\n");
239 else if (cf
->n
== NOT_KNOWN
)
240 fprintf (outf
, "not known\n");
243 for (i
= 0; i
< cf
->n
; i
++)
246 dump_affine_function (outf
, cf
->fns
[i
]);
247 fprintf (outf
, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf
, struct subscript
*subscript
)
257 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
259 fprintf (outf
, "\n (subscript \n");
260 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf
, cf
);
262 if (CF_NONTRIVIAL_P (cf
))
264 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
265 fprintf (outf
, " last_conflict: ");
266 print_generic_stmt (outf
, last_iteration
, 0);
269 cf
= SUB_CONFLICTS_IN_B (subscript
);
270 fprintf (outf
, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf
, cf
);
272 if (CF_NONTRIVIAL_P (cf
))
274 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
275 fprintf (outf
, " last_conflict: ");
276 print_generic_stmt (outf
, last_iteration
, 0);
279 fprintf (outf
, " (Subscript distance: ");
280 print_generic_stmt (outf
, SUB_DISTANCE (subscript
), 0);
281 fprintf (outf
, " )\n");
282 fprintf (outf
, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf
,
294 for (eq
= 0; eq
< length
; eq
++)
296 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
302 fprintf (outf
, " +");
305 fprintf (outf
, " -");
308 fprintf (outf
, " =");
310 case dir_positive_or_equal
:
311 fprintf (outf
, " +=");
313 case dir_positive_or_negative
:
314 fprintf (outf
, " +-");
316 case dir_negative_or_equal
:
317 fprintf (outf
, " -=");
320 fprintf (outf
, " *");
323 fprintf (outf
, "indep");
327 fprintf (outf
, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dir_vects
,
339 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, v
)
340 print_direction_vector (outf
, v
, length
);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
350 for (i
= 0; i
< n
; i
++)
351 fprintf (outfile
, "%3d ", vector
[i
]);
352 fprintf (outfile
, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dist_vects
,
364 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, v
)
365 print_lambda_vector (outf
, v
, length
);
371 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
373 dump_data_dependence_relation (stderr
, ddr
);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf
,
380 struct data_dependence_relation
*ddr
)
382 struct data_reference
*dra
, *drb
;
384 fprintf (outf
, "(Data Dep: \n");
386 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
393 dump_data_reference (outf
, dra
);
395 fprintf (outf
, " (nil)\n");
397 dump_data_reference (outf
, drb
);
399 fprintf (outf
, " (nil)\n");
401 fprintf (outf
, " (don't know)\n)\n");
407 dump_data_reference (outf
, dra
);
408 dump_data_reference (outf
, drb
);
410 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
411 fprintf (outf
, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
418 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
420 fprintf (outf
, " access_fn_A: ");
421 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
422 fprintf (outf
, " access_fn_B: ");
423 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
424 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
427 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
428 fprintf (outf
, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
)
430 fprintf (outf
, "%d ", loopi
->num
);
431 fprintf (outf
, ")\n");
433 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
435 fprintf (outf
, " distance_vector: ");
436 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
440 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
442 fprintf (outf
, " direction_vector: ");
443 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
448 fprintf (outf
, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file
,
455 enum data_dependence_direction dir
)
471 case dir_positive_or_negative
:
472 fprintf (file
, "+-");
475 case dir_positive_or_equal
:
476 fprintf (file
, "+=");
479 case dir_negative_or_equal
:
480 fprintf (file
, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
501 struct data_dependence_relation
*ddr
;
504 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
505 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
507 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), j
, v
)
509 fprintf (file
, "DISTANCE_V (");
510 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
511 fprintf (file
, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), j
, v
)
516 fprintf (file
, "DIRECTION_V (");
517 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
518 fprintf (file
, ")\n");
522 fprintf (file
, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
531 struct data_dependence_relation
*ddr
;
533 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
534 dump_data_dependence_relation (file
, ddr
);
536 fprintf (file
, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
547 tree
*var
, tree
*off
)
551 enum tree_code ocode
= code
;
559 *var
= build_int_cst (type
, 0);
560 *off
= fold_convert (ssizetype
, op0
);
563 case POINTER_PLUS_EXPR
:
568 split_constant_offset (op0
, &var0
, &off0
);
569 split_constant_offset (op1
, &var1
, &off1
);
570 *var
= fold_build2 (code
, type
, var0
, var1
);
571 *off
= size_binop (ocode
, off0
, off1
);
575 if (TREE_CODE (op1
) != INTEGER_CST
)
578 split_constant_offset (op0
, &var0
, &off0
);
579 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
580 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
586 HOST_WIDE_INT pbitsize
, pbitpos
;
587 enum machine_mode pmode
;
588 int punsignedp
, pvolatilep
;
590 op0
= TREE_OPERAND (op0
, 0);
591 if (!handled_component_p (op0
))
594 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
595 &pmode
, &punsignedp
, &pvolatilep
, false);
597 if (pbitpos
% BITS_PER_UNIT
!= 0)
599 base
= build_fold_addr_expr (base
);
600 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
604 split_constant_offset (poffset
, &poffset
, &off1
);
605 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
606 if (POINTER_TYPE_P (TREE_TYPE (base
)))
607 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
),
608 base
, fold_convert (sizetype
, poffset
));
610 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
611 fold_convert (TREE_TYPE (base
), poffset
));
614 var0
= fold_convert (type
, base
);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type
))
629 type
= TREE_TYPE (type
);
630 if (int_size_in_bytes (type
) < 0)
640 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
641 enum tree_code subcode
;
643 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
646 var0
= gimple_assign_rhs1 (def_stmt
);
647 subcode
= gimple_assign_rhs_code (def_stmt
);
648 var1
= gimple_assign_rhs2 (def_stmt
);
650 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype
= TREE_TYPE (op0
);
659 if ((POINTER_TYPE_P (itype
)
660 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
661 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
662 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
664 split_constant_offset (op0
, &var0
, off
);
665 *var
= fold_convert (type
, var0
);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
682 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
686 *off
= ssize_int (0);
689 if (tree_is_chrec (exp
))
692 otype
= TREE_TYPE (exp
);
693 code
= TREE_CODE (exp
);
694 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
695 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
697 *var
= fold_convert (type
, e
);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr
)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
717 if (TREE_CODE (addr
) != ADDR_EXPR
)
720 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference
*dr
)
730 gimple stmt
= DR_STMT (dr
);
731 struct loop
*loop
= loop_containing_stmt (stmt
);
732 tree ref
= DR_REF (dr
);
733 HOST_WIDE_INT pbitsize
, pbitpos
;
735 enum machine_mode pmode
;
736 int punsignedp
, pvolatilep
;
737 affine_iv base_iv
, offset_iv
;
738 tree init
, dinit
, step
;
739 bool in_loop
= (loop
&& loop
->num
);
741 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
742 fprintf (dump_file
, "analyze_innermost: ");
744 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
745 &pmode
, &punsignedp
, &pvolatilep
, false);
746 gcc_assert (base
!= NULL_TREE
);
748 if (pbitpos
% BITS_PER_UNIT
!= 0)
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
751 fprintf (dump_file
, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base
) == MEM_REF
)
757 if (!integer_zerop (TREE_OPERAND (base
, 1)))
761 double_int moff
= mem_ref_offset (base
);
762 poffset
= double_int_to_tree (sizetype
, moff
);
765 poffset
= size_binop (PLUS_EXPR
, poffset
, TREE_OPERAND (base
, 1));
767 base
= TREE_OPERAND (base
, 0);
770 base
= build_fold_addr_expr (base
);
773 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
777 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
784 base_iv
.step
= ssize_int (0);
785 base_iv
.no_overflow
= true;
790 offset_iv
.base
= ssize_int (0);
791 offset_iv
.step
= ssize_int (0);
797 offset_iv
.base
= poffset
;
798 offset_iv
.step
= ssize_int (0);
800 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
801 poffset
, &offset_iv
, false))
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
804 fprintf (dump_file
, "failed: evolution of offset is not"
810 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
811 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
812 init
= size_binop (PLUS_EXPR
, init
, dinit
);
813 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
814 init
= size_binop (PLUS_EXPR
, init
, dinit
);
816 step
= size_binop (PLUS_EXPR
,
817 fold_convert (ssizetype
, base_iv
.step
),
818 fold_convert (ssizetype
, offset_iv
.step
));
820 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
822 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
826 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
828 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
829 fprintf (dump_file
, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
840 VEC (tree
, heap
) *access_fns
= NULL
;
841 tree ref
= unshare_expr (DR_REF (dr
)), aref
= ref
, op
;
842 tree base
, off
, access_fn
= NULL_TREE
;
843 basic_block before_loop
= NULL
;
846 before_loop
= block_before_loop (nest
);
848 while (handled_component_p (aref
))
850 if (TREE_CODE (aref
) == ARRAY_REF
)
852 op
= TREE_OPERAND (aref
, 1);
855 access_fn
= analyze_scalar_evolution (loop
, op
);
856 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
857 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
860 TREE_OPERAND (aref
, 1) = build_int_cst (TREE_TYPE (op
), 0);
863 aref
= TREE_OPERAND (aref
, 0);
867 && (INDIRECT_REF_P (aref
)
868 || TREE_CODE (aref
) == MEM_REF
))
870 op
= TREE_OPERAND (aref
, 0);
871 access_fn
= analyze_scalar_evolution (loop
, op
);
872 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
873 base
= initial_condition (access_fn
);
874 split_constant_offset (base
, &base
, &off
);
875 if (TREE_CODE (aref
) == MEM_REF
)
876 off
= size_binop (PLUS_EXPR
, off
,
877 fold_convert (ssizetype
, TREE_OPERAND (aref
, 1)));
878 access_fn
= chrec_replace_initial_condition (access_fn
,
879 fold_convert (TREE_TYPE (base
), off
));
881 TREE_OPERAND (aref
, 0) = base
;
882 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
885 if (TREE_CODE (aref
) == MEM_REF
)
886 TREE_OPERAND (aref
, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref
, 1)), 0);
889 if (TREE_CODE (ref
) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref
, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref
, 1)))
892 ref
= TREE_OPERAND (TREE_OPERAND (ref
, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref
) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref
, 1)))
899 ref
= TREE_OPERAND (ref
, 0);
901 DR_BASE_OBJECT (dr
) = ref
;
902 DR_ACCESS_FNS (dr
) = access_fns
;
905 /* Extracts the alias analysis information from the memory reference DR. */
908 dr_analyze_alias (struct data_reference
*dr
)
910 tree ref
= DR_REF (dr
);
911 tree base
= get_base_address (ref
), addr
;
913 if (INDIRECT_REF_P (base
)
914 || TREE_CODE (base
) == MEM_REF
)
916 addr
= TREE_OPERAND (base
, 0);
917 if (TREE_CODE (addr
) == SSA_NAME
)
918 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
922 /* Frees data reference DR. */
925 free_data_ref (data_reference_p dr
)
927 VEC_free (tree
, heap
, DR_ACCESS_FNS (dr
));
931 /* Analyzes memory reference MEMREF accessed in STMT. The reference
932 is read if IS_READ is true, write otherwise. Returns the
933 data_reference description of MEMREF. NEST is the outermost loop
934 in which the reference should be instantiated, LOOP is the loop in
935 which the data reference should be analyzed. */
937 struct data_reference
*
938 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
941 struct data_reference
*dr
;
943 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
945 fprintf (dump_file
, "Creating dr for ");
946 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
947 fprintf (dump_file
, "\n");
950 dr
= XCNEW (struct data_reference
);
952 DR_REF (dr
) = memref
;
953 DR_IS_READ (dr
) = is_read
;
955 dr_analyze_innermost (dr
);
956 dr_analyze_indices (dr
, nest
, loop
);
957 dr_analyze_alias (dr
);
959 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
961 fprintf (dump_file
, "\tbase_address: ");
962 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
963 fprintf (dump_file
, "\n\toffset from base address: ");
964 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
965 fprintf (dump_file
, "\n\tconstant offset from base address: ");
966 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
967 fprintf (dump_file
, "\n\tstep: ");
968 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
969 fprintf (dump_file
, "\n\taligned to: ");
970 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
971 fprintf (dump_file
, "\n\tbase_object: ");
972 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
973 fprintf (dump_file
, "\n");
979 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
982 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
986 STRIP_NOPS (offset1
);
987 STRIP_NOPS (offset2
);
989 if (offset1
== offset2
)
992 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
993 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
996 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
997 TREE_OPERAND (offset2
, 0));
999 if (!res
|| !BINARY_CLASS_P (offset1
))
1002 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1003 TREE_OPERAND (offset2
, 1));
1008 /* Check if DRA and DRB have equal offsets. */
1010 dr_equal_offsets_p (struct data_reference
*dra
,
1011 struct data_reference
*drb
)
1013 tree offset1
, offset2
;
1015 offset1
= DR_OFFSET (dra
);
1016 offset2
= DR_OFFSET (drb
);
1018 return dr_equal_offsets_p1 (offset1
, offset2
);
1021 /* Returns true if FNA == FNB. */
1024 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1026 unsigned i
, n
= VEC_length (tree
, fna
);
1028 if (n
!= VEC_length (tree
, fnb
))
1031 for (i
= 0; i
< n
; i
++)
1032 if (!operand_equal_p (VEC_index (tree
, fna
, i
),
1033 VEC_index (tree
, fnb
, i
), 0))
1039 /* If all the functions in CF are the same, returns one of them,
1040 otherwise returns NULL. */
1043 common_affine_function (conflict_function
*cf
)
1048 if (!CF_NONTRIVIAL_P (cf
))
1053 for (i
= 1; i
< cf
->n
; i
++)
1054 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1060 /* Returns the base of the affine function FN. */
1063 affine_function_base (affine_fn fn
)
1065 return VEC_index (tree
, fn
, 0);
1068 /* Returns true if FN is a constant. */
1071 affine_function_constant_p (affine_fn fn
)
1076 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
1077 if (!integer_zerop (coef
))
1083 /* Returns true if FN is the zero constant function. */
1086 affine_function_zero_p (affine_fn fn
)
1088 return (integer_zerop (affine_function_base (fn
))
1089 && affine_function_constant_p (fn
));
1092 /* Returns a signed integer type with the largest precision from TA
1096 signed_type_for_types (tree ta
, tree tb
)
1098 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1099 return signed_type_for (ta
);
1101 return signed_type_for (tb
);
1104 /* Applies operation OP on affine functions FNA and FNB, and returns the
1108 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1114 if (VEC_length (tree
, fnb
) > VEC_length (tree
, fna
))
1116 n
= VEC_length (tree
, fna
);
1117 m
= VEC_length (tree
, fnb
);
1121 n
= VEC_length (tree
, fnb
);
1122 m
= VEC_length (tree
, fna
);
1125 ret
= VEC_alloc (tree
, heap
, m
);
1126 for (i
= 0; i
< n
; i
++)
1128 tree type
= signed_type_for_types (TREE_TYPE (VEC_index (tree
, fna
, i
)),
1129 TREE_TYPE (VEC_index (tree
, fnb
, i
)));
1131 VEC_quick_push (tree
, ret
,
1132 fold_build2 (op
, type
,
1133 VEC_index (tree
, fna
, i
),
1134 VEC_index (tree
, fnb
, i
)));
1137 for (; VEC_iterate (tree
, fna
, i
, coef
); i
++)
1138 VEC_quick_push (tree
, ret
,
1139 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1140 coef
, integer_zero_node
));
1141 for (; VEC_iterate (tree
, fnb
, i
, coef
); i
++)
1142 VEC_quick_push (tree
, ret
,
1143 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1144 integer_zero_node
, coef
));
1149 /* Returns the sum of affine functions FNA and FNB. */
1152 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1154 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1157 /* Returns the difference of affine functions FNA and FNB. */
1160 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1162 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1165 /* Frees affine function FN. */
1168 affine_fn_free (affine_fn fn
)
1170 VEC_free (tree
, heap
, fn
);
1173 /* Determine for each subscript in the data dependence relation DDR
1177 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1179 conflict_function
*cf_a
, *cf_b
;
1180 affine_fn fn_a
, fn_b
, diff
;
1182 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1186 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1188 struct subscript
*subscript
;
1190 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1191 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1192 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1194 fn_a
= common_affine_function (cf_a
);
1195 fn_b
= common_affine_function (cf_b
);
1198 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1201 diff
= affine_fn_minus (fn_a
, fn_b
);
1203 if (affine_function_constant_p (diff
))
1204 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1206 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1208 affine_fn_free (diff
);
1213 /* Returns the conflict function for "unknown". */
1215 static conflict_function
*
1216 conflict_fn_not_known (void)
1218 conflict_function
*fn
= XCNEW (conflict_function
);
1224 /* Returns the conflict function for "independent". */
1226 static conflict_function
*
1227 conflict_fn_no_dependence (void)
1229 conflict_function
*fn
= XCNEW (conflict_function
);
1230 fn
->n
= NO_DEPENDENCE
;
1235 /* Returns true if the address of OBJ is invariant in LOOP. */
1238 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1240 while (handled_component_p (obj
))
1242 if (TREE_CODE (obj
) == ARRAY_REF
)
1244 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1245 need to check the stride and the lower bound of the reference. */
1246 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1248 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1252 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1254 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1258 obj
= TREE_OPERAND (obj
, 0);
1261 if (!INDIRECT_REF_P (obj
)
1262 && TREE_CODE (obj
) != MEM_REF
)
1265 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1269 /* Returns false if we can prove that data references A and B do not alias,
1273 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
)
1275 tree addr_a
= DR_BASE_OBJECT (a
);
1276 tree addr_b
= DR_BASE_OBJECT (b
);
1278 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1279 return refs_output_dependent_p (addr_a
, addr_b
);
1280 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1281 return refs_anti_dependent_p (addr_a
, addr_b
);
1282 return refs_may_alias_p (addr_a
, addr_b
);
1285 static void compute_self_dependence (struct data_dependence_relation
*);
1287 /* Initialize a data dependence relation between data accesses A and
1288 B. NB_LOOPS is the number of loops surrounding the references: the
1289 size of the classic distance/direction vectors. */
1291 static struct data_dependence_relation
*
1292 initialize_data_dependence_relation (struct data_reference
*a
,
1293 struct data_reference
*b
,
1294 VEC (loop_p
, heap
) *loop_nest
)
1296 struct data_dependence_relation
*res
;
1299 res
= XNEW (struct data_dependence_relation
);
1302 DDR_LOOP_NEST (res
) = NULL
;
1303 DDR_REVERSED_P (res
) = false;
1304 DDR_SUBSCRIPTS (res
) = NULL
;
1305 DDR_DIR_VECTS (res
) = NULL
;
1306 DDR_DIST_VECTS (res
) = NULL
;
1308 if (a
== NULL
|| b
== NULL
)
1310 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1314 /* If the data references do not alias, then they are independent. */
1315 if (!dr_may_alias_p (a
, b
))
1317 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1321 /* When the references are exactly the same, don't spend time doing
1322 the data dependence tests, just initialize the ddr and return. */
1323 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1325 DDR_AFFINE_P (res
) = true;
1326 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1327 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1328 DDR_LOOP_NEST (res
) = loop_nest
;
1329 DDR_INNER_LOOP (res
) = 0;
1330 DDR_SELF_REFERENCE (res
) = true;
1331 compute_self_dependence (res
);
1335 /* If the references do not access the same object, we do not know
1336 whether they alias or not. */
1337 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1339 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1343 /* If the base of the object is not invariant in the loop nest, we cannot
1344 analyze it. TODO -- in fact, it would suffice to record that there may
1345 be arbitrary dependences in the loops where the base object varies. */
1347 && !object_address_invariant_in_loop_p (VEC_index (loop_p
, loop_nest
, 0),
1348 DR_BASE_OBJECT (a
)))
1350 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1354 /* If the number of dimensions of the access to not agree we can have
1355 a pointer access to a component of the array element type and an
1356 array access while the base-objects are still the same. Punt. */
1357 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1359 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1363 DDR_AFFINE_P (res
) = true;
1364 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1365 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1366 DDR_LOOP_NEST (res
) = loop_nest
;
1367 DDR_INNER_LOOP (res
) = 0;
1368 DDR_SELF_REFERENCE (res
) = false;
1370 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1372 struct subscript
*subscript
;
1374 subscript
= XNEW (struct subscript
);
1375 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1376 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1377 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1378 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1379 VEC_safe_push (subscript_p
, heap
, DDR_SUBSCRIPTS (res
), subscript
);
1385 /* Frees memory used by the conflict function F. */
1388 free_conflict_function (conflict_function
*f
)
1392 if (CF_NONTRIVIAL_P (f
))
1394 for (i
= 0; i
< f
->n
; i
++)
1395 affine_fn_free (f
->fns
[i
]);
1400 /* Frees memory used by SUBSCRIPTS. */
1403 free_subscripts (VEC (subscript_p
, heap
) *subscripts
)
1408 FOR_EACH_VEC_ELT (subscript_p
, subscripts
, i
, s
)
1410 free_conflict_function (s
->conflicting_iterations_in_a
);
1411 free_conflict_function (s
->conflicting_iterations_in_b
);
1414 VEC_free (subscript_p
, heap
, subscripts
);
1417 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1421 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1426 fprintf (dump_file
, "(dependence classified: ");
1427 print_generic_expr (dump_file
, chrec
, 0);
1428 fprintf (dump_file
, ")\n");
1431 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1432 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1433 DDR_SUBSCRIPTS (ddr
) = NULL
;
1436 /* The dependence relation DDR cannot be represented by a distance
1440 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1443 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1445 DDR_AFFINE_P (ddr
) = false;
1450 /* This section contains the classic Banerjee tests. */
1452 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1453 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1456 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1458 return (evolution_function_is_constant_p (chrec_a
)
1459 && evolution_function_is_constant_p (chrec_b
));
1462 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1463 variable, i.e., if the SIV (Single Index Variable) test is true. */
1466 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1468 if ((evolution_function_is_constant_p (chrec_a
)
1469 && evolution_function_is_univariate_p (chrec_b
))
1470 || (evolution_function_is_constant_p (chrec_b
)
1471 && evolution_function_is_univariate_p (chrec_a
)))
1474 if (evolution_function_is_univariate_p (chrec_a
)
1475 && evolution_function_is_univariate_p (chrec_b
))
1477 switch (TREE_CODE (chrec_a
))
1479 case POLYNOMIAL_CHREC
:
1480 switch (TREE_CODE (chrec_b
))
1482 case POLYNOMIAL_CHREC
:
1483 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1498 /* Creates a conflict function with N dimensions. The affine functions
1499 in each dimension follow. */
1501 static conflict_function
*
1502 conflict_fn (unsigned n
, ...)
1505 conflict_function
*ret
= XCNEW (conflict_function
);
1508 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1512 for (i
= 0; i
< n
; i
++)
1513 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1519 /* Returns constant affine function with value CST. */
1522 affine_fn_cst (tree cst
)
1524 affine_fn fn
= VEC_alloc (tree
, heap
, 1);
1525 VEC_quick_push (tree
, fn
, cst
);
1529 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1532 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1534 affine_fn fn
= VEC_alloc (tree
, heap
, dim
+ 1);
1537 gcc_assert (dim
> 0);
1538 VEC_quick_push (tree
, fn
, cst
);
1539 for (i
= 1; i
< dim
; i
++)
1540 VEC_quick_push (tree
, fn
, integer_zero_node
);
1541 VEC_quick_push (tree
, fn
, coef
);
1545 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1546 *OVERLAPS_B are initialized to the functions that describe the
1547 relation between the elements accessed twice by CHREC_A and
1548 CHREC_B. For k >= 0, the following property is verified:
1550 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1553 analyze_ziv_subscript (tree chrec_a
,
1555 conflict_function
**overlaps_a
,
1556 conflict_function
**overlaps_b
,
1557 tree
*last_conflicts
)
1559 tree type
, difference
;
1560 dependence_stats
.num_ziv
++;
1562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1563 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1565 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1566 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1567 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1568 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1570 switch (TREE_CODE (difference
))
1573 if (integer_zerop (difference
))
1575 /* The difference is equal to zero: the accessed index
1576 overlaps for each iteration in the loop. */
1577 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1578 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1579 *last_conflicts
= chrec_dont_know
;
1580 dependence_stats
.num_ziv_dependent
++;
1584 /* The accesses do not overlap. */
1585 *overlaps_a
= conflict_fn_no_dependence ();
1586 *overlaps_b
= conflict_fn_no_dependence ();
1587 *last_conflicts
= integer_zero_node
;
1588 dependence_stats
.num_ziv_independent
++;
1593 /* We're not sure whether the indexes overlap. For the moment,
1594 conservatively answer "don't know". */
1595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1596 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1598 *overlaps_a
= conflict_fn_not_known ();
1599 *overlaps_b
= conflict_fn_not_known ();
1600 *last_conflicts
= chrec_dont_know
;
1601 dependence_stats
.num_ziv_unimplemented
++;
1605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1606 fprintf (dump_file
, ")\n");
1609 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1610 and only if it fits to the int type. If this is not the case, or the
1611 bound on the number of iterations of LOOP could not be derived, returns
1615 max_stmt_executions_tree (struct loop
*loop
)
1620 if (!max_stmt_executions (loop
, true, &nit
))
1621 return chrec_dont_know
;
1623 type
= lang_hooks
.types
.type_for_size (INT_TYPE_SIZE
, true);
1624 if (!double_int_fits_to_tree_p (type
, nit
))
1625 return chrec_dont_know
;
1627 return double_int_to_tree (type
, nit
);
1630 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1631 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1632 *OVERLAPS_B are initialized to the functions that describe the
1633 relation between the elements accessed twice by CHREC_A and
1634 CHREC_B. For k >= 0, the following property is verified:
1636 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1639 analyze_siv_subscript_cst_affine (tree chrec_a
,
1641 conflict_function
**overlaps_a
,
1642 conflict_function
**overlaps_b
,
1643 tree
*last_conflicts
)
1645 bool value0
, value1
, value2
;
1646 tree type
, difference
, tmp
;
1648 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1649 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1650 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1651 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1653 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1655 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1656 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1658 dependence_stats
.num_siv_unimplemented
++;
1659 *overlaps_a
= conflict_fn_not_known ();
1660 *overlaps_b
= conflict_fn_not_known ();
1661 *last_conflicts
= chrec_dont_know
;
1666 if (value0
== false)
1668 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1670 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1671 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1673 *overlaps_a
= conflict_fn_not_known ();
1674 *overlaps_b
= conflict_fn_not_known ();
1675 *last_conflicts
= chrec_dont_know
;
1676 dependence_stats
.num_siv_unimplemented
++;
1685 chrec_b = {10, +, 1}
1688 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1690 HOST_WIDE_INT numiter
;
1691 struct loop
*loop
= get_chrec_loop (chrec_b
);
1693 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1694 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1695 fold_build1 (ABS_EXPR
, type
, difference
),
1696 CHREC_RIGHT (chrec_b
));
1697 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1698 *last_conflicts
= integer_one_node
;
1701 /* Perform weak-zero siv test to see if overlap is
1702 outside the loop bounds. */
1703 numiter
= max_stmt_executions_int (loop
, true);
1706 && compare_tree_int (tmp
, numiter
) > 0)
1708 free_conflict_function (*overlaps_a
);
1709 free_conflict_function (*overlaps_b
);
1710 *overlaps_a
= conflict_fn_no_dependence ();
1711 *overlaps_b
= conflict_fn_no_dependence ();
1712 *last_conflicts
= integer_zero_node
;
1713 dependence_stats
.num_siv_independent
++;
1716 dependence_stats
.num_siv_dependent
++;
1720 /* When the step does not divide the difference, there are
1724 *overlaps_a
= conflict_fn_no_dependence ();
1725 *overlaps_b
= conflict_fn_no_dependence ();
1726 *last_conflicts
= integer_zero_node
;
1727 dependence_stats
.num_siv_independent
++;
1736 chrec_b = {10, +, -1}
1738 In this case, chrec_a will not overlap with chrec_b. */
1739 *overlaps_a
= conflict_fn_no_dependence ();
1740 *overlaps_b
= conflict_fn_no_dependence ();
1741 *last_conflicts
= integer_zero_node
;
1742 dependence_stats
.num_siv_independent
++;
1749 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1751 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1752 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1754 *overlaps_a
= conflict_fn_not_known ();
1755 *overlaps_b
= conflict_fn_not_known ();
1756 *last_conflicts
= chrec_dont_know
;
1757 dependence_stats
.num_siv_unimplemented
++;
1762 if (value2
== false)
1766 chrec_b = {10, +, -1}
1768 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1770 HOST_WIDE_INT numiter
;
1771 struct loop
*loop
= get_chrec_loop (chrec_b
);
1773 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1774 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1775 CHREC_RIGHT (chrec_b
));
1776 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1777 *last_conflicts
= integer_one_node
;
1779 /* Perform weak-zero siv test to see if overlap is
1780 outside the loop bounds. */
1781 numiter
= max_stmt_executions_int (loop
, true);
1784 && compare_tree_int (tmp
, numiter
) > 0)
1786 free_conflict_function (*overlaps_a
);
1787 free_conflict_function (*overlaps_b
);
1788 *overlaps_a
= conflict_fn_no_dependence ();
1789 *overlaps_b
= conflict_fn_no_dependence ();
1790 *last_conflicts
= integer_zero_node
;
1791 dependence_stats
.num_siv_independent
++;
1794 dependence_stats
.num_siv_dependent
++;
1798 /* When the step does not divide the difference, there
1802 *overlaps_a
= conflict_fn_no_dependence ();
1803 *overlaps_b
= conflict_fn_no_dependence ();
1804 *last_conflicts
= integer_zero_node
;
1805 dependence_stats
.num_siv_independent
++;
1815 In this case, chrec_a will not overlap with chrec_b. */
1816 *overlaps_a
= conflict_fn_no_dependence ();
1817 *overlaps_b
= conflict_fn_no_dependence ();
1818 *last_conflicts
= integer_zero_node
;
1819 dependence_stats
.num_siv_independent
++;
1827 /* Helper recursive function for initializing the matrix A. Returns
1828 the initial value of CHREC. */
1831 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
1835 switch (TREE_CODE (chrec
))
1837 case POLYNOMIAL_CHREC
:
1838 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
1840 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
1841 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
1847 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1848 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
1850 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
1855 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1856 return chrec_convert (chrec_type (chrec
), op
, NULL
);
1861 /* Handle ~X as -1 - X. */
1862 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1863 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
1864 build_int_cst (TREE_TYPE (chrec
), -1), op
);
1876 #define FLOOR_DIV(x,y) ((x) / (y))
1878 /* Solves the special case of the Diophantine equation:
1879 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1881 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1882 number of iterations that loops X and Y run. The overlaps will be
1883 constructed as evolutions in dimension DIM. */
1886 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
1887 affine_fn
*overlaps_a
,
1888 affine_fn
*overlaps_b
,
1889 tree
*last_conflicts
, int dim
)
1891 if (((step_a
> 0 && step_b
> 0)
1892 || (step_a
< 0 && step_b
< 0)))
1894 int step_overlaps_a
, step_overlaps_b
;
1895 int gcd_steps_a_b
, last_conflict
, tau2
;
1897 gcd_steps_a_b
= gcd (step_a
, step_b
);
1898 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
1899 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
1903 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
1904 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
1905 last_conflict
= tau2
;
1906 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
1909 *last_conflicts
= chrec_dont_know
;
1911 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
1912 build_int_cst (NULL_TREE
,
1914 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
1915 build_int_cst (NULL_TREE
,
1921 *overlaps_a
= affine_fn_cst (integer_zero_node
);
1922 *overlaps_b
= affine_fn_cst (integer_zero_node
);
1923 *last_conflicts
= integer_zero_node
;
1927 /* Solves the special case of a Diophantine equation where CHREC_A is
1928 an affine bivariate function, and CHREC_B is an affine univariate
1929 function. For example,
1931 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1933 has the following overlapping functions:
1935 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1936 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1937 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1939 FORNOW: This is a specialized implementation for a case occurring in
1940 a common benchmark. Implement the general algorithm. */
1943 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
1944 conflict_function
**overlaps_a
,
1945 conflict_function
**overlaps_b
,
1946 tree
*last_conflicts
)
1948 bool xz_p
, yz_p
, xyz_p
;
1949 int step_x
, step_y
, step_z
;
1950 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
1951 affine_fn overlaps_a_xz
, overlaps_b_xz
;
1952 affine_fn overlaps_a_yz
, overlaps_b_yz
;
1953 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
1954 affine_fn ova1
, ova2
, ovb
;
1955 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
1957 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
1958 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
1959 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
1962 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)), true);
1963 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
1964 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
1966 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
1968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1969 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
1971 *overlaps_a
= conflict_fn_not_known ();
1972 *overlaps_b
= conflict_fn_not_known ();
1973 *last_conflicts
= chrec_dont_know
;
1977 niter
= MIN (niter_x
, niter_z
);
1978 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
1981 &last_conflicts_xz
, 1);
1982 niter
= MIN (niter_y
, niter_z
);
1983 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
1986 &last_conflicts_yz
, 2);
1987 niter
= MIN (niter_x
, niter_z
);
1988 niter
= MIN (niter_y
, niter
);
1989 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
1992 &last_conflicts_xyz
, 3);
1994 xz_p
= !integer_zerop (last_conflicts_xz
);
1995 yz_p
= !integer_zerop (last_conflicts_yz
);
1996 xyz_p
= !integer_zerop (last_conflicts_xyz
);
1998 if (xz_p
|| yz_p
|| xyz_p
)
2000 ova1
= affine_fn_cst (integer_zero_node
);
2001 ova2
= affine_fn_cst (integer_zero_node
);
2002 ovb
= affine_fn_cst (integer_zero_node
);
2005 affine_fn t0
= ova1
;
2008 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2009 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2010 affine_fn_free (t0
);
2011 affine_fn_free (t2
);
2012 *last_conflicts
= last_conflicts_xz
;
2016 affine_fn t0
= ova2
;
2019 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2020 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2021 affine_fn_free (t0
);
2022 affine_fn_free (t2
);
2023 *last_conflicts
= last_conflicts_yz
;
2027 affine_fn t0
= ova1
;
2028 affine_fn t2
= ova2
;
2031 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2032 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2033 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2034 affine_fn_free (t0
);
2035 affine_fn_free (t2
);
2036 affine_fn_free (t4
);
2037 *last_conflicts
= last_conflicts_xyz
;
2039 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2040 *overlaps_b
= conflict_fn (1, ovb
);
2044 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2045 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2046 *last_conflicts
= integer_zero_node
;
2049 affine_fn_free (overlaps_a_xz
);
2050 affine_fn_free (overlaps_b_xz
);
2051 affine_fn_free (overlaps_a_yz
);
2052 affine_fn_free (overlaps_b_yz
);
2053 affine_fn_free (overlaps_a_xyz
);
2054 affine_fn_free (overlaps_b_xyz
);
2057 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2060 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2063 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2066 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2069 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2074 for (i
= 0; i
< m
; i
++)
2075 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2078 /* Store the N x N identity matrix in MAT. */
2081 lambda_matrix_id (lambda_matrix mat
, int size
)
2085 for (i
= 0; i
< size
; i
++)
2086 for (j
= 0; j
< size
; j
++)
2087 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2090 /* Return the first nonzero element of vector VEC1 between START and N.
2091 We must have START <= N. Returns N if VEC1 is the zero vector. */
2094 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2097 while (j
< n
&& vec1
[j
] == 0)
2102 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2103 R2 = R2 + CONST1 * R1. */
2106 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2113 for (i
= 0; i
< n
; i
++)
2114 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2117 /* Swap rows R1 and R2 in matrix MAT. */
2120 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2129 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2130 and store the result in VEC2. */
2133 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2134 int size
, int const1
)
2139 lambda_vector_clear (vec2
, size
);
2141 for (i
= 0; i
< size
; i
++)
2142 vec2
[i
] = const1
* vec1
[i
];
2145 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2148 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2151 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2154 /* Negate row R1 of matrix MAT which has N columns. */
2157 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2159 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2162 /* Return true if two vectors are equal. */
2165 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2168 for (i
= 0; i
< size
; i
++)
2169 if (vec1
[i
] != vec2
[i
])
2174 /* Given an M x N integer matrix A, this function determines an M x
2175 M unimodular matrix U, and an M x N echelon matrix S such that
2176 "U.A = S". This decomposition is also known as "right Hermite".
2178 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2179 Restructuring Compilers" Utpal Banerjee. */
2182 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2183 lambda_matrix S
, lambda_matrix U
)
2187 lambda_matrix_copy (A
, S
, m
, n
);
2188 lambda_matrix_id (U
, m
);
2190 for (j
= 0; j
< n
; j
++)
2192 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2195 for (i
= m
- 1; i
>= i0
; i
--)
2197 while (S
[i
][j
] != 0)
2199 int sigma
, factor
, a
, b
;
2203 sigma
= (a
* b
< 0) ? -1: 1;
2206 factor
= sigma
* (a
/ b
);
2208 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2209 lambda_matrix_row_exchange (S
, i
, i
-1);
2211 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2212 lambda_matrix_row_exchange (U
, i
, i
-1);
2219 /* Determines the overlapping elements due to accesses CHREC_A and
2220 CHREC_B, that are affine functions. This function cannot handle
2221 symbolic evolution functions, ie. when initial conditions are
2222 parameters, because it uses lambda matrices of integers. */
2225 analyze_subscript_affine_affine (tree chrec_a
,
2227 conflict_function
**overlaps_a
,
2228 conflict_function
**overlaps_b
,
2229 tree
*last_conflicts
)
2231 unsigned nb_vars_a
, nb_vars_b
, dim
;
2232 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2233 lambda_matrix A
, U
, S
;
2234 struct obstack scratch_obstack
;
2236 if (eq_evolutions_p (chrec_a
, chrec_b
))
2238 /* The accessed index overlaps for each iteration in the
2240 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2241 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2242 *last_conflicts
= chrec_dont_know
;
2245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2246 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2248 /* For determining the initial intersection, we have to solve a
2249 Diophantine equation. This is the most time consuming part.
2251 For answering to the question: "Is there a dependence?" we have
2252 to prove that there exists a solution to the Diophantine
2253 equation, and that the solution is in the iteration domain,
2254 i.e. the solution is positive or zero, and that the solution
2255 happens before the upper bound loop.nb_iterations. Otherwise
2256 there is no dependence. This function outputs a description of
2257 the iterations that hold the intersections. */
2259 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2260 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2262 gcc_obstack_init (&scratch_obstack
);
2264 dim
= nb_vars_a
+ nb_vars_b
;
2265 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2266 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2267 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2269 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2270 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2271 gamma
= init_b
- init_a
;
2273 /* Don't do all the hard work of solving the Diophantine equation
2274 when we already know the solution: for example,
2277 | gamma = 3 - 3 = 0.
2278 Then the first overlap occurs during the first iterations:
2279 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2283 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2285 HOST_WIDE_INT step_a
, step_b
;
2286 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2289 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
2290 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
2291 niter
= MIN (niter_a
, niter_b
);
2292 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2293 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2295 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2298 *overlaps_a
= conflict_fn (1, ova
);
2299 *overlaps_b
= conflict_fn (1, ovb
);
2302 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2303 compute_overlap_steps_for_affine_1_2
2304 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2306 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2307 compute_overlap_steps_for_affine_1_2
2308 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2312 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2313 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2314 *overlaps_a
= conflict_fn_not_known ();
2315 *overlaps_b
= conflict_fn_not_known ();
2316 *last_conflicts
= chrec_dont_know
;
2318 goto end_analyze_subs_aa
;
2322 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2327 lambda_matrix_row_negate (U
, dim
, 0);
2329 gcd_alpha_beta
= S
[0][0];
2331 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2332 but that is a quite strange case. Instead of ICEing, answer
2334 if (gcd_alpha_beta
== 0)
2336 *overlaps_a
= conflict_fn_not_known ();
2337 *overlaps_b
= conflict_fn_not_known ();
2338 *last_conflicts
= chrec_dont_know
;
2339 goto end_analyze_subs_aa
;
2342 /* The classic "gcd-test". */
2343 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2345 /* The "gcd-test" has determined that there is no integer
2346 solution, i.e. there is no dependence. */
2347 *overlaps_a
= conflict_fn_no_dependence ();
2348 *overlaps_b
= conflict_fn_no_dependence ();
2349 *last_conflicts
= integer_zero_node
;
2352 /* Both access functions are univariate. This includes SIV and MIV cases. */
2353 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2355 /* Both functions should have the same evolution sign. */
2356 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2357 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2359 /* The solutions are given by:
2361 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2364 For a given integer t. Using the following variables,
2366 | i0 = u11 * gamma / gcd_alpha_beta
2367 | j0 = u12 * gamma / gcd_alpha_beta
2374 | y0 = j0 + j1 * t. */
2375 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2377 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2378 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2382 if ((i1
== 0 && i0
< 0)
2383 || (j1
== 0 && j0
< 0))
2385 /* There is no solution.
2386 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2387 falls in here, but for the moment we don't look at the
2388 upper bound of the iteration domain. */
2389 *overlaps_a
= conflict_fn_no_dependence ();
2390 *overlaps_b
= conflict_fn_no_dependence ();
2391 *last_conflicts
= integer_zero_node
;
2392 goto end_analyze_subs_aa
;
2395 if (i1
> 0 && j1
> 0)
2397 HOST_WIDE_INT niter_a
= max_stmt_executions_int
2398 (get_chrec_loop (chrec_a
), true);
2399 HOST_WIDE_INT niter_b
= max_stmt_executions_int
2400 (get_chrec_loop (chrec_b
), true);
2401 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2403 /* (X0, Y0) is a solution of the Diophantine equation:
2404 "chrec_a (X0) = chrec_b (Y0)". */
2405 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2407 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2408 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2410 /* (X1, Y1) is the smallest positive solution of the eq
2411 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2412 first conflict occurs. */
2413 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2414 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2415 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2419 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2420 FLOOR_DIV (niter
- j0
, j1
));
2421 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2423 /* If the overlap occurs outside of the bounds of the
2424 loop, there is no dependence. */
2425 if (x1
>= niter
|| y1
>= niter
)
2427 *overlaps_a
= conflict_fn_no_dependence ();
2428 *overlaps_b
= conflict_fn_no_dependence ();
2429 *last_conflicts
= integer_zero_node
;
2430 goto end_analyze_subs_aa
;
2433 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2436 *last_conflicts
= chrec_dont_know
;
2440 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2442 build_int_cst (NULL_TREE
, i1
)));
2445 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2447 build_int_cst (NULL_TREE
, j1
)));
2451 /* FIXME: For the moment, the upper bound of the
2452 iteration domain for i and j is not checked. */
2453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2454 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2455 *overlaps_a
= conflict_fn_not_known ();
2456 *overlaps_b
= conflict_fn_not_known ();
2457 *last_conflicts
= chrec_dont_know
;
2462 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2463 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2464 *overlaps_a
= conflict_fn_not_known ();
2465 *overlaps_b
= conflict_fn_not_known ();
2466 *last_conflicts
= chrec_dont_know
;
2471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2472 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2473 *overlaps_a
= conflict_fn_not_known ();
2474 *overlaps_b
= conflict_fn_not_known ();
2475 *last_conflicts
= chrec_dont_know
;
2478 end_analyze_subs_aa
:
2479 obstack_free (&scratch_obstack
, NULL
);
2480 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2482 fprintf (dump_file
, " (overlaps_a = ");
2483 dump_conflict_function (dump_file
, *overlaps_a
);
2484 fprintf (dump_file
, ")\n (overlaps_b = ");
2485 dump_conflict_function (dump_file
, *overlaps_b
);
2486 fprintf (dump_file
, ")\n");
2487 fprintf (dump_file
, ")\n");
2491 /* Returns true when analyze_subscript_affine_affine can be used for
2492 determining the dependence relation between chrec_a and chrec_b,
2493 that contain symbols. This function modifies chrec_a and chrec_b
2494 such that the analysis result is the same, and such that they don't
2495 contain symbols, and then can safely be passed to the analyzer.
2497 Example: The analysis of the following tuples of evolutions produce
2498 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2501 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2502 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2506 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2508 tree diff
, type
, left_a
, left_b
, right_b
;
2510 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2511 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2512 /* FIXME: For the moment not handled. Might be refined later. */
2515 type
= chrec_type (*chrec_a
);
2516 left_a
= CHREC_LEFT (*chrec_a
);
2517 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2518 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2520 if (!evolution_function_is_constant_p (diff
))
2523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2524 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2526 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2527 diff
, CHREC_RIGHT (*chrec_a
));
2528 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2529 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2530 build_int_cst (type
, 0),
2535 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2536 *OVERLAPS_B are initialized to the functions that describe the
2537 relation between the elements accessed twice by CHREC_A and
2538 CHREC_B. For k >= 0, the following property is verified:
2540 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2543 analyze_siv_subscript (tree chrec_a
,
2545 conflict_function
**overlaps_a
,
2546 conflict_function
**overlaps_b
,
2547 tree
*last_conflicts
,
2550 dependence_stats
.num_siv
++;
2552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2553 fprintf (dump_file
, "(analyze_siv_subscript \n");
2555 if (evolution_function_is_constant_p (chrec_a
)
2556 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2557 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2558 overlaps_a
, overlaps_b
, last_conflicts
);
2560 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2561 && evolution_function_is_constant_p (chrec_b
))
2562 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2563 overlaps_b
, overlaps_a
, last_conflicts
);
2565 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2566 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2568 if (!chrec_contains_symbols (chrec_a
)
2569 && !chrec_contains_symbols (chrec_b
))
2571 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2572 overlaps_a
, overlaps_b
,
2575 if (CF_NOT_KNOWN_P (*overlaps_a
)
2576 || CF_NOT_KNOWN_P (*overlaps_b
))
2577 dependence_stats
.num_siv_unimplemented
++;
2578 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2579 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2580 dependence_stats
.num_siv_independent
++;
2582 dependence_stats
.num_siv_dependent
++;
2584 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2587 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2588 overlaps_a
, overlaps_b
,
2591 if (CF_NOT_KNOWN_P (*overlaps_a
)
2592 || CF_NOT_KNOWN_P (*overlaps_b
))
2593 dependence_stats
.num_siv_unimplemented
++;
2594 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2595 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2596 dependence_stats
.num_siv_independent
++;
2598 dependence_stats
.num_siv_dependent
++;
2601 goto siv_subscript_dontknow
;
2606 siv_subscript_dontknow
:;
2607 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2608 fprintf (dump_file
, "siv test failed: unimplemented.\n");
2609 *overlaps_a
= conflict_fn_not_known ();
2610 *overlaps_b
= conflict_fn_not_known ();
2611 *last_conflicts
= chrec_dont_know
;
2612 dependence_stats
.num_siv_unimplemented
++;
2615 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2616 fprintf (dump_file
, ")\n");
2619 /* Returns false if we can prove that the greatest common divisor of the steps
2620 of CHREC does not divide CST, false otherwise. */
2623 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2625 HOST_WIDE_INT cd
= 0, val
;
2628 if (!host_integerp (cst
, 0))
2630 val
= tree_low_cst (cst
, 0);
2632 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2634 step
= CHREC_RIGHT (chrec
);
2635 if (!host_integerp (step
, 0))
2637 cd
= gcd (cd
, tree_low_cst (step
, 0));
2638 chrec
= CHREC_LEFT (chrec
);
2641 return val
% cd
== 0;
2644 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2645 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2646 functions that describe the relation between the elements accessed
2647 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2650 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2653 analyze_miv_subscript (tree chrec_a
,
2655 conflict_function
**overlaps_a
,
2656 conflict_function
**overlaps_b
,
2657 tree
*last_conflicts
,
2658 struct loop
*loop_nest
)
2660 tree type
, difference
;
2662 dependence_stats
.num_miv
++;
2663 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2664 fprintf (dump_file
, "(analyze_miv_subscript \n");
2666 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2667 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2668 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2669 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2671 if (eq_evolutions_p (chrec_a
, chrec_b
))
2673 /* Access functions are the same: all the elements are accessed
2674 in the same order. */
2675 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2676 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2677 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2678 dependence_stats
.num_miv_dependent
++;
2681 else if (evolution_function_is_constant_p (difference
)
2682 /* For the moment, the following is verified:
2683 evolution_function_is_affine_multivariate_p (chrec_a,
2685 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2687 /* testsuite/.../ssa-chrec-33.c
2688 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2690 The difference is 1, and all the evolution steps are multiples
2691 of 2, consequently there are no overlapping elements. */
2692 *overlaps_a
= conflict_fn_no_dependence ();
2693 *overlaps_b
= conflict_fn_no_dependence ();
2694 *last_conflicts
= integer_zero_node
;
2695 dependence_stats
.num_miv_independent
++;
2698 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2699 && !chrec_contains_symbols (chrec_a
)
2700 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2701 && !chrec_contains_symbols (chrec_b
))
2703 /* testsuite/.../ssa-chrec-35.c
2704 {0, +, 1}_2 vs. {0, +, 1}_3
2705 the overlapping elements are respectively located at iterations:
2706 {0, +, 1}_x and {0, +, 1}_x,
2707 in other words, we have the equality:
2708 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2711 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2712 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2714 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2715 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2717 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2718 overlaps_a
, overlaps_b
, last_conflicts
);
2720 if (CF_NOT_KNOWN_P (*overlaps_a
)
2721 || CF_NOT_KNOWN_P (*overlaps_b
))
2722 dependence_stats
.num_miv_unimplemented
++;
2723 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2724 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2725 dependence_stats
.num_miv_independent
++;
2727 dependence_stats
.num_miv_dependent
++;
2732 /* When the analysis is too difficult, answer "don't know". */
2733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2734 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2736 *overlaps_a
= conflict_fn_not_known ();
2737 *overlaps_b
= conflict_fn_not_known ();
2738 *last_conflicts
= chrec_dont_know
;
2739 dependence_stats
.num_miv_unimplemented
++;
2742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2743 fprintf (dump_file
, ")\n");
2746 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2747 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2748 OVERLAP_ITERATIONS_B are initialized with two functions that
2749 describe the iterations that contain conflicting elements.
2751 Remark: For an integer k >= 0, the following equality is true:
2753 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2757 analyze_overlapping_iterations (tree chrec_a
,
2759 conflict_function
**overlap_iterations_a
,
2760 conflict_function
**overlap_iterations_b
,
2761 tree
*last_conflicts
, struct loop
*loop_nest
)
2763 unsigned int lnn
= loop_nest
->num
;
2765 dependence_stats
.num_subscript_tests
++;
2767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2769 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2770 fprintf (dump_file
, " (chrec_a = ");
2771 print_generic_expr (dump_file
, chrec_a
, 0);
2772 fprintf (dump_file
, ")\n (chrec_b = ");
2773 print_generic_expr (dump_file
, chrec_b
, 0);
2774 fprintf (dump_file
, ")\n");
2777 if (chrec_a
== NULL_TREE
2778 || chrec_b
== NULL_TREE
2779 || chrec_contains_undetermined (chrec_a
)
2780 || chrec_contains_undetermined (chrec_b
))
2782 dependence_stats
.num_subscript_undetermined
++;
2784 *overlap_iterations_a
= conflict_fn_not_known ();
2785 *overlap_iterations_b
= conflict_fn_not_known ();
2788 /* If they are the same chrec, and are affine, they overlap
2789 on every iteration. */
2790 else if (eq_evolutions_p (chrec_a
, chrec_b
)
2791 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2792 || operand_equal_p (chrec_a
, chrec_b
, 0)))
2794 dependence_stats
.num_same_subscript_function
++;
2795 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2796 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2797 *last_conflicts
= chrec_dont_know
;
2800 /* If they aren't the same, and aren't affine, we can't do anything
2802 else if ((chrec_contains_symbols (chrec_a
)
2803 || chrec_contains_symbols (chrec_b
))
2804 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2805 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
2807 dependence_stats
.num_subscript_undetermined
++;
2808 *overlap_iterations_a
= conflict_fn_not_known ();
2809 *overlap_iterations_b
= conflict_fn_not_known ();
2812 else if (ziv_subscript_p (chrec_a
, chrec_b
))
2813 analyze_ziv_subscript (chrec_a
, chrec_b
,
2814 overlap_iterations_a
, overlap_iterations_b
,
2817 else if (siv_subscript_p (chrec_a
, chrec_b
))
2818 analyze_siv_subscript (chrec_a
, chrec_b
,
2819 overlap_iterations_a
, overlap_iterations_b
,
2820 last_conflicts
, lnn
);
2823 analyze_miv_subscript (chrec_a
, chrec_b
,
2824 overlap_iterations_a
, overlap_iterations_b
,
2825 last_conflicts
, loop_nest
);
2827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2829 fprintf (dump_file
, " (overlap_iterations_a = ");
2830 dump_conflict_function (dump_file
, *overlap_iterations_a
);
2831 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
2832 dump_conflict_function (dump_file
, *overlap_iterations_b
);
2833 fprintf (dump_file
, ")\n");
2834 fprintf (dump_file
, ")\n");
2838 /* Helper function for uniquely inserting distance vectors. */
2841 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
2846 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, v
)
2847 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
2850 VEC_safe_push (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
), dist_v
);
2853 /* Helper function for uniquely inserting direction vectors. */
2856 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
2861 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), i
, v
)
2862 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
2865 VEC_safe_push (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
), dir_v
);
2868 /* Add a distance of 1 on all the loops outer than INDEX. If we
2869 haven't yet determined a distance for this outer loop, push a new
2870 distance vector composed of the previous distance, and a distance
2871 of 1 for this outer loop. Example:
2879 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2880 save (0, 1), then we have to save (1, 0). */
2883 add_outer_distances (struct data_dependence_relation
*ddr
,
2884 lambda_vector dist_v
, int index
)
2886 /* For each outer loop where init_v is not set, the accesses are
2887 in dependence of distance 1 in the loop. */
2888 while (--index
>= 0)
2890 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2891 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
2893 save_dist_v (ddr
, save_v
);
2897 /* Return false when fail to represent the data dependence as a
2898 distance vector. INIT_B is set to true when a component has been
2899 added to the distance vector DIST_V. INDEX_CARRY is then set to
2900 the index in DIST_V that carries the dependence. */
2903 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
2904 struct data_reference
*ddr_a
,
2905 struct data_reference
*ddr_b
,
2906 lambda_vector dist_v
, bool *init_b
,
2910 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2912 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2914 tree access_fn_a
, access_fn_b
;
2915 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
2917 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2919 non_affine_dependence_relation (ddr
);
2923 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
2924 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
2926 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
2927 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
2930 int var_a
= CHREC_VARIABLE (access_fn_a
);
2931 int var_b
= CHREC_VARIABLE (access_fn_b
);
2934 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2936 non_affine_dependence_relation (ddr
);
2940 dist
= int_cst_value (SUB_DISTANCE (subscript
));
2941 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
2942 *index_carry
= MIN (index
, *index_carry
);
2944 /* This is the subscript coupling test. If we have already
2945 recorded a distance for this loop (a distance coming from
2946 another subscript), it should be the same. For example,
2947 in the following code, there is no dependence:
2954 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
2956 finalize_ddr_dependent (ddr
, chrec_known
);
2960 dist_v
[index
] = dist
;
2964 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
2966 /* This can be for example an affine vs. constant dependence
2967 (T[i] vs. T[3]) that is not an affine dependence and is
2968 not representable as a distance vector. */
2969 non_affine_dependence_relation (ddr
);
2977 /* Return true when the DDR contains only constant access functions. */
2980 constant_access_functions (const struct data_dependence_relation
*ddr
)
2984 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2985 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
2986 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
2992 /* Helper function for the case where DDR_A and DDR_B are the same
2993 multivariate access function with a constant step. For an example
2997 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3000 tree c_1
= CHREC_LEFT (c_2
);
3001 tree c_0
= CHREC_LEFT (c_1
);
3002 lambda_vector dist_v
;
3005 /* Polynomials with more than 2 variables are not handled yet. When
3006 the evolution steps are parameters, it is not possible to
3007 represent the dependence using classical distance vectors. */
3008 if (TREE_CODE (c_0
) != INTEGER_CST
3009 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3010 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3012 DDR_AFFINE_P (ddr
) = false;
3016 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3017 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3019 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3020 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3021 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3022 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3035 save_dist_v (ddr
, dist_v
);
3037 add_outer_distances (ddr
, dist_v
, x_1
);
3040 /* Helper function for the case where DDR_A and DDR_B are the same
3041 access functions. */
3044 add_other_self_distances (struct data_dependence_relation
*ddr
)
3046 lambda_vector dist_v
;
3048 int index_carry
= DDR_NB_LOOPS (ddr
);
3050 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3052 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3054 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3056 if (!evolution_function_is_univariate_p (access_fun
))
3058 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3060 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3064 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3066 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3067 add_multivariate_self_dist (ddr
, access_fun
);
3069 /* The evolution step is not constant: it varies in
3070 the outer loop, so this cannot be represented by a
3071 distance vector. For example in pr34635.c the
3072 evolution is {0, +, {0, +, 4}_1}_2. */
3073 DDR_AFFINE_P (ddr
) = false;
3078 index_carry
= MIN (index_carry
,
3079 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3080 DDR_LOOP_NEST (ddr
)));
3084 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3085 add_outer_distances (ddr
, dist_v
, index_carry
);
3089 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3091 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3093 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3094 save_dist_v (ddr
, dist_v
);
3097 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3098 is the case for example when access functions are the same and
3099 equal to a constant, as in:
3106 in which case the distance vectors are (0) and (1). */
3109 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3113 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3115 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3116 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3117 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3119 for (j
= 0; j
< ca
->n
; j
++)
3120 if (affine_function_zero_p (ca
->fns
[j
]))
3122 insert_innermost_unit_dist_vector (ddr
);
3126 for (j
= 0; j
< cb
->n
; j
++)
3127 if (affine_function_zero_p (cb
->fns
[j
]))
3129 insert_innermost_unit_dist_vector (ddr
);
3135 /* Compute the classic per loop distance vector. DDR is the data
3136 dependence relation to build a vector from. Return false when fail
3137 to represent the data dependence as a distance vector. */
3140 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3141 struct loop
*loop_nest
)
3143 bool init_b
= false;
3144 int index_carry
= DDR_NB_LOOPS (ddr
);
3145 lambda_vector dist_v
;
3147 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3150 if (same_access_functions (ddr
))
3152 /* Save the 0 vector. */
3153 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3154 save_dist_v (ddr
, dist_v
);
3156 if (constant_access_functions (ddr
))
3157 add_distance_for_zero_overlaps (ddr
);
3159 if (DDR_NB_LOOPS (ddr
) > 1)
3160 add_other_self_distances (ddr
);
3165 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3166 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3167 dist_v
, &init_b
, &index_carry
))
3170 /* Save the distance vector if we initialized one. */
3173 /* Verify a basic constraint: classic distance vectors should
3174 always be lexicographically positive.
3176 Data references are collected in the order of execution of
3177 the program, thus for the following loop
3179 | for (i = 1; i < 100; i++)
3180 | for (j = 1; j < 100; j++)
3182 | t = T[j+1][i-1]; // A
3183 | T[j][i] = t + 2; // B
3186 references are collected following the direction of the wind:
3187 A then B. The data dependence tests are performed also
3188 following this order, such that we're looking at the distance
3189 separating the elements accessed by A from the elements later
3190 accessed by B. But in this example, the distance returned by
3191 test_dep (A, B) is lexicographically negative (-1, 1), that
3192 means that the access A occurs later than B with respect to
3193 the outer loop, ie. we're actually looking upwind. In this
3194 case we solve test_dep (B, A) looking downwind to the
3195 lexicographically positive solution, that returns the
3196 distance vector (1, -1). */
3197 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3199 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3200 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3203 compute_subscript_distance (ddr
);
3204 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3205 save_v
, &init_b
, &index_carry
))
3207 save_dist_v (ddr
, save_v
);
3208 DDR_REVERSED_P (ddr
) = true;
3210 /* In this case there is a dependence forward for all the
3213 | for (k = 1; k < 100; k++)
3214 | for (i = 1; i < 100; i++)
3215 | for (j = 1; j < 100; j++)
3217 | t = T[j+1][i-1]; // A
3218 | T[j][i] = t + 2; // B
3226 if (DDR_NB_LOOPS (ddr
) > 1)
3228 add_outer_distances (ddr
, save_v
, index_carry
);
3229 add_outer_distances (ddr
, dist_v
, index_carry
);
3234 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3235 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3237 if (DDR_NB_LOOPS (ddr
) > 1)
3239 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3241 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3242 DDR_A (ddr
), loop_nest
))
3244 compute_subscript_distance (ddr
);
3245 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3246 opposite_v
, &init_b
,
3250 save_dist_v (ddr
, save_v
);
3251 add_outer_distances (ddr
, dist_v
, index_carry
);
3252 add_outer_distances (ddr
, opposite_v
, index_carry
);
3255 save_dist_v (ddr
, save_v
);
3260 /* There is a distance of 1 on all the outer loops: Example:
3261 there is a dependence of distance 1 on loop_1 for the array A.
3267 add_outer_distances (ddr
, dist_v
,
3268 lambda_vector_first_nz (dist_v
,
3269 DDR_NB_LOOPS (ddr
), 0));
3272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3276 fprintf (dump_file
, "(build_classic_dist_vector\n");
3277 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3279 fprintf (dump_file
, " dist_vector = (");
3280 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3281 DDR_NB_LOOPS (ddr
));
3282 fprintf (dump_file
, " )\n");
3284 fprintf (dump_file
, ")\n");
3290 /* Return the direction for a given distance.
3291 FIXME: Computing dir this way is suboptimal, since dir can catch
3292 cases that dist is unable to represent. */
3294 static inline enum data_dependence_direction
3295 dir_from_dist (int dist
)
3298 return dir_positive
;
3300 return dir_negative
;
3305 /* Compute the classic per loop direction vector. DDR is the data
3306 dependence relation to build a vector from. */
3309 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3312 lambda_vector dist_v
;
3314 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
3316 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3318 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3319 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3321 save_dir_v (ddr
, dir_v
);
3325 /* Helper function. Returns true when there is a dependence between
3326 data references DRA and DRB. */
3329 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3330 struct data_reference
*dra
,
3331 struct data_reference
*drb
,
3332 struct loop
*loop_nest
)
3335 tree last_conflicts
;
3336 struct subscript
*subscript
;
3338 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
3341 conflict_function
*overlaps_a
, *overlaps_b
;
3343 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3344 DR_ACCESS_FN (drb
, i
),
3345 &overlaps_a
, &overlaps_b
,
3346 &last_conflicts
, loop_nest
);
3348 if (CF_NOT_KNOWN_P (overlaps_a
)
3349 || CF_NOT_KNOWN_P (overlaps_b
))
3351 finalize_ddr_dependent (ddr
, chrec_dont_know
);
3352 dependence_stats
.num_dependence_undetermined
++;
3353 free_conflict_function (overlaps_a
);
3354 free_conflict_function (overlaps_b
);
3358 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3359 || CF_NO_DEPENDENCE_P (overlaps_b
))
3361 finalize_ddr_dependent (ddr
, chrec_known
);
3362 dependence_stats
.num_dependence_independent
++;
3363 free_conflict_function (overlaps_a
);
3364 free_conflict_function (overlaps_b
);
3370 if (SUB_CONFLICTS_IN_A (subscript
))
3371 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3372 if (SUB_CONFLICTS_IN_B (subscript
))
3373 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3375 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3376 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3377 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3384 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3387 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3388 struct loop
*loop_nest
)
3391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3392 fprintf (dump_file
, "(subscript_dependence_tester \n");
3394 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3395 dependence_stats
.num_dependence_dependent
++;
3397 compute_subscript_distance (ddr
);
3398 if (build_classic_dist_vector (ddr
, loop_nest
))
3399 build_classic_dir_vector (ddr
);
3401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3402 fprintf (dump_file
, ")\n");
3405 /* Returns true when all the access functions of A are affine or
3406 constant with respect to LOOP_NEST. */
3409 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3410 const struct loop
*loop_nest
)
3413 VEC(tree
,heap
) *fns
= DR_ACCESS_FNS (a
);
3416 FOR_EACH_VEC_ELT (tree
, fns
, i
, t
)
3417 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3418 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3424 /* Initializes an equation for an OMEGA problem using the information
3425 contained in the ACCESS_FUN. Returns true when the operation
3428 PB is the omega constraint system.
3429 EQ is the number of the equation to be initialized.
3430 OFFSET is used for shifting the variables names in the constraints:
3431 a constrain is composed of 2 * the number of variables surrounding
3432 dependence accesses. OFFSET is set either to 0 for the first n variables,
3433 then it is set to n.
3434 ACCESS_FUN is expected to be an affine chrec. */
3437 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3438 unsigned int offset
, tree access_fun
,
3439 struct data_dependence_relation
*ddr
)
3441 switch (TREE_CODE (access_fun
))
3443 case POLYNOMIAL_CHREC
:
3445 tree left
= CHREC_LEFT (access_fun
);
3446 tree right
= CHREC_RIGHT (access_fun
);
3447 int var
= CHREC_VARIABLE (access_fun
);
3450 if (TREE_CODE (right
) != INTEGER_CST
)
3453 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3454 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3456 /* Compute the innermost loop index. */
3457 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3460 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3461 += int_cst_value (right
);
3463 switch (TREE_CODE (left
))
3465 case POLYNOMIAL_CHREC
:
3466 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3469 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3478 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3486 /* As explained in the comments preceding init_omega_for_ddr, we have
3487 to set up a system for each loop level, setting outer loops
3488 variation to zero, and current loop variation to positive or zero.
3489 Save each lexico positive distance vector. */
3492 omega_extract_distance_vectors (omega_pb pb
,
3493 struct data_dependence_relation
*ddr
)
3497 struct loop
*loopi
, *loopj
;
3498 enum omega_result res
;
3500 /* Set a new problem for each loop in the nest. The basis is the
3501 problem that we have initialized until now. On top of this we
3502 add new constraints. */
3503 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3504 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3507 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3508 DDR_NB_LOOPS (ddr
));
3510 omega_copy_problem (copy
, pb
);
3512 /* For all the outer loops "loop_j", add "dj = 0". */
3514 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3516 eq
= omega_add_zero_eq (copy
, omega_black
);
3517 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3520 /* For "loop_i", add "0 <= di". */
3521 geq
= omega_add_zero_geq (copy
, omega_black
);
3522 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3524 /* Reduce the constraint system, and test that the current
3525 problem is feasible. */
3526 res
= omega_simplify_problem (copy
);
3527 if (res
== omega_false
3528 || res
== omega_unknown
3529 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3532 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3533 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3535 dist
= copy
->subs
[eq
].coef
[0];
3541 /* Reinitialize problem... */
3542 omega_copy_problem (copy
, pb
);
3544 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3546 eq
= omega_add_zero_eq (copy
, omega_black
);
3547 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3550 /* ..., but this time "di = 1". */
3551 eq
= omega_add_zero_eq (copy
, omega_black
);
3552 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3553 copy
->eqs
[eq
].coef
[0] = -1;
3555 res
= omega_simplify_problem (copy
);
3556 if (res
== omega_false
3557 || res
== omega_unknown
3558 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3561 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3562 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3564 dist
= copy
->subs
[eq
].coef
[0];
3570 /* Save the lexicographically positive distance vector. */
3573 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3574 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3578 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3579 if (copy
->subs
[eq
].key
> 0)
3581 dist
= copy
->subs
[eq
].coef
[0];
3582 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3585 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3586 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3588 save_dist_v (ddr
, dist_v
);
3589 save_dir_v (ddr
, dir_v
);
3593 omega_free_problem (copy
);
3597 /* This is called for each subscript of a tuple of data references:
3598 insert an equality for representing the conflicts. */
3601 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3602 struct data_dependence_relation
*ddr
,
3603 omega_pb pb
, bool *maybe_dependent
)
3606 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3607 TREE_TYPE (access_fun_b
));
3608 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3609 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3610 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3613 /* When the fun_a - fun_b is not constant, the dependence is not
3614 captured by the classic distance vector representation. */
3615 if (TREE_CODE (difference
) != INTEGER_CST
)
3619 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3621 /* There is no dependence. */
3622 *maybe_dependent
= false;
3626 minus_one
= build_int_cst (type
, -1);
3627 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3629 eq
= omega_add_zero_eq (pb
, omega_black
);
3630 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3631 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3632 /* There is probably a dependence, but the system of
3633 constraints cannot be built: answer "don't know". */
3637 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3638 && !int_divides_p (lambda_vector_gcd
3639 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3640 2 * DDR_NB_LOOPS (ddr
)),
3641 pb
->eqs
[eq
].coef
[0]))
3643 /* There is no dependence. */
3644 *maybe_dependent
= false;
3651 /* Helper function, same as init_omega_for_ddr but specialized for
3652 data references A and B. */
3655 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3656 struct data_dependence_relation
*ddr
,
3657 omega_pb pb
, bool *maybe_dependent
)
3662 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3664 /* Insert an equality per subscript. */
3665 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3667 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3668 ddr
, pb
, maybe_dependent
))
3670 else if (*maybe_dependent
== false)
3672 /* There is no dependence. */
3673 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3678 /* Insert inequalities: constraints corresponding to the iteration
3679 domain, i.e. the loops surrounding the references "loop_x" and
3680 the distance variables "dx". The layout of the OMEGA
3681 representation is as follows:
3682 - coef[0] is the constant
3683 - coef[1..nb_loops] are the protected variables that will not be
3684 removed by the solver: the "dx"
3685 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3687 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3688 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3690 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
, true);
3693 ineq
= omega_add_zero_geq (pb
, omega_black
);
3694 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3696 /* 0 <= loop_x + dx */
3697 ineq
= omega_add_zero_geq (pb
, omega_black
);
3698 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3699 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3703 /* loop_x <= nb_iters */
3704 ineq
= omega_add_zero_geq (pb
, omega_black
);
3705 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3706 pb
->geqs
[ineq
].coef
[0] = nbi
;
3708 /* loop_x + dx <= nb_iters */
3709 ineq
= omega_add_zero_geq (pb
, omega_black
);
3710 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3711 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3712 pb
->geqs
[ineq
].coef
[0] = nbi
;
3714 /* A step "dx" bigger than nb_iters is not feasible, so
3715 add "0 <= nb_iters + dx", */
3716 ineq
= omega_add_zero_geq (pb
, omega_black
);
3717 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3718 pb
->geqs
[ineq
].coef
[0] = nbi
;
3719 /* and "dx <= nb_iters". */
3720 ineq
= omega_add_zero_geq (pb
, omega_black
);
3721 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3722 pb
->geqs
[ineq
].coef
[0] = nbi
;
3726 omega_extract_distance_vectors (pb
, ddr
);
3731 /* Sets up the Omega dependence problem for the data dependence
3732 relation DDR. Returns false when the constraint system cannot be
3733 built, ie. when the test answers "don't know". Returns true
3734 otherwise, and when independence has been proved (using one of the
3735 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3736 set MAYBE_DEPENDENT to true.
3738 Example: for setting up the dependence system corresponding to the
3739 conflicting accesses
3744 | ... A[2*j, 2*(i + j)]
3748 the following constraints come from the iteration domain:
3755 where di, dj are the distance variables. The constraints
3756 representing the conflicting elements are:
3759 i + 1 = 2 * (i + di + j + dj)
3761 For asking that the resulting distance vector (di, dj) be
3762 lexicographically positive, we insert the constraint "di >= 0". If
3763 "di = 0" in the solution, we fix that component to zero, and we
3764 look at the inner loops: we set a new problem where all the outer
3765 loop distances are zero, and fix this inner component to be
3766 positive. When one of the components is positive, we save that
3767 distance, and set a new problem where the distance on this loop is
3768 zero, searching for other distances in the inner loops. Here is
3769 the classic example that illustrates that we have to set for each
3770 inner loop a new problem:
3778 we have to save two distances (1, 0) and (0, 1).
3780 Given two array references, refA and refB, we have to set the
3781 dependence problem twice, refA vs. refB and refB vs. refA, and we
3782 cannot do a single test, as refB might occur before refA in the
3783 inner loops, and the contrary when considering outer loops: ex.
3788 | T[{1,+,1}_2][{1,+,1}_1] // refA
3789 | T[{2,+,1}_2][{0,+,1}_1] // refB
3794 refB touches the elements in T before refA, and thus for the same
3795 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3796 but for successive loop_0 iterations, we have (1, -1, 1)
3798 The Omega solver expects the distance variables ("di" in the
3799 previous example) to come first in the constraint system (as
3800 variables to be protected, or "safe" variables), the constraint
3801 system is built using the following layout:
3803 "cst | distance vars | index vars".
3807 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
3808 bool *maybe_dependent
)
3813 *maybe_dependent
= true;
3815 if (same_access_functions (ddr
))
3818 lambda_vector dir_v
;
3820 /* Save the 0 vector. */
3821 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
3822 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3823 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3824 dir_v
[j
] = dir_equal
;
3825 save_dir_v (ddr
, dir_v
);
3827 /* Save the dependences carried by outer loops. */
3828 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3829 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3831 omega_free_problem (pb
);
3835 /* Omega expects the protected variables (those that have to be kept
3836 after elimination) to appear first in the constraint system.
3837 These variables are the distance variables. In the following
3838 initialization we declare NB_LOOPS safe variables, and the total
3839 number of variables for the constraint system is 2*NB_LOOPS. */
3840 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3841 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3843 omega_free_problem (pb
);
3845 /* Stop computation if not decidable, or no dependence. */
3846 if (res
== false || *maybe_dependent
== false)
3849 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3850 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
3852 omega_free_problem (pb
);
3857 /* Return true when DDR contains the same information as that stored
3858 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3861 ddr_consistent_p (FILE *file
,
3862 struct data_dependence_relation
*ddr
,
3863 VEC (lambda_vector
, heap
) *dist_vects
,
3864 VEC (lambda_vector
, heap
) *dir_vects
)
3868 /* If dump_file is set, output there. */
3869 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3872 if (VEC_length (lambda_vector
, dist_vects
) != DDR_NUM_DIST_VECTS (ddr
))
3874 lambda_vector b_dist_v
;
3875 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3876 VEC_length (lambda_vector
, dist_vects
),
3877 DDR_NUM_DIST_VECTS (ddr
));
3879 fprintf (file
, "Banerjee dist vectors:\n");
3880 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, i
, b_dist_v
)
3881 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
3883 fprintf (file
, "Omega dist vectors:\n");
3884 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3885 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
3887 fprintf (file
, "data dependence relation:\n");
3888 dump_data_dependence_relation (file
, ddr
);
3890 fprintf (file
, ")\n");
3894 if (VEC_length (lambda_vector
, dir_vects
) != DDR_NUM_DIR_VECTS (ddr
))
3896 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3897 VEC_length (lambda_vector
, dir_vects
),
3898 DDR_NUM_DIR_VECTS (ddr
));
3902 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3904 lambda_vector a_dist_v
;
3905 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
3907 /* Distance vectors are not ordered in the same way in the DDR
3908 and in the DIST_VECTS: search for a matching vector. */
3909 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, a_dist_v
)
3910 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
3913 if (j
== VEC_length (lambda_vector
, dist_vects
))
3915 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
3916 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
3917 fprintf (file
, "not found in Omega dist vectors:\n");
3918 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3919 fprintf (file
, "data dependence relation:\n");
3920 dump_data_dependence_relation (file
, ddr
);
3921 fprintf (file
, ")\n");
3925 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
3927 lambda_vector a_dir_v
;
3928 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
3930 /* Direction vectors are not ordered in the same way in the DDR
3931 and in the DIR_VECTS: search for a matching vector. */
3932 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, a_dir_v
)
3933 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
3936 if (j
== VEC_length (lambda_vector
, dist_vects
))
3938 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
3939 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
3940 fprintf (file
, "not found in Omega dir vectors:\n");
3941 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3942 fprintf (file
, "data dependence relation:\n");
3943 dump_data_dependence_relation (file
, ddr
);
3944 fprintf (file
, ")\n");
3951 /* This computes the affine dependence relation between A and B with
3952 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3953 independence between two accesses, while CHREC_DONT_KNOW is used
3954 for representing the unknown relation.
3956 Note that it is possible to stop the computation of the dependence
3957 relation the first time we detect a CHREC_KNOWN element for a given
3961 compute_affine_dependence (struct data_dependence_relation
*ddr
,
3962 struct loop
*loop_nest
)
3964 struct data_reference
*dra
= DDR_A (ddr
);
3965 struct data_reference
*drb
= DDR_B (ddr
);
3967 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3969 fprintf (dump_file
, "(compute_affine_dependence\n");
3970 fprintf (dump_file
, " (stmt_a = \n");
3971 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, 0);
3972 fprintf (dump_file
, ")\n (stmt_b = \n");
3973 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, 0);
3974 fprintf (dump_file
, ")\n");
3977 /* Analyze only when the dependence relation is not yet known. */
3978 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
3979 && !DDR_SELF_REFERENCE (ddr
))
3981 dependence_stats
.num_dependence_tests
++;
3983 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
3984 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
3986 if (flag_check_data_deps
)
3988 /* Compute the dependences using the first algorithm. */
3989 subscript_dependence_tester (ddr
, loop_nest
);
3991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3993 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
3994 dump_data_dependence_relation (dump_file
, ddr
);
3997 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
3999 bool maybe_dependent
;
4000 VEC (lambda_vector
, heap
) *dir_vects
, *dist_vects
;
4002 /* Save the result of the first DD analyzer. */
4003 dist_vects
= DDR_DIST_VECTS (ddr
);
4004 dir_vects
= DDR_DIR_VECTS (ddr
);
4006 /* Reset the information. */
4007 DDR_DIST_VECTS (ddr
) = NULL
;
4008 DDR_DIR_VECTS (ddr
) = NULL
;
4010 /* Compute the same information using Omega. */
4011 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4012 goto csys_dont_know
;
4014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4016 fprintf (dump_file
, "Omega Analyzer\n");
4017 dump_data_dependence_relation (dump_file
, ddr
);
4020 /* Check that we get the same information. */
4021 if (maybe_dependent
)
4022 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4027 subscript_dependence_tester (ddr
, loop_nest
);
4030 /* As a last case, if the dependence cannot be determined, or if
4031 the dependence is considered too difficult to determine, answer
4036 dependence_stats
.num_dependence_undetermined
++;
4038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4040 fprintf (dump_file
, "Data ref a:\n");
4041 dump_data_reference (dump_file
, dra
);
4042 fprintf (dump_file
, "Data ref b:\n");
4043 dump_data_reference (dump_file
, drb
);
4044 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4046 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4051 fprintf (dump_file
, ")\n");
4054 /* This computes the dependence relation for the same data
4055 reference into DDR. */
4058 compute_self_dependence (struct data_dependence_relation
*ddr
)
4061 struct subscript
*subscript
;
4063 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4066 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
4069 if (SUB_CONFLICTS_IN_A (subscript
))
4070 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4071 if (SUB_CONFLICTS_IN_B (subscript
))
4072 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4074 /* The accessed index overlaps for each iteration. */
4075 SUB_CONFLICTS_IN_A (subscript
)
4076 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4077 SUB_CONFLICTS_IN_B (subscript
)
4078 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4079 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
4082 /* The distance vector is the zero vector. */
4083 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4084 save_dir_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4087 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4088 the data references in DATAREFS, in the LOOP_NEST. When
4089 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4093 compute_all_dependences (VEC (data_reference_p
, heap
) *datarefs
,
4094 VEC (ddr_p
, heap
) **dependence_relations
,
4095 VEC (loop_p
, heap
) *loop_nest
,
4096 bool compute_self_and_rr
)
4098 struct data_dependence_relation
*ddr
;
4099 struct data_reference
*a
, *b
;
4102 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4103 for (j
= i
+ 1; VEC_iterate (data_reference_p
, datarefs
, j
, b
); j
++)
4104 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4106 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4107 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4109 compute_affine_dependence (ddr
, VEC_index (loop_p
, loop_nest
, 0));
4112 if (compute_self_and_rr
)
4113 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4115 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4116 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4117 compute_self_dependence (ddr
);
4121 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4122 true if STMT clobbers memory, false otherwise. */
4125 get_references_in_stmt (gimple stmt
, VEC (data_ref_loc
, heap
) **references
)
4127 bool clobbers_memory
= false;
4130 enum gimple_code stmt_code
= gimple_code (stmt
);
4134 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4135 Calls have side-effects, except those to const or pure
4137 if ((stmt_code
== GIMPLE_CALL
4138 && !(gimple_call_flags (stmt
) & (ECF_CONST
| ECF_PURE
)))
4139 || (stmt_code
== GIMPLE_ASM
4140 && gimple_asm_volatile_p (stmt
)))
4141 clobbers_memory
= true;
4143 if (!gimple_vuse (stmt
))
4144 return clobbers_memory
;
4146 if (stmt_code
== GIMPLE_ASSIGN
)
4149 op0
= gimple_assign_lhs_ptr (stmt
);
4150 op1
= gimple_assign_rhs1_ptr (stmt
);
4153 || (REFERENCE_CLASS_P (*op1
)
4154 && (base
= get_base_address (*op1
))
4155 && TREE_CODE (base
) != SSA_NAME
))
4157 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4159 ref
->is_read
= true;
4163 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
)))
4165 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4167 ref
->is_read
= false;
4170 else if (stmt_code
== GIMPLE_CALL
)
4172 unsigned i
, n
= gimple_call_num_args (stmt
);
4174 for (i
= 0; i
< n
; i
++)
4176 op0
= gimple_call_arg_ptr (stmt
, i
);
4179 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
)))
4181 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4183 ref
->is_read
= true;
4188 return clobbers_memory
;
4191 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4192 reference, returns false, otherwise returns true. NEST is the outermost
4193 loop of the loop nest in which the references should be analyzed. */
4196 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4197 VEC (data_reference_p
, heap
) **datarefs
)
4200 VEC (data_ref_loc
, heap
) *references
;
4203 data_reference_p dr
;
4205 if (get_references_in_stmt (stmt
, &references
))
4207 VEC_free (data_ref_loc
, heap
, references
);
4211 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4213 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4214 *ref
->pos
, stmt
, ref
->is_read
);
4215 gcc_assert (dr
!= NULL
);
4216 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4218 VEC_free (data_ref_loc
, heap
, references
);
4222 /* Stores the data references in STMT to DATAREFS. If there is an
4223 unanalyzable reference, returns false, otherwise returns true.
4224 NEST is the outermost loop of the loop nest in which the references
4225 should be instantiated, LOOP is the loop in which the references
4226 should be analyzed. */
4229 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4230 VEC (data_reference_p
, heap
) **datarefs
)
4233 VEC (data_ref_loc
, heap
) *references
;
4236 data_reference_p dr
;
4238 if (get_references_in_stmt (stmt
, &references
))
4240 VEC_free (data_ref_loc
, heap
, references
);
4244 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4246 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4247 gcc_assert (dr
!= NULL
);
4248 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4251 VEC_free (data_ref_loc
, heap
, references
);
4255 /* Search the data references in LOOP, and record the information into
4256 DATAREFS. Returns chrec_dont_know when failing to analyze a
4257 difficult case, returns NULL_TREE otherwise. */
4260 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4261 VEC (data_reference_p
, heap
) **datarefs
)
4263 gimple_stmt_iterator bsi
;
4265 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4267 gimple stmt
= gsi_stmt (bsi
);
4269 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4271 struct data_reference
*res
;
4272 res
= XCNEW (struct data_reference
);
4273 VEC_safe_push (data_reference_p
, heap
, *datarefs
, res
);
4275 return chrec_dont_know
;
4282 /* Search the data references in LOOP, and record the information into
4283 DATAREFS. Returns chrec_dont_know when failing to analyze a
4284 difficult case, returns NULL_TREE otherwise.
4286 TODO: This function should be made smarter so that it can handle address
4287 arithmetic as if they were array accesses, etc. */
4290 find_data_references_in_loop (struct loop
*loop
,
4291 VEC (data_reference_p
, heap
) **datarefs
)
4293 basic_block bb
, *bbs
;
4296 bbs
= get_loop_body_in_dom_order (loop
);
4298 for (i
= 0; i
< loop
->num_nodes
; i
++)
4302 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4305 return chrec_dont_know
;
4313 /* Recursive helper function. */
4316 find_loop_nest_1 (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4318 /* Inner loops of the nest should not contain siblings. Example:
4319 when there are two consecutive loops,
4330 the dependence relation cannot be captured by the distance
4335 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4337 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4341 /* Return false when the LOOP is not well nested. Otherwise return
4342 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4343 contain the loops from the outermost to the innermost, as they will
4344 appear in the classic distance vector. */
4347 find_loop_nest (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4349 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4351 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4355 /* Returns true when the data dependences have been computed, false otherwise.
4356 Given a loop nest LOOP, the following vectors are returned:
4357 DATAREFS is initialized to all the array elements contained in this loop,
4358 DEPENDENCE_RELATIONS contains the relations between the data references.
4359 Compute read-read and self relations if
4360 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4363 compute_data_dependences_for_loop (struct loop
*loop
,
4364 bool compute_self_and_read_read_dependences
,
4365 VEC (loop_p
, heap
) **loop_nest
,
4366 VEC (data_reference_p
, heap
) **datarefs
,
4367 VEC (ddr_p
, heap
) **dependence_relations
)
4371 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4373 /* If the loop nest is not well formed, or one of the data references
4374 is not computable, give up without spending time to compute other
4377 || !find_loop_nest (loop
, loop_nest
)
4378 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
)
4380 struct data_dependence_relation
*ddr
;
4382 /* Insert a single relation into dependence_relations:
4384 ddr
= initialize_data_dependence_relation (NULL
, NULL
, *loop_nest
);
4385 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4389 compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4390 compute_self_and_read_read_dependences
);
4392 if (dump_file
&& (dump_flags
& TDF_STATS
))
4394 fprintf (dump_file
, "Dependence tester statistics:\n");
4396 fprintf (dump_file
, "Number of dependence tests: %d\n",
4397 dependence_stats
.num_dependence_tests
);
4398 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4399 dependence_stats
.num_dependence_dependent
);
4400 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4401 dependence_stats
.num_dependence_independent
);
4402 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4403 dependence_stats
.num_dependence_undetermined
);
4405 fprintf (dump_file
, "Number of subscript tests: %d\n",
4406 dependence_stats
.num_subscript_tests
);
4407 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4408 dependence_stats
.num_subscript_undetermined
);
4409 fprintf (dump_file
, "Number of same subscript function: %d\n",
4410 dependence_stats
.num_same_subscript_function
);
4412 fprintf (dump_file
, "Number of ziv tests: %d\n",
4413 dependence_stats
.num_ziv
);
4414 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4415 dependence_stats
.num_ziv_dependent
);
4416 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4417 dependence_stats
.num_ziv_independent
);
4418 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4419 dependence_stats
.num_ziv_unimplemented
);
4421 fprintf (dump_file
, "Number of siv tests: %d\n",
4422 dependence_stats
.num_siv
);
4423 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4424 dependence_stats
.num_siv_dependent
);
4425 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4426 dependence_stats
.num_siv_independent
);
4427 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4428 dependence_stats
.num_siv_unimplemented
);
4430 fprintf (dump_file
, "Number of miv tests: %d\n",
4431 dependence_stats
.num_miv
);
4432 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4433 dependence_stats
.num_miv_dependent
);
4434 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4435 dependence_stats
.num_miv_independent
);
4436 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4437 dependence_stats
.num_miv_unimplemented
);
4443 /* Returns true when the data dependences for the basic block BB have been
4444 computed, false otherwise.
4445 DATAREFS is initialized to all the array elements contained in this basic
4446 block, DEPENDENCE_RELATIONS contains the relations between the data
4447 references. Compute read-read and self relations if
4448 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4450 compute_data_dependences_for_bb (basic_block bb
,
4451 bool compute_self_and_read_read_dependences
,
4452 VEC (data_reference_p
, heap
) **datarefs
,
4453 VEC (ddr_p
, heap
) **dependence_relations
)
4455 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4458 compute_all_dependences (*datarefs
, dependence_relations
, NULL
,
4459 compute_self_and_read_read_dependences
);
4463 /* Entry point (for testing only). Analyze all the data references
4464 and the dependence relations in LOOP.
4466 The data references are computed first.
4468 A relation on these nodes is represented by a complete graph. Some
4469 of the relations could be of no interest, thus the relations can be
4472 In the following function we compute all the relations. This is
4473 just a first implementation that is here for:
4474 - for showing how to ask for the dependence relations,
4475 - for the debugging the whole dependence graph,
4476 - for the dejagnu testcases and maintenance.
4478 It is possible to ask only for a part of the graph, avoiding to
4479 compute the whole dependence graph. The computed dependences are
4480 stored in a knowledge base (KB) such that later queries don't
4481 recompute the same information. The implementation of this KB is
4482 transparent to the optimizer, and thus the KB can be changed with a
4483 more efficient implementation, or the KB could be disabled. */
4485 analyze_all_data_dependences (struct loop
*loop
)
4488 int nb_data_refs
= 10;
4489 VEC (data_reference_p
, heap
) *datarefs
=
4490 VEC_alloc (data_reference_p
, heap
, nb_data_refs
);
4491 VEC (ddr_p
, heap
) *dependence_relations
=
4492 VEC_alloc (ddr_p
, heap
, nb_data_refs
* nb_data_refs
);
4493 VEC (loop_p
, heap
) *loop_nest
= VEC_alloc (loop_p
, heap
, 3);
4495 /* Compute DDs on the whole function. */
4496 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4497 &dependence_relations
);
4501 dump_data_dependence_relations (dump_file
, dependence_relations
);
4502 fprintf (dump_file
, "\n\n");
4504 if (dump_flags
& TDF_DETAILS
)
4505 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4507 if (dump_flags
& TDF_STATS
)
4509 unsigned nb_top_relations
= 0;
4510 unsigned nb_bot_relations
= 0;
4511 unsigned nb_chrec_relations
= 0;
4512 struct data_dependence_relation
*ddr
;
4514 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4516 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4519 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4523 nb_chrec_relations
++;
4526 gather_stats_on_scev_database ();
4530 VEC_free (loop_p
, heap
, loop_nest
);
4531 free_dependence_relations (dependence_relations
);
4532 free_data_refs (datarefs
);
4535 /* Computes all the data dependences and check that the results of
4536 several analyzers are the same. */
4539 tree_check_data_deps (void)
4542 struct loop
*loop_nest
;
4544 FOR_EACH_LOOP (li
, loop_nest
, 0)
4545 analyze_all_data_dependences (loop_nest
);
4548 /* Free the memory used by a data dependence relation DDR. */
4551 free_dependence_relation (struct data_dependence_relation
*ddr
)
4556 if (DDR_SUBSCRIPTS (ddr
))
4557 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4558 if (DDR_DIST_VECTS (ddr
))
4559 VEC_free (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
));
4560 if (DDR_DIR_VECTS (ddr
))
4561 VEC_free (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
));
4566 /* Free the memory used by the data dependence relations from
4567 DEPENDENCE_RELATIONS. */
4570 free_dependence_relations (VEC (ddr_p
, heap
) *dependence_relations
)
4573 struct data_dependence_relation
*ddr
;
4575 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4577 free_dependence_relation (ddr
);
4579 VEC_free (ddr_p
, heap
, dependence_relations
);
4582 /* Free the memory used by the data references from DATAREFS. */
4585 free_data_refs (VEC (data_reference_p
, heap
) *datarefs
)
4588 struct data_reference
*dr
;
4590 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
4592 VEC_free (data_reference_p
, heap
, datarefs
);
4597 /* Dump vertex I in RDG to FILE. */
4600 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
4602 struct vertex
*v
= &(rdg
->vertices
[i
]);
4603 struct graph_edge
*e
;
4605 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
4606 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
4607 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
4610 for (e
= v
->pred
; e
; e
= e
->pred_next
)
4611 fprintf (file
, " %d", e
->src
);
4613 fprintf (file
, ") (out:");
4616 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4617 fprintf (file
, " %d", e
->dest
);
4619 fprintf (file
, ")\n");
4620 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
4621 fprintf (file
, ")\n");
4624 /* Call dump_rdg_vertex on stderr. */
4627 debug_rdg_vertex (struct graph
*rdg
, int i
)
4629 dump_rdg_vertex (stderr
, rdg
, i
);
4632 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4633 dumped vertices to that bitmap. */
4635 void dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
4639 fprintf (file
, "(%d\n", c
);
4641 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4642 if (rdg
->vertices
[i
].component
== c
)
4645 bitmap_set_bit (dumped
, i
);
4647 dump_rdg_vertex (file
, rdg
, i
);
4650 fprintf (file
, ")\n");
4653 /* Call dump_rdg_vertex on stderr. */
4656 debug_rdg_component (struct graph
*rdg
, int c
)
4658 dump_rdg_component (stderr
, rdg
, c
, NULL
);
4661 /* Dump the reduced dependence graph RDG to FILE. */
4664 dump_rdg (FILE *file
, struct graph
*rdg
)
4667 bitmap dumped
= BITMAP_ALLOC (NULL
);
4669 fprintf (file
, "(rdg\n");
4671 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4672 if (!bitmap_bit_p (dumped
, i
))
4673 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
4675 fprintf (file
, ")\n");
4676 BITMAP_FREE (dumped
);
4679 /* Call dump_rdg on stderr. */
4682 debug_rdg (struct graph
*rdg
)
4684 dump_rdg (stderr
, rdg
);
4688 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
4692 fprintf (file
, "digraph RDG {\n");
4694 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4696 struct vertex
*v
= &(rdg
->vertices
[i
]);
4697 struct graph_edge
*e
;
4699 /* Highlight reads from memory. */
4700 if (RDG_MEM_READS_STMT (rdg
, i
))
4701 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
4703 /* Highlight stores to memory. */
4704 if (RDG_MEM_WRITE_STMT (rdg
, i
))
4705 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
4708 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4709 switch (RDGE_TYPE (e
))
4712 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
4716 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
4720 /* These are the most common dependences: don't print these. */
4721 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
4725 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
4733 fprintf (file
, "}\n\n");
4736 /* Display the Reduced Dependence Graph using dotty. */
4737 extern void dot_rdg (struct graph
*);
4740 dot_rdg (struct graph
*rdg
)
4742 /* When debugging, enable the following code. This cannot be used
4743 in production compilers because it calls "system". */
4745 FILE *file
= fopen ("/tmp/rdg.dot", "w");
4746 gcc_assert (file
!= NULL
);
4748 dot_rdg_1 (file
, rdg
);
4751 system ("dotty /tmp/rdg.dot &");
4753 dot_rdg_1 (stderr
, rdg
);
4757 /* This structure is used for recording the mapping statement index in
4760 struct GTY(()) rdg_vertex_info
4766 /* Returns the index of STMT in RDG. */
4769 rdg_vertex_for_stmt (struct graph
*rdg
, gimple stmt
)
4771 struct rdg_vertex_info rvi
, *slot
;
4774 slot
= (struct rdg_vertex_info
*) htab_find (rdg
->indices
, &rvi
);
4782 /* Creates an edge in RDG for each distance vector from DDR. The
4783 order that we keep track of in the RDG is the order in which
4784 statements have to be executed. */
4787 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
4789 struct graph_edge
*e
;
4791 data_reference_p dra
= DDR_A (ddr
);
4792 data_reference_p drb
= DDR_B (ddr
);
4793 unsigned level
= ddr_dependence_level (ddr
);
4795 /* For non scalar dependences, when the dependence is REVERSED,
4796 statement B has to be executed before statement A. */
4798 && !DDR_REVERSED_P (ddr
))
4800 data_reference_p tmp
= dra
;
4805 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
4806 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
4808 if (va
< 0 || vb
< 0)
4811 e
= add_edge (rdg
, va
, vb
);
4812 e
->data
= XNEW (struct rdg_edge
);
4814 RDGE_LEVEL (e
) = level
;
4815 RDGE_RELATION (e
) = ddr
;
4817 /* Determines the type of the data dependence. */
4818 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
4819 RDGE_TYPE (e
) = input_dd
;
4820 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
4821 RDGE_TYPE (e
) = output_dd
;
4822 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
4823 RDGE_TYPE (e
) = flow_dd
;
4824 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
4825 RDGE_TYPE (e
) = anti_dd
;
4828 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4829 the index of DEF in RDG. */
4832 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
4834 use_operand_p imm_use_p
;
4835 imm_use_iterator iterator
;
4837 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
4839 struct graph_edge
*e
;
4840 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
4845 e
= add_edge (rdg
, idef
, use
);
4846 e
->data
= XNEW (struct rdg_edge
);
4847 RDGE_TYPE (e
) = flow_dd
;
4848 RDGE_RELATION (e
) = NULL
;
4852 /* Creates the edges of the reduced dependence graph RDG. */
4855 create_rdg_edges (struct graph
*rdg
, VEC (ddr_p
, heap
) *ddrs
)
4858 struct data_dependence_relation
*ddr
;
4859 def_operand_p def_p
;
4862 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
4863 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4864 create_rdg_edge_for_ddr (rdg
, ddr
);
4866 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4867 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
4869 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
4872 /* Build the vertices of the reduced dependence graph RDG. */
4875 create_rdg_vertices (struct graph
*rdg
, VEC (gimple
, heap
) *stmts
)
4880 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
4882 VEC (data_ref_loc
, heap
) *references
;
4884 struct vertex
*v
= &(rdg
->vertices
[i
]);
4885 struct rdg_vertex_info
*rvi
= XNEW (struct rdg_vertex_info
);
4886 struct rdg_vertex_info
**slot
;
4890 slot
= (struct rdg_vertex_info
**) htab_find_slot (rdg
->indices
, rvi
, INSERT
);
4897 v
->data
= XNEW (struct rdg_vertex
);
4898 RDG_STMT (rdg
, i
) = stmt
;
4900 RDG_MEM_WRITE_STMT (rdg
, i
) = false;
4901 RDG_MEM_READS_STMT (rdg
, i
) = false;
4902 if (gimple_code (stmt
) == GIMPLE_PHI
)
4905 get_references_in_stmt (stmt
, &references
);
4906 FOR_EACH_VEC_ELT (data_ref_loc
, references
, j
, ref
)
4908 RDG_MEM_WRITE_STMT (rdg
, i
) = true;
4910 RDG_MEM_READS_STMT (rdg
, i
) = true;
4912 VEC_free (data_ref_loc
, heap
, references
);
4916 /* Initialize STMTS with all the statements of LOOP. When
4917 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4918 which we discover statements is important as
4919 generate_loops_for_partition is using the same traversal for
4920 identifying statements. */
4923 stmts_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
4926 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
4928 for (i
= 0; i
< loop
->num_nodes
; i
++)
4930 basic_block bb
= bbs
[i
];
4931 gimple_stmt_iterator bsi
;
4934 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4935 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
4937 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4939 stmt
= gsi_stmt (bsi
);
4940 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
4941 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
4948 /* Returns true when all the dependences are computable. */
4951 known_dependences_p (VEC (ddr_p
, heap
) *dependence_relations
)
4956 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4957 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4963 /* Computes a hash function for element ELT. */
4966 hash_stmt_vertex_info (const void *elt
)
4968 const struct rdg_vertex_info
*const rvi
=
4969 (const struct rdg_vertex_info
*) elt
;
4970 gimple stmt
= rvi
->stmt
;
4972 return htab_hash_pointer (stmt
);
4975 /* Compares database elements E1 and E2. */
4978 eq_stmt_vertex_info (const void *e1
, const void *e2
)
4980 const struct rdg_vertex_info
*elt1
= (const struct rdg_vertex_info
*) e1
;
4981 const struct rdg_vertex_info
*elt2
= (const struct rdg_vertex_info
*) e2
;
4983 return elt1
->stmt
== elt2
->stmt
;
4986 /* Free the element E. */
4989 hash_stmt_vertex_del (void *e
)
4994 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4995 statement of the loop nest, and one edge per data dependence or
4996 scalar dependence. */
4999 build_empty_rdg (int n_stmts
)
5001 int nb_data_refs
= 10;
5002 struct graph
*rdg
= new_graph (n_stmts
);
5004 rdg
->indices
= htab_create (nb_data_refs
, hash_stmt_vertex_info
,
5005 eq_stmt_vertex_info
, hash_stmt_vertex_del
);
5009 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5010 statement of the loop nest, and one edge per data dependence or
5011 scalar dependence. */
5014 build_rdg (struct loop
*loop
,
5015 VEC (loop_p
, heap
) **loop_nest
,
5016 VEC (ddr_p
, heap
) **dependence_relations
,
5017 VEC (data_reference_p
, heap
) **datarefs
)
5019 struct graph
*rdg
= NULL
;
5020 VEC (gimple
, heap
) *stmts
= VEC_alloc (gimple
, heap
, 10);
5022 compute_data_dependences_for_loop (loop
, false, loop_nest
, datarefs
,
5023 dependence_relations
);
5025 if (known_dependences_p (*dependence_relations
))
5027 stmts_from_loop (loop
, &stmts
);
5028 rdg
= build_empty_rdg (VEC_length (gimple
, stmts
));
5029 create_rdg_vertices (rdg
, stmts
);
5030 create_rdg_edges (rdg
, *dependence_relations
);
5033 VEC_free (gimple
, heap
, stmts
);
5037 /* Free the reduced dependence graph RDG. */
5040 free_rdg (struct graph
*rdg
)
5044 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5046 struct vertex
*v
= &(rdg
->vertices
[i
]);
5047 struct graph_edge
*e
;
5049 for (e
= v
->succ
; e
; e
= e
->succ_next
)
5055 htab_delete (rdg
->indices
);
5059 /* Initialize STMTS with all the statements of LOOP that contain a
5063 stores_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5066 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5068 for (i
= 0; i
< loop
->num_nodes
; i
++)
5070 basic_block bb
= bbs
[i
];
5071 gimple_stmt_iterator bsi
;
5073 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5074 if (gimple_vdef (gsi_stmt (bsi
)))
5075 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
5081 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5082 that contains a data reference on its LHS with a stride of the same
5083 size as its unit type. */
5086 stmt_with_adjacent_zero_store_dr_p (gimple stmt
)
5090 struct data_reference
*dr
;
5093 || !gimple_vdef (stmt
)
5094 || !is_gimple_assign (stmt
)
5095 || !gimple_assign_single_p (stmt
)
5096 || !(op1
= gimple_assign_rhs1 (stmt
))
5097 || !(integer_zerop (op1
) || real_zerop (op1
)))
5100 dr
= XCNEW (struct data_reference
);
5101 op0
= gimple_assign_lhs (stmt
);
5103 DR_STMT (dr
) = stmt
;
5106 res
= dr_analyze_innermost (dr
)
5107 && stride_of_unit_type_p (DR_STEP (dr
), TREE_TYPE (op0
));
5113 /* Initialize STMTS with all the statements of LOOP that contain a
5114 store to memory of the form "A[i] = 0". */
5117 stores_zero_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5121 gimple_stmt_iterator si
;
5123 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5125 for (i
= 0; i
< loop
->num_nodes
; i
++)
5126 for (bb
= bbs
[i
], si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5127 if ((stmt
= gsi_stmt (si
))
5128 && stmt_with_adjacent_zero_store_dr_p (stmt
))
5129 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (si
));
5134 /* For a data reference REF, return the declaration of its base
5135 address or NULL_TREE if the base is not determined. */
5138 ref_base_address (gimple stmt
, data_ref_loc
*ref
)
5140 tree base
= NULL_TREE
;
5142 struct data_reference
*dr
= XCNEW (struct data_reference
);
5144 DR_STMT (dr
) = stmt
;
5145 DR_REF (dr
) = *ref
->pos
;
5146 dr_analyze_innermost (dr
);
5147 base_address
= DR_BASE_ADDRESS (dr
);
5152 switch (TREE_CODE (base_address
))
5155 base
= TREE_OPERAND (base_address
, 0);
5159 base
= base_address
;
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5172 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
5174 gimple stmt
= RDG_STMT (rdg
, v
);
5175 struct loop
*loop
= loop_containing_stmt (stmt
);
5176 use_operand_p imm_use_p
;
5177 imm_use_iterator iterator
;
5179 def_operand_p def_p
;
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
5186 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
5188 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)
5196 /* Determines whether statements S1 and S2 access to similar memory
5197 locations. Two memory accesses are considered similar when they
5198 have the same base address declaration, i.e. when their
5199 ref_base_address is the same. */
5202 have_similar_memory_accesses (gimple s1
, gimple s2
)
5206 VEC (data_ref_loc
, heap
) *refs1
, *refs2
;
5207 data_ref_loc
*ref1
, *ref2
;
5209 get_references_in_stmt (s1
, &refs1
);
5210 get_references_in_stmt (s2
, &refs2
);
5212 FOR_EACH_VEC_ELT (data_ref_loc
, refs1
, i
, ref1
)
5214 tree base1
= ref_base_address (s1
, ref1
);
5217 FOR_EACH_VEC_ELT (data_ref_loc
, refs2
, j
, ref2
)
5218 if (base1
== ref_base_address (s2
, ref2
))
5226 VEC_free (data_ref_loc
, heap
, refs1
);
5227 VEC_free (data_ref_loc
, heap
, refs2
);
5231 /* Helper function for the hashtab. */
5234 have_similar_memory_accesses_1 (const void *s1
, const void *s2
)
5236 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple
) s1
),
5237 CONST_CAST_GIMPLE ((const_gimple
) s2
));
5240 /* Helper function for the hashtab. */
5243 ref_base_address_1 (const void *s
)
5245 gimple stmt
= CONST_CAST_GIMPLE ((const_gimple
) s
);
5247 VEC (data_ref_loc
, heap
) *refs
;
5251 get_references_in_stmt (stmt
, &refs
);
5253 FOR_EACH_VEC_ELT (data_ref_loc
, refs
, i
, ref
)
5256 res
= htab_hash_pointer (ref_base_address (stmt
, ref
));
5260 VEC_free (data_ref_loc
, heap
, refs
);
5264 /* Try to remove duplicated write data references from STMTS. */
5267 remove_similar_memory_refs (VEC (gimple
, heap
) **stmts
)
5271 htab_t seen
= htab_create (VEC_length (gimple
, *stmts
), ref_base_address_1
,
5272 have_similar_memory_accesses_1
, NULL
);
5274 for (i
= 0; VEC_iterate (gimple
, *stmts
, i
, stmt
); )
5278 slot
= htab_find_slot (seen
, stmt
, INSERT
);
5281 VEC_ordered_remove (gimple
, *stmts
, i
);
5284 *slot
= (void *) stmt
;
5292 /* Returns the index of PARAMETER in the parameters vector of the
5293 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5296 access_matrix_get_index_for_parameter (tree parameter
,
5297 struct access_matrix
*access_matrix
)
5300 VEC (tree
,heap
) *lambda_parameters
= AM_PARAMETERS (access_matrix
);
5301 tree lambda_parameter
;
5303 FOR_EACH_VEC_ELT (tree
, lambda_parameters
, i
, lambda_parameter
)
5304 if (lambda_parameter
== parameter
)
5305 return i
+ AM_NB_INDUCTION_VARS (access_matrix
);