1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests
;
91 int num_dependence_dependent
;
92 int num_dependence_independent
;
93 int num_dependence_undetermined
;
95 int num_subscript_tests
;
96 int num_subscript_undetermined
;
97 int num_same_subscript_function
;
100 int num_ziv_independent
;
101 int num_ziv_dependent
;
102 int num_ziv_unimplemented
;
105 int num_siv_independent
;
106 int num_siv_dependent
;
107 int num_siv_unimplemented
;
110 int num_miv_independent
;
111 int num_miv_dependent
;
112 int num_miv_unimplemented
;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
116 struct data_reference
*,
117 struct data_reference
*,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a
, const_tree b
)
124 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
125 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a
, int b
)
134 return ((b
% a
) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file
, VEC (data_reference_p
, heap
) *datarefs
)
145 struct data_reference
*dr
;
147 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
148 dump_data_reference (file
, dr
);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p
, heap
) *datarefs
)
156 dump_data_references (stderr
, datarefs
);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p
, heap
) *ddrs
)
164 dump_data_dependence_relations (stderr
, ddrs
);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file
,
171 VEC (ddr_p
, heap
) *ddrs
)
174 struct data_dependence_relation
*ddr
;
176 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
177 dump_data_dependence_relation (file
, ddr
);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference
*dr
)
185 dump_data_reference (stderr
, dr
);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf
,
192 struct data_reference
*dr
)
196 fprintf (outf
, "#(Data Ref: \n");
197 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
198 fprintf (outf
, "# stmt: ");
199 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
200 fprintf (outf
, "# ref: ");
201 print_generic_stmt (outf
, DR_REF (dr
), 0);
202 fprintf (outf
, "# base_object: ");
203 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
205 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
207 fprintf (outf
, "# Access function %d: ", i
);
208 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
210 fprintf (outf
, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf
, affine_fn fn
)
221 print_generic_expr (outf
, VEC_index (tree
, fn
, 0), TDF_SLIM
);
222 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
224 fprintf (outf
, " + ");
225 print_generic_expr (outf
, coef
, TDF_SLIM
);
226 fprintf (outf
, " * x_%u", i
);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
237 if (cf
->n
== NO_DEPENDENCE
)
238 fprintf (outf
, "no dependence\n");
239 else if (cf
->n
== NOT_KNOWN
)
240 fprintf (outf
, "not known\n");
243 for (i
= 0; i
< cf
->n
; i
++)
246 dump_affine_function (outf
, cf
->fns
[i
]);
247 fprintf (outf
, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf
, struct subscript
*subscript
)
257 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
259 fprintf (outf
, "\n (subscript \n");
260 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf
, cf
);
262 if (CF_NONTRIVIAL_P (cf
))
264 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
265 fprintf (outf
, " last_conflict: ");
266 print_generic_stmt (outf
, last_iteration
, 0);
269 cf
= SUB_CONFLICTS_IN_B (subscript
);
270 fprintf (outf
, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf
, cf
);
272 if (CF_NONTRIVIAL_P (cf
))
274 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
275 fprintf (outf
, " last_conflict: ");
276 print_generic_stmt (outf
, last_iteration
, 0);
279 fprintf (outf
, " (Subscript distance: ");
280 print_generic_stmt (outf
, SUB_DISTANCE (subscript
), 0);
281 fprintf (outf
, " )\n");
282 fprintf (outf
, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf
,
294 for (eq
= 0; eq
< length
; eq
++)
296 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
302 fprintf (outf
, " +");
305 fprintf (outf
, " -");
308 fprintf (outf
, " =");
310 case dir_positive_or_equal
:
311 fprintf (outf
, " +=");
313 case dir_positive_or_negative
:
314 fprintf (outf
, " +-");
316 case dir_negative_or_equal
:
317 fprintf (outf
, " -=");
320 fprintf (outf
, " *");
323 fprintf (outf
, "indep");
327 fprintf (outf
, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dir_vects
,
339 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, v
)
340 print_direction_vector (outf
, v
, length
);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
350 for (i
= 0; i
< n
; i
++)
351 fprintf (outfile
, "%3d ", vector
[i
]);
352 fprintf (outfile
, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dist_vects
,
364 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, v
)
365 print_lambda_vector (outf
, v
, length
);
371 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
373 dump_data_dependence_relation (stderr
, ddr
);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf
,
380 struct data_dependence_relation
*ddr
)
382 struct data_reference
*dra
, *drb
;
384 fprintf (outf
, "(Data Dep: \n");
386 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
393 dump_data_reference (outf
, dra
);
395 fprintf (outf
, " (nil)\n");
397 dump_data_reference (outf
, drb
);
399 fprintf (outf
, " (nil)\n");
401 fprintf (outf
, " (don't know)\n)\n");
407 dump_data_reference (outf
, dra
);
408 dump_data_reference (outf
, drb
);
410 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
411 fprintf (outf
, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
418 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
420 fprintf (outf
, " access_fn_A: ");
421 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
422 fprintf (outf
, " access_fn_B: ");
423 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
424 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
427 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
428 fprintf (outf
, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
)
430 fprintf (outf
, "%d ", loopi
->num
);
431 fprintf (outf
, ")\n");
433 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
435 fprintf (outf
, " distance_vector: ");
436 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
440 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
442 fprintf (outf
, " direction_vector: ");
443 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
448 fprintf (outf
, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file
,
455 enum data_dependence_direction dir
)
471 case dir_positive_or_negative
:
472 fprintf (file
, "+-");
475 case dir_positive_or_equal
:
476 fprintf (file
, "+=");
479 case dir_negative_or_equal
:
480 fprintf (file
, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
501 struct data_dependence_relation
*ddr
;
504 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
505 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
507 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), j
, v
)
509 fprintf (file
, "DISTANCE_V (");
510 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
511 fprintf (file
, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), j
, v
)
516 fprintf (file
, "DIRECTION_V (");
517 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
518 fprintf (file
, ")\n");
522 fprintf (file
, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
531 struct data_dependence_relation
*ddr
;
533 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
534 dump_data_dependence_relation (file
, ddr
);
536 fprintf (file
, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
547 tree
*var
, tree
*off
)
551 enum tree_code ocode
= code
;
559 *var
= build_int_cst (type
, 0);
560 *off
= fold_convert (ssizetype
, op0
);
563 case POINTER_PLUS_EXPR
:
568 split_constant_offset (op0
, &var0
, &off0
);
569 split_constant_offset (op1
, &var1
, &off1
);
570 *var
= fold_build2 (code
, type
, var0
, var1
);
571 *off
= size_binop (ocode
, off0
, off1
);
575 if (TREE_CODE (op1
) != INTEGER_CST
)
578 split_constant_offset (op0
, &var0
, &off0
);
579 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
580 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
586 HOST_WIDE_INT pbitsize
, pbitpos
;
587 enum machine_mode pmode
;
588 int punsignedp
, pvolatilep
;
590 op0
= TREE_OPERAND (op0
, 0);
591 if (!handled_component_p (op0
))
594 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
595 &pmode
, &punsignedp
, &pvolatilep
, false);
597 if (pbitpos
% BITS_PER_UNIT
!= 0)
599 base
= build_fold_addr_expr (base
);
600 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
604 split_constant_offset (poffset
, &poffset
, &off1
);
605 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
606 if (POINTER_TYPE_P (TREE_TYPE (base
)))
607 base
= fold_build_pointer_plus (base
, poffset
);
609 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
610 fold_convert (TREE_TYPE (base
), poffset
));
613 var0
= fold_convert (type
, base
);
615 /* If variable length types are involved, punt, otherwise casts
616 might be converted into ARRAY_REFs in gimplify_conversion.
617 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
618 possibly no longer appears in current GIMPLE, might resurface.
619 This perhaps could run
620 if (CONVERT_EXPR_P (var0))
622 gimplify_conversion (&var0);
623 // Attempt to fill in any within var0 found ARRAY_REF's
624 // element size from corresponding op embedded ARRAY_REF,
625 // if unsuccessful, just punt.
627 while (POINTER_TYPE_P (type
))
628 type
= TREE_TYPE (type
);
629 if (int_size_in_bytes (type
) < 0)
639 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
640 enum tree_code subcode
;
642 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
645 var0
= gimple_assign_rhs1 (def_stmt
);
646 subcode
= gimple_assign_rhs_code (def_stmt
);
647 var1
= gimple_assign_rhs2 (def_stmt
);
649 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
653 /* We must not introduce undefined overflow, and we must not change the value.
654 Hence we're okay if the inner type doesn't overflow to start with
655 (pointer or signed), the outer type also is an integer or pointer
656 and the outer precision is at least as large as the inner. */
657 tree itype
= TREE_TYPE (op0
);
658 if ((POINTER_TYPE_P (itype
)
659 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
660 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
661 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
663 split_constant_offset (op0
, &var0
, off
);
664 *var
= fold_convert (type
, var0
);
675 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
676 will be ssizetype. */
679 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
681 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
685 *off
= ssize_int (0);
688 if (tree_is_chrec (exp
))
691 otype
= TREE_TYPE (exp
);
692 code
= TREE_CODE (exp
);
693 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
694 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
696 *var
= fold_convert (type
, e
);
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
705 canonicalize_base_object_address (tree addr
)
711 /* The base address may be obtained by casting from integer, in that case
713 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
716 if (TREE_CODE (addr
) != ADDR_EXPR
)
719 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
727 dr_analyze_innermost (struct data_reference
*dr
)
729 gimple stmt
= DR_STMT (dr
);
730 struct loop
*loop
= loop_containing_stmt (stmt
);
731 tree ref
= DR_REF (dr
);
732 HOST_WIDE_INT pbitsize
, pbitpos
;
734 enum machine_mode pmode
;
735 int punsignedp
, pvolatilep
;
736 affine_iv base_iv
, offset_iv
;
737 tree init
, dinit
, step
;
738 bool in_loop
= (loop
&& loop
->num
);
740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
741 fprintf (dump_file
, "analyze_innermost: ");
743 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
744 &pmode
, &punsignedp
, &pvolatilep
, false);
745 gcc_assert (base
!= NULL_TREE
);
747 if (pbitpos
% BITS_PER_UNIT
!= 0)
749 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
750 fprintf (dump_file
, "failed: bit offset alignment.\n");
754 if (TREE_CODE (base
) == MEM_REF
)
756 if (!integer_zerop (TREE_OPERAND (base
, 1)))
760 double_int moff
= mem_ref_offset (base
);
761 poffset
= double_int_to_tree (sizetype
, moff
);
764 poffset
= size_binop (PLUS_EXPR
, poffset
, TREE_OPERAND (base
, 1));
766 base
= TREE_OPERAND (base
, 0);
769 base
= build_fold_addr_expr (base
);
772 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
775 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
776 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
783 base_iv
.step
= ssize_int (0);
784 base_iv
.no_overflow
= true;
789 offset_iv
.base
= ssize_int (0);
790 offset_iv
.step
= ssize_int (0);
796 offset_iv
.base
= poffset
;
797 offset_iv
.step
= ssize_int (0);
799 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
800 poffset
, &offset_iv
, false))
802 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "failed: evolution of offset is not"
809 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
810 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
811 init
= size_binop (PLUS_EXPR
, init
, dinit
);
812 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
813 init
= size_binop (PLUS_EXPR
, init
, dinit
);
815 step
= size_binop (PLUS_EXPR
,
816 fold_convert (ssizetype
, base_iv
.step
),
817 fold_convert (ssizetype
, offset_iv
.step
));
819 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
821 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
825 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
828 fprintf (dump_file
, "success.\n");
833 /* Determines the base object and the list of indices of memory reference
834 DR, analyzed in LOOP and instantiated in loop nest NEST. */
837 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
839 VEC (tree
, heap
) *access_fns
= NULL
;
840 tree ref
= unshare_expr (DR_REF (dr
)), aref
= ref
, op
;
841 tree base
, off
, access_fn
= NULL_TREE
;
842 basic_block before_loop
= NULL
;
845 before_loop
= block_before_loop (nest
);
847 while (handled_component_p (aref
))
849 if (TREE_CODE (aref
) == ARRAY_REF
)
851 op
= TREE_OPERAND (aref
, 1);
854 access_fn
= analyze_scalar_evolution (loop
, op
);
855 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
856 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
859 TREE_OPERAND (aref
, 1) = build_int_cst (TREE_TYPE (op
), 0);
862 aref
= TREE_OPERAND (aref
, 0);
866 && TREE_CODE (aref
) == MEM_REF
)
868 op
= TREE_OPERAND (aref
, 0);
869 access_fn
= analyze_scalar_evolution (loop
, op
);
870 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
871 base
= initial_condition (access_fn
);
872 split_constant_offset (base
, &base
, &off
);
873 if (!integer_zerop (TREE_OPERAND (aref
, 1)))
875 off
= size_binop (PLUS_EXPR
, off
,
876 fold_convert (ssizetype
, TREE_OPERAND (aref
, 1)));
877 TREE_OPERAND (aref
, 1)
878 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref
, 1)), 0);
880 access_fn
= chrec_replace_initial_condition (access_fn
,
881 fold_convert (TREE_TYPE (base
), off
));
883 TREE_OPERAND (aref
, 0) = base
;
884 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
887 if (TREE_CODE (ref
) == MEM_REF
888 && TREE_CODE (TREE_OPERAND (ref
, 0)) == ADDR_EXPR
889 && integer_zerop (TREE_OPERAND (ref
, 1)))
890 ref
= TREE_OPERAND (TREE_OPERAND (ref
, 0), 0);
892 /* For canonicalization purposes we'd like to strip all outermost
893 zero-offset component-refs.
894 ??? For now simply handle zero-index array-refs. */
895 while (TREE_CODE (ref
) == ARRAY_REF
896 && integer_zerop (TREE_OPERAND (ref
, 1)))
897 ref
= TREE_OPERAND (ref
, 0);
899 DR_BASE_OBJECT (dr
) = ref
;
900 DR_ACCESS_FNS (dr
) = access_fns
;
903 /* Extracts the alias analysis information from the memory reference DR. */
906 dr_analyze_alias (struct data_reference
*dr
)
908 tree ref
= DR_REF (dr
);
909 tree base
= get_base_address (ref
), addr
;
911 if (INDIRECT_REF_P (base
)
912 || TREE_CODE (base
) == MEM_REF
)
914 addr
= TREE_OPERAND (base
, 0);
915 if (TREE_CODE (addr
) == SSA_NAME
)
916 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
920 /* Frees data reference DR. */
923 free_data_ref (data_reference_p dr
)
925 VEC_free (tree
, heap
, DR_ACCESS_FNS (dr
));
929 /* Analyzes memory reference MEMREF accessed in STMT. The reference
930 is read if IS_READ is true, write otherwise. Returns the
931 data_reference description of MEMREF. NEST is the outermost loop
932 in which the reference should be instantiated, LOOP is the loop in
933 which the data reference should be analyzed. */
935 struct data_reference
*
936 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
939 struct data_reference
*dr
;
941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
943 fprintf (dump_file
, "Creating dr for ");
944 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
945 fprintf (dump_file
, "\n");
948 dr
= XCNEW (struct data_reference
);
950 DR_REF (dr
) = memref
;
951 DR_IS_READ (dr
) = is_read
;
953 dr_analyze_innermost (dr
);
954 dr_analyze_indices (dr
, nest
, loop
);
955 dr_analyze_alias (dr
);
957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
959 fprintf (dump_file
, "\tbase_address: ");
960 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
961 fprintf (dump_file
, "\n\toffset from base address: ");
962 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
963 fprintf (dump_file
, "\n\tconstant offset from base address: ");
964 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
965 fprintf (dump_file
, "\n\tstep: ");
966 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
967 fprintf (dump_file
, "\n\taligned to: ");
968 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
969 fprintf (dump_file
, "\n\tbase_object: ");
970 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
971 fprintf (dump_file
, "\n");
977 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
980 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
984 STRIP_NOPS (offset1
);
985 STRIP_NOPS (offset2
);
987 if (offset1
== offset2
)
990 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
991 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
994 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
995 TREE_OPERAND (offset2
, 0));
997 if (!res
|| !BINARY_CLASS_P (offset1
))
1000 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1001 TREE_OPERAND (offset2
, 1));
1006 /* Check if DRA and DRB have equal offsets. */
1008 dr_equal_offsets_p (struct data_reference
*dra
,
1009 struct data_reference
*drb
)
1011 tree offset1
, offset2
;
1013 offset1
= DR_OFFSET (dra
);
1014 offset2
= DR_OFFSET (drb
);
1016 return dr_equal_offsets_p1 (offset1
, offset2
);
1019 /* Returns true if FNA == FNB. */
1022 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1024 unsigned i
, n
= VEC_length (tree
, fna
);
1026 if (n
!= VEC_length (tree
, fnb
))
1029 for (i
= 0; i
< n
; i
++)
1030 if (!operand_equal_p (VEC_index (tree
, fna
, i
),
1031 VEC_index (tree
, fnb
, i
), 0))
1037 /* If all the functions in CF are the same, returns one of them,
1038 otherwise returns NULL. */
1041 common_affine_function (conflict_function
*cf
)
1046 if (!CF_NONTRIVIAL_P (cf
))
1051 for (i
= 1; i
< cf
->n
; i
++)
1052 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1058 /* Returns the base of the affine function FN. */
1061 affine_function_base (affine_fn fn
)
1063 return VEC_index (tree
, fn
, 0);
1066 /* Returns true if FN is a constant. */
1069 affine_function_constant_p (affine_fn fn
)
1074 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
1075 if (!integer_zerop (coef
))
1081 /* Returns true if FN is the zero constant function. */
1084 affine_function_zero_p (affine_fn fn
)
1086 return (integer_zerop (affine_function_base (fn
))
1087 && affine_function_constant_p (fn
));
1090 /* Returns a signed integer type with the largest precision from TA
1094 signed_type_for_types (tree ta
, tree tb
)
1096 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1097 return signed_type_for (ta
);
1099 return signed_type_for (tb
);
1102 /* Applies operation OP on affine functions FNA and FNB, and returns the
1106 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1112 if (VEC_length (tree
, fnb
) > VEC_length (tree
, fna
))
1114 n
= VEC_length (tree
, fna
);
1115 m
= VEC_length (tree
, fnb
);
1119 n
= VEC_length (tree
, fnb
);
1120 m
= VEC_length (tree
, fna
);
1123 ret
= VEC_alloc (tree
, heap
, m
);
1124 for (i
= 0; i
< n
; i
++)
1126 tree type
= signed_type_for_types (TREE_TYPE (VEC_index (tree
, fna
, i
)),
1127 TREE_TYPE (VEC_index (tree
, fnb
, i
)));
1129 VEC_quick_push (tree
, ret
,
1130 fold_build2 (op
, type
,
1131 VEC_index (tree
, fna
, i
),
1132 VEC_index (tree
, fnb
, i
)));
1135 for (; VEC_iterate (tree
, fna
, i
, coef
); i
++)
1136 VEC_quick_push (tree
, ret
,
1137 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1138 coef
, integer_zero_node
));
1139 for (; VEC_iterate (tree
, fnb
, i
, coef
); i
++)
1140 VEC_quick_push (tree
, ret
,
1141 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1142 integer_zero_node
, coef
));
1147 /* Returns the sum of affine functions FNA and FNB. */
1150 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1152 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1155 /* Returns the difference of affine functions FNA and FNB. */
1158 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1160 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1163 /* Frees affine function FN. */
1166 affine_fn_free (affine_fn fn
)
1168 VEC_free (tree
, heap
, fn
);
1171 /* Determine for each subscript in the data dependence relation DDR
1175 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1177 conflict_function
*cf_a
, *cf_b
;
1178 affine_fn fn_a
, fn_b
, diff
;
1180 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1184 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1186 struct subscript
*subscript
;
1188 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1189 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1190 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1192 fn_a
= common_affine_function (cf_a
);
1193 fn_b
= common_affine_function (cf_b
);
1196 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1199 diff
= affine_fn_minus (fn_a
, fn_b
);
1201 if (affine_function_constant_p (diff
))
1202 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1204 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1206 affine_fn_free (diff
);
1211 /* Returns the conflict function for "unknown". */
1213 static conflict_function
*
1214 conflict_fn_not_known (void)
1216 conflict_function
*fn
= XCNEW (conflict_function
);
1222 /* Returns the conflict function for "independent". */
1224 static conflict_function
*
1225 conflict_fn_no_dependence (void)
1227 conflict_function
*fn
= XCNEW (conflict_function
);
1228 fn
->n
= NO_DEPENDENCE
;
1233 /* Returns true if the address of OBJ is invariant in LOOP. */
1236 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1238 while (handled_component_p (obj
))
1240 if (TREE_CODE (obj
) == ARRAY_REF
)
1242 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1243 need to check the stride and the lower bound of the reference. */
1244 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1246 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1250 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1252 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1256 obj
= TREE_OPERAND (obj
, 0);
1259 if (!INDIRECT_REF_P (obj
)
1260 && TREE_CODE (obj
) != MEM_REF
)
1263 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1267 /* Returns false if we can prove that data references A and B do not alias,
1271 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
)
1273 tree addr_a
= DR_BASE_OBJECT (a
);
1274 tree addr_b
= DR_BASE_OBJECT (b
);
1276 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1277 return refs_output_dependent_p (addr_a
, addr_b
);
1278 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1279 return refs_anti_dependent_p (addr_a
, addr_b
);
1280 return refs_may_alias_p (addr_a
, addr_b
);
1283 static void compute_self_dependence (struct data_dependence_relation
*);
1285 /* Initialize a data dependence relation between data accesses A and
1286 B. NB_LOOPS is the number of loops surrounding the references: the
1287 size of the classic distance/direction vectors. */
1289 static struct data_dependence_relation
*
1290 initialize_data_dependence_relation (struct data_reference
*a
,
1291 struct data_reference
*b
,
1292 VEC (loop_p
, heap
) *loop_nest
)
1294 struct data_dependence_relation
*res
;
1297 res
= XNEW (struct data_dependence_relation
);
1300 DDR_LOOP_NEST (res
) = NULL
;
1301 DDR_REVERSED_P (res
) = false;
1302 DDR_SUBSCRIPTS (res
) = NULL
;
1303 DDR_DIR_VECTS (res
) = NULL
;
1304 DDR_DIST_VECTS (res
) = NULL
;
1306 if (a
== NULL
|| b
== NULL
)
1308 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1312 /* If the data references do not alias, then they are independent. */
1313 if (!dr_may_alias_p (a
, b
))
1315 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1319 /* When the references are exactly the same, don't spend time doing
1320 the data dependence tests, just initialize the ddr and return. */
1321 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1323 DDR_AFFINE_P (res
) = true;
1324 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1325 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1326 DDR_LOOP_NEST (res
) = loop_nest
;
1327 DDR_INNER_LOOP (res
) = 0;
1328 DDR_SELF_REFERENCE (res
) = true;
1329 compute_self_dependence (res
);
1333 /* If the references do not access the same object, we do not know
1334 whether they alias or not. */
1335 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1337 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1341 /* If the base of the object is not invariant in the loop nest, we cannot
1342 analyze it. TODO -- in fact, it would suffice to record that there may
1343 be arbitrary dependences in the loops where the base object varies. */
1345 && !object_address_invariant_in_loop_p (VEC_index (loop_p
, loop_nest
, 0),
1346 DR_BASE_OBJECT (a
)))
1348 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1352 /* If the number of dimensions of the access to not agree we can have
1353 a pointer access to a component of the array element type and an
1354 array access while the base-objects are still the same. Punt. */
1355 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1357 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1361 DDR_AFFINE_P (res
) = true;
1362 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1363 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1364 DDR_LOOP_NEST (res
) = loop_nest
;
1365 DDR_INNER_LOOP (res
) = 0;
1366 DDR_SELF_REFERENCE (res
) = false;
1368 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1370 struct subscript
*subscript
;
1372 subscript
= XNEW (struct subscript
);
1373 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1374 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1375 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1376 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1377 VEC_safe_push (subscript_p
, heap
, DDR_SUBSCRIPTS (res
), subscript
);
1383 /* Frees memory used by the conflict function F. */
1386 free_conflict_function (conflict_function
*f
)
1390 if (CF_NONTRIVIAL_P (f
))
1392 for (i
= 0; i
< f
->n
; i
++)
1393 affine_fn_free (f
->fns
[i
]);
1398 /* Frees memory used by SUBSCRIPTS. */
1401 free_subscripts (VEC (subscript_p
, heap
) *subscripts
)
1406 FOR_EACH_VEC_ELT (subscript_p
, subscripts
, i
, s
)
1408 free_conflict_function (s
->conflicting_iterations_in_a
);
1409 free_conflict_function (s
->conflicting_iterations_in_b
);
1412 VEC_free (subscript_p
, heap
, subscripts
);
1415 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1419 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1424 fprintf (dump_file
, "(dependence classified: ");
1425 print_generic_expr (dump_file
, chrec
, 0);
1426 fprintf (dump_file
, ")\n");
1429 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1430 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1431 DDR_SUBSCRIPTS (ddr
) = NULL
;
1434 /* The dependence relation DDR cannot be represented by a distance
1438 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1440 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1441 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1443 DDR_AFFINE_P (ddr
) = false;
1448 /* This section contains the classic Banerjee tests. */
1450 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1451 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1454 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1456 return (evolution_function_is_constant_p (chrec_a
)
1457 && evolution_function_is_constant_p (chrec_b
));
1460 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1461 variable, i.e., if the SIV (Single Index Variable) test is true. */
1464 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1466 if ((evolution_function_is_constant_p (chrec_a
)
1467 && evolution_function_is_univariate_p (chrec_b
))
1468 || (evolution_function_is_constant_p (chrec_b
)
1469 && evolution_function_is_univariate_p (chrec_a
)))
1472 if (evolution_function_is_univariate_p (chrec_a
)
1473 && evolution_function_is_univariate_p (chrec_b
))
1475 switch (TREE_CODE (chrec_a
))
1477 case POLYNOMIAL_CHREC
:
1478 switch (TREE_CODE (chrec_b
))
1480 case POLYNOMIAL_CHREC
:
1481 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1496 /* Creates a conflict function with N dimensions. The affine functions
1497 in each dimension follow. */
1499 static conflict_function
*
1500 conflict_fn (unsigned n
, ...)
1503 conflict_function
*ret
= XCNEW (conflict_function
);
1506 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1510 for (i
= 0; i
< n
; i
++)
1511 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1517 /* Returns constant affine function with value CST. */
1520 affine_fn_cst (tree cst
)
1522 affine_fn fn
= VEC_alloc (tree
, heap
, 1);
1523 VEC_quick_push (tree
, fn
, cst
);
1527 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1530 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1532 affine_fn fn
= VEC_alloc (tree
, heap
, dim
+ 1);
1535 gcc_assert (dim
> 0);
1536 VEC_quick_push (tree
, fn
, cst
);
1537 for (i
= 1; i
< dim
; i
++)
1538 VEC_quick_push (tree
, fn
, integer_zero_node
);
1539 VEC_quick_push (tree
, fn
, coef
);
1543 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1544 *OVERLAPS_B are initialized to the functions that describe the
1545 relation between the elements accessed twice by CHREC_A and
1546 CHREC_B. For k >= 0, the following property is verified:
1548 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1551 analyze_ziv_subscript (tree chrec_a
,
1553 conflict_function
**overlaps_a
,
1554 conflict_function
**overlaps_b
,
1555 tree
*last_conflicts
)
1557 tree type
, difference
;
1558 dependence_stats
.num_ziv
++;
1560 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1561 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1563 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1564 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1565 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1566 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1568 switch (TREE_CODE (difference
))
1571 if (integer_zerop (difference
))
1573 /* The difference is equal to zero: the accessed index
1574 overlaps for each iteration in the loop. */
1575 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1576 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1577 *last_conflicts
= chrec_dont_know
;
1578 dependence_stats
.num_ziv_dependent
++;
1582 /* The accesses do not overlap. */
1583 *overlaps_a
= conflict_fn_no_dependence ();
1584 *overlaps_b
= conflict_fn_no_dependence ();
1585 *last_conflicts
= integer_zero_node
;
1586 dependence_stats
.num_ziv_independent
++;
1591 /* We're not sure whether the indexes overlap. For the moment,
1592 conservatively answer "don't know". */
1593 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1594 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1596 *overlaps_a
= conflict_fn_not_known ();
1597 *overlaps_b
= conflict_fn_not_known ();
1598 *last_conflicts
= chrec_dont_know
;
1599 dependence_stats
.num_ziv_unimplemented
++;
1603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1604 fprintf (dump_file
, ")\n");
1607 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1608 and only if it fits to the int type. If this is not the case, or the
1609 bound on the number of iterations of LOOP could not be derived, returns
1613 max_stmt_executions_tree (struct loop
*loop
)
1617 if (!max_stmt_executions (loop
, true, &nit
))
1618 return chrec_dont_know
;
1620 if (!double_int_fits_to_tree_p (unsigned_type_node
, nit
))
1621 return chrec_dont_know
;
1623 return double_int_to_tree (unsigned_type_node
, nit
);
1626 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1627 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1628 *OVERLAPS_B are initialized to the functions that describe the
1629 relation between the elements accessed twice by CHREC_A and
1630 CHREC_B. For k >= 0, the following property is verified:
1632 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1635 analyze_siv_subscript_cst_affine (tree chrec_a
,
1637 conflict_function
**overlaps_a
,
1638 conflict_function
**overlaps_b
,
1639 tree
*last_conflicts
)
1641 bool value0
, value1
, value2
;
1642 tree type
, difference
, tmp
;
1644 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1645 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1646 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1647 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1649 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1651 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1652 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1654 dependence_stats
.num_siv_unimplemented
++;
1655 *overlaps_a
= conflict_fn_not_known ();
1656 *overlaps_b
= conflict_fn_not_known ();
1657 *last_conflicts
= chrec_dont_know
;
1662 if (value0
== false)
1664 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1667 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1669 *overlaps_a
= conflict_fn_not_known ();
1670 *overlaps_b
= conflict_fn_not_known ();
1671 *last_conflicts
= chrec_dont_know
;
1672 dependence_stats
.num_siv_unimplemented
++;
1681 chrec_b = {10, +, 1}
1684 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1686 HOST_WIDE_INT numiter
;
1687 struct loop
*loop
= get_chrec_loop (chrec_b
);
1689 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1690 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1691 fold_build1 (ABS_EXPR
, type
, difference
),
1692 CHREC_RIGHT (chrec_b
));
1693 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1694 *last_conflicts
= integer_one_node
;
1697 /* Perform weak-zero siv test to see if overlap is
1698 outside the loop bounds. */
1699 numiter
= max_stmt_executions_int (loop
, true);
1702 && compare_tree_int (tmp
, numiter
) > 0)
1704 free_conflict_function (*overlaps_a
);
1705 free_conflict_function (*overlaps_b
);
1706 *overlaps_a
= conflict_fn_no_dependence ();
1707 *overlaps_b
= conflict_fn_no_dependence ();
1708 *last_conflicts
= integer_zero_node
;
1709 dependence_stats
.num_siv_independent
++;
1712 dependence_stats
.num_siv_dependent
++;
1716 /* When the step does not divide the difference, there are
1720 *overlaps_a
= conflict_fn_no_dependence ();
1721 *overlaps_b
= conflict_fn_no_dependence ();
1722 *last_conflicts
= integer_zero_node
;
1723 dependence_stats
.num_siv_independent
++;
1732 chrec_b = {10, +, -1}
1734 In this case, chrec_a will not overlap with chrec_b. */
1735 *overlaps_a
= conflict_fn_no_dependence ();
1736 *overlaps_b
= conflict_fn_no_dependence ();
1737 *last_conflicts
= integer_zero_node
;
1738 dependence_stats
.num_siv_independent
++;
1745 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1748 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1750 *overlaps_a
= conflict_fn_not_known ();
1751 *overlaps_b
= conflict_fn_not_known ();
1752 *last_conflicts
= chrec_dont_know
;
1753 dependence_stats
.num_siv_unimplemented
++;
1758 if (value2
== false)
1762 chrec_b = {10, +, -1}
1764 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1766 HOST_WIDE_INT numiter
;
1767 struct loop
*loop
= get_chrec_loop (chrec_b
);
1769 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1770 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1771 CHREC_RIGHT (chrec_b
));
1772 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1773 *last_conflicts
= integer_one_node
;
1775 /* Perform weak-zero siv test to see if overlap is
1776 outside the loop bounds. */
1777 numiter
= max_stmt_executions_int (loop
, true);
1780 && compare_tree_int (tmp
, numiter
) > 0)
1782 free_conflict_function (*overlaps_a
);
1783 free_conflict_function (*overlaps_b
);
1784 *overlaps_a
= conflict_fn_no_dependence ();
1785 *overlaps_b
= conflict_fn_no_dependence ();
1786 *last_conflicts
= integer_zero_node
;
1787 dependence_stats
.num_siv_independent
++;
1790 dependence_stats
.num_siv_dependent
++;
1794 /* When the step does not divide the difference, there
1798 *overlaps_a
= conflict_fn_no_dependence ();
1799 *overlaps_b
= conflict_fn_no_dependence ();
1800 *last_conflicts
= integer_zero_node
;
1801 dependence_stats
.num_siv_independent
++;
1811 In this case, chrec_a will not overlap with chrec_b. */
1812 *overlaps_a
= conflict_fn_no_dependence ();
1813 *overlaps_b
= conflict_fn_no_dependence ();
1814 *last_conflicts
= integer_zero_node
;
1815 dependence_stats
.num_siv_independent
++;
1823 /* Helper recursive function for initializing the matrix A. Returns
1824 the initial value of CHREC. */
1827 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
1831 switch (TREE_CODE (chrec
))
1833 case POLYNOMIAL_CHREC
:
1834 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
1836 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
1837 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
1843 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1844 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
1846 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
1851 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1852 return chrec_convert (chrec_type (chrec
), op
, NULL
);
1857 /* Handle ~X as -1 - X. */
1858 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1859 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
1860 build_int_cst (TREE_TYPE (chrec
), -1), op
);
1872 #define FLOOR_DIV(x,y) ((x) / (y))
1874 /* Solves the special case of the Diophantine equation:
1875 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1877 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1878 number of iterations that loops X and Y run. The overlaps will be
1879 constructed as evolutions in dimension DIM. */
1882 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
1883 affine_fn
*overlaps_a
,
1884 affine_fn
*overlaps_b
,
1885 tree
*last_conflicts
, int dim
)
1887 if (((step_a
> 0 && step_b
> 0)
1888 || (step_a
< 0 && step_b
< 0)))
1890 int step_overlaps_a
, step_overlaps_b
;
1891 int gcd_steps_a_b
, last_conflict
, tau2
;
1893 gcd_steps_a_b
= gcd (step_a
, step_b
);
1894 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
1895 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
1899 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
1900 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
1901 last_conflict
= tau2
;
1902 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
1905 *last_conflicts
= chrec_dont_know
;
1907 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
1908 build_int_cst (NULL_TREE
,
1910 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
1911 build_int_cst (NULL_TREE
,
1917 *overlaps_a
= affine_fn_cst (integer_zero_node
);
1918 *overlaps_b
= affine_fn_cst (integer_zero_node
);
1919 *last_conflicts
= integer_zero_node
;
1923 /* Solves the special case of a Diophantine equation where CHREC_A is
1924 an affine bivariate function, and CHREC_B is an affine univariate
1925 function. For example,
1927 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1929 has the following overlapping functions:
1931 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1932 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1933 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1935 FORNOW: This is a specialized implementation for a case occurring in
1936 a common benchmark. Implement the general algorithm. */
1939 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
1940 conflict_function
**overlaps_a
,
1941 conflict_function
**overlaps_b
,
1942 tree
*last_conflicts
)
1944 bool xz_p
, yz_p
, xyz_p
;
1945 int step_x
, step_y
, step_z
;
1946 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
1947 affine_fn overlaps_a_xz
, overlaps_b_xz
;
1948 affine_fn overlaps_a_yz
, overlaps_b_yz
;
1949 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
1950 affine_fn ova1
, ova2
, ovb
;
1951 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
1953 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
1954 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
1955 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
1958 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)), true);
1959 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
1960 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
1962 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
1964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1965 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
1967 *overlaps_a
= conflict_fn_not_known ();
1968 *overlaps_b
= conflict_fn_not_known ();
1969 *last_conflicts
= chrec_dont_know
;
1973 niter
= MIN (niter_x
, niter_z
);
1974 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
1977 &last_conflicts_xz
, 1);
1978 niter
= MIN (niter_y
, niter_z
);
1979 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
1982 &last_conflicts_yz
, 2);
1983 niter
= MIN (niter_x
, niter_z
);
1984 niter
= MIN (niter_y
, niter
);
1985 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
1988 &last_conflicts_xyz
, 3);
1990 xz_p
= !integer_zerop (last_conflicts_xz
);
1991 yz_p
= !integer_zerop (last_conflicts_yz
);
1992 xyz_p
= !integer_zerop (last_conflicts_xyz
);
1994 if (xz_p
|| yz_p
|| xyz_p
)
1996 ova1
= affine_fn_cst (integer_zero_node
);
1997 ova2
= affine_fn_cst (integer_zero_node
);
1998 ovb
= affine_fn_cst (integer_zero_node
);
2001 affine_fn t0
= ova1
;
2004 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2005 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2006 affine_fn_free (t0
);
2007 affine_fn_free (t2
);
2008 *last_conflicts
= last_conflicts_xz
;
2012 affine_fn t0
= ova2
;
2015 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2016 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2017 affine_fn_free (t0
);
2018 affine_fn_free (t2
);
2019 *last_conflicts
= last_conflicts_yz
;
2023 affine_fn t0
= ova1
;
2024 affine_fn t2
= ova2
;
2027 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2028 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2029 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2030 affine_fn_free (t0
);
2031 affine_fn_free (t2
);
2032 affine_fn_free (t4
);
2033 *last_conflicts
= last_conflicts_xyz
;
2035 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2036 *overlaps_b
= conflict_fn (1, ovb
);
2040 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2041 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2042 *last_conflicts
= integer_zero_node
;
2045 affine_fn_free (overlaps_a_xz
);
2046 affine_fn_free (overlaps_b_xz
);
2047 affine_fn_free (overlaps_a_yz
);
2048 affine_fn_free (overlaps_b_yz
);
2049 affine_fn_free (overlaps_a_xyz
);
2050 affine_fn_free (overlaps_b_xyz
);
2053 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2056 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2059 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2062 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2065 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2070 for (i
= 0; i
< m
; i
++)
2071 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2074 /* Store the N x N identity matrix in MAT. */
2077 lambda_matrix_id (lambda_matrix mat
, int size
)
2081 for (i
= 0; i
< size
; i
++)
2082 for (j
= 0; j
< size
; j
++)
2083 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2086 /* Return the first nonzero element of vector VEC1 between START and N.
2087 We must have START <= N. Returns N if VEC1 is the zero vector. */
2090 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2093 while (j
< n
&& vec1
[j
] == 0)
2098 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2099 R2 = R2 + CONST1 * R1. */
2102 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2109 for (i
= 0; i
< n
; i
++)
2110 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2113 /* Swap rows R1 and R2 in matrix MAT. */
2116 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2125 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2126 and store the result in VEC2. */
2129 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2130 int size
, int const1
)
2135 lambda_vector_clear (vec2
, size
);
2137 for (i
= 0; i
< size
; i
++)
2138 vec2
[i
] = const1
* vec1
[i
];
2141 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2144 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2147 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2150 /* Negate row R1 of matrix MAT which has N columns. */
2153 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2155 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2158 /* Return true if two vectors are equal. */
2161 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2164 for (i
= 0; i
< size
; i
++)
2165 if (vec1
[i
] != vec2
[i
])
2170 /* Given an M x N integer matrix A, this function determines an M x
2171 M unimodular matrix U, and an M x N echelon matrix S such that
2172 "U.A = S". This decomposition is also known as "right Hermite".
2174 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2175 Restructuring Compilers" Utpal Banerjee. */
2178 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2179 lambda_matrix S
, lambda_matrix U
)
2183 lambda_matrix_copy (A
, S
, m
, n
);
2184 lambda_matrix_id (U
, m
);
2186 for (j
= 0; j
< n
; j
++)
2188 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2191 for (i
= m
- 1; i
>= i0
; i
--)
2193 while (S
[i
][j
] != 0)
2195 int sigma
, factor
, a
, b
;
2199 sigma
= (a
* b
< 0) ? -1: 1;
2202 factor
= sigma
* (a
/ b
);
2204 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2205 lambda_matrix_row_exchange (S
, i
, i
-1);
2207 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2208 lambda_matrix_row_exchange (U
, i
, i
-1);
2215 /* Determines the overlapping elements due to accesses CHREC_A and
2216 CHREC_B, that are affine functions. This function cannot handle
2217 symbolic evolution functions, ie. when initial conditions are
2218 parameters, because it uses lambda matrices of integers. */
2221 analyze_subscript_affine_affine (tree chrec_a
,
2223 conflict_function
**overlaps_a
,
2224 conflict_function
**overlaps_b
,
2225 tree
*last_conflicts
)
2227 unsigned nb_vars_a
, nb_vars_b
, dim
;
2228 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2229 lambda_matrix A
, U
, S
;
2230 struct obstack scratch_obstack
;
2232 if (eq_evolutions_p (chrec_a
, chrec_b
))
2234 /* The accessed index overlaps for each iteration in the
2236 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2237 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2238 *last_conflicts
= chrec_dont_know
;
2241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2242 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2244 /* For determining the initial intersection, we have to solve a
2245 Diophantine equation. This is the most time consuming part.
2247 For answering to the question: "Is there a dependence?" we have
2248 to prove that there exists a solution to the Diophantine
2249 equation, and that the solution is in the iteration domain,
2250 i.e. the solution is positive or zero, and that the solution
2251 happens before the upper bound loop.nb_iterations. Otherwise
2252 there is no dependence. This function outputs a description of
2253 the iterations that hold the intersections. */
2255 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2256 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2258 gcc_obstack_init (&scratch_obstack
);
2260 dim
= nb_vars_a
+ nb_vars_b
;
2261 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2262 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2263 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2265 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2266 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2267 gamma
= init_b
- init_a
;
2269 /* Don't do all the hard work of solving the Diophantine equation
2270 when we already know the solution: for example,
2273 | gamma = 3 - 3 = 0.
2274 Then the first overlap occurs during the first iterations:
2275 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2279 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2281 HOST_WIDE_INT step_a
, step_b
;
2282 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2285 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
), true);
2286 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
), true);
2287 niter
= MIN (niter_a
, niter_b
);
2288 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2289 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2291 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2294 *overlaps_a
= conflict_fn (1, ova
);
2295 *overlaps_b
= conflict_fn (1, ovb
);
2298 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2299 compute_overlap_steps_for_affine_1_2
2300 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2302 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2303 compute_overlap_steps_for_affine_1_2
2304 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2308 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2309 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2310 *overlaps_a
= conflict_fn_not_known ();
2311 *overlaps_b
= conflict_fn_not_known ();
2312 *last_conflicts
= chrec_dont_know
;
2314 goto end_analyze_subs_aa
;
2318 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2323 lambda_matrix_row_negate (U
, dim
, 0);
2325 gcd_alpha_beta
= S
[0][0];
2327 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2328 but that is a quite strange case. Instead of ICEing, answer
2330 if (gcd_alpha_beta
== 0)
2332 *overlaps_a
= conflict_fn_not_known ();
2333 *overlaps_b
= conflict_fn_not_known ();
2334 *last_conflicts
= chrec_dont_know
;
2335 goto end_analyze_subs_aa
;
2338 /* The classic "gcd-test". */
2339 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2341 /* The "gcd-test" has determined that there is no integer
2342 solution, i.e. there is no dependence. */
2343 *overlaps_a
= conflict_fn_no_dependence ();
2344 *overlaps_b
= conflict_fn_no_dependence ();
2345 *last_conflicts
= integer_zero_node
;
2348 /* Both access functions are univariate. This includes SIV and MIV cases. */
2349 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2351 /* Both functions should have the same evolution sign. */
2352 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2353 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2355 /* The solutions are given by:
2357 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2360 For a given integer t. Using the following variables,
2362 | i0 = u11 * gamma / gcd_alpha_beta
2363 | j0 = u12 * gamma / gcd_alpha_beta
2370 | y0 = j0 + j1 * t. */
2371 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2373 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2374 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2378 if ((i1
== 0 && i0
< 0)
2379 || (j1
== 0 && j0
< 0))
2381 /* There is no solution.
2382 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2383 falls in here, but for the moment we don't look at the
2384 upper bound of the iteration domain. */
2385 *overlaps_a
= conflict_fn_no_dependence ();
2386 *overlaps_b
= conflict_fn_no_dependence ();
2387 *last_conflicts
= integer_zero_node
;
2388 goto end_analyze_subs_aa
;
2391 if (i1
> 0 && j1
> 0)
2393 HOST_WIDE_INT niter_a
= max_stmt_executions_int
2394 (get_chrec_loop (chrec_a
), true);
2395 HOST_WIDE_INT niter_b
= max_stmt_executions_int
2396 (get_chrec_loop (chrec_b
), true);
2397 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2399 /* (X0, Y0) is a solution of the Diophantine equation:
2400 "chrec_a (X0) = chrec_b (Y0)". */
2401 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2403 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2404 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2406 /* (X1, Y1) is the smallest positive solution of the eq
2407 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2408 first conflict occurs. */
2409 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2410 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2411 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2415 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2416 FLOOR_DIV (niter
- j0
, j1
));
2417 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2419 /* If the overlap occurs outside of the bounds of the
2420 loop, there is no dependence. */
2421 if (x1
>= niter
|| y1
>= niter
)
2423 *overlaps_a
= conflict_fn_no_dependence ();
2424 *overlaps_b
= conflict_fn_no_dependence ();
2425 *last_conflicts
= integer_zero_node
;
2426 goto end_analyze_subs_aa
;
2429 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2432 *last_conflicts
= chrec_dont_know
;
2436 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2438 build_int_cst (NULL_TREE
, i1
)));
2441 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2443 build_int_cst (NULL_TREE
, j1
)));
2447 /* FIXME: For the moment, the upper bound of the
2448 iteration domain for i and j is not checked. */
2449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2450 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2451 *overlaps_a
= conflict_fn_not_known ();
2452 *overlaps_b
= conflict_fn_not_known ();
2453 *last_conflicts
= chrec_dont_know
;
2458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2459 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2460 *overlaps_a
= conflict_fn_not_known ();
2461 *overlaps_b
= conflict_fn_not_known ();
2462 *last_conflicts
= chrec_dont_know
;
2467 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2468 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2469 *overlaps_a
= conflict_fn_not_known ();
2470 *overlaps_b
= conflict_fn_not_known ();
2471 *last_conflicts
= chrec_dont_know
;
2474 end_analyze_subs_aa
:
2475 obstack_free (&scratch_obstack
, NULL
);
2476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2478 fprintf (dump_file
, " (overlaps_a = ");
2479 dump_conflict_function (dump_file
, *overlaps_a
);
2480 fprintf (dump_file
, ")\n (overlaps_b = ");
2481 dump_conflict_function (dump_file
, *overlaps_b
);
2482 fprintf (dump_file
, ")\n");
2483 fprintf (dump_file
, ")\n");
2487 /* Returns true when analyze_subscript_affine_affine can be used for
2488 determining the dependence relation between chrec_a and chrec_b,
2489 that contain symbols. This function modifies chrec_a and chrec_b
2490 such that the analysis result is the same, and such that they don't
2491 contain symbols, and then can safely be passed to the analyzer.
2493 Example: The analysis of the following tuples of evolutions produce
2494 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2497 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2498 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2502 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2504 tree diff
, type
, left_a
, left_b
, right_b
;
2506 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2507 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2508 /* FIXME: For the moment not handled. Might be refined later. */
2511 type
= chrec_type (*chrec_a
);
2512 left_a
= CHREC_LEFT (*chrec_a
);
2513 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2514 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2516 if (!evolution_function_is_constant_p (diff
))
2519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2520 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2522 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2523 diff
, CHREC_RIGHT (*chrec_a
));
2524 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2525 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2526 build_int_cst (type
, 0),
2531 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2532 *OVERLAPS_B are initialized to the functions that describe the
2533 relation between the elements accessed twice by CHREC_A and
2534 CHREC_B. For k >= 0, the following property is verified:
2536 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2539 analyze_siv_subscript (tree chrec_a
,
2541 conflict_function
**overlaps_a
,
2542 conflict_function
**overlaps_b
,
2543 tree
*last_conflicts
,
2546 dependence_stats
.num_siv
++;
2548 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2549 fprintf (dump_file
, "(analyze_siv_subscript \n");
2551 if (evolution_function_is_constant_p (chrec_a
)
2552 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2553 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2554 overlaps_a
, overlaps_b
, last_conflicts
);
2556 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2557 && evolution_function_is_constant_p (chrec_b
))
2558 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2559 overlaps_b
, overlaps_a
, last_conflicts
);
2561 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2562 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2564 if (!chrec_contains_symbols (chrec_a
)
2565 && !chrec_contains_symbols (chrec_b
))
2567 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2568 overlaps_a
, overlaps_b
,
2571 if (CF_NOT_KNOWN_P (*overlaps_a
)
2572 || CF_NOT_KNOWN_P (*overlaps_b
))
2573 dependence_stats
.num_siv_unimplemented
++;
2574 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2575 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2576 dependence_stats
.num_siv_independent
++;
2578 dependence_stats
.num_siv_dependent
++;
2580 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2583 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2584 overlaps_a
, overlaps_b
,
2587 if (CF_NOT_KNOWN_P (*overlaps_a
)
2588 || CF_NOT_KNOWN_P (*overlaps_b
))
2589 dependence_stats
.num_siv_unimplemented
++;
2590 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2591 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2592 dependence_stats
.num_siv_independent
++;
2594 dependence_stats
.num_siv_dependent
++;
2597 goto siv_subscript_dontknow
;
2602 siv_subscript_dontknow
:;
2603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2604 fprintf (dump_file
, "siv test failed: unimplemented.\n");
2605 *overlaps_a
= conflict_fn_not_known ();
2606 *overlaps_b
= conflict_fn_not_known ();
2607 *last_conflicts
= chrec_dont_know
;
2608 dependence_stats
.num_siv_unimplemented
++;
2611 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2612 fprintf (dump_file
, ")\n");
2615 /* Returns false if we can prove that the greatest common divisor of the steps
2616 of CHREC does not divide CST, false otherwise. */
2619 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2621 HOST_WIDE_INT cd
= 0, val
;
2624 if (!host_integerp (cst
, 0))
2626 val
= tree_low_cst (cst
, 0);
2628 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2630 step
= CHREC_RIGHT (chrec
);
2631 if (!host_integerp (step
, 0))
2633 cd
= gcd (cd
, tree_low_cst (step
, 0));
2634 chrec
= CHREC_LEFT (chrec
);
2637 return val
% cd
== 0;
2640 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2641 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2642 functions that describe the relation between the elements accessed
2643 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2646 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2649 analyze_miv_subscript (tree chrec_a
,
2651 conflict_function
**overlaps_a
,
2652 conflict_function
**overlaps_b
,
2653 tree
*last_conflicts
,
2654 struct loop
*loop_nest
)
2656 tree type
, difference
;
2658 dependence_stats
.num_miv
++;
2659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2660 fprintf (dump_file
, "(analyze_miv_subscript \n");
2662 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2663 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2664 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2665 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2667 if (eq_evolutions_p (chrec_a
, chrec_b
))
2669 /* Access functions are the same: all the elements are accessed
2670 in the same order. */
2671 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2672 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2673 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2674 dependence_stats
.num_miv_dependent
++;
2677 else if (evolution_function_is_constant_p (difference
)
2678 /* For the moment, the following is verified:
2679 evolution_function_is_affine_multivariate_p (chrec_a,
2681 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2683 /* testsuite/.../ssa-chrec-33.c
2684 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2686 The difference is 1, and all the evolution steps are multiples
2687 of 2, consequently there are no overlapping elements. */
2688 *overlaps_a
= conflict_fn_no_dependence ();
2689 *overlaps_b
= conflict_fn_no_dependence ();
2690 *last_conflicts
= integer_zero_node
;
2691 dependence_stats
.num_miv_independent
++;
2694 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2695 && !chrec_contains_symbols (chrec_a
)
2696 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2697 && !chrec_contains_symbols (chrec_b
))
2699 /* testsuite/.../ssa-chrec-35.c
2700 {0, +, 1}_2 vs. {0, +, 1}_3
2701 the overlapping elements are respectively located at iterations:
2702 {0, +, 1}_x and {0, +, 1}_x,
2703 in other words, we have the equality:
2704 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2707 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2708 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2710 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2711 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2713 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2714 overlaps_a
, overlaps_b
, last_conflicts
);
2716 if (CF_NOT_KNOWN_P (*overlaps_a
)
2717 || CF_NOT_KNOWN_P (*overlaps_b
))
2718 dependence_stats
.num_miv_unimplemented
++;
2719 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2720 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2721 dependence_stats
.num_miv_independent
++;
2723 dependence_stats
.num_miv_dependent
++;
2728 /* When the analysis is too difficult, answer "don't know". */
2729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2730 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2732 *overlaps_a
= conflict_fn_not_known ();
2733 *overlaps_b
= conflict_fn_not_known ();
2734 *last_conflicts
= chrec_dont_know
;
2735 dependence_stats
.num_miv_unimplemented
++;
2738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2739 fprintf (dump_file
, ")\n");
2742 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2743 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2744 OVERLAP_ITERATIONS_B are initialized with two functions that
2745 describe the iterations that contain conflicting elements.
2747 Remark: For an integer k >= 0, the following equality is true:
2749 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2753 analyze_overlapping_iterations (tree chrec_a
,
2755 conflict_function
**overlap_iterations_a
,
2756 conflict_function
**overlap_iterations_b
,
2757 tree
*last_conflicts
, struct loop
*loop_nest
)
2759 unsigned int lnn
= loop_nest
->num
;
2761 dependence_stats
.num_subscript_tests
++;
2763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2765 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2766 fprintf (dump_file
, " (chrec_a = ");
2767 print_generic_expr (dump_file
, chrec_a
, 0);
2768 fprintf (dump_file
, ")\n (chrec_b = ");
2769 print_generic_expr (dump_file
, chrec_b
, 0);
2770 fprintf (dump_file
, ")\n");
2773 if (chrec_a
== NULL_TREE
2774 || chrec_b
== NULL_TREE
2775 || chrec_contains_undetermined (chrec_a
)
2776 || chrec_contains_undetermined (chrec_b
))
2778 dependence_stats
.num_subscript_undetermined
++;
2780 *overlap_iterations_a
= conflict_fn_not_known ();
2781 *overlap_iterations_b
= conflict_fn_not_known ();
2784 /* If they are the same chrec, and are affine, they overlap
2785 on every iteration. */
2786 else if (eq_evolutions_p (chrec_a
, chrec_b
)
2787 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2788 || operand_equal_p (chrec_a
, chrec_b
, 0)))
2790 dependence_stats
.num_same_subscript_function
++;
2791 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2792 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2793 *last_conflicts
= chrec_dont_know
;
2796 /* If they aren't the same, and aren't affine, we can't do anything
2798 else if ((chrec_contains_symbols (chrec_a
)
2799 || chrec_contains_symbols (chrec_b
))
2800 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2801 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
2803 dependence_stats
.num_subscript_undetermined
++;
2804 *overlap_iterations_a
= conflict_fn_not_known ();
2805 *overlap_iterations_b
= conflict_fn_not_known ();
2808 else if (ziv_subscript_p (chrec_a
, chrec_b
))
2809 analyze_ziv_subscript (chrec_a
, chrec_b
,
2810 overlap_iterations_a
, overlap_iterations_b
,
2813 else if (siv_subscript_p (chrec_a
, chrec_b
))
2814 analyze_siv_subscript (chrec_a
, chrec_b
,
2815 overlap_iterations_a
, overlap_iterations_b
,
2816 last_conflicts
, lnn
);
2819 analyze_miv_subscript (chrec_a
, chrec_b
,
2820 overlap_iterations_a
, overlap_iterations_b
,
2821 last_conflicts
, loop_nest
);
2823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2825 fprintf (dump_file
, " (overlap_iterations_a = ");
2826 dump_conflict_function (dump_file
, *overlap_iterations_a
);
2827 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
2828 dump_conflict_function (dump_file
, *overlap_iterations_b
);
2829 fprintf (dump_file
, ")\n");
2830 fprintf (dump_file
, ")\n");
2834 /* Helper function for uniquely inserting distance vectors. */
2837 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
2842 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, v
)
2843 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
2846 VEC_safe_push (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
), dist_v
);
2849 /* Helper function for uniquely inserting direction vectors. */
2852 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
2857 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), i
, v
)
2858 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
2861 VEC_safe_push (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
), dir_v
);
2864 /* Add a distance of 1 on all the loops outer than INDEX. If we
2865 haven't yet determined a distance for this outer loop, push a new
2866 distance vector composed of the previous distance, and a distance
2867 of 1 for this outer loop. Example:
2875 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2876 save (0, 1), then we have to save (1, 0). */
2879 add_outer_distances (struct data_dependence_relation
*ddr
,
2880 lambda_vector dist_v
, int index
)
2882 /* For each outer loop where init_v is not set, the accesses are
2883 in dependence of distance 1 in the loop. */
2884 while (--index
>= 0)
2886 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2887 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
2889 save_dist_v (ddr
, save_v
);
2893 /* Return false when fail to represent the data dependence as a
2894 distance vector. INIT_B is set to true when a component has been
2895 added to the distance vector DIST_V. INDEX_CARRY is then set to
2896 the index in DIST_V that carries the dependence. */
2899 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
2900 struct data_reference
*ddr_a
,
2901 struct data_reference
*ddr_b
,
2902 lambda_vector dist_v
, bool *init_b
,
2906 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2908 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2910 tree access_fn_a
, access_fn_b
;
2911 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
2913 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2915 non_affine_dependence_relation (ddr
);
2919 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
2920 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
2922 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
2923 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
2926 int var_a
= CHREC_VARIABLE (access_fn_a
);
2927 int var_b
= CHREC_VARIABLE (access_fn_b
);
2930 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2932 non_affine_dependence_relation (ddr
);
2936 dist
= int_cst_value (SUB_DISTANCE (subscript
));
2937 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
2938 *index_carry
= MIN (index
, *index_carry
);
2940 /* This is the subscript coupling test. If we have already
2941 recorded a distance for this loop (a distance coming from
2942 another subscript), it should be the same. For example,
2943 in the following code, there is no dependence:
2950 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
2952 finalize_ddr_dependent (ddr
, chrec_known
);
2956 dist_v
[index
] = dist
;
2960 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
2962 /* This can be for example an affine vs. constant dependence
2963 (T[i] vs. T[3]) that is not an affine dependence and is
2964 not representable as a distance vector. */
2965 non_affine_dependence_relation (ddr
);
2973 /* Return true when the DDR contains only constant access functions. */
2976 constant_access_functions (const struct data_dependence_relation
*ddr
)
2980 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2981 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
2982 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
2988 /* Helper function for the case where DDR_A and DDR_B are the same
2989 multivariate access function with a constant step. For an example
2993 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
2996 tree c_1
= CHREC_LEFT (c_2
);
2997 tree c_0
= CHREC_LEFT (c_1
);
2998 lambda_vector dist_v
;
3001 /* Polynomials with more than 2 variables are not handled yet. When
3002 the evolution steps are parameters, it is not possible to
3003 represent the dependence using classical distance vectors. */
3004 if (TREE_CODE (c_0
) != INTEGER_CST
3005 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3006 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3008 DDR_AFFINE_P (ddr
) = false;
3012 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3013 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3015 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3016 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3017 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3018 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3031 save_dist_v (ddr
, dist_v
);
3033 add_outer_distances (ddr
, dist_v
, x_1
);
3036 /* Helper function for the case where DDR_A and DDR_B are the same
3037 access functions. */
3040 add_other_self_distances (struct data_dependence_relation
*ddr
)
3042 lambda_vector dist_v
;
3044 int index_carry
= DDR_NB_LOOPS (ddr
);
3046 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3048 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3050 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3052 if (!evolution_function_is_univariate_p (access_fun
))
3054 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3056 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3060 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3062 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3063 add_multivariate_self_dist (ddr
, access_fun
);
3065 /* The evolution step is not constant: it varies in
3066 the outer loop, so this cannot be represented by a
3067 distance vector. For example in pr34635.c the
3068 evolution is {0, +, {0, +, 4}_1}_2. */
3069 DDR_AFFINE_P (ddr
) = false;
3074 index_carry
= MIN (index_carry
,
3075 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3076 DDR_LOOP_NEST (ddr
)));
3080 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3081 add_outer_distances (ddr
, dist_v
, index_carry
);
3085 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3087 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3089 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3090 save_dist_v (ddr
, dist_v
);
3093 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3094 is the case for example when access functions are the same and
3095 equal to a constant, as in:
3102 in which case the distance vectors are (0) and (1). */
3105 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3109 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3111 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3112 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3113 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3115 for (j
= 0; j
< ca
->n
; j
++)
3116 if (affine_function_zero_p (ca
->fns
[j
]))
3118 insert_innermost_unit_dist_vector (ddr
);
3122 for (j
= 0; j
< cb
->n
; j
++)
3123 if (affine_function_zero_p (cb
->fns
[j
]))
3125 insert_innermost_unit_dist_vector (ddr
);
3131 /* Compute the classic per loop distance vector. DDR is the data
3132 dependence relation to build a vector from. Return false when fail
3133 to represent the data dependence as a distance vector. */
3136 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3137 struct loop
*loop_nest
)
3139 bool init_b
= false;
3140 int index_carry
= DDR_NB_LOOPS (ddr
);
3141 lambda_vector dist_v
;
3143 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3146 if (same_access_functions (ddr
))
3148 /* Save the 0 vector. */
3149 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3150 save_dist_v (ddr
, dist_v
);
3152 if (constant_access_functions (ddr
))
3153 add_distance_for_zero_overlaps (ddr
);
3155 if (DDR_NB_LOOPS (ddr
) > 1)
3156 add_other_self_distances (ddr
);
3161 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3162 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3163 dist_v
, &init_b
, &index_carry
))
3166 /* Save the distance vector if we initialized one. */
3169 /* Verify a basic constraint: classic distance vectors should
3170 always be lexicographically positive.
3172 Data references are collected in the order of execution of
3173 the program, thus for the following loop
3175 | for (i = 1; i < 100; i++)
3176 | for (j = 1; j < 100; j++)
3178 | t = T[j+1][i-1]; // A
3179 | T[j][i] = t + 2; // B
3182 references are collected following the direction of the wind:
3183 A then B. The data dependence tests are performed also
3184 following this order, such that we're looking at the distance
3185 separating the elements accessed by A from the elements later
3186 accessed by B. But in this example, the distance returned by
3187 test_dep (A, B) is lexicographically negative (-1, 1), that
3188 means that the access A occurs later than B with respect to
3189 the outer loop, ie. we're actually looking upwind. In this
3190 case we solve test_dep (B, A) looking downwind to the
3191 lexicographically positive solution, that returns the
3192 distance vector (1, -1). */
3193 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3195 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3196 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3199 compute_subscript_distance (ddr
);
3200 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3201 save_v
, &init_b
, &index_carry
))
3203 save_dist_v (ddr
, save_v
);
3204 DDR_REVERSED_P (ddr
) = true;
3206 /* In this case there is a dependence forward for all the
3209 | for (k = 1; k < 100; k++)
3210 | for (i = 1; i < 100; i++)
3211 | for (j = 1; j < 100; j++)
3213 | t = T[j+1][i-1]; // A
3214 | T[j][i] = t + 2; // B
3222 if (DDR_NB_LOOPS (ddr
) > 1)
3224 add_outer_distances (ddr
, save_v
, index_carry
);
3225 add_outer_distances (ddr
, dist_v
, index_carry
);
3230 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3231 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3233 if (DDR_NB_LOOPS (ddr
) > 1)
3235 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3237 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3238 DDR_A (ddr
), loop_nest
))
3240 compute_subscript_distance (ddr
);
3241 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3242 opposite_v
, &init_b
,
3246 save_dist_v (ddr
, save_v
);
3247 add_outer_distances (ddr
, dist_v
, index_carry
);
3248 add_outer_distances (ddr
, opposite_v
, index_carry
);
3251 save_dist_v (ddr
, save_v
);
3256 /* There is a distance of 1 on all the outer loops: Example:
3257 there is a dependence of distance 1 on loop_1 for the array A.
3263 add_outer_distances (ddr
, dist_v
,
3264 lambda_vector_first_nz (dist_v
,
3265 DDR_NB_LOOPS (ddr
), 0));
3268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3272 fprintf (dump_file
, "(build_classic_dist_vector\n");
3273 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3275 fprintf (dump_file
, " dist_vector = (");
3276 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3277 DDR_NB_LOOPS (ddr
));
3278 fprintf (dump_file
, " )\n");
3280 fprintf (dump_file
, ")\n");
3286 /* Return the direction for a given distance.
3287 FIXME: Computing dir this way is suboptimal, since dir can catch
3288 cases that dist is unable to represent. */
3290 static inline enum data_dependence_direction
3291 dir_from_dist (int dist
)
3294 return dir_positive
;
3296 return dir_negative
;
3301 /* Compute the classic per loop direction vector. DDR is the data
3302 dependence relation to build a vector from. */
3305 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3308 lambda_vector dist_v
;
3310 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
3312 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3314 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3315 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3317 save_dir_v (ddr
, dir_v
);
3321 /* Helper function. Returns true when there is a dependence between
3322 data references DRA and DRB. */
3325 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3326 struct data_reference
*dra
,
3327 struct data_reference
*drb
,
3328 struct loop
*loop_nest
)
3331 tree last_conflicts
;
3332 struct subscript
*subscript
;
3334 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
3337 conflict_function
*overlaps_a
, *overlaps_b
;
3339 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3340 DR_ACCESS_FN (drb
, i
),
3341 &overlaps_a
, &overlaps_b
,
3342 &last_conflicts
, loop_nest
);
3344 if (CF_NOT_KNOWN_P (overlaps_a
)
3345 || CF_NOT_KNOWN_P (overlaps_b
))
3347 finalize_ddr_dependent (ddr
, chrec_dont_know
);
3348 dependence_stats
.num_dependence_undetermined
++;
3349 free_conflict_function (overlaps_a
);
3350 free_conflict_function (overlaps_b
);
3354 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3355 || CF_NO_DEPENDENCE_P (overlaps_b
))
3357 finalize_ddr_dependent (ddr
, chrec_known
);
3358 dependence_stats
.num_dependence_independent
++;
3359 free_conflict_function (overlaps_a
);
3360 free_conflict_function (overlaps_b
);
3366 if (SUB_CONFLICTS_IN_A (subscript
))
3367 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3368 if (SUB_CONFLICTS_IN_B (subscript
))
3369 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3371 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3372 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3373 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3380 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3383 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3384 struct loop
*loop_nest
)
3387 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3388 fprintf (dump_file
, "(subscript_dependence_tester \n");
3390 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3391 dependence_stats
.num_dependence_dependent
++;
3393 compute_subscript_distance (ddr
);
3394 if (build_classic_dist_vector (ddr
, loop_nest
))
3395 build_classic_dir_vector (ddr
);
3397 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3398 fprintf (dump_file
, ")\n");
3401 /* Returns true when all the access functions of A are affine or
3402 constant with respect to LOOP_NEST. */
3405 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3406 const struct loop
*loop_nest
)
3409 VEC(tree
,heap
) *fns
= DR_ACCESS_FNS (a
);
3412 FOR_EACH_VEC_ELT (tree
, fns
, i
, t
)
3413 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3414 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3420 /* Initializes an equation for an OMEGA problem using the information
3421 contained in the ACCESS_FUN. Returns true when the operation
3424 PB is the omega constraint system.
3425 EQ is the number of the equation to be initialized.
3426 OFFSET is used for shifting the variables names in the constraints:
3427 a constrain is composed of 2 * the number of variables surrounding
3428 dependence accesses. OFFSET is set either to 0 for the first n variables,
3429 then it is set to n.
3430 ACCESS_FUN is expected to be an affine chrec. */
3433 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3434 unsigned int offset
, tree access_fun
,
3435 struct data_dependence_relation
*ddr
)
3437 switch (TREE_CODE (access_fun
))
3439 case POLYNOMIAL_CHREC
:
3441 tree left
= CHREC_LEFT (access_fun
);
3442 tree right
= CHREC_RIGHT (access_fun
);
3443 int var
= CHREC_VARIABLE (access_fun
);
3446 if (TREE_CODE (right
) != INTEGER_CST
)
3449 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3450 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3452 /* Compute the innermost loop index. */
3453 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3456 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3457 += int_cst_value (right
);
3459 switch (TREE_CODE (left
))
3461 case POLYNOMIAL_CHREC
:
3462 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3465 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3474 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3482 /* As explained in the comments preceding init_omega_for_ddr, we have
3483 to set up a system for each loop level, setting outer loops
3484 variation to zero, and current loop variation to positive or zero.
3485 Save each lexico positive distance vector. */
3488 omega_extract_distance_vectors (omega_pb pb
,
3489 struct data_dependence_relation
*ddr
)
3493 struct loop
*loopi
, *loopj
;
3494 enum omega_result res
;
3496 /* Set a new problem for each loop in the nest. The basis is the
3497 problem that we have initialized until now. On top of this we
3498 add new constraints. */
3499 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3500 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3503 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3504 DDR_NB_LOOPS (ddr
));
3506 omega_copy_problem (copy
, pb
);
3508 /* For all the outer loops "loop_j", add "dj = 0". */
3510 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3512 eq
= omega_add_zero_eq (copy
, omega_black
);
3513 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3516 /* For "loop_i", add "0 <= di". */
3517 geq
= omega_add_zero_geq (copy
, omega_black
);
3518 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3520 /* Reduce the constraint system, and test that the current
3521 problem is feasible. */
3522 res
= omega_simplify_problem (copy
);
3523 if (res
== omega_false
3524 || res
== omega_unknown
3525 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3528 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3529 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3531 dist
= copy
->subs
[eq
].coef
[0];
3537 /* Reinitialize problem... */
3538 omega_copy_problem (copy
, pb
);
3540 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3542 eq
= omega_add_zero_eq (copy
, omega_black
);
3543 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3546 /* ..., but this time "di = 1". */
3547 eq
= omega_add_zero_eq (copy
, omega_black
);
3548 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3549 copy
->eqs
[eq
].coef
[0] = -1;
3551 res
= omega_simplify_problem (copy
);
3552 if (res
== omega_false
3553 || res
== omega_unknown
3554 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3557 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3558 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3560 dist
= copy
->subs
[eq
].coef
[0];
3566 /* Save the lexicographically positive distance vector. */
3569 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3570 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3574 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3575 if (copy
->subs
[eq
].key
> 0)
3577 dist
= copy
->subs
[eq
].coef
[0];
3578 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3581 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3582 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3584 save_dist_v (ddr
, dist_v
);
3585 save_dir_v (ddr
, dir_v
);
3589 omega_free_problem (copy
);
3593 /* This is called for each subscript of a tuple of data references:
3594 insert an equality for representing the conflicts. */
3597 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3598 struct data_dependence_relation
*ddr
,
3599 omega_pb pb
, bool *maybe_dependent
)
3602 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3603 TREE_TYPE (access_fun_b
));
3604 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3605 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3606 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3609 /* When the fun_a - fun_b is not constant, the dependence is not
3610 captured by the classic distance vector representation. */
3611 if (TREE_CODE (difference
) != INTEGER_CST
)
3615 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3617 /* There is no dependence. */
3618 *maybe_dependent
= false;
3622 minus_one
= build_int_cst (type
, -1);
3623 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3625 eq
= omega_add_zero_eq (pb
, omega_black
);
3626 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3627 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3628 /* There is probably a dependence, but the system of
3629 constraints cannot be built: answer "don't know". */
3633 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3634 && !int_divides_p (lambda_vector_gcd
3635 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3636 2 * DDR_NB_LOOPS (ddr
)),
3637 pb
->eqs
[eq
].coef
[0]))
3639 /* There is no dependence. */
3640 *maybe_dependent
= false;
3647 /* Helper function, same as init_omega_for_ddr but specialized for
3648 data references A and B. */
3651 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3652 struct data_dependence_relation
*ddr
,
3653 omega_pb pb
, bool *maybe_dependent
)
3658 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3660 /* Insert an equality per subscript. */
3661 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3663 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3664 ddr
, pb
, maybe_dependent
))
3666 else if (*maybe_dependent
== false)
3668 /* There is no dependence. */
3669 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3674 /* Insert inequalities: constraints corresponding to the iteration
3675 domain, i.e. the loops surrounding the references "loop_x" and
3676 the distance variables "dx". The layout of the OMEGA
3677 representation is as follows:
3678 - coef[0] is the constant
3679 - coef[1..nb_loops] are the protected variables that will not be
3680 removed by the solver: the "dx"
3681 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3683 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3684 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3686 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
, true);
3689 ineq
= omega_add_zero_geq (pb
, omega_black
);
3690 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3692 /* 0 <= loop_x + dx */
3693 ineq
= omega_add_zero_geq (pb
, omega_black
);
3694 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3695 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3699 /* loop_x <= nb_iters */
3700 ineq
= omega_add_zero_geq (pb
, omega_black
);
3701 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3702 pb
->geqs
[ineq
].coef
[0] = nbi
;
3704 /* loop_x + dx <= nb_iters */
3705 ineq
= omega_add_zero_geq (pb
, omega_black
);
3706 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3707 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3708 pb
->geqs
[ineq
].coef
[0] = nbi
;
3710 /* A step "dx" bigger than nb_iters is not feasible, so
3711 add "0 <= nb_iters + dx", */
3712 ineq
= omega_add_zero_geq (pb
, omega_black
);
3713 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3714 pb
->geqs
[ineq
].coef
[0] = nbi
;
3715 /* and "dx <= nb_iters". */
3716 ineq
= omega_add_zero_geq (pb
, omega_black
);
3717 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3718 pb
->geqs
[ineq
].coef
[0] = nbi
;
3722 omega_extract_distance_vectors (pb
, ddr
);
3727 /* Sets up the Omega dependence problem for the data dependence
3728 relation DDR. Returns false when the constraint system cannot be
3729 built, ie. when the test answers "don't know". Returns true
3730 otherwise, and when independence has been proved (using one of the
3731 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3732 set MAYBE_DEPENDENT to true.
3734 Example: for setting up the dependence system corresponding to the
3735 conflicting accesses
3740 | ... A[2*j, 2*(i + j)]
3744 the following constraints come from the iteration domain:
3751 where di, dj are the distance variables. The constraints
3752 representing the conflicting elements are:
3755 i + 1 = 2 * (i + di + j + dj)
3757 For asking that the resulting distance vector (di, dj) be
3758 lexicographically positive, we insert the constraint "di >= 0". If
3759 "di = 0" in the solution, we fix that component to zero, and we
3760 look at the inner loops: we set a new problem where all the outer
3761 loop distances are zero, and fix this inner component to be
3762 positive. When one of the components is positive, we save that
3763 distance, and set a new problem where the distance on this loop is
3764 zero, searching for other distances in the inner loops. Here is
3765 the classic example that illustrates that we have to set for each
3766 inner loop a new problem:
3774 we have to save two distances (1, 0) and (0, 1).
3776 Given two array references, refA and refB, we have to set the
3777 dependence problem twice, refA vs. refB and refB vs. refA, and we
3778 cannot do a single test, as refB might occur before refA in the
3779 inner loops, and the contrary when considering outer loops: ex.
3784 | T[{1,+,1}_2][{1,+,1}_1] // refA
3785 | T[{2,+,1}_2][{0,+,1}_1] // refB
3790 refB touches the elements in T before refA, and thus for the same
3791 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3792 but for successive loop_0 iterations, we have (1, -1, 1)
3794 The Omega solver expects the distance variables ("di" in the
3795 previous example) to come first in the constraint system (as
3796 variables to be protected, or "safe" variables), the constraint
3797 system is built using the following layout:
3799 "cst | distance vars | index vars".
3803 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
3804 bool *maybe_dependent
)
3809 *maybe_dependent
= true;
3811 if (same_access_functions (ddr
))
3814 lambda_vector dir_v
;
3816 /* Save the 0 vector. */
3817 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
3818 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3819 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3820 dir_v
[j
] = dir_equal
;
3821 save_dir_v (ddr
, dir_v
);
3823 /* Save the dependences carried by outer loops. */
3824 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3825 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3827 omega_free_problem (pb
);
3831 /* Omega expects the protected variables (those that have to be kept
3832 after elimination) to appear first in the constraint system.
3833 These variables are the distance variables. In the following
3834 initialization we declare NB_LOOPS safe variables, and the total
3835 number of variables for the constraint system is 2*NB_LOOPS. */
3836 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3837 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3839 omega_free_problem (pb
);
3841 /* Stop computation if not decidable, or no dependence. */
3842 if (res
== false || *maybe_dependent
== false)
3845 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3846 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
3848 omega_free_problem (pb
);
3853 /* Return true when DDR contains the same information as that stored
3854 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3857 ddr_consistent_p (FILE *file
,
3858 struct data_dependence_relation
*ddr
,
3859 VEC (lambda_vector
, heap
) *dist_vects
,
3860 VEC (lambda_vector
, heap
) *dir_vects
)
3864 /* If dump_file is set, output there. */
3865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3868 if (VEC_length (lambda_vector
, dist_vects
) != DDR_NUM_DIST_VECTS (ddr
))
3870 lambda_vector b_dist_v
;
3871 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3872 VEC_length (lambda_vector
, dist_vects
),
3873 DDR_NUM_DIST_VECTS (ddr
));
3875 fprintf (file
, "Banerjee dist vectors:\n");
3876 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, i
, b_dist_v
)
3877 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
3879 fprintf (file
, "Omega dist vectors:\n");
3880 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3881 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
3883 fprintf (file
, "data dependence relation:\n");
3884 dump_data_dependence_relation (file
, ddr
);
3886 fprintf (file
, ")\n");
3890 if (VEC_length (lambda_vector
, dir_vects
) != DDR_NUM_DIR_VECTS (ddr
))
3892 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3893 VEC_length (lambda_vector
, dir_vects
),
3894 DDR_NUM_DIR_VECTS (ddr
));
3898 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3900 lambda_vector a_dist_v
;
3901 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
3903 /* Distance vectors are not ordered in the same way in the DDR
3904 and in the DIST_VECTS: search for a matching vector. */
3905 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, a_dist_v
)
3906 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
3909 if (j
== VEC_length (lambda_vector
, dist_vects
))
3911 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
3912 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
3913 fprintf (file
, "not found in Omega dist vectors:\n");
3914 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3915 fprintf (file
, "data dependence relation:\n");
3916 dump_data_dependence_relation (file
, ddr
);
3917 fprintf (file
, ")\n");
3921 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
3923 lambda_vector a_dir_v
;
3924 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
3926 /* Direction vectors are not ordered in the same way in the DDR
3927 and in the DIR_VECTS: search for a matching vector. */
3928 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, a_dir_v
)
3929 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
3932 if (j
== VEC_length (lambda_vector
, dist_vects
))
3934 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
3935 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
3936 fprintf (file
, "not found in Omega dir vectors:\n");
3937 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3938 fprintf (file
, "data dependence relation:\n");
3939 dump_data_dependence_relation (file
, ddr
);
3940 fprintf (file
, ")\n");
3947 /* This computes the affine dependence relation between A and B with
3948 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3949 independence between two accesses, while CHREC_DONT_KNOW is used
3950 for representing the unknown relation.
3952 Note that it is possible to stop the computation of the dependence
3953 relation the first time we detect a CHREC_KNOWN element for a given
3957 compute_affine_dependence (struct data_dependence_relation
*ddr
,
3958 struct loop
*loop_nest
)
3960 struct data_reference
*dra
= DDR_A (ddr
);
3961 struct data_reference
*drb
= DDR_B (ddr
);
3963 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3965 fprintf (dump_file
, "(compute_affine_dependence\n");
3966 fprintf (dump_file
, " (stmt_a = \n");
3967 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, 0);
3968 fprintf (dump_file
, ")\n (stmt_b = \n");
3969 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, 0);
3970 fprintf (dump_file
, ")\n");
3973 /* Analyze only when the dependence relation is not yet known. */
3974 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
3975 && !DDR_SELF_REFERENCE (ddr
))
3977 dependence_stats
.num_dependence_tests
++;
3979 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
3980 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
3982 if (flag_check_data_deps
)
3984 /* Compute the dependences using the first algorithm. */
3985 subscript_dependence_tester (ddr
, loop_nest
);
3987 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3989 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
3990 dump_data_dependence_relation (dump_file
, ddr
);
3993 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
3995 bool maybe_dependent
;
3996 VEC (lambda_vector
, heap
) *dir_vects
, *dist_vects
;
3998 /* Save the result of the first DD analyzer. */
3999 dist_vects
= DDR_DIST_VECTS (ddr
);
4000 dir_vects
= DDR_DIR_VECTS (ddr
);
4002 /* Reset the information. */
4003 DDR_DIST_VECTS (ddr
) = NULL
;
4004 DDR_DIR_VECTS (ddr
) = NULL
;
4006 /* Compute the same information using Omega. */
4007 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4008 goto csys_dont_know
;
4010 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4012 fprintf (dump_file
, "Omega Analyzer\n");
4013 dump_data_dependence_relation (dump_file
, ddr
);
4016 /* Check that we get the same information. */
4017 if (maybe_dependent
)
4018 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4023 subscript_dependence_tester (ddr
, loop_nest
);
4026 /* As a last case, if the dependence cannot be determined, or if
4027 the dependence is considered too difficult to determine, answer
4032 dependence_stats
.num_dependence_undetermined
++;
4034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4036 fprintf (dump_file
, "Data ref a:\n");
4037 dump_data_reference (dump_file
, dra
);
4038 fprintf (dump_file
, "Data ref b:\n");
4039 dump_data_reference (dump_file
, drb
);
4040 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4042 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4047 fprintf (dump_file
, ")\n");
4050 /* This computes the dependence relation for the same data
4051 reference into DDR. */
4054 compute_self_dependence (struct data_dependence_relation
*ddr
)
4057 struct subscript
*subscript
;
4059 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4062 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
4065 if (SUB_CONFLICTS_IN_A (subscript
))
4066 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4067 if (SUB_CONFLICTS_IN_B (subscript
))
4068 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4070 /* The accessed index overlaps for each iteration. */
4071 SUB_CONFLICTS_IN_A (subscript
)
4072 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4073 SUB_CONFLICTS_IN_B (subscript
)
4074 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4075 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
4078 /* The distance vector is the zero vector. */
4079 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4080 save_dir_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4083 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4084 the data references in DATAREFS, in the LOOP_NEST. When
4085 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4089 compute_all_dependences (VEC (data_reference_p
, heap
) *datarefs
,
4090 VEC (ddr_p
, heap
) **dependence_relations
,
4091 VEC (loop_p
, heap
) *loop_nest
,
4092 bool compute_self_and_rr
)
4094 struct data_dependence_relation
*ddr
;
4095 struct data_reference
*a
, *b
;
4098 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4099 for (j
= i
+ 1; VEC_iterate (data_reference_p
, datarefs
, j
, b
); j
++)
4100 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4102 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4103 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4105 compute_affine_dependence (ddr
, VEC_index (loop_p
, loop_nest
, 0));
4108 if (compute_self_and_rr
)
4109 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4111 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4112 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4113 compute_self_dependence (ddr
);
4117 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4118 true if STMT clobbers memory, false otherwise. */
4121 get_references_in_stmt (gimple stmt
, VEC (data_ref_loc
, heap
) **references
)
4123 bool clobbers_memory
= false;
4126 enum gimple_code stmt_code
= gimple_code (stmt
);
4130 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4131 Calls have side-effects, except those to const or pure
4133 if ((stmt_code
== GIMPLE_CALL
4134 && !(gimple_call_flags (stmt
) & (ECF_CONST
| ECF_PURE
)))
4135 || (stmt_code
== GIMPLE_ASM
4136 && gimple_asm_volatile_p (stmt
)))
4137 clobbers_memory
= true;
4139 if (!gimple_vuse (stmt
))
4140 return clobbers_memory
;
4142 if (stmt_code
== GIMPLE_ASSIGN
)
4145 op0
= gimple_assign_lhs_ptr (stmt
);
4146 op1
= gimple_assign_rhs1_ptr (stmt
);
4149 || (REFERENCE_CLASS_P (*op1
)
4150 && (base
= get_base_address (*op1
))
4151 && TREE_CODE (base
) != SSA_NAME
))
4153 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4155 ref
->is_read
= true;
4158 else if (stmt_code
== GIMPLE_CALL
)
4162 op0
= gimple_call_lhs_ptr (stmt
);
4163 n
= gimple_call_num_args (stmt
);
4164 for (i
= 0; i
< n
; i
++)
4166 op1
= gimple_call_arg_ptr (stmt
, i
);
4169 || (REFERENCE_CLASS_P (*op1
) && get_base_address (*op1
)))
4171 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4173 ref
->is_read
= true;
4178 return clobbers_memory
;
4182 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
))))
4184 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4186 ref
->is_read
= false;
4188 return clobbers_memory
;
4191 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4192 reference, returns false, otherwise returns true. NEST is the outermost
4193 loop of the loop nest in which the references should be analyzed. */
4196 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4197 VEC (data_reference_p
, heap
) **datarefs
)
4200 VEC (data_ref_loc
, heap
) *references
;
4203 data_reference_p dr
;
4205 if (get_references_in_stmt (stmt
, &references
))
4207 VEC_free (data_ref_loc
, heap
, references
);
4211 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4213 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4214 *ref
->pos
, stmt
, ref
->is_read
);
4215 gcc_assert (dr
!= NULL
);
4216 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4218 VEC_free (data_ref_loc
, heap
, references
);
4222 /* Stores the data references in STMT to DATAREFS. If there is an
4223 unanalyzable reference, returns false, otherwise returns true.
4224 NEST is the outermost loop of the loop nest in which the references
4225 should be instantiated, LOOP is the loop in which the references
4226 should be analyzed. */
4229 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4230 VEC (data_reference_p
, heap
) **datarefs
)
4233 VEC (data_ref_loc
, heap
) *references
;
4236 data_reference_p dr
;
4238 if (get_references_in_stmt (stmt
, &references
))
4240 VEC_free (data_ref_loc
, heap
, references
);
4244 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4246 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4247 gcc_assert (dr
!= NULL
);
4248 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4251 VEC_free (data_ref_loc
, heap
, references
);
4255 /* Search the data references in LOOP, and record the information into
4256 DATAREFS. Returns chrec_dont_know when failing to analyze a
4257 difficult case, returns NULL_TREE otherwise. */
4260 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4261 VEC (data_reference_p
, heap
) **datarefs
)
4263 gimple_stmt_iterator bsi
;
4265 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4267 gimple stmt
= gsi_stmt (bsi
);
4269 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4271 struct data_reference
*res
;
4272 res
= XCNEW (struct data_reference
);
4273 VEC_safe_push (data_reference_p
, heap
, *datarefs
, res
);
4275 return chrec_dont_know
;
4282 /* Search the data references in LOOP, and record the information into
4283 DATAREFS. Returns chrec_dont_know when failing to analyze a
4284 difficult case, returns NULL_TREE otherwise.
4286 TODO: This function should be made smarter so that it can handle address
4287 arithmetic as if they were array accesses, etc. */
4290 find_data_references_in_loop (struct loop
*loop
,
4291 VEC (data_reference_p
, heap
) **datarefs
)
4293 basic_block bb
, *bbs
;
4296 bbs
= get_loop_body_in_dom_order (loop
);
4298 for (i
= 0; i
< loop
->num_nodes
; i
++)
4302 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4305 return chrec_dont_know
;
4313 /* Recursive helper function. */
4316 find_loop_nest_1 (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4318 /* Inner loops of the nest should not contain siblings. Example:
4319 when there are two consecutive loops,
4330 the dependence relation cannot be captured by the distance
4335 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4337 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4341 /* Return false when the LOOP is not well nested. Otherwise return
4342 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4343 contain the loops from the outermost to the innermost, as they will
4344 appear in the classic distance vector. */
4347 find_loop_nest (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4349 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4351 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4355 /* Returns true when the data dependences have been computed, false otherwise.
4356 Given a loop nest LOOP, the following vectors are returned:
4357 DATAREFS is initialized to all the array elements contained in this loop,
4358 DEPENDENCE_RELATIONS contains the relations between the data references.
4359 Compute read-read and self relations if
4360 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4363 compute_data_dependences_for_loop (struct loop
*loop
,
4364 bool compute_self_and_read_read_dependences
,
4365 VEC (loop_p
, heap
) **loop_nest
,
4366 VEC (data_reference_p
, heap
) **datarefs
,
4367 VEC (ddr_p
, heap
) **dependence_relations
)
4371 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4373 /* If the loop nest is not well formed, or one of the data references
4374 is not computable, give up without spending time to compute other
4377 || !find_loop_nest (loop
, loop_nest
)
4378 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
)
4380 struct data_dependence_relation
*ddr
;
4382 /* Insert a single relation into dependence_relations:
4384 ddr
= initialize_data_dependence_relation (NULL
, NULL
, *loop_nest
);
4385 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4389 compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4390 compute_self_and_read_read_dependences
);
4392 if (dump_file
&& (dump_flags
& TDF_STATS
))
4394 fprintf (dump_file
, "Dependence tester statistics:\n");
4396 fprintf (dump_file
, "Number of dependence tests: %d\n",
4397 dependence_stats
.num_dependence_tests
);
4398 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4399 dependence_stats
.num_dependence_dependent
);
4400 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4401 dependence_stats
.num_dependence_independent
);
4402 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4403 dependence_stats
.num_dependence_undetermined
);
4405 fprintf (dump_file
, "Number of subscript tests: %d\n",
4406 dependence_stats
.num_subscript_tests
);
4407 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4408 dependence_stats
.num_subscript_undetermined
);
4409 fprintf (dump_file
, "Number of same subscript function: %d\n",
4410 dependence_stats
.num_same_subscript_function
);
4412 fprintf (dump_file
, "Number of ziv tests: %d\n",
4413 dependence_stats
.num_ziv
);
4414 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4415 dependence_stats
.num_ziv_dependent
);
4416 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4417 dependence_stats
.num_ziv_independent
);
4418 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4419 dependence_stats
.num_ziv_unimplemented
);
4421 fprintf (dump_file
, "Number of siv tests: %d\n",
4422 dependence_stats
.num_siv
);
4423 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4424 dependence_stats
.num_siv_dependent
);
4425 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4426 dependence_stats
.num_siv_independent
);
4427 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4428 dependence_stats
.num_siv_unimplemented
);
4430 fprintf (dump_file
, "Number of miv tests: %d\n",
4431 dependence_stats
.num_miv
);
4432 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4433 dependence_stats
.num_miv_dependent
);
4434 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4435 dependence_stats
.num_miv_independent
);
4436 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4437 dependence_stats
.num_miv_unimplemented
);
4443 /* Returns true when the data dependences for the basic block BB have been
4444 computed, false otherwise.
4445 DATAREFS is initialized to all the array elements contained in this basic
4446 block, DEPENDENCE_RELATIONS contains the relations between the data
4447 references. Compute read-read and self relations if
4448 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4450 compute_data_dependences_for_bb (basic_block bb
,
4451 bool compute_self_and_read_read_dependences
,
4452 VEC (data_reference_p
, heap
) **datarefs
,
4453 VEC (ddr_p
, heap
) **dependence_relations
)
4455 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4458 compute_all_dependences (*datarefs
, dependence_relations
, NULL
,
4459 compute_self_and_read_read_dependences
);
4463 /* Entry point (for testing only). Analyze all the data references
4464 and the dependence relations in LOOP.
4466 The data references are computed first.
4468 A relation on these nodes is represented by a complete graph. Some
4469 of the relations could be of no interest, thus the relations can be
4472 In the following function we compute all the relations. This is
4473 just a first implementation that is here for:
4474 - for showing how to ask for the dependence relations,
4475 - for the debugging the whole dependence graph,
4476 - for the dejagnu testcases and maintenance.
4478 It is possible to ask only for a part of the graph, avoiding to
4479 compute the whole dependence graph. The computed dependences are
4480 stored in a knowledge base (KB) such that later queries don't
4481 recompute the same information. The implementation of this KB is
4482 transparent to the optimizer, and thus the KB can be changed with a
4483 more efficient implementation, or the KB could be disabled. */
4485 analyze_all_data_dependences (struct loop
*loop
)
4488 int nb_data_refs
= 10;
4489 VEC (data_reference_p
, heap
) *datarefs
=
4490 VEC_alloc (data_reference_p
, heap
, nb_data_refs
);
4491 VEC (ddr_p
, heap
) *dependence_relations
=
4492 VEC_alloc (ddr_p
, heap
, nb_data_refs
* nb_data_refs
);
4493 VEC (loop_p
, heap
) *loop_nest
= VEC_alloc (loop_p
, heap
, 3);
4495 /* Compute DDs on the whole function. */
4496 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4497 &dependence_relations
);
4501 dump_data_dependence_relations (dump_file
, dependence_relations
);
4502 fprintf (dump_file
, "\n\n");
4504 if (dump_flags
& TDF_DETAILS
)
4505 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4507 if (dump_flags
& TDF_STATS
)
4509 unsigned nb_top_relations
= 0;
4510 unsigned nb_bot_relations
= 0;
4511 unsigned nb_chrec_relations
= 0;
4512 struct data_dependence_relation
*ddr
;
4514 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4516 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4519 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4523 nb_chrec_relations
++;
4526 gather_stats_on_scev_database ();
4530 VEC_free (loop_p
, heap
, loop_nest
);
4531 free_dependence_relations (dependence_relations
);
4532 free_data_refs (datarefs
);
4535 /* Computes all the data dependences and check that the results of
4536 several analyzers are the same. */
4539 tree_check_data_deps (void)
4542 struct loop
*loop_nest
;
4544 FOR_EACH_LOOP (li
, loop_nest
, 0)
4545 analyze_all_data_dependences (loop_nest
);
4548 /* Free the memory used by a data dependence relation DDR. */
4551 free_dependence_relation (struct data_dependence_relation
*ddr
)
4556 if (DDR_SUBSCRIPTS (ddr
))
4557 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4558 if (DDR_DIST_VECTS (ddr
))
4559 VEC_free (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
));
4560 if (DDR_DIR_VECTS (ddr
))
4561 VEC_free (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
));
4566 /* Free the memory used by the data dependence relations from
4567 DEPENDENCE_RELATIONS. */
4570 free_dependence_relations (VEC (ddr_p
, heap
) *dependence_relations
)
4573 struct data_dependence_relation
*ddr
;
4575 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4577 free_dependence_relation (ddr
);
4579 VEC_free (ddr_p
, heap
, dependence_relations
);
4582 /* Free the memory used by the data references from DATAREFS. */
4585 free_data_refs (VEC (data_reference_p
, heap
) *datarefs
)
4588 struct data_reference
*dr
;
4590 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
4592 VEC_free (data_reference_p
, heap
, datarefs
);
4597 /* Dump vertex I in RDG to FILE. */
4600 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
4602 struct vertex
*v
= &(rdg
->vertices
[i
]);
4603 struct graph_edge
*e
;
4605 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
4606 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
4607 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
4610 for (e
= v
->pred
; e
; e
= e
->pred_next
)
4611 fprintf (file
, " %d", e
->src
);
4613 fprintf (file
, ") (out:");
4616 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4617 fprintf (file
, " %d", e
->dest
);
4619 fprintf (file
, ")\n");
4620 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
4621 fprintf (file
, ")\n");
4624 /* Call dump_rdg_vertex on stderr. */
4627 debug_rdg_vertex (struct graph
*rdg
, int i
)
4629 dump_rdg_vertex (stderr
, rdg
, i
);
4632 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4633 dumped vertices to that bitmap. */
4635 void dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
4639 fprintf (file
, "(%d\n", c
);
4641 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4642 if (rdg
->vertices
[i
].component
== c
)
4645 bitmap_set_bit (dumped
, i
);
4647 dump_rdg_vertex (file
, rdg
, i
);
4650 fprintf (file
, ")\n");
4653 /* Call dump_rdg_vertex on stderr. */
4656 debug_rdg_component (struct graph
*rdg
, int c
)
4658 dump_rdg_component (stderr
, rdg
, c
, NULL
);
4661 /* Dump the reduced dependence graph RDG to FILE. */
4664 dump_rdg (FILE *file
, struct graph
*rdg
)
4667 bitmap dumped
= BITMAP_ALLOC (NULL
);
4669 fprintf (file
, "(rdg\n");
4671 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4672 if (!bitmap_bit_p (dumped
, i
))
4673 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
4675 fprintf (file
, ")\n");
4676 BITMAP_FREE (dumped
);
4679 /* Call dump_rdg on stderr. */
4682 debug_rdg (struct graph
*rdg
)
4684 dump_rdg (stderr
, rdg
);
4688 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
4692 fprintf (file
, "digraph RDG {\n");
4694 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4696 struct vertex
*v
= &(rdg
->vertices
[i
]);
4697 struct graph_edge
*e
;
4699 /* Highlight reads from memory. */
4700 if (RDG_MEM_READS_STMT (rdg
, i
))
4701 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
4703 /* Highlight stores to memory. */
4704 if (RDG_MEM_WRITE_STMT (rdg
, i
))
4705 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
4708 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4709 switch (RDGE_TYPE (e
))
4712 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
4716 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
4720 /* These are the most common dependences: don't print these. */
4721 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
4725 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
4733 fprintf (file
, "}\n\n");
4736 /* Display the Reduced Dependence Graph using dotty. */
4737 extern void dot_rdg (struct graph
*);
4740 dot_rdg (struct graph
*rdg
)
4742 /* When debugging, enable the following code. This cannot be used
4743 in production compilers because it calls "system". */
4745 FILE *file
= fopen ("/tmp/rdg.dot", "w");
4746 gcc_assert (file
!= NULL
);
4748 dot_rdg_1 (file
, rdg
);
4751 system ("dotty /tmp/rdg.dot &");
4753 dot_rdg_1 (stderr
, rdg
);
4757 /* This structure is used for recording the mapping statement index in
4760 struct GTY(()) rdg_vertex_info
4766 /* Returns the index of STMT in RDG. */
4769 rdg_vertex_for_stmt (struct graph
*rdg
, gimple stmt
)
4771 struct rdg_vertex_info rvi
, *slot
;
4774 slot
= (struct rdg_vertex_info
*) htab_find (rdg
->indices
, &rvi
);
4782 /* Creates an edge in RDG for each distance vector from DDR. The
4783 order that we keep track of in the RDG is the order in which
4784 statements have to be executed. */
4787 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
4789 struct graph_edge
*e
;
4791 data_reference_p dra
= DDR_A (ddr
);
4792 data_reference_p drb
= DDR_B (ddr
);
4793 unsigned level
= ddr_dependence_level (ddr
);
4795 /* For non scalar dependences, when the dependence is REVERSED,
4796 statement B has to be executed before statement A. */
4798 && !DDR_REVERSED_P (ddr
))
4800 data_reference_p tmp
= dra
;
4805 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
4806 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
4808 if (va
< 0 || vb
< 0)
4811 e
= add_edge (rdg
, va
, vb
);
4812 e
->data
= XNEW (struct rdg_edge
);
4814 RDGE_LEVEL (e
) = level
;
4815 RDGE_RELATION (e
) = ddr
;
4817 /* Determines the type of the data dependence. */
4818 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
4819 RDGE_TYPE (e
) = input_dd
;
4820 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
4821 RDGE_TYPE (e
) = output_dd
;
4822 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
4823 RDGE_TYPE (e
) = flow_dd
;
4824 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
4825 RDGE_TYPE (e
) = anti_dd
;
4828 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4829 the index of DEF in RDG. */
4832 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
4834 use_operand_p imm_use_p
;
4835 imm_use_iterator iterator
;
4837 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
4839 struct graph_edge
*e
;
4840 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
4845 e
= add_edge (rdg
, idef
, use
);
4846 e
->data
= XNEW (struct rdg_edge
);
4847 RDGE_TYPE (e
) = flow_dd
;
4848 RDGE_RELATION (e
) = NULL
;
4852 /* Creates the edges of the reduced dependence graph RDG. */
4855 create_rdg_edges (struct graph
*rdg
, VEC (ddr_p
, heap
) *ddrs
)
4858 struct data_dependence_relation
*ddr
;
4859 def_operand_p def_p
;
4862 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
4863 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4864 create_rdg_edge_for_ddr (rdg
, ddr
);
4866 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4867 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
4869 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
4872 /* Build the vertices of the reduced dependence graph RDG. */
4875 create_rdg_vertices (struct graph
*rdg
, VEC (gimple
, heap
) *stmts
)
4880 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
4882 VEC (data_ref_loc
, heap
) *references
;
4884 struct vertex
*v
= &(rdg
->vertices
[i
]);
4885 struct rdg_vertex_info
*rvi
= XNEW (struct rdg_vertex_info
);
4886 struct rdg_vertex_info
**slot
;
4890 slot
= (struct rdg_vertex_info
**) htab_find_slot (rdg
->indices
, rvi
, INSERT
);
4897 v
->data
= XNEW (struct rdg_vertex
);
4898 RDG_STMT (rdg
, i
) = stmt
;
4900 RDG_MEM_WRITE_STMT (rdg
, i
) = false;
4901 RDG_MEM_READS_STMT (rdg
, i
) = false;
4902 if (gimple_code (stmt
) == GIMPLE_PHI
)
4905 get_references_in_stmt (stmt
, &references
);
4906 FOR_EACH_VEC_ELT (data_ref_loc
, references
, j
, ref
)
4908 RDG_MEM_WRITE_STMT (rdg
, i
) = true;
4910 RDG_MEM_READS_STMT (rdg
, i
) = true;
4912 VEC_free (data_ref_loc
, heap
, references
);
4916 /* Initialize STMTS with all the statements of LOOP. When
4917 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4918 which we discover statements is important as
4919 generate_loops_for_partition is using the same traversal for
4920 identifying statements. */
4923 stmts_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
4926 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
4928 for (i
= 0; i
< loop
->num_nodes
; i
++)
4930 basic_block bb
= bbs
[i
];
4931 gimple_stmt_iterator bsi
;
4934 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4935 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
4937 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4939 stmt
= gsi_stmt (bsi
);
4940 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
4941 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
4948 /* Returns true when all the dependences are computable. */
4951 known_dependences_p (VEC (ddr_p
, heap
) *dependence_relations
)
4956 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4957 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4963 /* Computes a hash function for element ELT. */
4966 hash_stmt_vertex_info (const void *elt
)
4968 const struct rdg_vertex_info
*const rvi
=
4969 (const struct rdg_vertex_info
*) elt
;
4970 gimple stmt
= rvi
->stmt
;
4972 return htab_hash_pointer (stmt
);
4975 /* Compares database elements E1 and E2. */
4978 eq_stmt_vertex_info (const void *e1
, const void *e2
)
4980 const struct rdg_vertex_info
*elt1
= (const struct rdg_vertex_info
*) e1
;
4981 const struct rdg_vertex_info
*elt2
= (const struct rdg_vertex_info
*) e2
;
4983 return elt1
->stmt
== elt2
->stmt
;
4986 /* Free the element E. */
4989 hash_stmt_vertex_del (void *e
)
4994 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4995 statement of the loop nest, and one edge per data dependence or
4996 scalar dependence. */
4999 build_empty_rdg (int n_stmts
)
5001 int nb_data_refs
= 10;
5002 struct graph
*rdg
= new_graph (n_stmts
);
5004 rdg
->indices
= htab_create (nb_data_refs
, hash_stmt_vertex_info
,
5005 eq_stmt_vertex_info
, hash_stmt_vertex_del
);
5009 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5010 statement of the loop nest, and one edge per data dependence or
5011 scalar dependence. */
5014 build_rdg (struct loop
*loop
,
5015 VEC (loop_p
, heap
) **loop_nest
,
5016 VEC (ddr_p
, heap
) **dependence_relations
,
5017 VEC (data_reference_p
, heap
) **datarefs
)
5019 struct graph
*rdg
= NULL
;
5020 VEC (gimple
, heap
) *stmts
= VEC_alloc (gimple
, heap
, 10);
5022 compute_data_dependences_for_loop (loop
, false, loop_nest
, datarefs
,
5023 dependence_relations
);
5025 if (known_dependences_p (*dependence_relations
))
5027 stmts_from_loop (loop
, &stmts
);
5028 rdg
= build_empty_rdg (VEC_length (gimple
, stmts
));
5029 create_rdg_vertices (rdg
, stmts
);
5030 create_rdg_edges (rdg
, *dependence_relations
);
5033 VEC_free (gimple
, heap
, stmts
);
5037 /* Free the reduced dependence graph RDG. */
5040 free_rdg (struct graph
*rdg
)
5044 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5046 struct vertex
*v
= &(rdg
->vertices
[i
]);
5047 struct graph_edge
*e
;
5049 for (e
= v
->succ
; e
; e
= e
->succ_next
)
5055 htab_delete (rdg
->indices
);
5059 /* Initialize STMTS with all the statements of LOOP that contain a
5063 stores_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5066 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5068 for (i
= 0; i
< loop
->num_nodes
; i
++)
5070 basic_block bb
= bbs
[i
];
5071 gimple_stmt_iterator bsi
;
5073 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5074 if (gimple_vdef (gsi_stmt (bsi
)))
5075 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
5081 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5082 that contains a data reference on its LHS with a stride of the same
5083 size as its unit type. */
5086 stmt_with_adjacent_zero_store_dr_p (gimple stmt
)
5090 struct data_reference
*dr
;
5093 || !gimple_vdef (stmt
)
5094 || !is_gimple_assign (stmt
)
5095 || !gimple_assign_single_p (stmt
)
5096 || !(op1
= gimple_assign_rhs1 (stmt
))
5097 || !(integer_zerop (op1
) || real_zerop (op1
)))
5100 dr
= XCNEW (struct data_reference
);
5101 op0
= gimple_assign_lhs (stmt
);
5103 DR_STMT (dr
) = stmt
;
5106 res
= dr_analyze_innermost (dr
)
5107 && stride_of_unit_type_p (DR_STEP (dr
), TREE_TYPE (op0
));
5113 /* Initialize STMTS with all the statements of LOOP that contain a
5114 store to memory of the form "A[i] = 0". */
5117 stores_zero_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5121 gimple_stmt_iterator si
;
5123 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5125 for (i
= 0; i
< loop
->num_nodes
; i
++)
5126 for (bb
= bbs
[i
], si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5127 if ((stmt
= gsi_stmt (si
))
5128 && stmt_with_adjacent_zero_store_dr_p (stmt
))
5129 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (si
));
5134 /* For a data reference REF, return the declaration of its base
5135 address or NULL_TREE if the base is not determined. */
5138 ref_base_address (gimple stmt
, data_ref_loc
*ref
)
5140 tree base
= NULL_TREE
;
5142 struct data_reference
*dr
= XCNEW (struct data_reference
);
5144 DR_STMT (dr
) = stmt
;
5145 DR_REF (dr
) = *ref
->pos
;
5146 dr_analyze_innermost (dr
);
5147 base_address
= DR_BASE_ADDRESS (dr
);
5152 switch (TREE_CODE (base_address
))
5155 base
= TREE_OPERAND (base_address
, 0);
5159 base
= base_address
;
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5172 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
5174 gimple stmt
= RDG_STMT (rdg
, v
);
5175 struct loop
*loop
= loop_containing_stmt (stmt
);
5176 use_operand_p imm_use_p
;
5177 imm_use_iterator iterator
;
5179 def_operand_p def_p
;
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
5186 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
5188 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)
5196 /* Determines whether statements S1 and S2 access to similar memory
5197 locations. Two memory accesses are considered similar when they
5198 have the same base address declaration, i.e. when their
5199 ref_base_address is the same. */
5202 have_similar_memory_accesses (gimple s1
, gimple s2
)
5206 VEC (data_ref_loc
, heap
) *refs1
, *refs2
;
5207 data_ref_loc
*ref1
, *ref2
;
5209 get_references_in_stmt (s1
, &refs1
);
5210 get_references_in_stmt (s2
, &refs2
);
5212 FOR_EACH_VEC_ELT (data_ref_loc
, refs1
, i
, ref1
)
5214 tree base1
= ref_base_address (s1
, ref1
);
5217 FOR_EACH_VEC_ELT (data_ref_loc
, refs2
, j
, ref2
)
5218 if (base1
== ref_base_address (s2
, ref2
))
5226 VEC_free (data_ref_loc
, heap
, refs1
);
5227 VEC_free (data_ref_loc
, heap
, refs2
);
5231 /* Helper function for the hashtab. */
5234 have_similar_memory_accesses_1 (const void *s1
, const void *s2
)
5236 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple
) s1
),
5237 CONST_CAST_GIMPLE ((const_gimple
) s2
));
5240 /* Helper function for the hashtab. */
5243 ref_base_address_1 (const void *s
)
5245 gimple stmt
= CONST_CAST_GIMPLE ((const_gimple
) s
);
5247 VEC (data_ref_loc
, heap
) *refs
;
5251 get_references_in_stmt (stmt
, &refs
);
5253 FOR_EACH_VEC_ELT (data_ref_loc
, refs
, i
, ref
)
5256 res
= htab_hash_pointer (ref_base_address (stmt
, ref
));
5260 VEC_free (data_ref_loc
, heap
, refs
);
5264 /* Try to remove duplicated write data references from STMTS. */
5267 remove_similar_memory_refs (VEC (gimple
, heap
) **stmts
)
5271 htab_t seen
= htab_create (VEC_length (gimple
, *stmts
), ref_base_address_1
,
5272 have_similar_memory_accesses_1
, NULL
);
5274 for (i
= 0; VEC_iterate (gimple
, *stmts
, i
, stmt
); )
5278 slot
= htab_find_slot (seen
, stmt
, INSERT
);
5281 VEC_ordered_remove (gimple
, *stmts
, i
);
5284 *slot
= (void *) stmt
;
5292 /* Returns the index of PARAMETER in the parameters vector of the
5293 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5296 access_matrix_get_index_for_parameter (tree parameter
,
5297 struct access_matrix
*access_matrix
)
5300 VEC (tree
,heap
) *lambda_parameters
= AM_PARAMETERS (access_matrix
);
5301 tree lambda_parameter
;
5303 FOR_EACH_VEC_ELT (tree
, lambda_parameters
, i
, lambda_parameter
)
5304 if (lambda_parameter
== parameter
)
5305 return i
+ AM_NB_INDUCTION_VARS (access_matrix
);