1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests
;
91 int num_dependence_dependent
;
92 int num_dependence_independent
;
93 int num_dependence_undetermined
;
95 int num_subscript_tests
;
96 int num_subscript_undetermined
;
97 int num_same_subscript_function
;
100 int num_ziv_independent
;
101 int num_ziv_dependent
;
102 int num_ziv_unimplemented
;
105 int num_siv_independent
;
106 int num_siv_dependent
;
107 int num_siv_unimplemented
;
110 int num_miv_independent
;
111 int num_miv_dependent
;
112 int num_miv_unimplemented
;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
116 struct data_reference
*,
117 struct data_reference
*,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a
, const_tree b
)
124 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
125 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
, 0));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a
, int b
)
134 return ((b
% a
) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file
, VEC (data_reference_p
, heap
) *datarefs
)
145 struct data_reference
*dr
;
147 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
148 dump_data_reference (file
, dr
);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p
, heap
) *datarefs
)
156 dump_data_references (stderr
, datarefs
);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p
, heap
) *ddrs
)
164 dump_data_dependence_relations (stderr
, ddrs
);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file
,
171 VEC (ddr_p
, heap
) *ddrs
)
174 struct data_dependence_relation
*ddr
;
176 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
177 dump_data_dependence_relation (file
, ddr
);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference
*dr
)
185 dump_data_reference (stderr
, dr
);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf
,
192 struct data_reference
*dr
)
196 fprintf (outf
, "#(Data Ref: \n");
197 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
198 fprintf (outf
, "# stmt: ");
199 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
200 fprintf (outf
, "# ref: ");
201 print_generic_stmt (outf
, DR_REF (dr
), 0);
202 fprintf (outf
, "# base_object: ");
203 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
205 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
207 fprintf (outf
, "# Access function %d: ", i
);
208 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
210 fprintf (outf
, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf
, affine_fn fn
)
221 print_generic_expr (outf
, VEC_index (tree
, fn
, 0), TDF_SLIM
);
222 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
224 fprintf (outf
, " + ");
225 print_generic_expr (outf
, coef
, TDF_SLIM
);
226 fprintf (outf
, " * x_%u", i
);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
237 if (cf
->n
== NO_DEPENDENCE
)
238 fprintf (outf
, "no dependence\n");
239 else if (cf
->n
== NOT_KNOWN
)
240 fprintf (outf
, "not known\n");
243 for (i
= 0; i
< cf
->n
; i
++)
246 dump_affine_function (outf
, cf
->fns
[i
]);
247 fprintf (outf
, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf
, struct subscript
*subscript
)
257 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
259 fprintf (outf
, "\n (subscript \n");
260 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf
, cf
);
262 if (CF_NONTRIVIAL_P (cf
))
264 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
265 fprintf (outf
, " last_conflict: ");
266 print_generic_stmt (outf
, last_iteration
, 0);
269 cf
= SUB_CONFLICTS_IN_B (subscript
);
270 fprintf (outf
, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf
, cf
);
272 if (CF_NONTRIVIAL_P (cf
))
274 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
275 fprintf (outf
, " last_conflict: ");
276 print_generic_stmt (outf
, last_iteration
, 0);
279 fprintf (outf
, " (Subscript distance: ");
280 print_generic_stmt (outf
, SUB_DISTANCE (subscript
), 0);
281 fprintf (outf
, " )\n");
282 fprintf (outf
, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf
,
294 for (eq
= 0; eq
< length
; eq
++)
296 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
302 fprintf (outf
, " +");
305 fprintf (outf
, " -");
308 fprintf (outf
, " =");
310 case dir_positive_or_equal
:
311 fprintf (outf
, " +=");
313 case dir_positive_or_negative
:
314 fprintf (outf
, " +-");
316 case dir_negative_or_equal
:
317 fprintf (outf
, " -=");
320 fprintf (outf
, " *");
323 fprintf (outf
, "indep");
327 fprintf (outf
, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dir_vects
,
339 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, v
)
340 print_direction_vector (outf
, v
, length
);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
350 for (i
= 0; i
< n
; i
++)
351 fprintf (outfile
, "%3d ", vector
[i
]);
352 fprintf (outfile
, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf
, VEC (lambda_vector
, heap
) *dist_vects
,
364 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, v
)
365 print_lambda_vector (outf
, v
, length
);
371 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
373 dump_data_dependence_relation (stderr
, ddr
);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf
,
380 struct data_dependence_relation
*ddr
)
382 struct data_reference
*dra
, *drb
;
384 fprintf (outf
, "(Data Dep: \n");
386 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
393 dump_data_reference (outf
, dra
);
395 fprintf (outf
, " (nil)\n");
397 dump_data_reference (outf
, drb
);
399 fprintf (outf
, " (nil)\n");
401 fprintf (outf
, " (don't know)\n)\n");
407 dump_data_reference (outf
, dra
);
408 dump_data_reference (outf
, drb
);
410 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
411 fprintf (outf
, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
418 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
420 fprintf (outf
, " access_fn_A: ");
421 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
422 fprintf (outf
, " access_fn_B: ");
423 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
424 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
427 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
428 fprintf (outf
, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
)
430 fprintf (outf
, "%d ", loopi
->num
);
431 fprintf (outf
, ")\n");
433 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
435 fprintf (outf
, " distance_vector: ");
436 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
440 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
442 fprintf (outf
, " direction_vector: ");
443 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
448 fprintf (outf
, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file
,
455 enum data_dependence_direction dir
)
471 case dir_positive_or_negative
:
472 fprintf (file
, "+-");
475 case dir_positive_or_equal
:
476 fprintf (file
, "+=");
479 case dir_negative_or_equal
:
480 fprintf (file
, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
501 struct data_dependence_relation
*ddr
;
504 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
505 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
507 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), j
, v
)
509 fprintf (file
, "DISTANCE_V (");
510 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
511 fprintf (file
, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), j
, v
)
516 fprintf (file
, "DIRECTION_V (");
517 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
518 fprintf (file
, ")\n");
522 fprintf (file
, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file
, VEC (ddr_p
, heap
) *ddrs
)
531 struct data_dependence_relation
*ddr
;
533 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
534 dump_data_dependence_relation (file
, ddr
);
536 fprintf (file
, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
547 tree
*var
, tree
*off
)
551 enum tree_code ocode
= code
;
559 *var
= build_int_cst (type
, 0);
560 *off
= fold_convert (ssizetype
, op0
);
563 case POINTER_PLUS_EXPR
:
568 split_constant_offset (op0
, &var0
, &off0
);
569 split_constant_offset (op1
, &var1
, &off1
);
570 *var
= fold_build2 (code
, type
, var0
, var1
);
571 *off
= size_binop (ocode
, off0
, off1
);
575 if (TREE_CODE (op1
) != INTEGER_CST
)
578 split_constant_offset (op0
, &var0
, &off0
);
579 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
580 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
586 HOST_WIDE_INT pbitsize
, pbitpos
;
587 enum machine_mode pmode
;
588 int punsignedp
, pvolatilep
;
590 op0
= TREE_OPERAND (op0
, 0);
591 if (!handled_component_p (op0
))
594 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
595 &pmode
, &punsignedp
, &pvolatilep
, false);
597 if (pbitpos
% BITS_PER_UNIT
!= 0)
599 base
= build_fold_addr_expr (base
);
600 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
604 split_constant_offset (poffset
, &poffset
, &off1
);
605 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
606 if (POINTER_TYPE_P (TREE_TYPE (base
)))
607 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
),
608 base
, fold_convert (sizetype
, poffset
));
610 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
611 fold_convert (TREE_TYPE (base
), poffset
));
614 var0
= fold_convert (type
, base
);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type
))
629 type
= TREE_TYPE (type
);
630 if (int_size_in_bytes (type
) < 0)
640 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
641 enum tree_code subcode
;
643 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
646 var0
= gimple_assign_rhs1 (def_stmt
);
647 subcode
= gimple_assign_rhs_code (def_stmt
);
648 var1
= gimple_assign_rhs2 (def_stmt
);
650 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype
= TREE_TYPE (op0
);
659 if ((POINTER_TYPE_P (itype
)
660 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
661 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
662 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
664 split_constant_offset (op0
, &var0
, off
);
665 *var
= fold_convert (type
, var0
);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
682 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
686 *off
= ssize_int (0);
689 if (tree_is_chrec (exp
))
692 otype
= TREE_TYPE (exp
);
693 code
= TREE_CODE (exp
);
694 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
695 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
697 *var
= fold_convert (type
, e
);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr
)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
717 if (TREE_CODE (addr
) != ADDR_EXPR
)
720 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference
*dr
)
730 gimple stmt
= DR_STMT (dr
);
731 struct loop
*loop
= loop_containing_stmt (stmt
);
732 tree ref
= DR_REF (dr
);
733 HOST_WIDE_INT pbitsize
, pbitpos
;
735 enum machine_mode pmode
;
736 int punsignedp
, pvolatilep
;
737 affine_iv base_iv
, offset_iv
;
738 tree init
, dinit
, step
;
739 bool in_loop
= (loop
&& loop
->num
);
741 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
742 fprintf (dump_file
, "analyze_innermost: ");
744 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
745 &pmode
, &punsignedp
, &pvolatilep
, false);
746 gcc_assert (base
!= NULL_TREE
);
748 if (pbitpos
% BITS_PER_UNIT
!= 0)
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
751 fprintf (dump_file
, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base
) == MEM_REF
)
757 if (!integer_zerop (TREE_OPERAND (base
, 1)))
761 double_int moff
= mem_ref_offset (base
);
762 poffset
= double_int_to_tree (sizetype
, moff
);
765 poffset
= size_binop (PLUS_EXPR
, poffset
, TREE_OPERAND (base
, 1));
767 base
= TREE_OPERAND (base
, 0);
770 base
= build_fold_addr_expr (base
);
773 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
777 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
784 base_iv
.step
= ssize_int (0);
785 base_iv
.no_overflow
= true;
790 offset_iv
.base
= ssize_int (0);
791 offset_iv
.step
= ssize_int (0);
797 offset_iv
.base
= poffset
;
798 offset_iv
.step
= ssize_int (0);
800 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
801 poffset
, &offset_iv
, false))
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
804 fprintf (dump_file
, "failed: evolution of offset is not"
810 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
811 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
812 init
= size_binop (PLUS_EXPR
, init
, dinit
);
813 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
814 init
= size_binop (PLUS_EXPR
, init
, dinit
);
816 step
= size_binop (PLUS_EXPR
,
817 fold_convert (ssizetype
, base_iv
.step
),
818 fold_convert (ssizetype
, offset_iv
.step
));
820 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
822 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
826 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
828 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
829 fprintf (dump_file
, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
840 VEC (tree
, heap
) *access_fns
= NULL
;
841 tree ref
= unshare_expr (DR_REF (dr
)), aref
= ref
, op
;
842 tree base
, off
, access_fn
= NULL_TREE
;
843 basic_block before_loop
= NULL
;
846 before_loop
= block_before_loop (nest
);
848 while (handled_component_p (aref
))
850 if (TREE_CODE (aref
) == ARRAY_REF
)
852 op
= TREE_OPERAND (aref
, 1);
855 access_fn
= analyze_scalar_evolution (loop
, op
);
856 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
857 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
860 TREE_OPERAND (aref
, 1) = build_int_cst (TREE_TYPE (op
), 0);
863 aref
= TREE_OPERAND (aref
, 0);
867 && (INDIRECT_REF_P (aref
)
868 || TREE_CODE (aref
) == MEM_REF
))
870 op
= TREE_OPERAND (aref
, 0);
871 access_fn
= analyze_scalar_evolution (loop
, op
);
872 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
873 base
= initial_condition (access_fn
);
874 split_constant_offset (base
, &base
, &off
);
875 if (TREE_CODE (aref
) == MEM_REF
)
876 off
= size_binop (PLUS_EXPR
, off
,
877 fold_convert (ssizetype
, TREE_OPERAND (aref
, 1)));
878 access_fn
= chrec_replace_initial_condition (access_fn
,
879 fold_convert (TREE_TYPE (base
), off
));
881 TREE_OPERAND (aref
, 0) = base
;
882 VEC_safe_push (tree
, heap
, access_fns
, access_fn
);
885 if (TREE_CODE (aref
) == MEM_REF
)
886 TREE_OPERAND (aref
, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref
, 1)), 0);
889 if (TREE_CODE (ref
) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref
, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref
, 1)))
892 ref
= TREE_OPERAND (TREE_OPERAND (ref
, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref
) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref
, 1)))
899 ref
= TREE_OPERAND (ref
, 0);
901 DR_BASE_OBJECT (dr
) = ref
;
902 DR_ACCESS_FNS (dr
) = access_fns
;
905 /* Extracts the alias analysis information from the memory reference DR. */
908 dr_analyze_alias (struct data_reference
*dr
)
910 tree ref
= DR_REF (dr
);
911 tree base
= get_base_address (ref
), addr
;
913 if (INDIRECT_REF_P (base
)
914 || TREE_CODE (base
) == MEM_REF
)
916 addr
= TREE_OPERAND (base
, 0);
917 if (TREE_CODE (addr
) == SSA_NAME
)
918 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
922 /* Returns true if the address of DR is invariant. */
925 dr_address_invariant_p (struct data_reference
*dr
)
930 FOR_EACH_VEC_ELT (tree
, DR_ACCESS_FNS (dr
), i
, idx
)
931 if (tree_contains_chrecs (idx
, NULL
))
937 /* Frees data reference DR. */
940 free_data_ref (data_reference_p dr
)
942 VEC_free (tree
, heap
, DR_ACCESS_FNS (dr
));
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
952 struct data_reference
*
953 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
956 struct data_reference
*dr
;
958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
960 fprintf (dump_file
, "Creating dr for ");
961 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
962 fprintf (dump_file
, "\n");
965 dr
= XCNEW (struct data_reference
);
967 DR_REF (dr
) = memref
;
968 DR_IS_READ (dr
) = is_read
;
970 dr_analyze_innermost (dr
);
971 dr_analyze_indices (dr
, nest
, loop
);
972 dr_analyze_alias (dr
);
974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
976 fprintf (dump_file
, "\tbase_address: ");
977 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
978 fprintf (dump_file
, "\n\toffset from base address: ");
979 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
980 fprintf (dump_file
, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
982 fprintf (dump_file
, "\n\tstep: ");
983 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
984 fprintf (dump_file
, "\n\taligned to: ");
985 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
986 fprintf (dump_file
, "\n\tbase_object: ");
987 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
988 fprintf (dump_file
, "\n");
994 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
997 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1001 STRIP_NOPS (offset1
);
1002 STRIP_NOPS (offset2
);
1004 if (offset1
== offset2
)
1007 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1008 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1011 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1012 TREE_OPERAND (offset2
, 0));
1014 if (!res
|| !BINARY_CLASS_P (offset1
))
1017 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1018 TREE_OPERAND (offset2
, 1));
1023 /* Check if DRA and DRB have equal offsets. */
1025 dr_equal_offsets_p (struct data_reference
*dra
,
1026 struct data_reference
*drb
)
1028 tree offset1
, offset2
;
1030 offset1
= DR_OFFSET (dra
);
1031 offset2
= DR_OFFSET (drb
);
1033 return dr_equal_offsets_p1 (offset1
, offset2
);
1036 /* Returns true if FNA == FNB. */
1039 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1041 unsigned i
, n
= VEC_length (tree
, fna
);
1043 if (n
!= VEC_length (tree
, fnb
))
1046 for (i
= 0; i
< n
; i
++)
1047 if (!operand_equal_p (VEC_index (tree
, fna
, i
),
1048 VEC_index (tree
, fnb
, i
), 0))
1054 /* If all the functions in CF are the same, returns one of them,
1055 otherwise returns NULL. */
1058 common_affine_function (conflict_function
*cf
)
1063 if (!CF_NONTRIVIAL_P (cf
))
1068 for (i
= 1; i
< cf
->n
; i
++)
1069 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1075 /* Returns the base of the affine function FN. */
1078 affine_function_base (affine_fn fn
)
1080 return VEC_index (tree
, fn
, 0);
1083 /* Returns true if FN is a constant. */
1086 affine_function_constant_p (affine_fn fn
)
1091 for (i
= 1; VEC_iterate (tree
, fn
, i
, coef
); i
++)
1092 if (!integer_zerop (coef
))
1098 /* Returns true if FN is the zero constant function. */
1101 affine_function_zero_p (affine_fn fn
)
1103 return (integer_zerop (affine_function_base (fn
))
1104 && affine_function_constant_p (fn
));
1107 /* Returns a signed integer type with the largest precision from TA
1111 signed_type_for_types (tree ta
, tree tb
)
1113 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1114 return signed_type_for (ta
);
1116 return signed_type_for (tb
);
1119 /* Applies operation OP on affine functions FNA and FNB, and returns the
1123 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1129 if (VEC_length (tree
, fnb
) > VEC_length (tree
, fna
))
1131 n
= VEC_length (tree
, fna
);
1132 m
= VEC_length (tree
, fnb
);
1136 n
= VEC_length (tree
, fnb
);
1137 m
= VEC_length (tree
, fna
);
1140 ret
= VEC_alloc (tree
, heap
, m
);
1141 for (i
= 0; i
< n
; i
++)
1143 tree type
= signed_type_for_types (TREE_TYPE (VEC_index (tree
, fna
, i
)),
1144 TREE_TYPE (VEC_index (tree
, fnb
, i
)));
1146 VEC_quick_push (tree
, ret
,
1147 fold_build2 (op
, type
,
1148 VEC_index (tree
, fna
, i
),
1149 VEC_index (tree
, fnb
, i
)));
1152 for (; VEC_iterate (tree
, fna
, i
, coef
); i
++)
1153 VEC_quick_push (tree
, ret
,
1154 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1155 coef
, integer_zero_node
));
1156 for (; VEC_iterate (tree
, fnb
, i
, coef
); i
++)
1157 VEC_quick_push (tree
, ret
,
1158 fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1159 integer_zero_node
, coef
));
1164 /* Returns the sum of affine functions FNA and FNB. */
1167 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1169 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1172 /* Returns the difference of affine functions FNA and FNB. */
1175 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1177 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1180 /* Frees affine function FN. */
1183 affine_fn_free (affine_fn fn
)
1185 VEC_free (tree
, heap
, fn
);
1188 /* Determine for each subscript in the data dependence relation DDR
1192 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1194 conflict_function
*cf_a
, *cf_b
;
1195 affine_fn fn_a
, fn_b
, diff
;
1197 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1201 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1203 struct subscript
*subscript
;
1205 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1206 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1207 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1209 fn_a
= common_affine_function (cf_a
);
1210 fn_b
= common_affine_function (cf_b
);
1213 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1216 diff
= affine_fn_minus (fn_a
, fn_b
);
1218 if (affine_function_constant_p (diff
))
1219 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1221 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1223 affine_fn_free (diff
);
1228 /* Returns the conflict function for "unknown". */
1230 static conflict_function
*
1231 conflict_fn_not_known (void)
1233 conflict_function
*fn
= XCNEW (conflict_function
);
1239 /* Returns the conflict function for "independent". */
1241 static conflict_function
*
1242 conflict_fn_no_dependence (void)
1244 conflict_function
*fn
= XCNEW (conflict_function
);
1245 fn
->n
= NO_DEPENDENCE
;
1250 /* Returns true if the address of OBJ is invariant in LOOP. */
1253 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1255 while (handled_component_p (obj
))
1257 if (TREE_CODE (obj
) == ARRAY_REF
)
1259 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1260 need to check the stride and the lower bound of the reference. */
1261 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1263 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1267 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1269 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1273 obj
= TREE_OPERAND (obj
, 0);
1276 if (!INDIRECT_REF_P (obj
)
1277 && TREE_CODE (obj
) != MEM_REF
)
1280 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1284 /* Returns false if we can prove that data references A and B do not alias,
1288 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
)
1290 tree addr_a
= DR_BASE_OBJECT (a
);
1291 tree addr_b
= DR_BASE_OBJECT (b
);
1293 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1294 return refs_output_dependent_p (addr_a
, addr_b
);
1295 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1296 return refs_anti_dependent_p (addr_a
, addr_b
);
1297 return refs_may_alias_p (addr_a
, addr_b
);
1300 static void compute_self_dependence (struct data_dependence_relation
*);
1302 /* Initialize a data dependence relation between data accesses A and
1303 B. NB_LOOPS is the number of loops surrounding the references: the
1304 size of the classic distance/direction vectors. */
1306 static struct data_dependence_relation
*
1307 initialize_data_dependence_relation (struct data_reference
*a
,
1308 struct data_reference
*b
,
1309 VEC (loop_p
, heap
) *loop_nest
)
1311 struct data_dependence_relation
*res
;
1314 res
= XNEW (struct data_dependence_relation
);
1317 DDR_LOOP_NEST (res
) = NULL
;
1318 DDR_REVERSED_P (res
) = false;
1319 DDR_SUBSCRIPTS (res
) = NULL
;
1320 DDR_DIR_VECTS (res
) = NULL
;
1321 DDR_DIST_VECTS (res
) = NULL
;
1323 if (a
== NULL
|| b
== NULL
)
1325 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1329 /* If the data references do not alias, then they are independent. */
1330 if (!dr_may_alias_p (a
, b
))
1332 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1336 /* When the references are exactly the same, don't spend time doing
1337 the data dependence tests, just initialize the ddr and return. */
1338 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1340 DDR_AFFINE_P (res
) = true;
1341 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1342 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1343 DDR_LOOP_NEST (res
) = loop_nest
;
1344 DDR_INNER_LOOP (res
) = 0;
1345 DDR_SELF_REFERENCE (res
) = true;
1346 compute_self_dependence (res
);
1350 /* If the references do not access the same object, we do not know
1351 whether they alias or not. */
1352 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1354 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1358 /* If the base of the object is not invariant in the loop nest, we cannot
1359 analyze it. TODO -- in fact, it would suffice to record that there may
1360 be arbitrary dependences in the loops where the base object varies. */
1362 && !object_address_invariant_in_loop_p (VEC_index (loop_p
, loop_nest
, 0),
1363 DR_BASE_OBJECT (a
)))
1365 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1369 /* If the number of dimensions of the access to not agree we can have
1370 a pointer access to a component of the array element type and an
1371 array access while the base-objects are still the same. Punt. */
1372 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1374 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1378 DDR_AFFINE_P (res
) = true;
1379 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1380 DDR_SUBSCRIPTS (res
) = VEC_alloc (subscript_p
, heap
, DR_NUM_DIMENSIONS (a
));
1381 DDR_LOOP_NEST (res
) = loop_nest
;
1382 DDR_INNER_LOOP (res
) = 0;
1383 DDR_SELF_REFERENCE (res
) = false;
1385 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1387 struct subscript
*subscript
;
1389 subscript
= XNEW (struct subscript
);
1390 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1391 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1392 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1393 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1394 VEC_safe_push (subscript_p
, heap
, DDR_SUBSCRIPTS (res
), subscript
);
1400 /* Frees memory used by the conflict function F. */
1403 free_conflict_function (conflict_function
*f
)
1407 if (CF_NONTRIVIAL_P (f
))
1409 for (i
= 0; i
< f
->n
; i
++)
1410 affine_fn_free (f
->fns
[i
]);
1415 /* Frees memory used by SUBSCRIPTS. */
1418 free_subscripts (VEC (subscript_p
, heap
) *subscripts
)
1423 FOR_EACH_VEC_ELT (subscript_p
, subscripts
, i
, s
)
1425 free_conflict_function (s
->conflicting_iterations_in_a
);
1426 free_conflict_function (s
->conflicting_iterations_in_b
);
1429 VEC_free (subscript_p
, heap
, subscripts
);
1432 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1436 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1439 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1441 fprintf (dump_file
, "(dependence classified: ");
1442 print_generic_expr (dump_file
, chrec
, 0);
1443 fprintf (dump_file
, ")\n");
1446 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1447 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1448 DDR_SUBSCRIPTS (ddr
) = NULL
;
1451 /* The dependence relation DDR cannot be represented by a distance
1455 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1458 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1460 DDR_AFFINE_P (ddr
) = false;
1465 /* This section contains the classic Banerjee tests. */
1467 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1468 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1471 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1473 return (evolution_function_is_constant_p (chrec_a
)
1474 && evolution_function_is_constant_p (chrec_b
));
1477 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1478 variable, i.e., if the SIV (Single Index Variable) test is true. */
1481 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1483 if ((evolution_function_is_constant_p (chrec_a
)
1484 && evolution_function_is_univariate_p (chrec_b
))
1485 || (evolution_function_is_constant_p (chrec_b
)
1486 && evolution_function_is_univariate_p (chrec_a
)))
1489 if (evolution_function_is_univariate_p (chrec_a
)
1490 && evolution_function_is_univariate_p (chrec_b
))
1492 switch (TREE_CODE (chrec_a
))
1494 case POLYNOMIAL_CHREC
:
1495 switch (TREE_CODE (chrec_b
))
1497 case POLYNOMIAL_CHREC
:
1498 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1513 /* Creates a conflict function with N dimensions. The affine functions
1514 in each dimension follow. */
1516 static conflict_function
*
1517 conflict_fn (unsigned n
, ...)
1520 conflict_function
*ret
= XCNEW (conflict_function
);
1523 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1527 for (i
= 0; i
< n
; i
++)
1528 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1534 /* Returns constant affine function with value CST. */
1537 affine_fn_cst (tree cst
)
1539 affine_fn fn
= VEC_alloc (tree
, heap
, 1);
1540 VEC_quick_push (tree
, fn
, cst
);
1544 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1547 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1549 affine_fn fn
= VEC_alloc (tree
, heap
, dim
+ 1);
1552 gcc_assert (dim
> 0);
1553 VEC_quick_push (tree
, fn
, cst
);
1554 for (i
= 1; i
< dim
; i
++)
1555 VEC_quick_push (tree
, fn
, integer_zero_node
);
1556 VEC_quick_push (tree
, fn
, coef
);
1560 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1561 *OVERLAPS_B are initialized to the functions that describe the
1562 relation between the elements accessed twice by CHREC_A and
1563 CHREC_B. For k >= 0, the following property is verified:
1565 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1568 analyze_ziv_subscript (tree chrec_a
,
1570 conflict_function
**overlaps_a
,
1571 conflict_function
**overlaps_b
,
1572 tree
*last_conflicts
)
1574 tree type
, difference
;
1575 dependence_stats
.num_ziv
++;
1577 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1578 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1580 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1581 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1582 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1583 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1585 switch (TREE_CODE (difference
))
1588 if (integer_zerop (difference
))
1590 /* The difference is equal to zero: the accessed index
1591 overlaps for each iteration in the loop. */
1592 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1593 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1594 *last_conflicts
= chrec_dont_know
;
1595 dependence_stats
.num_ziv_dependent
++;
1599 /* The accesses do not overlap. */
1600 *overlaps_a
= conflict_fn_no_dependence ();
1601 *overlaps_b
= conflict_fn_no_dependence ();
1602 *last_conflicts
= integer_zero_node
;
1603 dependence_stats
.num_ziv_independent
++;
1608 /* We're not sure whether the indexes overlap. For the moment,
1609 conservatively answer "don't know". */
1610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1613 *overlaps_a
= conflict_fn_not_known ();
1614 *overlaps_b
= conflict_fn_not_known ();
1615 *last_conflicts
= chrec_dont_know
;
1616 dependence_stats
.num_ziv_unimplemented
++;
1620 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1621 fprintf (dump_file
, ")\n");
1624 /* Sets NIT to the estimated number of executions of the statements in
1625 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1626 large as the number of iterations. If we have no reliable estimate,
1627 the function returns false, otherwise returns true. */
1630 estimated_loop_iterations (struct loop
*loop
, bool conservative
,
1633 estimate_numbers_of_iterations_loop (loop
, true);
1636 if (!loop
->any_upper_bound
)
1639 *nit
= loop
->nb_iterations_upper_bound
;
1643 if (!loop
->any_estimate
)
1646 *nit
= loop
->nb_iterations_estimate
;
1652 /* Similar to estimated_loop_iterations, but returns the estimate only
1653 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1654 on the number of iterations of LOOP could not be derived, returns -1. */
1657 estimated_loop_iterations_int (struct loop
*loop
, bool conservative
)
1660 HOST_WIDE_INT hwi_nit
;
1662 if (!estimated_loop_iterations (loop
, conservative
, &nit
))
1665 if (!double_int_fits_in_shwi_p (nit
))
1667 hwi_nit
= double_int_to_shwi (nit
);
1669 return hwi_nit
< 0 ? -1 : hwi_nit
;
1672 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1673 and only if it fits to the int type. If this is not the case, or the
1674 estimate on the number of iterations of LOOP could not be derived, returns
1678 estimated_loop_iterations_tree (struct loop
*loop
, bool conservative
)
1683 if (!estimated_loop_iterations (loop
, conservative
, &nit
))
1684 return chrec_dont_know
;
1686 type
= lang_hooks
.types
.type_for_size (INT_TYPE_SIZE
, true);
1687 if (!double_int_fits_to_tree_p (type
, nit
))
1688 return chrec_dont_know
;
1690 return double_int_to_tree (type
, nit
);
1693 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1694 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1695 *OVERLAPS_B are initialized to the functions that describe the
1696 relation between the elements accessed twice by CHREC_A and
1697 CHREC_B. For k >= 0, the following property is verified:
1699 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1702 analyze_siv_subscript_cst_affine (tree chrec_a
,
1704 conflict_function
**overlaps_a
,
1705 conflict_function
**overlaps_b
,
1706 tree
*last_conflicts
)
1708 bool value0
, value1
, value2
;
1709 tree type
, difference
, tmp
;
1711 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1712 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1713 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1714 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1716 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1718 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1719 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1721 dependence_stats
.num_siv_unimplemented
++;
1722 *overlaps_a
= conflict_fn_not_known ();
1723 *overlaps_b
= conflict_fn_not_known ();
1724 *last_conflicts
= chrec_dont_know
;
1729 if (value0
== false)
1731 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1734 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1736 *overlaps_a
= conflict_fn_not_known ();
1737 *overlaps_b
= conflict_fn_not_known ();
1738 *last_conflicts
= chrec_dont_know
;
1739 dependence_stats
.num_siv_unimplemented
++;
1748 chrec_b = {10, +, 1}
1751 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1753 HOST_WIDE_INT numiter
;
1754 struct loop
*loop
= get_chrec_loop (chrec_b
);
1756 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1757 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1758 fold_build1 (ABS_EXPR
, type
, difference
),
1759 CHREC_RIGHT (chrec_b
));
1760 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1761 *last_conflicts
= integer_one_node
;
1764 /* Perform weak-zero siv test to see if overlap is
1765 outside the loop bounds. */
1766 numiter
= estimated_loop_iterations_int (loop
, false);
1769 && compare_tree_int (tmp
, numiter
) > 0)
1771 free_conflict_function (*overlaps_a
);
1772 free_conflict_function (*overlaps_b
);
1773 *overlaps_a
= conflict_fn_no_dependence ();
1774 *overlaps_b
= conflict_fn_no_dependence ();
1775 *last_conflicts
= integer_zero_node
;
1776 dependence_stats
.num_siv_independent
++;
1779 dependence_stats
.num_siv_dependent
++;
1783 /* When the step does not divide the difference, there are
1787 *overlaps_a
= conflict_fn_no_dependence ();
1788 *overlaps_b
= conflict_fn_no_dependence ();
1789 *last_conflicts
= integer_zero_node
;
1790 dependence_stats
.num_siv_independent
++;
1799 chrec_b = {10, +, -1}
1801 In this case, chrec_a will not overlap with chrec_b. */
1802 *overlaps_a
= conflict_fn_no_dependence ();
1803 *overlaps_b
= conflict_fn_no_dependence ();
1804 *last_conflicts
= integer_zero_node
;
1805 dependence_stats
.num_siv_independent
++;
1812 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1815 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1817 *overlaps_a
= conflict_fn_not_known ();
1818 *overlaps_b
= conflict_fn_not_known ();
1819 *last_conflicts
= chrec_dont_know
;
1820 dependence_stats
.num_siv_unimplemented
++;
1825 if (value2
== false)
1829 chrec_b = {10, +, -1}
1831 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1833 HOST_WIDE_INT numiter
;
1834 struct loop
*loop
= get_chrec_loop (chrec_b
);
1836 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1837 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1838 CHREC_RIGHT (chrec_b
));
1839 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1840 *last_conflicts
= integer_one_node
;
1842 /* Perform weak-zero siv test to see if overlap is
1843 outside the loop bounds. */
1844 numiter
= estimated_loop_iterations_int (loop
, false);
1847 && compare_tree_int (tmp
, numiter
) > 0)
1849 free_conflict_function (*overlaps_a
);
1850 free_conflict_function (*overlaps_b
);
1851 *overlaps_a
= conflict_fn_no_dependence ();
1852 *overlaps_b
= conflict_fn_no_dependence ();
1853 *last_conflicts
= integer_zero_node
;
1854 dependence_stats
.num_siv_independent
++;
1857 dependence_stats
.num_siv_dependent
++;
1861 /* When the step does not divide the difference, there
1865 *overlaps_a
= conflict_fn_no_dependence ();
1866 *overlaps_b
= conflict_fn_no_dependence ();
1867 *last_conflicts
= integer_zero_node
;
1868 dependence_stats
.num_siv_independent
++;
1878 In this case, chrec_a will not overlap with chrec_b. */
1879 *overlaps_a
= conflict_fn_no_dependence ();
1880 *overlaps_b
= conflict_fn_no_dependence ();
1881 *last_conflicts
= integer_zero_node
;
1882 dependence_stats
.num_siv_independent
++;
1890 /* Helper recursive function for initializing the matrix A. Returns
1891 the initial value of CHREC. */
1894 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
1898 switch (TREE_CODE (chrec
))
1900 case POLYNOMIAL_CHREC
:
1901 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
1903 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
1904 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
1910 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1911 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
1913 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
1918 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1919 return chrec_convert (chrec_type (chrec
), op
, NULL
);
1924 /* Handle ~X as -1 - X. */
1925 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
1926 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
1927 build_int_cst (TREE_TYPE (chrec
), -1), op
);
1939 #define FLOOR_DIV(x,y) ((x) / (y))
1941 /* Solves the special case of the Diophantine equation:
1942 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1944 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1945 number of iterations that loops X and Y run. The overlaps will be
1946 constructed as evolutions in dimension DIM. */
1949 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
1950 affine_fn
*overlaps_a
,
1951 affine_fn
*overlaps_b
,
1952 tree
*last_conflicts
, int dim
)
1954 if (((step_a
> 0 && step_b
> 0)
1955 || (step_a
< 0 && step_b
< 0)))
1957 int step_overlaps_a
, step_overlaps_b
;
1958 int gcd_steps_a_b
, last_conflict
, tau2
;
1960 gcd_steps_a_b
= gcd (step_a
, step_b
);
1961 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
1962 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
1966 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
1967 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
1968 last_conflict
= tau2
;
1969 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
1972 *last_conflicts
= chrec_dont_know
;
1974 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
1975 build_int_cst (NULL_TREE
,
1977 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
1978 build_int_cst (NULL_TREE
,
1984 *overlaps_a
= affine_fn_cst (integer_zero_node
);
1985 *overlaps_b
= affine_fn_cst (integer_zero_node
);
1986 *last_conflicts
= integer_zero_node
;
1990 /* Solves the special case of a Diophantine equation where CHREC_A is
1991 an affine bivariate function, and CHREC_B is an affine univariate
1992 function. For example,
1994 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1996 has the following overlapping functions:
1998 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1999 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2000 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2002 FORNOW: This is a specialized implementation for a case occurring in
2003 a common benchmark. Implement the general algorithm. */
2006 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2007 conflict_function
**overlaps_a
,
2008 conflict_function
**overlaps_b
,
2009 tree
*last_conflicts
)
2011 bool xz_p
, yz_p
, xyz_p
;
2012 int step_x
, step_y
, step_z
;
2013 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2014 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2015 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2016 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2017 affine_fn ova1
, ova2
, ovb
;
2018 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2020 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2021 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2022 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2025 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a
)),
2027 niter_y
= estimated_loop_iterations_int (get_chrec_loop (chrec_a
), false);
2028 niter_z
= estimated_loop_iterations_int (get_chrec_loop (chrec_b
), false);
2030 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2033 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2035 *overlaps_a
= conflict_fn_not_known ();
2036 *overlaps_b
= conflict_fn_not_known ();
2037 *last_conflicts
= chrec_dont_know
;
2041 niter
= MIN (niter_x
, niter_z
);
2042 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2045 &last_conflicts_xz
, 1);
2046 niter
= MIN (niter_y
, niter_z
);
2047 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2050 &last_conflicts_yz
, 2);
2051 niter
= MIN (niter_x
, niter_z
);
2052 niter
= MIN (niter_y
, niter
);
2053 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2056 &last_conflicts_xyz
, 3);
2058 xz_p
= !integer_zerop (last_conflicts_xz
);
2059 yz_p
= !integer_zerop (last_conflicts_yz
);
2060 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2062 if (xz_p
|| yz_p
|| xyz_p
)
2064 ova1
= affine_fn_cst (integer_zero_node
);
2065 ova2
= affine_fn_cst (integer_zero_node
);
2066 ovb
= affine_fn_cst (integer_zero_node
);
2069 affine_fn t0
= ova1
;
2072 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2073 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2074 affine_fn_free (t0
);
2075 affine_fn_free (t2
);
2076 *last_conflicts
= last_conflicts_xz
;
2080 affine_fn t0
= ova2
;
2083 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2084 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2085 affine_fn_free (t0
);
2086 affine_fn_free (t2
);
2087 *last_conflicts
= last_conflicts_yz
;
2091 affine_fn t0
= ova1
;
2092 affine_fn t2
= ova2
;
2095 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2096 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2097 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2098 affine_fn_free (t0
);
2099 affine_fn_free (t2
);
2100 affine_fn_free (t4
);
2101 *last_conflicts
= last_conflicts_xyz
;
2103 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2104 *overlaps_b
= conflict_fn (1, ovb
);
2108 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2109 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2110 *last_conflicts
= integer_zero_node
;
2113 affine_fn_free (overlaps_a_xz
);
2114 affine_fn_free (overlaps_b_xz
);
2115 affine_fn_free (overlaps_a_yz
);
2116 affine_fn_free (overlaps_b_yz
);
2117 affine_fn_free (overlaps_a_xyz
);
2118 affine_fn_free (overlaps_b_xyz
);
2121 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2124 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2127 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2130 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2133 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2138 for (i
= 0; i
< m
; i
++)
2139 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2142 /* Store the N x N identity matrix in MAT. */
2145 lambda_matrix_id (lambda_matrix mat
, int size
)
2149 for (i
= 0; i
< size
; i
++)
2150 for (j
= 0; j
< size
; j
++)
2151 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2154 /* Return the first nonzero element of vector VEC1 between START and N.
2155 We must have START <= N. Returns N if VEC1 is the zero vector. */
2158 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2161 while (j
< n
&& vec1
[j
] == 0)
2166 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2167 R2 = R2 + CONST1 * R1. */
2170 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2177 for (i
= 0; i
< n
; i
++)
2178 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2181 /* Swap rows R1 and R2 in matrix MAT. */
2184 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2193 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2194 and store the result in VEC2. */
2197 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2198 int size
, int const1
)
2203 lambda_vector_clear (vec2
, size
);
2205 for (i
= 0; i
< size
; i
++)
2206 vec2
[i
] = const1
* vec1
[i
];
2209 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2212 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2215 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2218 /* Negate row R1 of matrix MAT which has N columns. */
2221 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2223 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2226 /* Return true if two vectors are equal. */
2229 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2232 for (i
= 0; i
< size
; i
++)
2233 if (vec1
[i
] != vec2
[i
])
2238 /* Given an M x N integer matrix A, this function determines an M x
2239 M unimodular matrix U, and an M x N echelon matrix S such that
2240 "U.A = S". This decomposition is also known as "right Hermite".
2242 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2243 Restructuring Compilers" Utpal Banerjee. */
2246 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2247 lambda_matrix S
, lambda_matrix U
)
2251 lambda_matrix_copy (A
, S
, m
, n
);
2252 lambda_matrix_id (U
, m
);
2254 for (j
= 0; j
< n
; j
++)
2256 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2259 for (i
= m
- 1; i
>= i0
; i
--)
2261 while (S
[i
][j
] != 0)
2263 int sigma
, factor
, a
, b
;
2267 sigma
= (a
* b
< 0) ? -1: 1;
2270 factor
= sigma
* (a
/ b
);
2272 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2273 lambda_matrix_row_exchange (S
, i
, i
-1);
2275 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2276 lambda_matrix_row_exchange (U
, i
, i
-1);
2283 /* Determines the overlapping elements due to accesses CHREC_A and
2284 CHREC_B, that are affine functions. This function cannot handle
2285 symbolic evolution functions, ie. when initial conditions are
2286 parameters, because it uses lambda matrices of integers. */
2289 analyze_subscript_affine_affine (tree chrec_a
,
2291 conflict_function
**overlaps_a
,
2292 conflict_function
**overlaps_b
,
2293 tree
*last_conflicts
)
2295 unsigned nb_vars_a
, nb_vars_b
, dim
;
2296 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2297 lambda_matrix A
, U
, S
;
2298 struct obstack scratch_obstack
;
2300 if (eq_evolutions_p (chrec_a
, chrec_b
))
2302 /* The accessed index overlaps for each iteration in the
2304 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2305 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2306 *last_conflicts
= chrec_dont_know
;
2309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2310 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2312 /* For determining the initial intersection, we have to solve a
2313 Diophantine equation. This is the most time consuming part.
2315 For answering to the question: "Is there a dependence?" we have
2316 to prove that there exists a solution to the Diophantine
2317 equation, and that the solution is in the iteration domain,
2318 i.e. the solution is positive or zero, and that the solution
2319 happens before the upper bound loop.nb_iterations. Otherwise
2320 there is no dependence. This function outputs a description of
2321 the iterations that hold the intersections. */
2323 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2324 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2326 gcc_obstack_init (&scratch_obstack
);
2328 dim
= nb_vars_a
+ nb_vars_b
;
2329 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2330 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2331 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2333 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2334 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2335 gamma
= init_b
- init_a
;
2337 /* Don't do all the hard work of solving the Diophantine equation
2338 when we already know the solution: for example,
2341 | gamma = 3 - 3 = 0.
2342 Then the first overlap occurs during the first iterations:
2343 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2347 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2349 HOST_WIDE_INT step_a
, step_b
;
2350 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2353 niter_a
= estimated_loop_iterations_int (get_chrec_loop (chrec_a
),
2355 niter_b
= estimated_loop_iterations_int (get_chrec_loop (chrec_b
),
2357 niter
= MIN (niter_a
, niter_b
);
2358 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2359 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2361 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2364 *overlaps_a
= conflict_fn (1, ova
);
2365 *overlaps_b
= conflict_fn (1, ovb
);
2368 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2369 compute_overlap_steps_for_affine_1_2
2370 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2372 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2373 compute_overlap_steps_for_affine_1_2
2374 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2379 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2380 *overlaps_a
= conflict_fn_not_known ();
2381 *overlaps_b
= conflict_fn_not_known ();
2382 *last_conflicts
= chrec_dont_know
;
2384 goto end_analyze_subs_aa
;
2388 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2393 lambda_matrix_row_negate (U
, dim
, 0);
2395 gcd_alpha_beta
= S
[0][0];
2397 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2398 but that is a quite strange case. Instead of ICEing, answer
2400 if (gcd_alpha_beta
== 0)
2402 *overlaps_a
= conflict_fn_not_known ();
2403 *overlaps_b
= conflict_fn_not_known ();
2404 *last_conflicts
= chrec_dont_know
;
2405 goto end_analyze_subs_aa
;
2408 /* The classic "gcd-test". */
2409 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2411 /* The "gcd-test" has determined that there is no integer
2412 solution, i.e. there is no dependence. */
2413 *overlaps_a
= conflict_fn_no_dependence ();
2414 *overlaps_b
= conflict_fn_no_dependence ();
2415 *last_conflicts
= integer_zero_node
;
2418 /* Both access functions are univariate. This includes SIV and MIV cases. */
2419 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2421 /* Both functions should have the same evolution sign. */
2422 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2423 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2425 /* The solutions are given by:
2427 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2430 For a given integer t. Using the following variables,
2432 | i0 = u11 * gamma / gcd_alpha_beta
2433 | j0 = u12 * gamma / gcd_alpha_beta
2440 | y0 = j0 + j1 * t. */
2441 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2443 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2444 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2448 if ((i1
== 0 && i0
< 0)
2449 || (j1
== 0 && j0
< 0))
2451 /* There is no solution.
2452 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2453 falls in here, but for the moment we don't look at the
2454 upper bound of the iteration domain. */
2455 *overlaps_a
= conflict_fn_no_dependence ();
2456 *overlaps_b
= conflict_fn_no_dependence ();
2457 *last_conflicts
= integer_zero_node
;
2458 goto end_analyze_subs_aa
;
2461 if (i1
> 0 && j1
> 0)
2463 HOST_WIDE_INT niter_a
= estimated_loop_iterations_int
2464 (get_chrec_loop (chrec_a
), false);
2465 HOST_WIDE_INT niter_b
= estimated_loop_iterations_int
2466 (get_chrec_loop (chrec_b
), false);
2467 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2469 /* (X0, Y0) is a solution of the Diophantine equation:
2470 "chrec_a (X0) = chrec_b (Y0)". */
2471 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2473 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2474 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2476 /* (X1, Y1) is the smallest positive solution of the eq
2477 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2478 first conflict occurs. */
2479 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2480 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2481 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2485 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2486 FLOOR_DIV (niter
- j0
, j1
));
2487 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2489 /* If the overlap occurs outside of the bounds of the
2490 loop, there is no dependence. */
2491 if (x1
>= niter
|| y1
>= niter
)
2493 *overlaps_a
= conflict_fn_no_dependence ();
2494 *overlaps_b
= conflict_fn_no_dependence ();
2495 *last_conflicts
= integer_zero_node
;
2496 goto end_analyze_subs_aa
;
2499 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2502 *last_conflicts
= chrec_dont_know
;
2506 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2508 build_int_cst (NULL_TREE
, i1
)));
2511 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2513 build_int_cst (NULL_TREE
, j1
)));
2517 /* FIXME: For the moment, the upper bound of the
2518 iteration domain for i and j is not checked. */
2519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2520 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2521 *overlaps_a
= conflict_fn_not_known ();
2522 *overlaps_b
= conflict_fn_not_known ();
2523 *last_conflicts
= chrec_dont_know
;
2528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2529 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2530 *overlaps_a
= conflict_fn_not_known ();
2531 *overlaps_b
= conflict_fn_not_known ();
2532 *last_conflicts
= chrec_dont_know
;
2537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2538 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2539 *overlaps_a
= conflict_fn_not_known ();
2540 *overlaps_b
= conflict_fn_not_known ();
2541 *last_conflicts
= chrec_dont_know
;
2544 end_analyze_subs_aa
:
2545 obstack_free (&scratch_obstack
, NULL
);
2546 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2548 fprintf (dump_file
, " (overlaps_a = ");
2549 dump_conflict_function (dump_file
, *overlaps_a
);
2550 fprintf (dump_file
, ")\n (overlaps_b = ");
2551 dump_conflict_function (dump_file
, *overlaps_b
);
2552 fprintf (dump_file
, ")\n");
2553 fprintf (dump_file
, ")\n");
2557 /* Returns true when analyze_subscript_affine_affine can be used for
2558 determining the dependence relation between chrec_a and chrec_b,
2559 that contain symbols. This function modifies chrec_a and chrec_b
2560 such that the analysis result is the same, and such that they don't
2561 contain symbols, and then can safely be passed to the analyzer.
2563 Example: The analysis of the following tuples of evolutions produce
2564 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2567 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2568 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2572 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2574 tree diff
, type
, left_a
, left_b
, right_b
;
2576 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2577 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2578 /* FIXME: For the moment not handled. Might be refined later. */
2581 type
= chrec_type (*chrec_a
);
2582 left_a
= CHREC_LEFT (*chrec_a
);
2583 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2584 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2586 if (!evolution_function_is_constant_p (diff
))
2589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2590 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2592 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2593 diff
, CHREC_RIGHT (*chrec_a
));
2594 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2595 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2596 build_int_cst (type
, 0),
2601 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2602 *OVERLAPS_B are initialized to the functions that describe the
2603 relation between the elements accessed twice by CHREC_A and
2604 CHREC_B. For k >= 0, the following property is verified:
2606 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2609 analyze_siv_subscript (tree chrec_a
,
2611 conflict_function
**overlaps_a
,
2612 conflict_function
**overlaps_b
,
2613 tree
*last_conflicts
,
2616 dependence_stats
.num_siv
++;
2618 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2619 fprintf (dump_file
, "(analyze_siv_subscript \n");
2621 if (evolution_function_is_constant_p (chrec_a
)
2622 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2623 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2624 overlaps_a
, overlaps_b
, last_conflicts
);
2626 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2627 && evolution_function_is_constant_p (chrec_b
))
2628 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2629 overlaps_b
, overlaps_a
, last_conflicts
);
2631 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2632 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2634 if (!chrec_contains_symbols (chrec_a
)
2635 && !chrec_contains_symbols (chrec_b
))
2637 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2638 overlaps_a
, overlaps_b
,
2641 if (CF_NOT_KNOWN_P (*overlaps_a
)
2642 || CF_NOT_KNOWN_P (*overlaps_b
))
2643 dependence_stats
.num_siv_unimplemented
++;
2644 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2645 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2646 dependence_stats
.num_siv_independent
++;
2648 dependence_stats
.num_siv_dependent
++;
2650 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2653 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2654 overlaps_a
, overlaps_b
,
2657 if (CF_NOT_KNOWN_P (*overlaps_a
)
2658 || CF_NOT_KNOWN_P (*overlaps_b
))
2659 dependence_stats
.num_siv_unimplemented
++;
2660 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2661 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2662 dependence_stats
.num_siv_independent
++;
2664 dependence_stats
.num_siv_dependent
++;
2667 goto siv_subscript_dontknow
;
2672 siv_subscript_dontknow
:;
2673 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2674 fprintf (dump_file
, "siv test failed: unimplemented.\n");
2675 *overlaps_a
= conflict_fn_not_known ();
2676 *overlaps_b
= conflict_fn_not_known ();
2677 *last_conflicts
= chrec_dont_know
;
2678 dependence_stats
.num_siv_unimplemented
++;
2681 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2682 fprintf (dump_file
, ")\n");
2685 /* Returns false if we can prove that the greatest common divisor of the steps
2686 of CHREC does not divide CST, false otherwise. */
2689 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2691 HOST_WIDE_INT cd
= 0, val
;
2694 if (!host_integerp (cst
, 0))
2696 val
= tree_low_cst (cst
, 0);
2698 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2700 step
= CHREC_RIGHT (chrec
);
2701 if (!host_integerp (step
, 0))
2703 cd
= gcd (cd
, tree_low_cst (step
, 0));
2704 chrec
= CHREC_LEFT (chrec
);
2707 return val
% cd
== 0;
2710 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2711 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2712 functions that describe the relation between the elements accessed
2713 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2716 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2719 analyze_miv_subscript (tree chrec_a
,
2721 conflict_function
**overlaps_a
,
2722 conflict_function
**overlaps_b
,
2723 tree
*last_conflicts
,
2724 struct loop
*loop_nest
)
2726 tree type
, difference
;
2728 dependence_stats
.num_miv
++;
2729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2730 fprintf (dump_file
, "(analyze_miv_subscript \n");
2732 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2733 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2734 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2735 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2737 if (eq_evolutions_p (chrec_a
, chrec_b
))
2739 /* Access functions are the same: all the elements are accessed
2740 in the same order. */
2741 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2742 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2743 *last_conflicts
= estimated_loop_iterations_tree
2744 (get_chrec_loop (chrec_a
), true);
2745 dependence_stats
.num_miv_dependent
++;
2748 else if (evolution_function_is_constant_p (difference
)
2749 /* For the moment, the following is verified:
2750 evolution_function_is_affine_multivariate_p (chrec_a,
2752 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2754 /* testsuite/.../ssa-chrec-33.c
2755 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2757 The difference is 1, and all the evolution steps are multiples
2758 of 2, consequently there are no overlapping elements. */
2759 *overlaps_a
= conflict_fn_no_dependence ();
2760 *overlaps_b
= conflict_fn_no_dependence ();
2761 *last_conflicts
= integer_zero_node
;
2762 dependence_stats
.num_miv_independent
++;
2765 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2766 && !chrec_contains_symbols (chrec_a
)
2767 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2768 && !chrec_contains_symbols (chrec_b
))
2770 /* testsuite/.../ssa-chrec-35.c
2771 {0, +, 1}_2 vs. {0, +, 1}_3
2772 the overlapping elements are respectively located at iterations:
2773 {0, +, 1}_x and {0, +, 1}_x,
2774 in other words, we have the equality:
2775 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2778 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2779 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2781 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2782 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2784 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2785 overlaps_a
, overlaps_b
, last_conflicts
);
2787 if (CF_NOT_KNOWN_P (*overlaps_a
)
2788 || CF_NOT_KNOWN_P (*overlaps_b
))
2789 dependence_stats
.num_miv_unimplemented
++;
2790 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2791 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2792 dependence_stats
.num_miv_independent
++;
2794 dependence_stats
.num_miv_dependent
++;
2799 /* When the analysis is too difficult, answer "don't know". */
2800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2801 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2803 *overlaps_a
= conflict_fn_not_known ();
2804 *overlaps_b
= conflict_fn_not_known ();
2805 *last_conflicts
= chrec_dont_know
;
2806 dependence_stats
.num_miv_unimplemented
++;
2809 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2810 fprintf (dump_file
, ")\n");
2813 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2814 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2815 OVERLAP_ITERATIONS_B are initialized with two functions that
2816 describe the iterations that contain conflicting elements.
2818 Remark: For an integer k >= 0, the following equality is true:
2820 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2824 analyze_overlapping_iterations (tree chrec_a
,
2826 conflict_function
**overlap_iterations_a
,
2827 conflict_function
**overlap_iterations_b
,
2828 tree
*last_conflicts
, struct loop
*loop_nest
)
2830 unsigned int lnn
= loop_nest
->num
;
2832 dependence_stats
.num_subscript_tests
++;
2834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2836 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2837 fprintf (dump_file
, " (chrec_a = ");
2838 print_generic_expr (dump_file
, chrec_a
, 0);
2839 fprintf (dump_file
, ")\n (chrec_b = ");
2840 print_generic_expr (dump_file
, chrec_b
, 0);
2841 fprintf (dump_file
, ")\n");
2844 if (chrec_a
== NULL_TREE
2845 || chrec_b
== NULL_TREE
2846 || chrec_contains_undetermined (chrec_a
)
2847 || chrec_contains_undetermined (chrec_b
))
2849 dependence_stats
.num_subscript_undetermined
++;
2851 *overlap_iterations_a
= conflict_fn_not_known ();
2852 *overlap_iterations_b
= conflict_fn_not_known ();
2855 /* If they are the same chrec, and are affine, they overlap
2856 on every iteration. */
2857 else if (eq_evolutions_p (chrec_a
, chrec_b
)
2858 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2859 || operand_equal_p (chrec_a
, chrec_b
, 0)))
2861 dependence_stats
.num_same_subscript_function
++;
2862 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2863 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2864 *last_conflicts
= chrec_dont_know
;
2867 /* If they aren't the same, and aren't affine, we can't do anything
2869 else if ((chrec_contains_symbols (chrec_a
)
2870 || chrec_contains_symbols (chrec_b
))
2871 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
2872 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
2874 dependence_stats
.num_subscript_undetermined
++;
2875 *overlap_iterations_a
= conflict_fn_not_known ();
2876 *overlap_iterations_b
= conflict_fn_not_known ();
2879 else if (ziv_subscript_p (chrec_a
, chrec_b
))
2880 analyze_ziv_subscript (chrec_a
, chrec_b
,
2881 overlap_iterations_a
, overlap_iterations_b
,
2884 else if (siv_subscript_p (chrec_a
, chrec_b
))
2885 analyze_siv_subscript (chrec_a
, chrec_b
,
2886 overlap_iterations_a
, overlap_iterations_b
,
2887 last_conflicts
, lnn
);
2890 analyze_miv_subscript (chrec_a
, chrec_b
,
2891 overlap_iterations_a
, overlap_iterations_b
,
2892 last_conflicts
, loop_nest
);
2894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2896 fprintf (dump_file
, " (overlap_iterations_a = ");
2897 dump_conflict_function (dump_file
, *overlap_iterations_a
);
2898 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
2899 dump_conflict_function (dump_file
, *overlap_iterations_b
);
2900 fprintf (dump_file
, ")\n");
2901 fprintf (dump_file
, ")\n");
2905 /* Helper function for uniquely inserting distance vectors. */
2908 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
2913 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, v
)
2914 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
2917 VEC_safe_push (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
), dist_v
);
2920 /* Helper function for uniquely inserting direction vectors. */
2923 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
2928 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIR_VECTS (ddr
), i
, v
)
2929 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
2932 VEC_safe_push (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
), dir_v
);
2935 /* Add a distance of 1 on all the loops outer than INDEX. If we
2936 haven't yet determined a distance for this outer loop, push a new
2937 distance vector composed of the previous distance, and a distance
2938 of 1 for this outer loop. Example:
2946 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2947 save (0, 1), then we have to save (1, 0). */
2950 add_outer_distances (struct data_dependence_relation
*ddr
,
2951 lambda_vector dist_v
, int index
)
2953 /* For each outer loop where init_v is not set, the accesses are
2954 in dependence of distance 1 in the loop. */
2955 while (--index
>= 0)
2957 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2958 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
2960 save_dist_v (ddr
, save_v
);
2964 /* Return false when fail to represent the data dependence as a
2965 distance vector. INIT_B is set to true when a component has been
2966 added to the distance vector DIST_V. INDEX_CARRY is then set to
2967 the index in DIST_V that carries the dependence. */
2970 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
2971 struct data_reference
*ddr_a
,
2972 struct data_reference
*ddr_b
,
2973 lambda_vector dist_v
, bool *init_b
,
2977 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
2979 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2981 tree access_fn_a
, access_fn_b
;
2982 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
2984 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
2986 non_affine_dependence_relation (ddr
);
2990 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
2991 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
2993 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
2994 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
2997 int var_a
= CHREC_VARIABLE (access_fn_a
);
2998 int var_b
= CHREC_VARIABLE (access_fn_b
);
3001 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3003 non_affine_dependence_relation (ddr
);
3007 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3008 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3009 *index_carry
= MIN (index
, *index_carry
);
3011 /* This is the subscript coupling test. If we have already
3012 recorded a distance for this loop (a distance coming from
3013 another subscript), it should be the same. For example,
3014 in the following code, there is no dependence:
3021 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3023 finalize_ddr_dependent (ddr
, chrec_known
);
3027 dist_v
[index
] = dist
;
3031 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3033 /* This can be for example an affine vs. constant dependence
3034 (T[i] vs. T[3]) that is not an affine dependence and is
3035 not representable as a distance vector. */
3036 non_affine_dependence_relation (ddr
);
3044 /* Return true when the DDR contains only constant access functions. */
3047 constant_access_functions (const struct data_dependence_relation
*ddr
)
3051 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3052 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3053 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3059 /* Helper function for the case where DDR_A and DDR_B are the same
3060 multivariate access function with a constant step. For an example
3064 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3067 tree c_1
= CHREC_LEFT (c_2
);
3068 tree c_0
= CHREC_LEFT (c_1
);
3069 lambda_vector dist_v
;
3072 /* Polynomials with more than 2 variables are not handled yet. When
3073 the evolution steps are parameters, it is not possible to
3074 represent the dependence using classical distance vectors. */
3075 if (TREE_CODE (c_0
) != INTEGER_CST
3076 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3077 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3079 DDR_AFFINE_P (ddr
) = false;
3083 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3084 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3086 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3087 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3088 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3089 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3102 save_dist_v (ddr
, dist_v
);
3104 add_outer_distances (ddr
, dist_v
, x_1
);
3107 /* Helper function for the case where DDR_A and DDR_B are the same
3108 access functions. */
3111 add_other_self_distances (struct data_dependence_relation
*ddr
)
3113 lambda_vector dist_v
;
3115 int index_carry
= DDR_NB_LOOPS (ddr
);
3117 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3119 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3121 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3123 if (!evolution_function_is_univariate_p (access_fun
))
3125 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3127 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3131 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3133 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3134 add_multivariate_self_dist (ddr
, access_fun
);
3136 /* The evolution step is not constant: it varies in
3137 the outer loop, so this cannot be represented by a
3138 distance vector. For example in pr34635.c the
3139 evolution is {0, +, {0, +, 4}_1}_2. */
3140 DDR_AFFINE_P (ddr
) = false;
3145 index_carry
= MIN (index_carry
,
3146 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3147 DDR_LOOP_NEST (ddr
)));
3151 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3152 add_outer_distances (ddr
, dist_v
, index_carry
);
3156 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3158 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3160 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3161 save_dist_v (ddr
, dist_v
);
3164 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3165 is the case for example when access functions are the same and
3166 equal to a constant, as in:
3173 in which case the distance vectors are (0) and (1). */
3176 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3180 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3182 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3183 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3184 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3186 for (j
= 0; j
< ca
->n
; j
++)
3187 if (affine_function_zero_p (ca
->fns
[j
]))
3189 insert_innermost_unit_dist_vector (ddr
);
3193 for (j
= 0; j
< cb
->n
; j
++)
3194 if (affine_function_zero_p (cb
->fns
[j
]))
3196 insert_innermost_unit_dist_vector (ddr
);
3202 /* Compute the classic per loop distance vector. DDR is the data
3203 dependence relation to build a vector from. Return false when fail
3204 to represent the data dependence as a distance vector. */
3207 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3208 struct loop
*loop_nest
)
3210 bool init_b
= false;
3211 int index_carry
= DDR_NB_LOOPS (ddr
);
3212 lambda_vector dist_v
;
3214 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3217 if (same_access_functions (ddr
))
3219 /* Save the 0 vector. */
3220 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3221 save_dist_v (ddr
, dist_v
);
3223 if (constant_access_functions (ddr
))
3224 add_distance_for_zero_overlaps (ddr
);
3226 if (DDR_NB_LOOPS (ddr
) > 1)
3227 add_other_self_distances (ddr
);
3232 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3233 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3234 dist_v
, &init_b
, &index_carry
))
3237 /* Save the distance vector if we initialized one. */
3240 /* Verify a basic constraint: classic distance vectors should
3241 always be lexicographically positive.
3243 Data references are collected in the order of execution of
3244 the program, thus for the following loop
3246 | for (i = 1; i < 100; i++)
3247 | for (j = 1; j < 100; j++)
3249 | t = T[j+1][i-1]; // A
3250 | T[j][i] = t + 2; // B
3253 references are collected following the direction of the wind:
3254 A then B. The data dependence tests are performed also
3255 following this order, such that we're looking at the distance
3256 separating the elements accessed by A from the elements later
3257 accessed by B. But in this example, the distance returned by
3258 test_dep (A, B) is lexicographically negative (-1, 1), that
3259 means that the access A occurs later than B with respect to
3260 the outer loop, ie. we're actually looking upwind. In this
3261 case we solve test_dep (B, A) looking downwind to the
3262 lexicographically positive solution, that returns the
3263 distance vector (1, -1). */
3264 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3266 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3267 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3270 compute_subscript_distance (ddr
);
3271 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3272 save_v
, &init_b
, &index_carry
))
3274 save_dist_v (ddr
, save_v
);
3275 DDR_REVERSED_P (ddr
) = true;
3277 /* In this case there is a dependence forward for all the
3280 | for (k = 1; k < 100; k++)
3281 | for (i = 1; i < 100; i++)
3282 | for (j = 1; j < 100; j++)
3284 | t = T[j+1][i-1]; // A
3285 | T[j][i] = t + 2; // B
3293 if (DDR_NB_LOOPS (ddr
) > 1)
3295 add_outer_distances (ddr
, save_v
, index_carry
);
3296 add_outer_distances (ddr
, dist_v
, index_carry
);
3301 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3302 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3304 if (DDR_NB_LOOPS (ddr
) > 1)
3306 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3308 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3309 DDR_A (ddr
), loop_nest
))
3311 compute_subscript_distance (ddr
);
3312 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3313 opposite_v
, &init_b
,
3317 save_dist_v (ddr
, save_v
);
3318 add_outer_distances (ddr
, dist_v
, index_carry
);
3319 add_outer_distances (ddr
, opposite_v
, index_carry
);
3322 save_dist_v (ddr
, save_v
);
3327 /* There is a distance of 1 on all the outer loops: Example:
3328 there is a dependence of distance 1 on loop_1 for the array A.
3334 add_outer_distances (ddr
, dist_v
,
3335 lambda_vector_first_nz (dist_v
,
3336 DDR_NB_LOOPS (ddr
), 0));
3339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3343 fprintf (dump_file
, "(build_classic_dist_vector\n");
3344 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3346 fprintf (dump_file
, " dist_vector = (");
3347 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3348 DDR_NB_LOOPS (ddr
));
3349 fprintf (dump_file
, " )\n");
3351 fprintf (dump_file
, ")\n");
3357 /* Return the direction for a given distance.
3358 FIXME: Computing dir this way is suboptimal, since dir can catch
3359 cases that dist is unable to represent. */
3361 static inline enum data_dependence_direction
3362 dir_from_dist (int dist
)
3365 return dir_positive
;
3367 return dir_negative
;
3372 /* Compute the classic per loop direction vector. DDR is the data
3373 dependence relation to build a vector from. */
3376 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3379 lambda_vector dist_v
;
3381 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
3383 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3385 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3386 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3388 save_dir_v (ddr
, dir_v
);
3392 /* Helper function. Returns true when there is a dependence between
3393 data references DRA and DRB. */
3396 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3397 struct data_reference
*dra
,
3398 struct data_reference
*drb
,
3399 struct loop
*loop_nest
)
3402 tree last_conflicts
;
3403 struct subscript
*subscript
;
3405 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
3408 conflict_function
*overlaps_a
, *overlaps_b
;
3410 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3411 DR_ACCESS_FN (drb
, i
),
3412 &overlaps_a
, &overlaps_b
,
3413 &last_conflicts
, loop_nest
);
3415 if (CF_NOT_KNOWN_P (overlaps_a
)
3416 || CF_NOT_KNOWN_P (overlaps_b
))
3418 finalize_ddr_dependent (ddr
, chrec_dont_know
);
3419 dependence_stats
.num_dependence_undetermined
++;
3420 free_conflict_function (overlaps_a
);
3421 free_conflict_function (overlaps_b
);
3425 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3426 || CF_NO_DEPENDENCE_P (overlaps_b
))
3428 finalize_ddr_dependent (ddr
, chrec_known
);
3429 dependence_stats
.num_dependence_independent
++;
3430 free_conflict_function (overlaps_a
);
3431 free_conflict_function (overlaps_b
);
3437 if (SUB_CONFLICTS_IN_A (subscript
))
3438 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3439 if (SUB_CONFLICTS_IN_B (subscript
))
3440 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3442 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3443 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3444 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3451 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3454 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3455 struct loop
*loop_nest
)
3458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3459 fprintf (dump_file
, "(subscript_dependence_tester \n");
3461 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3462 dependence_stats
.num_dependence_dependent
++;
3464 compute_subscript_distance (ddr
);
3465 if (build_classic_dist_vector (ddr
, loop_nest
))
3466 build_classic_dir_vector (ddr
);
3468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3469 fprintf (dump_file
, ")\n");
3472 /* Returns true when all the access functions of A are affine or
3473 constant with respect to LOOP_NEST. */
3476 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3477 const struct loop
*loop_nest
)
3480 VEC(tree
,heap
) *fns
= DR_ACCESS_FNS (a
);
3483 FOR_EACH_VEC_ELT (tree
, fns
, i
, t
)
3484 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3485 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3491 /* Initializes an equation for an OMEGA problem using the information
3492 contained in the ACCESS_FUN. Returns true when the operation
3495 PB is the omega constraint system.
3496 EQ is the number of the equation to be initialized.
3497 OFFSET is used for shifting the variables names in the constraints:
3498 a constrain is composed of 2 * the number of variables surrounding
3499 dependence accesses. OFFSET is set either to 0 for the first n variables,
3500 then it is set to n.
3501 ACCESS_FUN is expected to be an affine chrec. */
3504 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3505 unsigned int offset
, tree access_fun
,
3506 struct data_dependence_relation
*ddr
)
3508 switch (TREE_CODE (access_fun
))
3510 case POLYNOMIAL_CHREC
:
3512 tree left
= CHREC_LEFT (access_fun
);
3513 tree right
= CHREC_RIGHT (access_fun
);
3514 int var
= CHREC_VARIABLE (access_fun
);
3517 if (TREE_CODE (right
) != INTEGER_CST
)
3520 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3521 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3523 /* Compute the innermost loop index. */
3524 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3527 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3528 += int_cst_value (right
);
3530 switch (TREE_CODE (left
))
3532 case POLYNOMIAL_CHREC
:
3533 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3536 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3545 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3553 /* As explained in the comments preceding init_omega_for_ddr, we have
3554 to set up a system for each loop level, setting outer loops
3555 variation to zero, and current loop variation to positive or zero.
3556 Save each lexico positive distance vector. */
3559 omega_extract_distance_vectors (omega_pb pb
,
3560 struct data_dependence_relation
*ddr
)
3564 struct loop
*loopi
, *loopj
;
3565 enum omega_result res
;
3567 /* Set a new problem for each loop in the nest. The basis is the
3568 problem that we have initialized until now. On top of this we
3569 add new constraints. */
3570 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3571 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3574 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3575 DDR_NB_LOOPS (ddr
));
3577 omega_copy_problem (copy
, pb
);
3579 /* For all the outer loops "loop_j", add "dj = 0". */
3581 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3583 eq
= omega_add_zero_eq (copy
, omega_black
);
3584 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3587 /* For "loop_i", add "0 <= di". */
3588 geq
= omega_add_zero_geq (copy
, omega_black
);
3589 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3591 /* Reduce the constraint system, and test that the current
3592 problem is feasible. */
3593 res
= omega_simplify_problem (copy
);
3594 if (res
== omega_false
3595 || res
== omega_unknown
3596 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3599 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3600 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3602 dist
= copy
->subs
[eq
].coef
[0];
3608 /* Reinitialize problem... */
3609 omega_copy_problem (copy
, pb
);
3611 j
< i
&& VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), j
, loopj
); j
++)
3613 eq
= omega_add_zero_eq (copy
, omega_black
);
3614 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3617 /* ..., but this time "di = 1". */
3618 eq
= omega_add_zero_eq (copy
, omega_black
);
3619 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3620 copy
->eqs
[eq
].coef
[0] = -1;
3622 res
= omega_simplify_problem (copy
);
3623 if (res
== omega_false
3624 || res
== omega_unknown
3625 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3628 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3629 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3631 dist
= copy
->subs
[eq
].coef
[0];
3637 /* Save the lexicographically positive distance vector. */
3640 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3641 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3645 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3646 if (copy
->subs
[eq
].key
> 0)
3648 dist
= copy
->subs
[eq
].coef
[0];
3649 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3652 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3653 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3655 save_dist_v (ddr
, dist_v
);
3656 save_dir_v (ddr
, dir_v
);
3660 omega_free_problem (copy
);
3664 /* This is called for each subscript of a tuple of data references:
3665 insert an equality for representing the conflicts. */
3668 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3669 struct data_dependence_relation
*ddr
,
3670 omega_pb pb
, bool *maybe_dependent
)
3673 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3674 TREE_TYPE (access_fun_b
));
3675 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3676 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3677 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3680 /* When the fun_a - fun_b is not constant, the dependence is not
3681 captured by the classic distance vector representation. */
3682 if (TREE_CODE (difference
) != INTEGER_CST
)
3686 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3688 /* There is no dependence. */
3689 *maybe_dependent
= false;
3693 minus_one
= build_int_cst (type
, -1);
3694 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3696 eq
= omega_add_zero_eq (pb
, omega_black
);
3697 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3698 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3699 /* There is probably a dependence, but the system of
3700 constraints cannot be built: answer "don't know". */
3704 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3705 && !int_divides_p (lambda_vector_gcd
3706 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3707 2 * DDR_NB_LOOPS (ddr
)),
3708 pb
->eqs
[eq
].coef
[0]))
3710 /* There is no dependence. */
3711 *maybe_dependent
= false;
3718 /* Helper function, same as init_omega_for_ddr but specialized for
3719 data references A and B. */
3722 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3723 struct data_dependence_relation
*ddr
,
3724 omega_pb pb
, bool *maybe_dependent
)
3729 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3731 /* Insert an equality per subscript. */
3732 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3734 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3735 ddr
, pb
, maybe_dependent
))
3737 else if (*maybe_dependent
== false)
3739 /* There is no dependence. */
3740 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3745 /* Insert inequalities: constraints corresponding to the iteration
3746 domain, i.e. the loops surrounding the references "loop_x" and
3747 the distance variables "dx". The layout of the OMEGA
3748 representation is as follows:
3749 - coef[0] is the constant
3750 - coef[1..nb_loops] are the protected variables that will not be
3751 removed by the solver: the "dx"
3752 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3754 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3755 && VEC_iterate (loop_p
, DDR_LOOP_NEST (ddr
), i
, loopi
); i
++)
3757 HOST_WIDE_INT nbi
= estimated_loop_iterations_int (loopi
, false);
3760 ineq
= omega_add_zero_geq (pb
, omega_black
);
3761 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3763 /* 0 <= loop_x + dx */
3764 ineq
= omega_add_zero_geq (pb
, omega_black
);
3765 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3766 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3770 /* loop_x <= nb_iters */
3771 ineq
= omega_add_zero_geq (pb
, omega_black
);
3772 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3773 pb
->geqs
[ineq
].coef
[0] = nbi
;
3775 /* loop_x + dx <= nb_iters */
3776 ineq
= omega_add_zero_geq (pb
, omega_black
);
3777 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3778 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3779 pb
->geqs
[ineq
].coef
[0] = nbi
;
3781 /* A step "dx" bigger than nb_iters is not feasible, so
3782 add "0 <= nb_iters + dx", */
3783 ineq
= omega_add_zero_geq (pb
, omega_black
);
3784 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3785 pb
->geqs
[ineq
].coef
[0] = nbi
;
3786 /* and "dx <= nb_iters". */
3787 ineq
= omega_add_zero_geq (pb
, omega_black
);
3788 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3789 pb
->geqs
[ineq
].coef
[0] = nbi
;
3793 omega_extract_distance_vectors (pb
, ddr
);
3798 /* Sets up the Omega dependence problem for the data dependence
3799 relation DDR. Returns false when the constraint system cannot be
3800 built, ie. when the test answers "don't know". Returns true
3801 otherwise, and when independence has been proved (using one of the
3802 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3803 set MAYBE_DEPENDENT to true.
3805 Example: for setting up the dependence system corresponding to the
3806 conflicting accesses
3811 | ... A[2*j, 2*(i + j)]
3815 the following constraints come from the iteration domain:
3822 where di, dj are the distance variables. The constraints
3823 representing the conflicting elements are:
3826 i + 1 = 2 * (i + di + j + dj)
3828 For asking that the resulting distance vector (di, dj) be
3829 lexicographically positive, we insert the constraint "di >= 0". If
3830 "di = 0" in the solution, we fix that component to zero, and we
3831 look at the inner loops: we set a new problem where all the outer
3832 loop distances are zero, and fix this inner component to be
3833 positive. When one of the components is positive, we save that
3834 distance, and set a new problem where the distance on this loop is
3835 zero, searching for other distances in the inner loops. Here is
3836 the classic example that illustrates that we have to set for each
3837 inner loop a new problem:
3845 we have to save two distances (1, 0) and (0, 1).
3847 Given two array references, refA and refB, we have to set the
3848 dependence problem twice, refA vs. refB and refB vs. refA, and we
3849 cannot do a single test, as refB might occur before refA in the
3850 inner loops, and the contrary when considering outer loops: ex.
3855 | T[{1,+,1}_2][{1,+,1}_1] // refA
3856 | T[{2,+,1}_2][{0,+,1}_1] // refB
3861 refB touches the elements in T before refA, and thus for the same
3862 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3863 but for successive loop_0 iterations, we have (1, -1, 1)
3865 The Omega solver expects the distance variables ("di" in the
3866 previous example) to come first in the constraint system (as
3867 variables to be protected, or "safe" variables), the constraint
3868 system is built using the following layout:
3870 "cst | distance vars | index vars".
3874 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
3875 bool *maybe_dependent
)
3880 *maybe_dependent
= true;
3882 if (same_access_functions (ddr
))
3885 lambda_vector dir_v
;
3887 /* Save the 0 vector. */
3888 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
3889 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3890 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3891 dir_v
[j
] = dir_equal
;
3892 save_dir_v (ddr
, dir_v
);
3894 /* Save the dependences carried by outer loops. */
3895 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3896 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3898 omega_free_problem (pb
);
3902 /* Omega expects the protected variables (those that have to be kept
3903 after elimination) to appear first in the constraint system.
3904 These variables are the distance variables. In the following
3905 initialization we declare NB_LOOPS safe variables, and the total
3906 number of variables for the constraint system is 2*NB_LOOPS. */
3907 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3908 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
3910 omega_free_problem (pb
);
3912 /* Stop computation if not decidable, or no dependence. */
3913 if (res
== false || *maybe_dependent
== false)
3916 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
3917 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
3919 omega_free_problem (pb
);
3924 /* Return true when DDR contains the same information as that stored
3925 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3928 ddr_consistent_p (FILE *file
,
3929 struct data_dependence_relation
*ddr
,
3930 VEC (lambda_vector
, heap
) *dist_vects
,
3931 VEC (lambda_vector
, heap
) *dir_vects
)
3935 /* If dump_file is set, output there. */
3936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3939 if (VEC_length (lambda_vector
, dist_vects
) != DDR_NUM_DIST_VECTS (ddr
))
3941 lambda_vector b_dist_v
;
3942 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3943 VEC_length (lambda_vector
, dist_vects
),
3944 DDR_NUM_DIST_VECTS (ddr
));
3946 fprintf (file
, "Banerjee dist vectors:\n");
3947 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, i
, b_dist_v
)
3948 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
3950 fprintf (file
, "Omega dist vectors:\n");
3951 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3952 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
3954 fprintf (file
, "data dependence relation:\n");
3955 dump_data_dependence_relation (file
, ddr
);
3957 fprintf (file
, ")\n");
3961 if (VEC_length (lambda_vector
, dir_vects
) != DDR_NUM_DIR_VECTS (ddr
))
3963 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3964 VEC_length (lambda_vector
, dir_vects
),
3965 DDR_NUM_DIR_VECTS (ddr
));
3969 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3971 lambda_vector a_dist_v
;
3972 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
3974 /* Distance vectors are not ordered in the same way in the DDR
3975 and in the DIST_VECTS: search for a matching vector. */
3976 FOR_EACH_VEC_ELT (lambda_vector
, dist_vects
, j
, a_dist_v
)
3977 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
3980 if (j
== VEC_length (lambda_vector
, dist_vects
))
3982 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
3983 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
3984 fprintf (file
, "not found in Omega dist vectors:\n");
3985 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
3986 fprintf (file
, "data dependence relation:\n");
3987 dump_data_dependence_relation (file
, ddr
);
3988 fprintf (file
, ")\n");
3992 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
3994 lambda_vector a_dir_v
;
3995 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
3997 /* Direction vectors are not ordered in the same way in the DDR
3998 and in the DIR_VECTS: search for a matching vector. */
3999 FOR_EACH_VEC_ELT (lambda_vector
, dir_vects
, j
, a_dir_v
)
4000 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4003 if (j
== VEC_length (lambda_vector
, dist_vects
))
4005 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4006 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4007 fprintf (file
, "not found in Omega dir vectors:\n");
4008 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4009 fprintf (file
, "data dependence relation:\n");
4010 dump_data_dependence_relation (file
, ddr
);
4011 fprintf (file
, ")\n");
4018 /* This computes the affine dependence relation between A and B with
4019 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4020 independence between two accesses, while CHREC_DONT_KNOW is used
4021 for representing the unknown relation.
4023 Note that it is possible to stop the computation of the dependence
4024 relation the first time we detect a CHREC_KNOWN element for a given
4028 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4029 struct loop
*loop_nest
)
4031 struct data_reference
*dra
= DDR_A (ddr
);
4032 struct data_reference
*drb
= DDR_B (ddr
);
4034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4036 fprintf (dump_file
, "(compute_affine_dependence\n");
4037 fprintf (dump_file
, " (stmt_a = \n");
4038 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, 0);
4039 fprintf (dump_file
, ")\n (stmt_b = \n");
4040 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, 0);
4041 fprintf (dump_file
, ")\n");
4044 /* Analyze only when the dependence relation is not yet known. */
4045 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
4046 && !DDR_SELF_REFERENCE (ddr
))
4048 dependence_stats
.num_dependence_tests
++;
4050 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4051 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4053 if (flag_check_data_deps
)
4055 /* Compute the dependences using the first algorithm. */
4056 subscript_dependence_tester (ddr
, loop_nest
);
4058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4060 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4061 dump_data_dependence_relation (dump_file
, ddr
);
4064 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4066 bool maybe_dependent
;
4067 VEC (lambda_vector
, heap
) *dir_vects
, *dist_vects
;
4069 /* Save the result of the first DD analyzer. */
4070 dist_vects
= DDR_DIST_VECTS (ddr
);
4071 dir_vects
= DDR_DIR_VECTS (ddr
);
4073 /* Reset the information. */
4074 DDR_DIST_VECTS (ddr
) = NULL
;
4075 DDR_DIR_VECTS (ddr
) = NULL
;
4077 /* Compute the same information using Omega. */
4078 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4079 goto csys_dont_know
;
4081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4083 fprintf (dump_file
, "Omega Analyzer\n");
4084 dump_data_dependence_relation (dump_file
, ddr
);
4087 /* Check that we get the same information. */
4088 if (maybe_dependent
)
4089 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4094 subscript_dependence_tester (ddr
, loop_nest
);
4097 /* As a last case, if the dependence cannot be determined, or if
4098 the dependence is considered too difficult to determine, answer
4103 dependence_stats
.num_dependence_undetermined
++;
4105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4107 fprintf (dump_file
, "Data ref a:\n");
4108 dump_data_reference (dump_file
, dra
);
4109 fprintf (dump_file
, "Data ref b:\n");
4110 dump_data_reference (dump_file
, drb
);
4111 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4113 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4118 fprintf (dump_file
, ")\n");
4121 /* This computes the dependence relation for the same data
4122 reference into DDR. */
4125 compute_self_dependence (struct data_dependence_relation
*ddr
)
4128 struct subscript
*subscript
;
4130 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4133 for (i
= 0; VEC_iterate (subscript_p
, DDR_SUBSCRIPTS (ddr
), i
, subscript
);
4136 if (SUB_CONFLICTS_IN_A (subscript
))
4137 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4138 if (SUB_CONFLICTS_IN_B (subscript
))
4139 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4141 /* The accessed index overlaps for each iteration. */
4142 SUB_CONFLICTS_IN_A (subscript
)
4143 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4144 SUB_CONFLICTS_IN_B (subscript
)
4145 = conflict_fn (1, affine_fn_cst (integer_zero_node
));
4146 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
4149 /* The distance vector is the zero vector. */
4150 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4151 save_dir_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4154 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4155 the data references in DATAREFS, in the LOOP_NEST. When
4156 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4160 compute_all_dependences (VEC (data_reference_p
, heap
) *datarefs
,
4161 VEC (ddr_p
, heap
) **dependence_relations
,
4162 VEC (loop_p
, heap
) *loop_nest
,
4163 bool compute_self_and_rr
)
4165 struct data_dependence_relation
*ddr
;
4166 struct data_reference
*a
, *b
;
4169 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4170 for (j
= i
+ 1; VEC_iterate (data_reference_p
, datarefs
, j
, b
); j
++)
4171 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4173 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4174 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4176 compute_affine_dependence (ddr
, VEC_index (loop_p
, loop_nest
, 0));
4179 if (compute_self_and_rr
)
4180 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, a
)
4182 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4183 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4184 compute_self_dependence (ddr
);
4188 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4189 true if STMT clobbers memory, false otherwise. */
4192 get_references_in_stmt (gimple stmt
, VEC (data_ref_loc
, heap
) **references
)
4194 bool clobbers_memory
= false;
4197 enum gimple_code stmt_code
= gimple_code (stmt
);
4201 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4202 Calls have side-effects, except those to const or pure
4204 if ((stmt_code
== GIMPLE_CALL
4205 && !(gimple_call_flags (stmt
) & (ECF_CONST
| ECF_PURE
)))
4206 || (stmt_code
== GIMPLE_ASM
4207 && gimple_asm_volatile_p (stmt
)))
4208 clobbers_memory
= true;
4210 if (!gimple_vuse (stmt
))
4211 return clobbers_memory
;
4213 if (stmt_code
== GIMPLE_ASSIGN
)
4216 op0
= gimple_assign_lhs_ptr (stmt
);
4217 op1
= gimple_assign_rhs1_ptr (stmt
);
4220 || (REFERENCE_CLASS_P (*op1
)
4221 && (base
= get_base_address (*op1
))
4222 && TREE_CODE (base
) != SSA_NAME
))
4224 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4226 ref
->is_read
= true;
4230 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
)))
4232 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4234 ref
->is_read
= false;
4237 else if (stmt_code
== GIMPLE_CALL
)
4239 unsigned i
, n
= gimple_call_num_args (stmt
);
4241 for (i
= 0; i
< n
; i
++)
4243 op0
= gimple_call_arg_ptr (stmt
, i
);
4246 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
)))
4248 ref
= VEC_safe_push (data_ref_loc
, heap
, *references
, NULL
);
4250 ref
->is_read
= true;
4255 return clobbers_memory
;
4258 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4259 reference, returns false, otherwise returns true. NEST is the outermost
4260 loop of the loop nest in which the references should be analyzed. */
4263 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4264 VEC (data_reference_p
, heap
) **datarefs
)
4267 VEC (data_ref_loc
, heap
) *references
;
4270 data_reference_p dr
;
4272 if (get_references_in_stmt (stmt
, &references
))
4274 VEC_free (data_ref_loc
, heap
, references
);
4278 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4280 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4281 *ref
->pos
, stmt
, ref
->is_read
);
4282 gcc_assert (dr
!= NULL
);
4284 /* FIXME -- data dependence analysis does not work correctly for objects
4285 with invariant addresses in loop nests. Let us fail here until the
4286 problem is fixed. */
4287 if (dr_address_invariant_p (dr
) && nest
)
4290 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4291 fprintf (dump_file
, "\tFAILED as dr address is invariant\n");
4296 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4298 VEC_free (data_ref_loc
, heap
, references
);
4302 /* Stores the data references in STMT to DATAREFS. If there is an
4303 unanalyzable reference, returns false, otherwise returns true.
4304 NEST is the outermost loop of the loop nest in which the references
4305 should be instantiated, LOOP is the loop in which the references
4306 should be analyzed. */
4309 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4310 VEC (data_reference_p
, heap
) **datarefs
)
4313 VEC (data_ref_loc
, heap
) *references
;
4316 data_reference_p dr
;
4318 if (get_references_in_stmt (stmt
, &references
))
4320 VEC_free (data_ref_loc
, heap
, references
);
4324 FOR_EACH_VEC_ELT (data_ref_loc
, references
, i
, ref
)
4326 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4327 gcc_assert (dr
!= NULL
);
4328 VEC_safe_push (data_reference_p
, heap
, *datarefs
, dr
);
4331 VEC_free (data_ref_loc
, heap
, references
);
4335 /* Search the data references in LOOP, and record the information into
4336 DATAREFS. Returns chrec_dont_know when failing to analyze a
4337 difficult case, returns NULL_TREE otherwise. */
4340 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4341 VEC (data_reference_p
, heap
) **datarefs
)
4343 gimple_stmt_iterator bsi
;
4345 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4347 gimple stmt
= gsi_stmt (bsi
);
4349 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4351 struct data_reference
*res
;
4352 res
= XCNEW (struct data_reference
);
4353 VEC_safe_push (data_reference_p
, heap
, *datarefs
, res
);
4355 return chrec_dont_know
;
4362 /* Search the data references in LOOP, and record the information into
4363 DATAREFS. Returns chrec_dont_know when failing to analyze a
4364 difficult case, returns NULL_TREE otherwise.
4366 TODO: This function should be made smarter so that it can handle address
4367 arithmetic as if they were array accesses, etc. */
4370 find_data_references_in_loop (struct loop
*loop
,
4371 VEC (data_reference_p
, heap
) **datarefs
)
4373 basic_block bb
, *bbs
;
4376 bbs
= get_loop_body_in_dom_order (loop
);
4378 for (i
= 0; i
< loop
->num_nodes
; i
++)
4382 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4385 return chrec_dont_know
;
4393 /* Recursive helper function. */
4396 find_loop_nest_1 (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4398 /* Inner loops of the nest should not contain siblings. Example:
4399 when there are two consecutive loops,
4410 the dependence relation cannot be captured by the distance
4415 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4417 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4421 /* Return false when the LOOP is not well nested. Otherwise return
4422 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4423 contain the loops from the outermost to the innermost, as they will
4424 appear in the classic distance vector. */
4427 find_loop_nest (struct loop
*loop
, VEC (loop_p
, heap
) **loop_nest
)
4429 VEC_safe_push (loop_p
, heap
, *loop_nest
, loop
);
4431 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4435 /* Returns true when the data dependences have been computed, false otherwise.
4436 Given a loop nest LOOP, the following vectors are returned:
4437 DATAREFS is initialized to all the array elements contained in this loop,
4438 DEPENDENCE_RELATIONS contains the relations between the data references.
4439 Compute read-read and self relations if
4440 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4443 compute_data_dependences_for_loop (struct loop
*loop
,
4444 bool compute_self_and_read_read_dependences
,
4445 VEC (loop_p
, heap
) **loop_nest
,
4446 VEC (data_reference_p
, heap
) **datarefs
,
4447 VEC (ddr_p
, heap
) **dependence_relations
)
4451 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4453 /* If the loop nest is not well formed, or one of the data references
4454 is not computable, give up without spending time to compute other
4457 || !find_loop_nest (loop
, loop_nest
)
4458 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
)
4460 struct data_dependence_relation
*ddr
;
4462 /* Insert a single relation into dependence_relations:
4464 ddr
= initialize_data_dependence_relation (NULL
, NULL
, *loop_nest
);
4465 VEC_safe_push (ddr_p
, heap
, *dependence_relations
, ddr
);
4469 compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4470 compute_self_and_read_read_dependences
);
4472 if (dump_file
&& (dump_flags
& TDF_STATS
))
4474 fprintf (dump_file
, "Dependence tester statistics:\n");
4476 fprintf (dump_file
, "Number of dependence tests: %d\n",
4477 dependence_stats
.num_dependence_tests
);
4478 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4479 dependence_stats
.num_dependence_dependent
);
4480 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4481 dependence_stats
.num_dependence_independent
);
4482 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4483 dependence_stats
.num_dependence_undetermined
);
4485 fprintf (dump_file
, "Number of subscript tests: %d\n",
4486 dependence_stats
.num_subscript_tests
);
4487 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4488 dependence_stats
.num_subscript_undetermined
);
4489 fprintf (dump_file
, "Number of same subscript function: %d\n",
4490 dependence_stats
.num_same_subscript_function
);
4492 fprintf (dump_file
, "Number of ziv tests: %d\n",
4493 dependence_stats
.num_ziv
);
4494 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4495 dependence_stats
.num_ziv_dependent
);
4496 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4497 dependence_stats
.num_ziv_independent
);
4498 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4499 dependence_stats
.num_ziv_unimplemented
);
4501 fprintf (dump_file
, "Number of siv tests: %d\n",
4502 dependence_stats
.num_siv
);
4503 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4504 dependence_stats
.num_siv_dependent
);
4505 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4506 dependence_stats
.num_siv_independent
);
4507 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4508 dependence_stats
.num_siv_unimplemented
);
4510 fprintf (dump_file
, "Number of miv tests: %d\n",
4511 dependence_stats
.num_miv
);
4512 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4513 dependence_stats
.num_miv_dependent
);
4514 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4515 dependence_stats
.num_miv_independent
);
4516 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4517 dependence_stats
.num_miv_unimplemented
);
4523 /* Returns true when the data dependences for the basic block BB have been
4524 computed, false otherwise.
4525 DATAREFS is initialized to all the array elements contained in this basic
4526 block, DEPENDENCE_RELATIONS contains the relations between the data
4527 references. Compute read-read and self relations if
4528 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4530 compute_data_dependences_for_bb (basic_block bb
,
4531 bool compute_self_and_read_read_dependences
,
4532 VEC (data_reference_p
, heap
) **datarefs
,
4533 VEC (ddr_p
, heap
) **dependence_relations
)
4535 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4538 compute_all_dependences (*datarefs
, dependence_relations
, NULL
,
4539 compute_self_and_read_read_dependences
);
4543 /* Entry point (for testing only). Analyze all the data references
4544 and the dependence relations in LOOP.
4546 The data references are computed first.
4548 A relation on these nodes is represented by a complete graph. Some
4549 of the relations could be of no interest, thus the relations can be
4552 In the following function we compute all the relations. This is
4553 just a first implementation that is here for:
4554 - for showing how to ask for the dependence relations,
4555 - for the debugging the whole dependence graph,
4556 - for the dejagnu testcases and maintenance.
4558 It is possible to ask only for a part of the graph, avoiding to
4559 compute the whole dependence graph. The computed dependences are
4560 stored in a knowledge base (KB) such that later queries don't
4561 recompute the same information. The implementation of this KB is
4562 transparent to the optimizer, and thus the KB can be changed with a
4563 more efficient implementation, or the KB could be disabled. */
4565 analyze_all_data_dependences (struct loop
*loop
)
4568 int nb_data_refs
= 10;
4569 VEC (data_reference_p
, heap
) *datarefs
=
4570 VEC_alloc (data_reference_p
, heap
, nb_data_refs
);
4571 VEC (ddr_p
, heap
) *dependence_relations
=
4572 VEC_alloc (ddr_p
, heap
, nb_data_refs
* nb_data_refs
);
4573 VEC (loop_p
, heap
) *loop_nest
= VEC_alloc (loop_p
, heap
, 3);
4575 /* Compute DDs on the whole function. */
4576 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4577 &dependence_relations
);
4581 dump_data_dependence_relations (dump_file
, dependence_relations
);
4582 fprintf (dump_file
, "\n\n");
4584 if (dump_flags
& TDF_DETAILS
)
4585 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4587 if (dump_flags
& TDF_STATS
)
4589 unsigned nb_top_relations
= 0;
4590 unsigned nb_bot_relations
= 0;
4591 unsigned nb_chrec_relations
= 0;
4592 struct data_dependence_relation
*ddr
;
4594 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4596 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4599 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4603 nb_chrec_relations
++;
4606 gather_stats_on_scev_database ();
4610 VEC_free (loop_p
, heap
, loop_nest
);
4611 free_dependence_relations (dependence_relations
);
4612 free_data_refs (datarefs
);
4615 /* Computes all the data dependences and check that the results of
4616 several analyzers are the same. */
4619 tree_check_data_deps (void)
4622 struct loop
*loop_nest
;
4624 FOR_EACH_LOOP (li
, loop_nest
, 0)
4625 analyze_all_data_dependences (loop_nest
);
4628 /* Free the memory used by a data dependence relation DDR. */
4631 free_dependence_relation (struct data_dependence_relation
*ddr
)
4636 if (DDR_SUBSCRIPTS (ddr
))
4637 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4638 if (DDR_DIST_VECTS (ddr
))
4639 VEC_free (lambda_vector
, heap
, DDR_DIST_VECTS (ddr
));
4640 if (DDR_DIR_VECTS (ddr
))
4641 VEC_free (lambda_vector
, heap
, DDR_DIR_VECTS (ddr
));
4646 /* Free the memory used by the data dependence relations from
4647 DEPENDENCE_RELATIONS. */
4650 free_dependence_relations (VEC (ddr_p
, heap
) *dependence_relations
)
4653 struct data_dependence_relation
*ddr
;
4655 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
4657 free_dependence_relation (ddr
);
4659 VEC_free (ddr_p
, heap
, dependence_relations
);
4662 /* Free the memory used by the data references from DATAREFS. */
4665 free_data_refs (VEC (data_reference_p
, heap
) *datarefs
)
4668 struct data_reference
*dr
;
4670 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
4672 VEC_free (data_reference_p
, heap
, datarefs
);
4677 /* Dump vertex I in RDG to FILE. */
4680 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
4682 struct vertex
*v
= &(rdg
->vertices
[i
]);
4683 struct graph_edge
*e
;
4685 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
4686 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
4687 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
4690 for (e
= v
->pred
; e
; e
= e
->pred_next
)
4691 fprintf (file
, " %d", e
->src
);
4693 fprintf (file
, ") (out:");
4696 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4697 fprintf (file
, " %d", e
->dest
);
4699 fprintf (file
, ")\n");
4700 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
4701 fprintf (file
, ")\n");
4704 /* Call dump_rdg_vertex on stderr. */
4707 debug_rdg_vertex (struct graph
*rdg
, int i
)
4709 dump_rdg_vertex (stderr
, rdg
, i
);
4712 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4713 dumped vertices to that bitmap. */
4715 void dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
4719 fprintf (file
, "(%d\n", c
);
4721 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4722 if (rdg
->vertices
[i
].component
== c
)
4725 bitmap_set_bit (dumped
, i
);
4727 dump_rdg_vertex (file
, rdg
, i
);
4730 fprintf (file
, ")\n");
4733 /* Call dump_rdg_vertex on stderr. */
4736 debug_rdg_component (struct graph
*rdg
, int c
)
4738 dump_rdg_component (stderr
, rdg
, c
, NULL
);
4741 /* Dump the reduced dependence graph RDG to FILE. */
4744 dump_rdg (FILE *file
, struct graph
*rdg
)
4747 bitmap dumped
= BITMAP_ALLOC (NULL
);
4749 fprintf (file
, "(rdg\n");
4751 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4752 if (!bitmap_bit_p (dumped
, i
))
4753 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
4755 fprintf (file
, ")\n");
4756 BITMAP_FREE (dumped
);
4759 /* Call dump_rdg on stderr. */
4762 debug_rdg (struct graph
*rdg
)
4764 dump_rdg (stderr
, rdg
);
4768 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
4772 fprintf (file
, "digraph RDG {\n");
4774 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4776 struct vertex
*v
= &(rdg
->vertices
[i
]);
4777 struct graph_edge
*e
;
4779 /* Highlight reads from memory. */
4780 if (RDG_MEM_READS_STMT (rdg
, i
))
4781 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
4783 /* Highlight stores to memory. */
4784 if (RDG_MEM_WRITE_STMT (rdg
, i
))
4785 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
4788 for (e
= v
->succ
; e
; e
= e
->succ_next
)
4789 switch (RDGE_TYPE (e
))
4792 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
4796 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
4800 /* These are the most common dependences: don't print these. */
4801 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
4805 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
4813 fprintf (file
, "}\n\n");
4816 /* Display the Reduced Dependence Graph using dotty. */
4817 extern void dot_rdg (struct graph
*);
4820 dot_rdg (struct graph
*rdg
)
4822 /* When debugging, enable the following code. This cannot be used
4823 in production compilers because it calls "system". */
4825 FILE *file
= fopen ("/tmp/rdg.dot", "w");
4826 gcc_assert (file
!= NULL
);
4828 dot_rdg_1 (file
, rdg
);
4831 system ("dotty /tmp/rdg.dot &");
4833 dot_rdg_1 (stderr
, rdg
);
4837 /* This structure is used for recording the mapping statement index in
4840 struct GTY(()) rdg_vertex_info
4846 /* Returns the index of STMT in RDG. */
4849 rdg_vertex_for_stmt (struct graph
*rdg
, gimple stmt
)
4851 struct rdg_vertex_info rvi
, *slot
;
4854 slot
= (struct rdg_vertex_info
*) htab_find (rdg
->indices
, &rvi
);
4862 /* Creates an edge in RDG for each distance vector from DDR. The
4863 order that we keep track of in the RDG is the order in which
4864 statements have to be executed. */
4867 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
4869 struct graph_edge
*e
;
4871 data_reference_p dra
= DDR_A (ddr
);
4872 data_reference_p drb
= DDR_B (ddr
);
4873 unsigned level
= ddr_dependence_level (ddr
);
4875 /* For non scalar dependences, when the dependence is REVERSED,
4876 statement B has to be executed before statement A. */
4878 && !DDR_REVERSED_P (ddr
))
4880 data_reference_p tmp
= dra
;
4885 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
4886 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
4888 if (va
< 0 || vb
< 0)
4891 e
= add_edge (rdg
, va
, vb
);
4892 e
->data
= XNEW (struct rdg_edge
);
4894 RDGE_LEVEL (e
) = level
;
4895 RDGE_RELATION (e
) = ddr
;
4897 /* Determines the type of the data dependence. */
4898 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
4899 RDGE_TYPE (e
) = input_dd
;
4900 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
4901 RDGE_TYPE (e
) = output_dd
;
4902 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
4903 RDGE_TYPE (e
) = flow_dd
;
4904 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
4905 RDGE_TYPE (e
) = anti_dd
;
4908 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4909 the index of DEF in RDG. */
4912 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
4914 use_operand_p imm_use_p
;
4915 imm_use_iterator iterator
;
4917 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
4919 struct graph_edge
*e
;
4920 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
4925 e
= add_edge (rdg
, idef
, use
);
4926 e
->data
= XNEW (struct rdg_edge
);
4927 RDGE_TYPE (e
) = flow_dd
;
4928 RDGE_RELATION (e
) = NULL
;
4932 /* Creates the edges of the reduced dependence graph RDG. */
4935 create_rdg_edges (struct graph
*rdg
, VEC (ddr_p
, heap
) *ddrs
)
4938 struct data_dependence_relation
*ddr
;
4939 def_operand_p def_p
;
4942 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
4943 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4944 create_rdg_edge_for_ddr (rdg
, ddr
);
4946 for (i
= 0; i
< rdg
->n_vertices
; i
++)
4947 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
4949 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
4952 /* Build the vertices of the reduced dependence graph RDG. */
4955 create_rdg_vertices (struct graph
*rdg
, VEC (gimple
, heap
) *stmts
)
4960 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
4962 VEC (data_ref_loc
, heap
) *references
;
4964 struct vertex
*v
= &(rdg
->vertices
[i
]);
4965 struct rdg_vertex_info
*rvi
= XNEW (struct rdg_vertex_info
);
4966 struct rdg_vertex_info
**slot
;
4970 slot
= (struct rdg_vertex_info
**) htab_find_slot (rdg
->indices
, rvi
, INSERT
);
4977 v
->data
= XNEW (struct rdg_vertex
);
4978 RDG_STMT (rdg
, i
) = stmt
;
4980 RDG_MEM_WRITE_STMT (rdg
, i
) = false;
4981 RDG_MEM_READS_STMT (rdg
, i
) = false;
4982 if (gimple_code (stmt
) == GIMPLE_PHI
)
4985 get_references_in_stmt (stmt
, &references
);
4986 FOR_EACH_VEC_ELT (data_ref_loc
, references
, j
, ref
)
4988 RDG_MEM_WRITE_STMT (rdg
, i
) = true;
4990 RDG_MEM_READS_STMT (rdg
, i
) = true;
4992 VEC_free (data_ref_loc
, heap
, references
);
4996 /* Initialize STMTS with all the statements of LOOP. When
4997 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4998 which we discover statements is important as
4999 generate_loops_for_partition is using the same traversal for
5000 identifying statements. */
5003 stmts_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5006 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5008 for (i
= 0; i
< loop
->num_nodes
; i
++)
5010 basic_block bb
= bbs
[i
];
5011 gimple_stmt_iterator bsi
;
5014 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5015 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
5017 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5019 stmt
= gsi_stmt (bsi
);
5020 if (gimple_code (stmt
) != GIMPLE_LABEL
)
5021 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
5028 /* Returns true when all the dependences are computable. */
5031 known_dependences_p (VEC (ddr_p
, heap
) *dependence_relations
)
5036 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
5037 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5043 /* Computes a hash function for element ELT. */
5046 hash_stmt_vertex_info (const void *elt
)
5048 const struct rdg_vertex_info
*const rvi
=
5049 (const struct rdg_vertex_info
*) elt
;
5050 gimple stmt
= rvi
->stmt
;
5052 return htab_hash_pointer (stmt
);
5055 /* Compares database elements E1 and E2. */
5058 eq_stmt_vertex_info (const void *e1
, const void *e2
)
5060 const struct rdg_vertex_info
*elt1
= (const struct rdg_vertex_info
*) e1
;
5061 const struct rdg_vertex_info
*elt2
= (const struct rdg_vertex_info
*) e2
;
5063 return elt1
->stmt
== elt2
->stmt
;
5066 /* Free the element E. */
5069 hash_stmt_vertex_del (void *e
)
5074 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5075 statement of the loop nest, and one edge per data dependence or
5076 scalar dependence. */
5079 build_empty_rdg (int n_stmts
)
5081 int nb_data_refs
= 10;
5082 struct graph
*rdg
= new_graph (n_stmts
);
5084 rdg
->indices
= htab_create (nb_data_refs
, hash_stmt_vertex_info
,
5085 eq_stmt_vertex_info
, hash_stmt_vertex_del
);
5089 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5090 statement of the loop nest, and one edge per data dependence or
5091 scalar dependence. */
5094 build_rdg (struct loop
*loop
,
5095 VEC (loop_p
, heap
) **loop_nest
,
5096 VEC (ddr_p
, heap
) **dependence_relations
,
5097 VEC (data_reference_p
, heap
) **datarefs
)
5099 struct graph
*rdg
= NULL
;
5100 VEC (gimple
, heap
) *stmts
= VEC_alloc (gimple
, heap
, 10);
5102 compute_data_dependences_for_loop (loop
, false, loop_nest
, datarefs
,
5103 dependence_relations
);
5105 if (known_dependences_p (*dependence_relations
))
5107 stmts_from_loop (loop
, &stmts
);
5108 rdg
= build_empty_rdg (VEC_length (gimple
, stmts
));
5109 create_rdg_vertices (rdg
, stmts
);
5110 create_rdg_edges (rdg
, *dependence_relations
);
5113 VEC_free (gimple
, heap
, stmts
);
5117 /* Free the reduced dependence graph RDG. */
5120 free_rdg (struct graph
*rdg
)
5124 for (i
= 0; i
< rdg
->n_vertices
; i
++)
5126 struct vertex
*v
= &(rdg
->vertices
[i
]);
5127 struct graph_edge
*e
;
5129 for (e
= v
->succ
; e
; e
= e
->succ_next
)
5137 htab_delete (rdg
->indices
);
5141 /* Initialize STMTS with all the statements of LOOP that contain a
5145 stores_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5148 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5150 for (i
= 0; i
< loop
->num_nodes
; i
++)
5152 basic_block bb
= bbs
[i
];
5153 gimple_stmt_iterator bsi
;
5155 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5156 if (gimple_vdef (gsi_stmt (bsi
)))
5157 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (bsi
));
5163 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5164 that contains a data reference on its LHS with a stride of the same
5165 size as its unit type. */
5168 stmt_with_adjacent_zero_store_dr_p (gimple stmt
)
5172 struct data_reference
*dr
;
5175 || !gimple_vdef (stmt
)
5176 || !is_gimple_assign (stmt
)
5177 || !gimple_assign_single_p (stmt
)
5178 || !(op1
= gimple_assign_rhs1 (stmt
))
5179 || !(integer_zerop (op1
) || real_zerop (op1
)))
5182 dr
= XCNEW (struct data_reference
);
5183 op0
= gimple_assign_lhs (stmt
);
5185 DR_STMT (dr
) = stmt
;
5188 res
= dr_analyze_innermost (dr
)
5189 && stride_of_unit_type_p (DR_STEP (dr
), TREE_TYPE (op0
));
5195 /* Initialize STMTS with all the statements of LOOP that contain a
5196 store to memory of the form "A[i] = 0". */
5199 stores_zero_from_loop (struct loop
*loop
, VEC (gimple
, heap
) **stmts
)
5203 gimple_stmt_iterator si
;
5205 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
5207 for (i
= 0; i
< loop
->num_nodes
; i
++)
5208 for (bb
= bbs
[i
], si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5209 if ((stmt
= gsi_stmt (si
))
5210 && stmt_with_adjacent_zero_store_dr_p (stmt
))
5211 VEC_safe_push (gimple
, heap
, *stmts
, gsi_stmt (si
));
5216 /* For a data reference REF, return the declaration of its base
5217 address or NULL_TREE if the base is not determined. */
5220 ref_base_address (gimple stmt
, data_ref_loc
*ref
)
5222 tree base
= NULL_TREE
;
5224 struct data_reference
*dr
= XCNEW (struct data_reference
);
5226 DR_STMT (dr
) = stmt
;
5227 DR_REF (dr
) = *ref
->pos
;
5228 dr_analyze_innermost (dr
);
5229 base_address
= DR_BASE_ADDRESS (dr
);
5234 switch (TREE_CODE (base_address
))
5237 base
= TREE_OPERAND (base_address
, 0);
5241 base
= base_address
;
5250 /* Determines whether the statement from vertex V of the RDG has a
5251 definition used outside the loop that contains this statement. */
5254 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
5256 gimple stmt
= RDG_STMT (rdg
, v
);
5257 struct loop
*loop
= loop_containing_stmt (stmt
);
5258 use_operand_p imm_use_p
;
5259 imm_use_iterator iterator
;
5261 def_operand_p def_p
;
5266 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
5268 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
5270 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)
5278 /* Determines whether statements S1 and S2 access to similar memory
5279 locations. Two memory accesses are considered similar when they
5280 have the same base address declaration, i.e. when their
5281 ref_base_address is the same. */
5284 have_similar_memory_accesses (gimple s1
, gimple s2
)
5288 VEC (data_ref_loc
, heap
) *refs1
, *refs2
;
5289 data_ref_loc
*ref1
, *ref2
;
5291 get_references_in_stmt (s1
, &refs1
);
5292 get_references_in_stmt (s2
, &refs2
);
5294 FOR_EACH_VEC_ELT (data_ref_loc
, refs1
, i
, ref1
)
5296 tree base1
= ref_base_address (s1
, ref1
);
5299 FOR_EACH_VEC_ELT (data_ref_loc
, refs2
, j
, ref2
)
5300 if (base1
== ref_base_address (s2
, ref2
))
5308 VEC_free (data_ref_loc
, heap
, refs1
);
5309 VEC_free (data_ref_loc
, heap
, refs2
);
5313 /* Helper function for the hashtab. */
5316 have_similar_memory_accesses_1 (const void *s1
, const void *s2
)
5318 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple
) s1
),
5319 CONST_CAST_GIMPLE ((const_gimple
) s2
));
5322 /* Helper function for the hashtab. */
5325 ref_base_address_1 (const void *s
)
5327 gimple stmt
= CONST_CAST_GIMPLE ((const_gimple
) s
);
5329 VEC (data_ref_loc
, heap
) *refs
;
5333 get_references_in_stmt (stmt
, &refs
);
5335 FOR_EACH_VEC_ELT (data_ref_loc
, refs
, i
, ref
)
5338 res
= htab_hash_pointer (ref_base_address (stmt
, ref
));
5342 VEC_free (data_ref_loc
, heap
, refs
);
5346 /* Try to remove duplicated write data references from STMTS. */
5349 remove_similar_memory_refs (VEC (gimple
, heap
) **stmts
)
5353 htab_t seen
= htab_create (VEC_length (gimple
, *stmts
), ref_base_address_1
,
5354 have_similar_memory_accesses_1
, NULL
);
5356 for (i
= 0; VEC_iterate (gimple
, *stmts
, i
, stmt
); )
5360 slot
= htab_find_slot (seen
, stmt
, INSERT
);
5363 VEC_ordered_remove (gimple
, *stmts
, i
);
5366 *slot
= (void *) stmt
;
5374 /* Returns the index of PARAMETER in the parameters vector of the
5375 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5378 access_matrix_get_index_for_parameter (tree parameter
,
5379 struct access_matrix
*access_matrix
)
5382 VEC (tree
,heap
) *lambda_parameters
= AM_PARAMETERS (access_matrix
);
5383 tree lambda_parameter
;
5385 FOR_EACH_VEC_ELT (tree
, lambda_parameters
, i
, lambda_parameter
)
5386 if (lambda_parameter
== parameter
)
5387 return i
+ AM_NB_INDUCTION_VARS (access_matrix
);