1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 #include "gimple-pretty-print.h"
85 #include "fold-const.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
95 #include "tree-affine.h"
99 static struct datadep_stats
101 int num_dependence_tests
;
102 int num_dependence_dependent
;
103 int num_dependence_independent
;
104 int num_dependence_undetermined
;
106 int num_subscript_tests
;
107 int num_subscript_undetermined
;
108 int num_same_subscript_function
;
111 int num_ziv_independent
;
112 int num_ziv_dependent
;
113 int num_ziv_unimplemented
;
116 int num_siv_independent
;
117 int num_siv_dependent
;
118 int num_siv_unimplemented
;
121 int num_miv_independent
;
122 int num_miv_dependent
;
123 int num_miv_unimplemented
;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
127 struct data_reference
*,
128 struct data_reference
*,
130 /* Returns true iff A divides B. */
133 tree_fold_divides_p (const_tree a
, const_tree b
)
135 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
136 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
140 /* Returns true iff A divides B. */
143 int_divides_p (int a
, int b
)
145 return ((b
% a
) == 0);
150 /* Dump into FILE all the data references from DATAREFS. */
153 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
156 struct data_reference
*dr
;
158 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
159 dump_data_reference (file
, dr
);
162 /* Unified dump into FILE all the data references from DATAREFS. */
165 debug (vec
<data_reference_p
> &ref
)
167 dump_data_references (stderr
, ref
);
171 debug (vec
<data_reference_p
> *ptr
)
176 fprintf (stderr
, "<nil>\n");
180 /* Dump into STDERR all the data references from DATAREFS. */
183 debug_data_references (vec
<data_reference_p
> datarefs
)
185 dump_data_references (stderr
, datarefs
);
188 /* Print to STDERR the data_reference DR. */
191 debug_data_reference (struct data_reference
*dr
)
193 dump_data_reference (stderr
, dr
);
196 /* Dump function for a DATA_REFERENCE structure. */
199 dump_data_reference (FILE *outf
,
200 struct data_reference
*dr
)
204 fprintf (outf
, "#(Data Ref: \n");
205 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
206 fprintf (outf
, "# stmt: ");
207 print_gimple_stmt (outf
, DR_STMT (dr
), 0);
208 fprintf (outf
, "# ref: ");
209 print_generic_stmt (outf
, DR_REF (dr
));
210 fprintf (outf
, "# base_object: ");
211 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
));
213 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
215 fprintf (outf
, "# Access function %d: ", i
);
216 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
));
218 fprintf (outf
, "#)\n");
221 /* Unified dump function for a DATA_REFERENCE structure. */
224 debug (data_reference
&ref
)
226 dump_data_reference (stderr
, &ref
);
230 debug (data_reference
*ptr
)
235 fprintf (stderr
, "<nil>\n");
239 /* Dumps the affine function described by FN to the file OUTF. */
242 dump_affine_function (FILE *outf
, affine_fn fn
)
247 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
248 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
250 fprintf (outf
, " + ");
251 print_generic_expr (outf
, coef
, TDF_SLIM
);
252 fprintf (outf
, " * x_%u", i
);
256 /* Dumps the conflict function CF to the file OUTF. */
259 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
263 if (cf
->n
== NO_DEPENDENCE
)
264 fprintf (outf
, "no dependence");
265 else if (cf
->n
== NOT_KNOWN
)
266 fprintf (outf
, "not known");
269 for (i
= 0; i
< cf
->n
; i
++)
274 dump_affine_function (outf
, cf
->fns
[i
]);
280 /* Dump function for a SUBSCRIPT structure. */
283 dump_subscript (FILE *outf
, struct subscript
*subscript
)
285 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
287 fprintf (outf
, "\n (subscript \n");
288 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
289 dump_conflict_function (outf
, cf
);
290 if (CF_NONTRIVIAL_P (cf
))
292 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
293 fprintf (outf
, "\n last_conflict: ");
294 print_generic_expr (outf
, last_iteration
);
297 cf
= SUB_CONFLICTS_IN_B (subscript
);
298 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
299 dump_conflict_function (outf
, cf
);
300 if (CF_NONTRIVIAL_P (cf
))
302 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
303 fprintf (outf
, "\n last_conflict: ");
304 print_generic_expr (outf
, last_iteration
);
307 fprintf (outf
, "\n (Subscript distance: ");
308 print_generic_expr (outf
, SUB_DISTANCE (subscript
));
309 fprintf (outf
, " ))\n");
312 /* Print the classic direction vector DIRV to OUTF. */
315 print_direction_vector (FILE *outf
,
321 for (eq
= 0; eq
< length
; eq
++)
323 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
329 fprintf (outf
, " +");
332 fprintf (outf
, " -");
335 fprintf (outf
, " =");
337 case dir_positive_or_equal
:
338 fprintf (outf
, " +=");
340 case dir_positive_or_negative
:
341 fprintf (outf
, " +-");
343 case dir_negative_or_equal
:
344 fprintf (outf
, " -=");
347 fprintf (outf
, " *");
350 fprintf (outf
, "indep");
354 fprintf (outf
, "\n");
357 /* Print a vector of direction vectors. */
360 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
366 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
367 print_direction_vector (outf
, v
, length
);
370 /* Print out a vector VEC of length N to OUTFILE. */
373 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
377 for (i
= 0; i
< n
; i
++)
378 fprintf (outfile
, "%3d ", vector
[i
]);
379 fprintf (outfile
, "\n");
382 /* Print a vector of distance vectors. */
385 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
391 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
392 print_lambda_vector (outf
, v
, length
);
395 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
398 dump_data_dependence_relation (FILE *outf
,
399 struct data_dependence_relation
*ddr
)
401 struct data_reference
*dra
, *drb
;
403 fprintf (outf
, "(Data Dep: \n");
405 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
412 dump_data_reference (outf
, dra
);
414 fprintf (outf
, " (nil)\n");
416 dump_data_reference (outf
, drb
);
418 fprintf (outf
, " (nil)\n");
420 fprintf (outf
, " (don't know)\n)\n");
426 dump_data_reference (outf
, dra
);
427 dump_data_reference (outf
, drb
);
429 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
430 fprintf (outf
, " (no dependence)\n");
432 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
437 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
439 fprintf (outf
, " access_fn_A: ");
440 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
));
441 fprintf (outf
, " access_fn_B: ");
442 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
));
443 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
446 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
447 fprintf (outf
, " loop nest: (");
448 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
449 fprintf (outf
, "%d ", loopi
->num
);
450 fprintf (outf
, ")\n");
452 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
454 fprintf (outf
, " distance_vector: ");
455 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
459 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
461 fprintf (outf
, " direction_vector: ");
462 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
467 fprintf (outf
, ")\n");
473 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
475 dump_data_dependence_relation (stderr
, ddr
);
478 /* Dump into FILE all the dependence relations from DDRS. */
481 dump_data_dependence_relations (FILE *file
,
485 struct data_dependence_relation
*ddr
;
487 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
488 dump_data_dependence_relation (file
, ddr
);
492 debug (vec
<ddr_p
> &ref
)
494 dump_data_dependence_relations (stderr
, ref
);
498 debug (vec
<ddr_p
> *ptr
)
503 fprintf (stderr
, "<nil>\n");
507 /* Dump to STDERR all the dependence relations from DDRS. */
510 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
512 dump_data_dependence_relations (stderr
, ddrs
);
515 /* Dumps the distance and direction vectors in FILE. DDRS contains
516 the dependence relations, and VECT_SIZE is the size of the
517 dependence vectors, or in other words the number of loops in the
521 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
524 struct data_dependence_relation
*ddr
;
527 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
528 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
530 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
532 fprintf (file
, "DISTANCE_V (");
533 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
534 fprintf (file
, ")\n");
537 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
539 fprintf (file
, "DIRECTION_V (");
540 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
541 fprintf (file
, ")\n");
545 fprintf (file
, "\n\n");
548 /* Dumps the data dependence relations DDRS in FILE. */
551 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
554 struct data_dependence_relation
*ddr
;
556 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
557 dump_data_dependence_relation (file
, ddr
);
559 fprintf (file
, "\n\n");
563 debug_ddrs (vec
<ddr_p
> ddrs
)
565 dump_ddrs (stderr
, ddrs
);
568 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
569 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
570 constant of type ssizetype, and returns true. If we cannot do this
571 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
575 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
576 tree
*var
, tree
*off
)
580 enum tree_code ocode
= code
;
588 *var
= build_int_cst (type
, 0);
589 *off
= fold_convert (ssizetype
, op0
);
592 case POINTER_PLUS_EXPR
:
597 split_constant_offset (op0
, &var0
, &off0
);
598 split_constant_offset (op1
, &var1
, &off1
);
599 *var
= fold_build2 (code
, type
, var0
, var1
);
600 *off
= size_binop (ocode
, off0
, off1
);
604 if (TREE_CODE (op1
) != INTEGER_CST
)
607 split_constant_offset (op0
, &var0
, &off0
);
608 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
609 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
615 HOST_WIDE_INT pbitsize
, pbitpos
;
617 int punsignedp
, preversep
, pvolatilep
;
619 op0
= TREE_OPERAND (op0
, 0);
621 = get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
622 &punsignedp
, &preversep
, &pvolatilep
);
624 if (pbitpos
% BITS_PER_UNIT
!= 0)
626 base
= build_fold_addr_expr (base
);
627 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
631 split_constant_offset (poffset
, &poffset
, &off1
);
632 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
633 if (POINTER_TYPE_P (TREE_TYPE (base
)))
634 base
= fold_build_pointer_plus (base
, poffset
);
636 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
637 fold_convert (TREE_TYPE (base
), poffset
));
640 var0
= fold_convert (type
, base
);
642 /* If variable length types are involved, punt, otherwise casts
643 might be converted into ARRAY_REFs in gimplify_conversion.
644 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
645 possibly no longer appears in current GIMPLE, might resurface.
646 This perhaps could run
647 if (CONVERT_EXPR_P (var0))
649 gimplify_conversion (&var0);
650 // Attempt to fill in any within var0 found ARRAY_REF's
651 // element size from corresponding op embedded ARRAY_REF,
652 // if unsuccessful, just punt.
654 while (POINTER_TYPE_P (type
))
655 type
= TREE_TYPE (type
);
656 if (int_size_in_bytes (type
) < 0)
666 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
669 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
670 enum tree_code subcode
;
672 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
675 var0
= gimple_assign_rhs1 (def_stmt
);
676 subcode
= gimple_assign_rhs_code (def_stmt
);
677 var1
= gimple_assign_rhs2 (def_stmt
);
679 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
683 /* We must not introduce undefined overflow, and we must not change the value.
684 Hence we're okay if the inner type doesn't overflow to start with
685 (pointer or signed), the outer type also is an integer or pointer
686 and the outer precision is at least as large as the inner. */
687 tree itype
= TREE_TYPE (op0
);
688 if ((POINTER_TYPE_P (itype
)
689 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
690 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
691 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
693 split_constant_offset (op0
, &var0
, off
);
694 *var
= fold_convert (type
, var0
);
705 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
706 will be ssizetype. */
709 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
711 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
715 *off
= ssize_int (0);
718 if (tree_is_chrec (exp
)
719 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
722 otype
= TREE_TYPE (exp
);
723 code
= TREE_CODE (exp
);
724 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
725 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
727 *var
= fold_convert (type
, e
);
732 /* Returns the address ADDR of an object in a canonical shape (without nop
733 casts, and with type of pointer to the object). */
736 canonicalize_base_object_address (tree addr
)
742 /* The base address may be obtained by casting from integer, in that case
744 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
747 if (TREE_CODE (addr
) != ADDR_EXPR
)
750 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
753 /* Analyze the behavior of memory reference REF. There are two modes:
755 - BB analysis. In this case we simply split the address into base,
756 init and offset components, without reference to any containing loop.
757 The resulting base and offset are general expressions and they can
758 vary arbitrarily from one iteration of the containing loop to the next.
759 The step is always zero.
761 - loop analysis. In this case we analyze the reference both wrt LOOP
762 and on the basis that the reference occurs (is "used") in LOOP;
763 see the comment above analyze_scalar_evolution_in_loop for more
764 information about this distinction. The base, init, offset and
765 step fields are all invariant in LOOP.
767 Perform BB analysis if LOOP is null, or if LOOP is the function's
768 dummy outermost loop. In other cases perform loop analysis.
770 Return true if the analysis succeeded and store the results in DRB if so.
771 BB analysis can only fail for bitfield or reversed-storage accesses. */
774 dr_analyze_innermost (innermost_loop_behavior
*drb
, tree ref
,
777 HOST_WIDE_INT pbitsize
, pbitpos
;
780 int punsignedp
, preversep
, pvolatilep
;
781 affine_iv base_iv
, offset_iv
;
782 tree init
, dinit
, step
;
783 bool in_loop
= (loop
&& loop
->num
);
785 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
786 fprintf (dump_file
, "analyze_innermost: ");
788 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
789 &punsignedp
, &preversep
, &pvolatilep
);
790 gcc_assert (base
!= NULL_TREE
);
792 if (pbitpos
% BITS_PER_UNIT
!= 0)
794 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
795 fprintf (dump_file
, "failed: bit offset alignment.\n");
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
802 fprintf (dump_file
, "failed: reverse storage order.\n");
806 /* Calculate the alignment and misalignment for the inner reference. */
807 unsigned int HOST_WIDE_INT base_misalignment
;
808 unsigned int base_alignment
;
809 get_object_alignment_1 (base
, &base_alignment
, &base_misalignment
);
811 /* There are no bitfield references remaining in BASE, so the values
812 we got back must be whole bytes. */
813 gcc_assert (base_alignment
% BITS_PER_UNIT
== 0
814 && base_misalignment
% BITS_PER_UNIT
== 0);
815 base_alignment
/= BITS_PER_UNIT
;
816 base_misalignment
/= BITS_PER_UNIT
;
818 if (TREE_CODE (base
) == MEM_REF
)
820 if (!integer_zerop (TREE_OPERAND (base
, 1)))
822 /* Subtract MOFF from the base and add it to POFFSET instead.
823 Adjust the misalignment to reflect the amount we subtracted. */
824 offset_int moff
= mem_ref_offset (base
);
825 base_misalignment
-= moff
.to_short_addr ();
826 tree mofft
= wide_int_to_tree (sizetype
, moff
);
830 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
832 base
= TREE_OPERAND (base
, 0);
835 base
= build_fold_addr_expr (base
);
839 if (!simple_iv (loop
, loop
, base
, &base_iv
, true))
841 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
842 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
849 base_iv
.step
= ssize_int (0);
850 base_iv
.no_overflow
= true;
855 offset_iv
.base
= ssize_int (0);
856 offset_iv
.step
= ssize_int (0);
862 offset_iv
.base
= poffset
;
863 offset_iv
.step
= ssize_int (0);
865 else if (!simple_iv (loop
, loop
, poffset
, &offset_iv
, true))
867 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
868 fprintf (dump_file
, "failed: evolution of offset is not affine.\n");
873 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
875 /* Subtract any constant component from the base and add it to INIT instead.
876 Adjust the misalignment to reflect the amount we subtracted. */
877 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
878 init
= size_binop (PLUS_EXPR
, init
, dinit
);
879 base_misalignment
-= TREE_INT_CST_LOW (dinit
);
881 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
882 init
= size_binop (PLUS_EXPR
, init
, dinit
);
884 step
= size_binop (PLUS_EXPR
,
885 fold_convert (ssizetype
, base_iv
.step
),
886 fold_convert (ssizetype
, offset_iv
.step
));
888 base
= canonicalize_base_object_address (base_iv
.base
);
890 /* See if get_pointer_alignment can guarantee a higher alignment than
891 the one we calculated above. */
892 unsigned int HOST_WIDE_INT alt_misalignment
;
893 unsigned int alt_alignment
;
894 get_pointer_alignment_1 (base
, &alt_alignment
, &alt_misalignment
);
896 /* As above, these values must be whole bytes. */
897 gcc_assert (alt_alignment
% BITS_PER_UNIT
== 0
898 && alt_misalignment
% BITS_PER_UNIT
== 0);
899 alt_alignment
/= BITS_PER_UNIT
;
900 alt_misalignment
/= BITS_PER_UNIT
;
902 if (base_alignment
< alt_alignment
)
904 base_alignment
= alt_alignment
;
905 base_misalignment
= alt_misalignment
;
908 drb
->base_address
= base
;
909 drb
->offset
= fold_convert (ssizetype
, offset_iv
.base
);
912 drb
->base_alignment
= base_alignment
;
913 drb
->base_misalignment
= base_misalignment
& (base_alignment
- 1);
914 drb
->offset_alignment
= highest_pow2_factor (offset_iv
.base
);
915 drb
->step_alignment
= highest_pow2_factor (step
);
917 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
918 fprintf (dump_file
, "success.\n");
923 /* Determines the base object and the list of indices of memory reference
924 DR, analyzed in LOOP and instantiated in loop nest NEST. */
927 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
929 vec
<tree
> access_fns
= vNULL
;
931 tree base
, off
, access_fn
;
932 basic_block before_loop
;
934 /* If analyzing a basic-block there are no indices to analyze
935 and thus no access functions. */
938 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
939 DR_ACCESS_FNS (dr
).create (0);
944 before_loop
= block_before_loop (nest
);
946 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
947 into a two element array with a constant index. The base is
948 then just the immediate underlying object. */
949 if (TREE_CODE (ref
) == REALPART_EXPR
)
951 ref
= TREE_OPERAND (ref
, 0);
952 access_fns
.safe_push (integer_zero_node
);
954 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
956 ref
= TREE_OPERAND (ref
, 0);
957 access_fns
.safe_push (integer_one_node
);
960 /* Analyze access functions of dimensions we know to be independent. */
961 while (handled_component_p (ref
))
963 if (TREE_CODE (ref
) == ARRAY_REF
)
965 op
= TREE_OPERAND (ref
, 1);
966 access_fn
= analyze_scalar_evolution (loop
, op
);
967 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
968 access_fns
.safe_push (access_fn
);
970 else if (TREE_CODE (ref
) == COMPONENT_REF
971 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
973 /* For COMPONENT_REFs of records (but not unions!) use the
974 FIELD_DECL offset as constant access function so we can
975 disambiguate a[i].f1 and a[i].f2. */
976 tree off
= component_ref_field_offset (ref
);
977 off
= size_binop (PLUS_EXPR
,
978 size_binop (MULT_EXPR
,
979 fold_convert (bitsizetype
, off
),
980 bitsize_int (BITS_PER_UNIT
)),
981 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
982 access_fns
.safe_push (off
);
985 /* If we have an unhandled component we could not translate
986 to an access function stop analyzing. We have determined
987 our base object in this case. */
990 ref
= TREE_OPERAND (ref
, 0);
993 /* If the address operand of a MEM_REF base has an evolution in the
994 analyzed nest, add it as an additional independent access-function. */
995 if (TREE_CODE (ref
) == MEM_REF
)
997 op
= TREE_OPERAND (ref
, 0);
998 access_fn
= analyze_scalar_evolution (loop
, op
);
999 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
1000 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
1003 tree memoff
= TREE_OPERAND (ref
, 1);
1004 base
= initial_condition (access_fn
);
1005 orig_type
= TREE_TYPE (base
);
1006 STRIP_USELESS_TYPE_CONVERSION (base
);
1007 split_constant_offset (base
, &base
, &off
);
1008 STRIP_USELESS_TYPE_CONVERSION (base
);
1009 /* Fold the MEM_REF offset into the evolutions initial
1010 value to make more bases comparable. */
1011 if (!integer_zerop (memoff
))
1013 off
= size_binop (PLUS_EXPR
, off
,
1014 fold_convert (ssizetype
, memoff
));
1015 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1017 /* Adjust the offset so it is a multiple of the access type
1018 size and thus we separate bases that can possibly be used
1019 to produce partial overlaps (which the access_fn machinery
1022 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1023 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1024 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1025 rem
= wi::mod_trunc (off
, TYPE_SIZE_UNIT (TREE_TYPE (ref
)), SIGNED
);
1027 /* If we can't compute the remainder simply force the initial
1028 condition to zero. */
1030 off
= wide_int_to_tree (ssizetype
, wi::sub (off
, rem
));
1031 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1032 /* And finally replace the initial condition. */
1033 access_fn
= chrec_replace_initial_condition
1034 (access_fn
, fold_convert (orig_type
, off
));
1035 /* ??? This is still not a suitable base object for
1036 dr_may_alias_p - the base object needs to be an
1037 access that covers the object as whole. With
1038 an evolution in the pointer this cannot be
1040 As a band-aid, mark the access so we can special-case
1041 it in dr_may_alias_p. */
1043 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1044 MEM_REF
, TREE_TYPE (ref
),
1046 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1047 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1048 DR_UNCONSTRAINED_BASE (dr
) = true;
1049 access_fns
.safe_push (access_fn
);
1052 else if (DECL_P (ref
))
1054 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1055 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1056 build_fold_addr_expr (ref
),
1057 build_int_cst (reference_alias_ptr_type (ref
), 0));
1060 DR_BASE_OBJECT (dr
) = ref
;
1061 DR_ACCESS_FNS (dr
) = access_fns
;
1064 /* Extracts the alias analysis information from the memory reference DR. */
1067 dr_analyze_alias (struct data_reference
*dr
)
1069 tree ref
= DR_REF (dr
);
1070 tree base
= get_base_address (ref
), addr
;
1072 if (INDIRECT_REF_P (base
)
1073 || TREE_CODE (base
) == MEM_REF
)
1075 addr
= TREE_OPERAND (base
, 0);
1076 if (TREE_CODE (addr
) == SSA_NAME
)
1077 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1081 /* Frees data reference DR. */
1084 free_data_ref (data_reference_p dr
)
1086 DR_ACCESS_FNS (dr
).release ();
1090 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1091 is read if IS_READ is true, write otherwise. Returns the
1092 data_reference description of MEMREF. NEST is the outermost loop
1093 in which the reference should be instantiated, LOOP is the loop in
1094 which the data reference should be analyzed. */
1096 struct data_reference
*
1097 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1100 struct data_reference
*dr
;
1102 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1104 fprintf (dump_file
, "Creating dr for ");
1105 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1106 fprintf (dump_file
, "\n");
1109 dr
= XCNEW (struct data_reference
);
1110 DR_STMT (dr
) = stmt
;
1111 DR_REF (dr
) = memref
;
1112 DR_IS_READ (dr
) = is_read
;
1114 dr_analyze_innermost (&DR_INNERMOST (dr
), memref
,
1115 nest
!= NULL
? loop
: NULL
);
1116 dr_analyze_indices (dr
, nest
, loop
);
1117 dr_analyze_alias (dr
);
1119 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1122 fprintf (dump_file
, "\tbase_address: ");
1123 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1124 fprintf (dump_file
, "\n\toffset from base address: ");
1125 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1126 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1127 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1128 fprintf (dump_file
, "\n\tstep: ");
1129 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1130 fprintf (dump_file
, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr
));
1131 fprintf (dump_file
, "\n\tbase misalignment: %d",
1132 DR_BASE_MISALIGNMENT (dr
));
1133 fprintf (dump_file
, "\n\toffset alignment: %d",
1134 DR_OFFSET_ALIGNMENT (dr
));
1135 fprintf (dump_file
, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr
));
1136 fprintf (dump_file
, "\n\tbase_object: ");
1137 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1138 fprintf (dump_file
, "\n");
1139 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1141 fprintf (dump_file
, "\tAccess function %d: ", i
);
1142 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1149 /* A helper function computes order between two tree epxressions T1 and T2.
1150 This is used in comparator functions sorting objects based on the order
1151 of tree expressions. The function returns -1, 0, or 1. */
1154 data_ref_compare_tree (tree t1
, tree t2
)
1157 enum tree_code code
;
1170 if (TREE_CODE (t1
) != TREE_CODE (t2
))
1171 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
1173 code
= TREE_CODE (t1
);
1176 /* For const values, we can just use hash values for comparisons. */
1184 hashval_t h1
= iterative_hash_expr (t1
, 0);
1185 hashval_t h2
= iterative_hash_expr (t2
, 0);
1187 return h1
< h2
? -1 : 1;
1192 cmp
= data_ref_compare_tree (SSA_NAME_VAR (t1
), SSA_NAME_VAR (t2
));
1196 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
1197 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
1201 tclass
= TREE_CODE_CLASS (code
);
1203 /* For var-decl, we could compare their UIDs. */
1204 if (tclass
== tcc_declaration
)
1206 if (DECL_UID (t1
) != DECL_UID (t2
))
1207 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
1211 /* For expressions with operands, compare their operands recursively. */
1212 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
1214 cmp
= data_ref_compare_tree (TREE_OPERAND (t1
, i
),
1215 TREE_OPERAND (t2
, i
));
1224 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1228 runtime_alias_check_p (ddr_p ddr
, struct loop
*loop
, bool speed_p
)
1230 if (dump_enabled_p ())
1232 dump_printf (MSG_NOTE
, "consider run-time aliasing test between ");
1233 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr
)));
1234 dump_printf (MSG_NOTE
, " and ");
1235 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr
)));
1236 dump_printf (MSG_NOTE
, "\n");
1241 if (dump_enabled_p ())
1242 dump_printf (MSG_MISSED_OPTIMIZATION
,
1243 "runtime alias check not supported when optimizing "
1248 /* FORNOW: We don't support versioning with outer-loop in either
1249 vectorization or loop distribution. */
1250 if (loop
!= NULL
&& loop
->inner
!= NULL
)
1252 if (dump_enabled_p ())
1253 dump_printf (MSG_MISSED_OPTIMIZATION
,
1254 "runtime alias check not supported for outer loop.\n");
1258 /* FORNOW: We don't support creating runtime alias tests for non-constant
1260 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
1261 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
1263 if (dump_enabled_p ())
1264 dump_printf (MSG_MISSED_OPTIMIZATION
,
1265 "runtime alias check not supported for non-constant "
1273 /* Operator == between two dr_with_seg_len objects.
1275 This equality operator is used to make sure two data refs
1276 are the same one so that we will consider to combine the
1277 aliasing checks of those two pairs of data dependent data
1281 operator == (const dr_with_seg_len
& d1
,
1282 const dr_with_seg_len
& d2
)
1284 return operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
1285 DR_BASE_ADDRESS (d2
.dr
), 0)
1286 && data_ref_compare_tree (DR_OFFSET (d1
.dr
), DR_OFFSET (d2
.dr
)) == 0
1287 && data_ref_compare_tree (DR_INIT (d1
.dr
), DR_INIT (d2
.dr
)) == 0
1288 && data_ref_compare_tree (d1
.seg_len
, d2
.seg_len
) == 0;
1291 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1292 so that we can combine aliasing checks in one scan. */
1295 comp_dr_with_seg_len_pair (const void *pa_
, const void *pb_
)
1297 const dr_with_seg_len_pair_t
* pa
= (const dr_with_seg_len_pair_t
*) pa_
;
1298 const dr_with_seg_len_pair_t
* pb
= (const dr_with_seg_len_pair_t
*) pb_
;
1299 const dr_with_seg_len
&a1
= pa
->first
, &a2
= pa
->second
;
1300 const dr_with_seg_len
&b1
= pb
->first
, &b2
= pb
->second
;
1302 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1303 if a and c have the same basic address snd step, and b and d have the same
1304 address and step. Therefore, if any a&c or b&d don't have the same address
1305 and step, we don't care the order of those two pairs after sorting. */
1308 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a1
.dr
),
1309 DR_BASE_ADDRESS (b1
.dr
))) != 0)
1311 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a2
.dr
),
1312 DR_BASE_ADDRESS (b2
.dr
))) != 0)
1314 if ((comp_res
= data_ref_compare_tree (DR_STEP (a1
.dr
),
1315 DR_STEP (b1
.dr
))) != 0)
1317 if ((comp_res
= data_ref_compare_tree (DR_STEP (a2
.dr
),
1318 DR_STEP (b2
.dr
))) != 0)
1320 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a1
.dr
),
1321 DR_OFFSET (b1
.dr
))) != 0)
1323 if ((comp_res
= data_ref_compare_tree (DR_INIT (a1
.dr
),
1324 DR_INIT (b1
.dr
))) != 0)
1326 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a2
.dr
),
1327 DR_OFFSET (b2
.dr
))) != 0)
1329 if ((comp_res
= data_ref_compare_tree (DR_INIT (a2
.dr
),
1330 DR_INIT (b2
.dr
))) != 0)
1336 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1337 FACTOR is number of iterations that each data reference is accessed.
1339 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1340 we create an expression:
1342 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1343 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1345 for aliasing checks. However, in some cases we can decrease the number
1346 of checks by combining two checks into one. For example, suppose we have
1347 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1348 condition is satisfied:
1350 load_ptr_0 < load_ptr_1 &&
1351 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1353 (this condition means, in each iteration of vectorized loop, the accessed
1354 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1357 we then can use only the following expression to finish the alising checks
1358 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1360 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1361 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1363 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1367 prune_runtime_alias_test_list (vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1368 unsigned HOST_WIDE_INT factor
)
1370 /* Sort the collected data ref pairs so that we can scan them once to
1371 combine all possible aliasing checks. */
1372 alias_pairs
->qsort (comp_dr_with_seg_len_pair
);
1374 /* Scan the sorted dr pairs and check if we can combine alias checks
1375 of two neighboring dr pairs. */
1376 for (size_t i
= 1; i
< alias_pairs
->length (); ++i
)
1378 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1379 dr_with_seg_len
*dr_a1
= &(*alias_pairs
)[i
-1].first
,
1380 *dr_b1
= &(*alias_pairs
)[i
-1].second
,
1381 *dr_a2
= &(*alias_pairs
)[i
].first
,
1382 *dr_b2
= &(*alias_pairs
)[i
].second
;
1384 /* Remove duplicate data ref pairs. */
1385 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
1387 if (dump_enabled_p ())
1389 dump_printf (MSG_NOTE
, "found equal ranges ");
1390 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a1
->dr
));
1391 dump_printf (MSG_NOTE
, ", ");
1392 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b1
->dr
));
1393 dump_printf (MSG_NOTE
, " and ");
1394 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a2
->dr
));
1395 dump_printf (MSG_NOTE
, ", ");
1396 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b2
->dr
));
1397 dump_printf (MSG_NOTE
, "\n");
1399 alias_pairs
->ordered_remove (i
--);
1403 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
1405 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1406 and DR_A1 and DR_A2 are two consecutive memrefs. */
1407 if (*dr_a1
== *dr_a2
)
1409 std::swap (dr_a1
, dr_b1
);
1410 std::swap (dr_a2
, dr_b2
);
1413 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
1414 DR_BASE_ADDRESS (dr_a2
->dr
), 0)
1415 || !operand_equal_p (DR_OFFSET (dr_a1
->dr
),
1416 DR_OFFSET (dr_a2
->dr
), 0)
1417 || !tree_fits_shwi_p (DR_INIT (dr_a1
->dr
))
1418 || !tree_fits_shwi_p (DR_INIT (dr_a2
->dr
)))
1421 /* Only merge const step data references. */
1422 if (TREE_CODE (DR_STEP (dr_a1
->dr
)) != INTEGER_CST
1423 || TREE_CODE (DR_STEP (dr_a2
->dr
)) != INTEGER_CST
)
1426 /* DR_A1 and DR_A2 must goes in the same direction. */
1427 if (tree_int_cst_compare (DR_STEP (dr_a1
->dr
), size_zero_node
)
1428 != tree_int_cst_compare (DR_STEP (dr_a2
->dr
), size_zero_node
))
1432 = (tree_int_cst_compare (DR_STEP (dr_a1
->dr
), size_zero_node
) < 0);
1434 /* We need to compute merged segment length at compilation time for
1435 dr_a1 and dr_a2, which is impossible if either one has non-const
1437 if ((!tree_fits_uhwi_p (dr_a1
->seg_len
)
1438 || !tree_fits_uhwi_p (dr_a2
->seg_len
))
1439 && tree_int_cst_compare (DR_STEP (dr_a1
->dr
),
1440 DR_STEP (dr_a2
->dr
)) != 0)
1443 /* Make sure dr_a1 starts left of dr_a2. */
1444 if (tree_int_cst_lt (DR_INIT (dr_a2
->dr
), DR_INIT (dr_a1
->dr
)))
1445 std::swap (*dr_a1
, *dr_a2
);
1447 bool do_remove
= false;
1448 wide_int diff
= wi::sub (DR_INIT (dr_a2
->dr
), DR_INIT (dr_a1
->dr
));
1449 wide_int min_seg_len_b
;
1452 if (TREE_CODE (dr_b1
->seg_len
) == INTEGER_CST
)
1453 min_seg_len_b
= wi::abs (dr_b1
->seg_len
);
1455 min_seg_len_b
= wi::mul (factor
, wi::abs (DR_STEP (dr_b1
->dr
)));
1457 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1460 check if the following condition is satisfied:
1462 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1464 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1465 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1466 have to make a best estimation. We can get the minimum value
1467 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1468 then either of the following two conditions can guarantee the
1471 1: DIFF <= MIN_SEG_LEN_B
1472 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1473 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1474 to take care of wrapping behavior in it.
1477 If the left segment does not extend beyond the start of the
1478 right segment the new segment length is that of the right
1479 plus the segment distance. The condition is like:
1481 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1483 Note 1: Case A.2 and B combined together effectively merges every
1484 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1486 Note 2: Above description is based on positive DR_STEP, we need to
1487 take care of negative DR_STEP for wrapping behavior. See PR80815
1488 for more information. */
1491 /* Adjust diff according to access size of both references. */
1492 tree size_a1
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1
->dr
)));
1493 tree size_a2
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2
->dr
)));
1494 diff
= wi::add (diff
, wi::sub (size_a2
, size_a1
));
1496 if (wi::leu_p (diff
, min_seg_len_b
)
1497 /* Case A.2 and B combined. */
1498 || (tree_fits_uhwi_p (dr_a2
->seg_len
)))
1500 if (tree_fits_uhwi_p (dr_a1
->seg_len
)
1501 && tree_fits_uhwi_p (dr_a2
->seg_len
))
1503 = wide_int_to_tree (sizetype
,
1504 wi::umin (wi::sub (dr_a1
->seg_len
,
1509 = size_binop (MINUS_EXPR
, dr_a2
->seg_len
,
1510 wide_int_to_tree (sizetype
, diff
));
1512 dr_a2
->seg_len
= new_seg_len
;
1519 if (wi::leu_p (diff
, min_seg_len_b
)
1520 /* Case A.2 and B combined. */
1521 || (tree_fits_uhwi_p (dr_a1
->seg_len
)))
1523 if (tree_fits_uhwi_p (dr_a1
->seg_len
)
1524 && tree_fits_uhwi_p (dr_a2
->seg_len
))
1526 = wide_int_to_tree (sizetype
,
1527 wi::umax (wi::add (dr_a2
->seg_len
,
1532 = size_binop (PLUS_EXPR
, dr_a2
->seg_len
,
1533 wide_int_to_tree (sizetype
, diff
));
1535 dr_a1
->seg_len
= new_seg_len
;
1542 if (dump_enabled_p ())
1544 dump_printf (MSG_NOTE
, "merging ranges for ");
1545 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a1
->dr
));
1546 dump_printf (MSG_NOTE
, ", ");
1547 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b1
->dr
));
1548 dump_printf (MSG_NOTE
, " and ");
1549 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a2
->dr
));
1550 dump_printf (MSG_NOTE
, ", ");
1551 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b2
->dr
));
1552 dump_printf (MSG_NOTE
, "\n");
1554 alias_pairs
->ordered_remove (neg_step
? i
- 1 : i
);
1561 /* Given LOOP's two data references and segment lengths described by DR_A
1562 and DR_B, create expression checking if the two addresses ranges intersect
1563 with each other based on index of the two addresses. This can only be
1564 done if DR_A and DR_B referring to the same (array) object and the index
1565 is the only difference. For example:
1568 data-ref arr[i] arr[j]
1570 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1572 The addresses and their index are like:
1574 |<- ADDR_A ->| |<- ADDR_B ->|
1575 ------------------------------------------------------->
1577 ------------------------------------------------------->
1578 i_0 ... i_0+4 j_0 ... j_0+4
1580 We can create expression based on index rather than address:
1582 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1584 Note evolution step of index needs to be considered in comparison. */
1587 create_intersect_range_checks_index (struct loop
*loop
, tree
*cond_expr
,
1588 const dr_with_seg_len
& dr_a
,
1589 const dr_with_seg_len
& dr_b
)
1591 if (integer_zerop (DR_STEP (dr_a
.dr
))
1592 || integer_zerop (DR_STEP (dr_b
.dr
))
1593 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
1596 if (!tree_fits_uhwi_p (dr_a
.seg_len
) || !tree_fits_uhwi_p (dr_b
.seg_len
))
1599 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
1602 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
1605 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
1608 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
1610 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
1611 unsigned HOST_WIDE_INT abs_step
1612 = absu_hwi (tree_to_shwi (DR_STEP (dr_a
.dr
)));
1614 unsigned HOST_WIDE_INT seg_len1
= tree_to_uhwi (dr_a
.seg_len
);
1615 unsigned HOST_WIDE_INT seg_len2
= tree_to_uhwi (dr_b
.seg_len
);
1616 /* Infer the number of iterations with which the memory segment is accessed
1617 by DR. In other words, alias is checked if memory segment accessed by
1618 DR_A in some iterations intersect with memory segment accessed by DR_B
1619 in the same amount iterations.
1620 Note segnment length is a linear function of number of iterations with
1621 DR_STEP as the coefficient. */
1622 unsigned HOST_WIDE_INT niter_len1
= (seg_len1
+ abs_step
- 1) / abs_step
;
1623 unsigned HOST_WIDE_INT niter_len2
= (seg_len2
+ abs_step
- 1) / abs_step
;
1626 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
1628 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
1629 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
1630 /* Two indices must be the same if they are not scev, or not scev wrto
1631 current loop being vecorized. */
1632 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
1633 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
1634 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
1635 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
1637 if (operand_equal_p (access1
, access2
, 0))
1642 /* The two indices must have the same step. */
1643 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
1646 tree idx_step
= CHREC_RIGHT (access1
);
1647 /* Index must have const step, otherwise DR_STEP won't be constant. */
1648 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
1649 /* Index must evaluate in the same direction as DR. */
1650 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
1652 tree min1
= CHREC_LEFT (access1
);
1653 tree min2
= CHREC_LEFT (access2
);
1654 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
1657 /* Ideally, alias can be checked against loop's control IV, but we
1658 need to prove linear mapping between control IV and reference
1659 index. Although that should be true, we check against (array)
1660 index of data reference. Like segment length, index length is
1661 linear function of the number of iterations with index_step as
1662 the coefficient, i.e, niter_len * idx_step. */
1663 tree idx_len1
= fold_build2 (MULT_EXPR
, TREE_TYPE (min1
), idx_step
,
1664 build_int_cst (TREE_TYPE (min1
),
1666 tree idx_len2
= fold_build2 (MULT_EXPR
, TREE_TYPE (min2
), idx_step
,
1667 build_int_cst (TREE_TYPE (min2
),
1669 tree max1
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min1
), min1
, idx_len1
);
1670 tree max2
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min2
), min2
, idx_len2
);
1671 /* Adjust ranges for negative step. */
1674 min1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
), max1
, idx_step
);
1675 max1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
),
1676 CHREC_LEFT (access1
), idx_step
);
1677 min2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
), max2
, idx_step
);
1678 max2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
),
1679 CHREC_LEFT (access2
), idx_step
);
1682 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1683 fold_build2 (LE_EXPR
, boolean_type_node
, max1
, min2
),
1684 fold_build2 (LE_EXPR
, boolean_type_node
, max2
, min1
));
1686 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1687 *cond_expr
, part_cond_expr
);
1689 *cond_expr
= part_cond_expr
;
1694 /* Given two data references and segment lengths described by DR_A and DR_B,
1695 create expression checking if the two addresses ranges intersect with
1698 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1699 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1702 create_intersect_range_checks (struct loop
*loop
, tree
*cond_expr
,
1703 const dr_with_seg_len
& dr_a
,
1704 const dr_with_seg_len
& dr_b
)
1706 *cond_expr
= NULL_TREE
;
1707 if (create_intersect_range_checks_index (loop
, cond_expr
, dr_a
, dr_b
))
1710 tree segment_length_a
= dr_a
.seg_len
;
1711 tree segment_length_b
= dr_b
.seg_len
;
1712 tree addr_base_a
= DR_BASE_ADDRESS (dr_a
.dr
);
1713 tree addr_base_b
= DR_BASE_ADDRESS (dr_b
.dr
);
1714 tree offset_a
= DR_OFFSET (dr_a
.dr
), offset_b
= DR_OFFSET (dr_b
.dr
);
1716 offset_a
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_a
),
1717 offset_a
, DR_INIT (dr_a
.dr
));
1718 offset_b
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_b
),
1719 offset_b
, DR_INIT (dr_b
.dr
));
1720 addr_base_a
= fold_build_pointer_plus (addr_base_a
, offset_a
);
1721 addr_base_b
= fold_build_pointer_plus (addr_base_b
, offset_b
);
1723 tree seg_a_min
= addr_base_a
;
1724 tree seg_a_max
= fold_build_pointer_plus (addr_base_a
, segment_length_a
);
1725 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1726 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1728 if (tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0)
1730 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a
.dr
)));
1731 seg_a_min
= fold_build_pointer_plus (seg_a_max
, unit_size
);
1732 seg_a_max
= fold_build_pointer_plus (addr_base_a
, unit_size
);
1735 tree seg_b_min
= addr_base_b
;
1736 tree seg_b_max
= fold_build_pointer_plus (addr_base_b
, segment_length_b
);
1737 if (tree_int_cst_compare (DR_STEP (dr_b
.dr
), size_zero_node
) < 0)
1739 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b
.dr
)));
1740 seg_b_min
= fold_build_pointer_plus (seg_b_max
, unit_size
);
1741 seg_b_max
= fold_build_pointer_plus (addr_base_b
, unit_size
);
1744 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1745 fold_build2 (LE_EXPR
, boolean_type_node
, seg_a_max
, seg_b_min
),
1746 fold_build2 (LE_EXPR
, boolean_type_node
, seg_b_max
, seg_a_min
));
1749 /* Create a conditional expression that represents the run-time checks for
1750 overlapping of address ranges represented by a list of data references
1751 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1752 COND_EXPR is the conditional expression to be used in the if statement
1753 that controls which version of the loop gets executed at runtime. */
1756 create_runtime_alias_checks (struct loop
*loop
,
1757 vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1760 tree part_cond_expr
;
1762 for (size_t i
= 0, s
= alias_pairs
->length (); i
< s
; ++i
)
1764 const dr_with_seg_len
& dr_a
= (*alias_pairs
)[i
].first
;
1765 const dr_with_seg_len
& dr_b
= (*alias_pairs
)[i
].second
;
1767 if (dump_enabled_p ())
1769 dump_printf (MSG_NOTE
, "create runtime check for data references ");
1770 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a
.dr
));
1771 dump_printf (MSG_NOTE
, " and ");
1772 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b
.dr
));
1773 dump_printf (MSG_NOTE
, "\n");
1776 /* Create condition expression for each pair data references. */
1777 create_intersect_range_checks (loop
, &part_cond_expr
, dr_a
, dr_b
);
1779 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1780 *cond_expr
, part_cond_expr
);
1782 *cond_expr
= part_cond_expr
;
1786 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1789 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1793 STRIP_NOPS (offset1
);
1794 STRIP_NOPS (offset2
);
1796 if (offset1
== offset2
)
1799 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1800 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1803 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1804 TREE_OPERAND (offset2
, 0));
1806 if (!res
|| !BINARY_CLASS_P (offset1
))
1809 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1810 TREE_OPERAND (offset2
, 1));
1815 /* Check if DRA and DRB have equal offsets. */
1817 dr_equal_offsets_p (struct data_reference
*dra
,
1818 struct data_reference
*drb
)
1820 tree offset1
, offset2
;
1822 offset1
= DR_OFFSET (dra
);
1823 offset2
= DR_OFFSET (drb
);
1825 return dr_equal_offsets_p1 (offset1
, offset2
);
1828 /* Returns true if FNA == FNB. */
1831 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1833 unsigned i
, n
= fna
.length ();
1835 if (n
!= fnb
.length ())
1838 for (i
= 0; i
< n
; i
++)
1839 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1845 /* If all the functions in CF are the same, returns one of them,
1846 otherwise returns NULL. */
1849 common_affine_function (conflict_function
*cf
)
1854 if (!CF_NONTRIVIAL_P (cf
))
1855 return affine_fn ();
1859 for (i
= 1; i
< cf
->n
; i
++)
1860 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1861 return affine_fn ();
1866 /* Returns the base of the affine function FN. */
1869 affine_function_base (affine_fn fn
)
1874 /* Returns true if FN is a constant. */
1877 affine_function_constant_p (affine_fn fn
)
1882 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1883 if (!integer_zerop (coef
))
1889 /* Returns true if FN is the zero constant function. */
1892 affine_function_zero_p (affine_fn fn
)
1894 return (integer_zerop (affine_function_base (fn
))
1895 && affine_function_constant_p (fn
));
1898 /* Returns a signed integer type with the largest precision from TA
1902 signed_type_for_types (tree ta
, tree tb
)
1904 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1905 return signed_type_for (ta
);
1907 return signed_type_for (tb
);
1910 /* Applies operation OP on affine functions FNA and FNB, and returns the
1914 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1920 if (fnb
.length () > fna
.length ())
1932 for (i
= 0; i
< n
; i
++)
1934 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1935 TREE_TYPE (fnb
[i
]));
1936 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1939 for (; fna
.iterate (i
, &coef
); i
++)
1940 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1941 coef
, integer_zero_node
));
1942 for (; fnb
.iterate (i
, &coef
); i
++)
1943 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1944 integer_zero_node
, coef
));
1949 /* Returns the sum of affine functions FNA and FNB. */
1952 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1954 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1957 /* Returns the difference of affine functions FNA and FNB. */
1960 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1962 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1965 /* Frees affine function FN. */
1968 affine_fn_free (affine_fn fn
)
1973 /* Determine for each subscript in the data dependence relation DDR
1977 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1979 conflict_function
*cf_a
, *cf_b
;
1980 affine_fn fn_a
, fn_b
, diff
;
1982 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1986 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1988 struct subscript
*subscript
;
1990 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1991 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1992 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1994 fn_a
= common_affine_function (cf_a
);
1995 fn_b
= common_affine_function (cf_b
);
1996 if (!fn_a
.exists () || !fn_b
.exists ())
1998 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2001 diff
= affine_fn_minus (fn_a
, fn_b
);
2003 if (affine_function_constant_p (diff
))
2004 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
2006 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2008 affine_fn_free (diff
);
2013 /* Returns the conflict function for "unknown". */
2015 static conflict_function
*
2016 conflict_fn_not_known (void)
2018 conflict_function
*fn
= XCNEW (conflict_function
);
2024 /* Returns the conflict function for "independent". */
2026 static conflict_function
*
2027 conflict_fn_no_dependence (void)
2029 conflict_function
*fn
= XCNEW (conflict_function
);
2030 fn
->n
= NO_DEPENDENCE
;
2035 /* Returns true if the address of OBJ is invariant in LOOP. */
2038 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
2040 while (handled_component_p (obj
))
2042 if (TREE_CODE (obj
) == ARRAY_REF
)
2044 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2045 need to check the stride and the lower bound of the reference. */
2046 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2048 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
2052 else if (TREE_CODE (obj
) == COMPONENT_REF
)
2054 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2058 obj
= TREE_OPERAND (obj
, 0);
2061 if (!INDIRECT_REF_P (obj
)
2062 && TREE_CODE (obj
) != MEM_REF
)
2065 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
2069 /* Returns false if we can prove that data references A and B do not alias,
2070 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2074 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
2077 tree addr_a
= DR_BASE_OBJECT (a
);
2078 tree addr_b
= DR_BASE_OBJECT (b
);
2080 /* If we are not processing a loop nest but scalar code we
2081 do not need to care about possible cross-iteration dependences
2082 and thus can process the full original reference. Do so,
2083 similar to how loop invariant motion applies extra offset-based
2087 aff_tree off1
, off2
;
2088 widest_int size1
, size2
;
2089 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
2090 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
2091 aff_combination_scale (&off1
, -1);
2092 aff_combination_add (&off2
, &off1
);
2093 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
2097 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
2098 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
2099 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
2100 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
2103 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2104 do not know the size of the base-object. So we cannot do any
2105 offset/overlap based analysis but have to rely on points-to
2106 information only. */
2107 if (TREE_CODE (addr_a
) == MEM_REF
2108 && (DR_UNCONSTRAINED_BASE (a
)
2109 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
2111 /* For true dependences we can apply TBAA. */
2112 if (flag_strict_aliasing
2113 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
2114 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
2115 get_alias_set (DR_REF (b
))))
2117 if (TREE_CODE (addr_b
) == MEM_REF
)
2118 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2119 TREE_OPERAND (addr_b
, 0));
2121 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2122 build_fold_addr_expr (addr_b
));
2124 else if (TREE_CODE (addr_b
) == MEM_REF
2125 && (DR_UNCONSTRAINED_BASE (b
)
2126 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
2128 /* For true dependences we can apply TBAA. */
2129 if (flag_strict_aliasing
2130 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
2131 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
2132 get_alias_set (DR_REF (b
))))
2134 if (TREE_CODE (addr_a
) == MEM_REF
)
2135 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
2136 TREE_OPERAND (addr_b
, 0));
2138 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
2139 TREE_OPERAND (addr_b
, 0));
2142 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2143 that is being subsetted in the loop nest. */
2144 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
2145 return refs_output_dependent_p (addr_a
, addr_b
);
2146 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
2147 return refs_anti_dependent_p (addr_a
, addr_b
);
2148 return refs_may_alias_p (addr_a
, addr_b
);
2151 /* Initialize a data dependence relation between data accesses A and
2152 B. NB_LOOPS is the number of loops surrounding the references: the
2153 size of the classic distance/direction vectors. */
2155 struct data_dependence_relation
*
2156 initialize_data_dependence_relation (struct data_reference
*a
,
2157 struct data_reference
*b
,
2158 vec
<loop_p
> loop_nest
)
2160 struct data_dependence_relation
*res
;
2163 res
= XNEW (struct data_dependence_relation
);
2166 DDR_LOOP_NEST (res
).create (0);
2167 DDR_REVERSED_P (res
) = false;
2168 DDR_SUBSCRIPTS (res
).create (0);
2169 DDR_DIR_VECTS (res
).create (0);
2170 DDR_DIST_VECTS (res
).create (0);
2172 if (a
== NULL
|| b
== NULL
)
2174 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2178 /* If the data references do not alias, then they are independent. */
2179 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
2181 DDR_ARE_DEPENDENT (res
) = chrec_known
;
2185 /* The case where the references are exactly the same. */
2186 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
2188 if ((loop_nest
.exists ()
2189 && !object_address_invariant_in_loop_p (loop_nest
[0],
2190 DR_BASE_OBJECT (a
)))
2191 || DR_NUM_DIMENSIONS (a
) == 0)
2193 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2196 DDR_AFFINE_P (res
) = true;
2197 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
2198 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
2199 DDR_LOOP_NEST (res
) = loop_nest
;
2200 DDR_INNER_LOOP (res
) = 0;
2201 DDR_SELF_REFERENCE (res
) = true;
2202 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
2204 struct subscript
*subscript
;
2206 subscript
= XNEW (struct subscript
);
2207 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
2208 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
2209 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
2210 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2211 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
2216 /* If the references do not access the same object, we do not know
2217 whether they alias or not. We do not care about TBAA or alignment
2218 info so we can use OEP_ADDRESS_OF to avoid false negatives.
2219 But the accesses have to use compatible types as otherwise the
2220 built indices would not match. */
2221 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), OEP_ADDRESS_OF
)
2222 || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a
)),
2223 TREE_TYPE (DR_BASE_OBJECT (b
))))
2225 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2229 /* If the base of the object is not invariant in the loop nest, we cannot
2230 analyze it. TODO -- in fact, it would suffice to record that there may
2231 be arbitrary dependences in the loops where the base object varies. */
2232 if ((loop_nest
.exists ()
2233 && !object_address_invariant_in_loop_p (loop_nest
[0], DR_BASE_OBJECT (a
)))
2234 || DR_NUM_DIMENSIONS (a
) == 0)
2236 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2240 /* If the number of dimensions of the access to not agree we can have
2241 a pointer access to a component of the array element type and an
2242 array access while the base-objects are still the same. Punt. */
2243 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
2245 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
2249 DDR_AFFINE_P (res
) = true;
2250 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
2251 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
2252 DDR_LOOP_NEST (res
) = loop_nest
;
2253 DDR_INNER_LOOP (res
) = 0;
2254 DDR_SELF_REFERENCE (res
) = false;
2256 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
2258 struct subscript
*subscript
;
2260 subscript
= XNEW (struct subscript
);
2261 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
2262 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
2263 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
2264 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2265 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
2271 /* Frees memory used by the conflict function F. */
2274 free_conflict_function (conflict_function
*f
)
2278 if (CF_NONTRIVIAL_P (f
))
2280 for (i
= 0; i
< f
->n
; i
++)
2281 affine_fn_free (f
->fns
[i
]);
2286 /* Frees memory used by SUBSCRIPTS. */
2289 free_subscripts (vec
<subscript_p
> subscripts
)
2294 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
2296 free_conflict_function (s
->conflicting_iterations_in_a
);
2297 free_conflict_function (s
->conflicting_iterations_in_b
);
2300 subscripts
.release ();
2303 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2307 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
2310 DDR_ARE_DEPENDENT (ddr
) = chrec
;
2311 free_subscripts (DDR_SUBSCRIPTS (ddr
));
2312 DDR_SUBSCRIPTS (ddr
).create (0);
2315 /* The dependence relation DDR cannot be represented by a distance
2319 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
2321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2322 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
2324 DDR_AFFINE_P (ddr
) = false;
2329 /* This section contains the classic Banerjee tests. */
2331 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2332 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2335 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
2337 return (evolution_function_is_constant_p (chrec_a
)
2338 && evolution_function_is_constant_p (chrec_b
));
2341 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2342 variable, i.e., if the SIV (Single Index Variable) test is true. */
2345 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
2347 if ((evolution_function_is_constant_p (chrec_a
)
2348 && evolution_function_is_univariate_p (chrec_b
))
2349 || (evolution_function_is_constant_p (chrec_b
)
2350 && evolution_function_is_univariate_p (chrec_a
)))
2353 if (evolution_function_is_univariate_p (chrec_a
)
2354 && evolution_function_is_univariate_p (chrec_b
))
2356 switch (TREE_CODE (chrec_a
))
2358 case POLYNOMIAL_CHREC
:
2359 switch (TREE_CODE (chrec_b
))
2361 case POLYNOMIAL_CHREC
:
2362 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
2378 /* Creates a conflict function with N dimensions. The affine functions
2379 in each dimension follow. */
2381 static conflict_function
*
2382 conflict_fn (unsigned n
, ...)
2385 conflict_function
*ret
= XCNEW (conflict_function
);
2388 gcc_assert (0 < n
&& n
<= MAX_DIM
);
2392 for (i
= 0; i
< n
; i
++)
2393 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
2399 /* Returns constant affine function with value CST. */
2402 affine_fn_cst (tree cst
)
2406 fn
.quick_push (cst
);
2410 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2413 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
2416 fn
.create (dim
+ 1);
2419 gcc_assert (dim
> 0);
2420 fn
.quick_push (cst
);
2421 for (i
= 1; i
< dim
; i
++)
2422 fn
.quick_push (integer_zero_node
);
2423 fn
.quick_push (coef
);
2427 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2428 *OVERLAPS_B are initialized to the functions that describe the
2429 relation between the elements accessed twice by CHREC_A and
2430 CHREC_B. For k >= 0, the following property is verified:
2432 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2435 analyze_ziv_subscript (tree chrec_a
,
2437 conflict_function
**overlaps_a
,
2438 conflict_function
**overlaps_b
,
2439 tree
*last_conflicts
)
2441 tree type
, difference
;
2442 dependence_stats
.num_ziv
++;
2444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2445 fprintf (dump_file
, "(analyze_ziv_subscript \n");
2447 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2448 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2449 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2450 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2452 switch (TREE_CODE (difference
))
2455 if (integer_zerop (difference
))
2457 /* The difference is equal to zero: the accessed index
2458 overlaps for each iteration in the loop. */
2459 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2460 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2461 *last_conflicts
= chrec_dont_know
;
2462 dependence_stats
.num_ziv_dependent
++;
2466 /* The accesses do not overlap. */
2467 *overlaps_a
= conflict_fn_no_dependence ();
2468 *overlaps_b
= conflict_fn_no_dependence ();
2469 *last_conflicts
= integer_zero_node
;
2470 dependence_stats
.num_ziv_independent
++;
2475 /* We're not sure whether the indexes overlap. For the moment,
2476 conservatively answer "don't know". */
2477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2478 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
2480 *overlaps_a
= conflict_fn_not_known ();
2481 *overlaps_b
= conflict_fn_not_known ();
2482 *last_conflicts
= chrec_dont_know
;
2483 dependence_stats
.num_ziv_unimplemented
++;
2487 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2488 fprintf (dump_file
, ")\n");
2491 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2492 and only if it fits to the int type. If this is not the case, or the
2493 bound on the number of iterations of LOOP could not be derived, returns
2497 max_stmt_executions_tree (struct loop
*loop
)
2501 if (!max_stmt_executions (loop
, &nit
))
2502 return chrec_dont_know
;
2504 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
2505 return chrec_dont_know
;
2507 return wide_int_to_tree (unsigned_type_node
, nit
);
2510 /* Determine whether the CHREC is always positive/negative. If the expression
2511 cannot be statically analyzed, return false, otherwise set the answer into
2515 chrec_is_positive (tree chrec
, bool *value
)
2517 bool value0
, value1
, value2
;
2518 tree end_value
, nb_iter
;
2520 switch (TREE_CODE (chrec
))
2522 case POLYNOMIAL_CHREC
:
2523 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
2524 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
2527 /* FIXME -- overflows. */
2528 if (value0
== value1
)
2534 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2535 and the proof consists in showing that the sign never
2536 changes during the execution of the loop, from 0 to
2537 loop->nb_iterations. */
2538 if (!evolution_function_is_affine_p (chrec
))
2541 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
2542 if (chrec_contains_undetermined (nb_iter
))
2546 /* TODO -- If the test is after the exit, we may decrease the number of
2547 iterations by one. */
2549 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
2552 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
2554 if (!chrec_is_positive (end_value
, &value2
))
2558 return value0
== value1
;
2561 switch (tree_int_cst_sgn (chrec
))
2580 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2581 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2582 *OVERLAPS_B are initialized to the functions that describe the
2583 relation between the elements accessed twice by CHREC_A and
2584 CHREC_B. For k >= 0, the following property is verified:
2586 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2589 analyze_siv_subscript_cst_affine (tree chrec_a
,
2591 conflict_function
**overlaps_a
,
2592 conflict_function
**overlaps_b
,
2593 tree
*last_conflicts
)
2595 bool value0
, value1
, value2
;
2596 tree type
, difference
, tmp
;
2598 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2599 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2600 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2601 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
2603 /* Special case overlap in the first iteration. */
2604 if (integer_zerop (difference
))
2606 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2607 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2608 *last_conflicts
= integer_one_node
;
2612 if (!chrec_is_positive (initial_condition (difference
), &value0
))
2614 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2615 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
2617 dependence_stats
.num_siv_unimplemented
++;
2618 *overlaps_a
= conflict_fn_not_known ();
2619 *overlaps_b
= conflict_fn_not_known ();
2620 *last_conflicts
= chrec_dont_know
;
2625 if (value0
== false)
2627 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
2629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2630 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
2632 *overlaps_a
= conflict_fn_not_known ();
2633 *overlaps_b
= conflict_fn_not_known ();
2634 *last_conflicts
= chrec_dont_know
;
2635 dependence_stats
.num_siv_unimplemented
++;
2644 chrec_b = {10, +, 1}
2647 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
2649 HOST_WIDE_INT numiter
;
2650 struct loop
*loop
= get_chrec_loop (chrec_b
);
2652 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2653 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
2654 fold_build1 (ABS_EXPR
, type
, difference
),
2655 CHREC_RIGHT (chrec_b
));
2656 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
2657 *last_conflicts
= integer_one_node
;
2660 /* Perform weak-zero siv test to see if overlap is
2661 outside the loop bounds. */
2662 numiter
= max_stmt_executions_int (loop
);
2665 && compare_tree_int (tmp
, numiter
) > 0)
2667 free_conflict_function (*overlaps_a
);
2668 free_conflict_function (*overlaps_b
);
2669 *overlaps_a
= conflict_fn_no_dependence ();
2670 *overlaps_b
= conflict_fn_no_dependence ();
2671 *last_conflicts
= integer_zero_node
;
2672 dependence_stats
.num_siv_independent
++;
2675 dependence_stats
.num_siv_dependent
++;
2679 /* When the step does not divide the difference, there are
2683 *overlaps_a
= conflict_fn_no_dependence ();
2684 *overlaps_b
= conflict_fn_no_dependence ();
2685 *last_conflicts
= integer_zero_node
;
2686 dependence_stats
.num_siv_independent
++;
2695 chrec_b = {10, +, -1}
2697 In this case, chrec_a will not overlap with chrec_b. */
2698 *overlaps_a
= conflict_fn_no_dependence ();
2699 *overlaps_b
= conflict_fn_no_dependence ();
2700 *last_conflicts
= integer_zero_node
;
2701 dependence_stats
.num_siv_independent
++;
2708 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
2710 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2711 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
2713 *overlaps_a
= conflict_fn_not_known ();
2714 *overlaps_b
= conflict_fn_not_known ();
2715 *last_conflicts
= chrec_dont_know
;
2716 dependence_stats
.num_siv_unimplemented
++;
2721 if (value2
== false)
2725 chrec_b = {10, +, -1}
2727 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
2729 HOST_WIDE_INT numiter
;
2730 struct loop
*loop
= get_chrec_loop (chrec_b
);
2732 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2733 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
2734 CHREC_RIGHT (chrec_b
));
2735 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
2736 *last_conflicts
= integer_one_node
;
2738 /* Perform weak-zero siv test to see if overlap is
2739 outside the loop bounds. */
2740 numiter
= max_stmt_executions_int (loop
);
2743 && compare_tree_int (tmp
, numiter
) > 0)
2745 free_conflict_function (*overlaps_a
);
2746 free_conflict_function (*overlaps_b
);
2747 *overlaps_a
= conflict_fn_no_dependence ();
2748 *overlaps_b
= conflict_fn_no_dependence ();
2749 *last_conflicts
= integer_zero_node
;
2750 dependence_stats
.num_siv_independent
++;
2753 dependence_stats
.num_siv_dependent
++;
2757 /* When the step does not divide the difference, there
2761 *overlaps_a
= conflict_fn_no_dependence ();
2762 *overlaps_b
= conflict_fn_no_dependence ();
2763 *last_conflicts
= integer_zero_node
;
2764 dependence_stats
.num_siv_independent
++;
2774 In this case, chrec_a will not overlap with chrec_b. */
2775 *overlaps_a
= conflict_fn_no_dependence ();
2776 *overlaps_b
= conflict_fn_no_dependence ();
2777 *last_conflicts
= integer_zero_node
;
2778 dependence_stats
.num_siv_independent
++;
2786 /* Helper recursive function for initializing the matrix A. Returns
2787 the initial value of CHREC. */
2790 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2794 switch (TREE_CODE (chrec
))
2796 case POLYNOMIAL_CHREC
:
2797 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2798 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2804 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2805 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2807 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2812 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2813 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2818 /* Handle ~X as -1 - X. */
2819 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2820 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2821 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2833 #define FLOOR_DIV(x,y) ((x) / (y))
2835 /* Solves the special case of the Diophantine equation:
2836 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2838 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2839 number of iterations that loops X and Y run. The overlaps will be
2840 constructed as evolutions in dimension DIM. */
2843 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter
,
2844 HOST_WIDE_INT step_a
,
2845 HOST_WIDE_INT step_b
,
2846 affine_fn
*overlaps_a
,
2847 affine_fn
*overlaps_b
,
2848 tree
*last_conflicts
, int dim
)
2850 if (((step_a
> 0 && step_b
> 0)
2851 || (step_a
< 0 && step_b
< 0)))
2853 HOST_WIDE_INT step_overlaps_a
, step_overlaps_b
;
2854 HOST_WIDE_INT gcd_steps_a_b
, last_conflict
, tau2
;
2856 gcd_steps_a_b
= gcd (step_a
, step_b
);
2857 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2858 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2862 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2863 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2864 last_conflict
= tau2
;
2865 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2868 *last_conflicts
= chrec_dont_know
;
2870 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2871 build_int_cst (NULL_TREE
,
2873 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2874 build_int_cst (NULL_TREE
,
2880 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2881 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2882 *last_conflicts
= integer_zero_node
;
2886 /* Solves the special case of a Diophantine equation where CHREC_A is
2887 an affine bivariate function, and CHREC_B is an affine univariate
2888 function. For example,
2890 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2892 has the following overlapping functions:
2894 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2895 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2896 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2898 FORNOW: This is a specialized implementation for a case occurring in
2899 a common benchmark. Implement the general algorithm. */
2902 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2903 conflict_function
**overlaps_a
,
2904 conflict_function
**overlaps_b
,
2905 tree
*last_conflicts
)
2907 bool xz_p
, yz_p
, xyz_p
;
2908 HOST_WIDE_INT step_x
, step_y
, step_z
;
2909 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2910 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2911 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2912 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2913 affine_fn ova1
, ova2
, ovb
;
2914 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2916 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2917 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2918 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2920 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2921 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2922 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2924 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2927 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2929 *overlaps_a
= conflict_fn_not_known ();
2930 *overlaps_b
= conflict_fn_not_known ();
2931 *last_conflicts
= chrec_dont_know
;
2935 niter
= MIN (niter_x
, niter_z
);
2936 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2939 &last_conflicts_xz
, 1);
2940 niter
= MIN (niter_y
, niter_z
);
2941 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2944 &last_conflicts_yz
, 2);
2945 niter
= MIN (niter_x
, niter_z
);
2946 niter
= MIN (niter_y
, niter
);
2947 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2950 &last_conflicts_xyz
, 3);
2952 xz_p
= !integer_zerop (last_conflicts_xz
);
2953 yz_p
= !integer_zerop (last_conflicts_yz
);
2954 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2956 if (xz_p
|| yz_p
|| xyz_p
)
2958 ova1
= affine_fn_cst (integer_zero_node
);
2959 ova2
= affine_fn_cst (integer_zero_node
);
2960 ovb
= affine_fn_cst (integer_zero_node
);
2963 affine_fn t0
= ova1
;
2966 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2967 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2968 affine_fn_free (t0
);
2969 affine_fn_free (t2
);
2970 *last_conflicts
= last_conflicts_xz
;
2974 affine_fn t0
= ova2
;
2977 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2978 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2979 affine_fn_free (t0
);
2980 affine_fn_free (t2
);
2981 *last_conflicts
= last_conflicts_yz
;
2985 affine_fn t0
= ova1
;
2986 affine_fn t2
= ova2
;
2989 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2990 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2991 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2992 affine_fn_free (t0
);
2993 affine_fn_free (t2
);
2994 affine_fn_free (t4
);
2995 *last_conflicts
= last_conflicts_xyz
;
2997 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2998 *overlaps_b
= conflict_fn (1, ovb
);
3002 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3003 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3004 *last_conflicts
= integer_zero_node
;
3007 affine_fn_free (overlaps_a_xz
);
3008 affine_fn_free (overlaps_b_xz
);
3009 affine_fn_free (overlaps_a_yz
);
3010 affine_fn_free (overlaps_b_yz
);
3011 affine_fn_free (overlaps_a_xyz
);
3012 affine_fn_free (overlaps_b_xyz
);
3015 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3018 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
3021 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
3024 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3027 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
3032 for (i
= 0; i
< m
; i
++)
3033 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
3036 /* Store the N x N identity matrix in MAT. */
3039 lambda_matrix_id (lambda_matrix mat
, int size
)
3043 for (i
= 0; i
< size
; i
++)
3044 for (j
= 0; j
< size
; j
++)
3045 mat
[i
][j
] = (i
== j
) ? 1 : 0;
3048 /* Return the first nonzero element of vector VEC1 between START and N.
3049 We must have START <= N. Returns N if VEC1 is the zero vector. */
3052 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
3055 while (j
< n
&& vec1
[j
] == 0)
3060 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3061 R2 = R2 + CONST1 * R1. */
3064 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
3071 for (i
= 0; i
< n
; i
++)
3072 mat
[r2
][i
] += const1
* mat
[r1
][i
];
3075 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3076 and store the result in VEC2. */
3079 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
3080 int size
, int const1
)
3085 lambda_vector_clear (vec2
, size
);
3087 for (i
= 0; i
< size
; i
++)
3088 vec2
[i
] = const1
* vec1
[i
];
3091 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3094 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
3097 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
3100 /* Negate row R1 of matrix MAT which has N columns. */
3103 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
3105 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
3108 /* Return true if two vectors are equal. */
3111 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
3114 for (i
= 0; i
< size
; i
++)
3115 if (vec1
[i
] != vec2
[i
])
3120 /* Given an M x N integer matrix A, this function determines an M x
3121 M unimodular matrix U, and an M x N echelon matrix S such that
3122 "U.A = S". This decomposition is also known as "right Hermite".
3124 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3125 Restructuring Compilers" Utpal Banerjee. */
3128 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
3129 lambda_matrix S
, lambda_matrix U
)
3133 lambda_matrix_copy (A
, S
, m
, n
);
3134 lambda_matrix_id (U
, m
);
3136 for (j
= 0; j
< n
; j
++)
3138 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
3141 for (i
= m
- 1; i
>= i0
; i
--)
3143 while (S
[i
][j
] != 0)
3145 int sigma
, factor
, a
, b
;
3149 sigma
= (a
* b
< 0) ? -1: 1;
3152 factor
= sigma
* (a
/ b
);
3154 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
3155 std::swap (S
[i
], S
[i
-1]);
3157 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
3158 std::swap (U
[i
], U
[i
-1]);
3165 /* Determines the overlapping elements due to accesses CHREC_A and
3166 CHREC_B, that are affine functions. This function cannot handle
3167 symbolic evolution functions, ie. when initial conditions are
3168 parameters, because it uses lambda matrices of integers. */
3171 analyze_subscript_affine_affine (tree chrec_a
,
3173 conflict_function
**overlaps_a
,
3174 conflict_function
**overlaps_b
,
3175 tree
*last_conflicts
)
3177 unsigned nb_vars_a
, nb_vars_b
, dim
;
3178 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
3179 lambda_matrix A
, U
, S
;
3180 struct obstack scratch_obstack
;
3182 if (eq_evolutions_p (chrec_a
, chrec_b
))
3184 /* The accessed index overlaps for each iteration in the
3186 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3187 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3188 *last_conflicts
= chrec_dont_know
;
3191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3192 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
3194 /* For determining the initial intersection, we have to solve a
3195 Diophantine equation. This is the most time consuming part.
3197 For answering to the question: "Is there a dependence?" we have
3198 to prove that there exists a solution to the Diophantine
3199 equation, and that the solution is in the iteration domain,
3200 i.e. the solution is positive or zero, and that the solution
3201 happens before the upper bound loop.nb_iterations. Otherwise
3202 there is no dependence. This function outputs a description of
3203 the iterations that hold the intersections. */
3205 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
3206 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
3208 gcc_obstack_init (&scratch_obstack
);
3210 dim
= nb_vars_a
+ nb_vars_b
;
3211 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
3212 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
3213 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
3215 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
3216 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
3217 gamma
= init_b
- init_a
;
3219 /* Don't do all the hard work of solving the Diophantine equation
3220 when we already know the solution: for example,
3223 | gamma = 3 - 3 = 0.
3224 Then the first overlap occurs during the first iterations:
3225 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3229 if (nb_vars_a
== 1 && nb_vars_b
== 1)
3231 HOST_WIDE_INT step_a
, step_b
;
3232 HOST_WIDE_INT niter
, niter_a
, niter_b
;
3235 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
3236 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
3237 niter
= MIN (niter_a
, niter_b
);
3238 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
3239 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
3241 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
3244 *overlaps_a
= conflict_fn (1, ova
);
3245 *overlaps_b
= conflict_fn (1, ovb
);
3248 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
3249 compute_overlap_steps_for_affine_1_2
3250 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
3252 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
3253 compute_overlap_steps_for_affine_1_2
3254 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
3258 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3259 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
3260 *overlaps_a
= conflict_fn_not_known ();
3261 *overlaps_b
= conflict_fn_not_known ();
3262 *last_conflicts
= chrec_dont_know
;
3264 goto end_analyze_subs_aa
;
3268 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
3273 lambda_matrix_row_negate (U
, dim
, 0);
3275 gcd_alpha_beta
= S
[0][0];
3277 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3278 but that is a quite strange case. Instead of ICEing, answer
3280 if (gcd_alpha_beta
== 0)
3282 *overlaps_a
= conflict_fn_not_known ();
3283 *overlaps_b
= conflict_fn_not_known ();
3284 *last_conflicts
= chrec_dont_know
;
3285 goto end_analyze_subs_aa
;
3288 /* The classic "gcd-test". */
3289 if (!int_divides_p (gcd_alpha_beta
, gamma
))
3291 /* The "gcd-test" has determined that there is no integer
3292 solution, i.e. there is no dependence. */
3293 *overlaps_a
= conflict_fn_no_dependence ();
3294 *overlaps_b
= conflict_fn_no_dependence ();
3295 *last_conflicts
= integer_zero_node
;
3298 /* Both access functions are univariate. This includes SIV and MIV cases. */
3299 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
3301 /* Both functions should have the same evolution sign. */
3302 if (((A
[0][0] > 0 && -A
[1][0] > 0)
3303 || (A
[0][0] < 0 && -A
[1][0] < 0)))
3305 /* The solutions are given by:
3307 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3310 For a given integer t. Using the following variables,
3312 | i0 = u11 * gamma / gcd_alpha_beta
3313 | j0 = u12 * gamma / gcd_alpha_beta
3320 | y0 = j0 + j1 * t. */
3321 HOST_WIDE_INT i0
, j0
, i1
, j1
;
3323 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
3324 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
3328 if ((i1
== 0 && i0
< 0)
3329 || (j1
== 0 && j0
< 0))
3331 /* There is no solution.
3332 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3333 falls in here, but for the moment we don't look at the
3334 upper bound of the iteration domain. */
3335 *overlaps_a
= conflict_fn_no_dependence ();
3336 *overlaps_b
= conflict_fn_no_dependence ();
3337 *last_conflicts
= integer_zero_node
;
3338 goto end_analyze_subs_aa
;
3341 if (i1
> 0 && j1
> 0)
3343 HOST_WIDE_INT niter_a
3344 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
3345 HOST_WIDE_INT niter_b
3346 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
3347 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
3349 /* (X0, Y0) is a solution of the Diophantine equation:
3350 "chrec_a (X0) = chrec_b (Y0)". */
3351 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
3353 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
3354 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
3356 /* (X1, Y1) is the smallest positive solution of the eq
3357 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3358 first conflict occurs. */
3359 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
3360 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
3361 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
3365 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter_a
- i0
, i1
),
3366 FLOOR_DIV (niter_b
- j0
, j1
));
3367 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
3369 /* If the overlap occurs outside of the bounds of the
3370 loop, there is no dependence. */
3371 if (x1
>= niter_a
|| y1
>= niter_b
)
3373 *overlaps_a
= conflict_fn_no_dependence ();
3374 *overlaps_b
= conflict_fn_no_dependence ();
3375 *last_conflicts
= integer_zero_node
;
3376 goto end_analyze_subs_aa
;
3379 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
3382 *last_conflicts
= chrec_dont_know
;
3386 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
3388 build_int_cst (NULL_TREE
, i1
)));
3391 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
3393 build_int_cst (NULL_TREE
, j1
)));
3397 /* FIXME: For the moment, the upper bound of the
3398 iteration domain for i and j is not checked. */
3399 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3400 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3401 *overlaps_a
= conflict_fn_not_known ();
3402 *overlaps_b
= conflict_fn_not_known ();
3403 *last_conflicts
= chrec_dont_know
;
3408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3409 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3410 *overlaps_a
= conflict_fn_not_known ();
3411 *overlaps_b
= conflict_fn_not_known ();
3412 *last_conflicts
= chrec_dont_know
;
3417 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3418 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
3419 *overlaps_a
= conflict_fn_not_known ();
3420 *overlaps_b
= conflict_fn_not_known ();
3421 *last_conflicts
= chrec_dont_know
;
3424 end_analyze_subs_aa
:
3425 obstack_free (&scratch_obstack
, NULL
);
3426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3428 fprintf (dump_file
, " (overlaps_a = ");
3429 dump_conflict_function (dump_file
, *overlaps_a
);
3430 fprintf (dump_file
, ")\n (overlaps_b = ");
3431 dump_conflict_function (dump_file
, *overlaps_b
);
3432 fprintf (dump_file
, "))\n");
3436 /* Returns true when analyze_subscript_affine_affine can be used for
3437 determining the dependence relation between chrec_a and chrec_b,
3438 that contain symbols. This function modifies chrec_a and chrec_b
3439 such that the analysis result is the same, and such that they don't
3440 contain symbols, and then can safely be passed to the analyzer.
3442 Example: The analysis of the following tuples of evolutions produce
3443 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3446 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3447 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3451 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
3453 tree diff
, type
, left_a
, left_b
, right_b
;
3455 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
3456 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
3457 /* FIXME: For the moment not handled. Might be refined later. */
3460 type
= chrec_type (*chrec_a
);
3461 left_a
= CHREC_LEFT (*chrec_a
);
3462 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
3463 diff
= chrec_fold_minus (type
, left_a
, left_b
);
3465 if (!evolution_function_is_constant_p (diff
))
3468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3469 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
3471 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
3472 diff
, CHREC_RIGHT (*chrec_a
));
3473 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
3474 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
3475 build_int_cst (type
, 0),
3480 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3481 *OVERLAPS_B are initialized to the functions that describe the
3482 relation between the elements accessed twice by CHREC_A and
3483 CHREC_B. For k >= 0, the following property is verified:
3485 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3488 analyze_siv_subscript (tree chrec_a
,
3490 conflict_function
**overlaps_a
,
3491 conflict_function
**overlaps_b
,
3492 tree
*last_conflicts
,
3495 dependence_stats
.num_siv
++;
3497 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3498 fprintf (dump_file
, "(analyze_siv_subscript \n");
3500 if (evolution_function_is_constant_p (chrec_a
)
3501 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
3502 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
3503 overlaps_a
, overlaps_b
, last_conflicts
);
3505 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
3506 && evolution_function_is_constant_p (chrec_b
))
3507 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
3508 overlaps_b
, overlaps_a
, last_conflicts
);
3510 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
3511 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
3513 if (!chrec_contains_symbols (chrec_a
)
3514 && !chrec_contains_symbols (chrec_b
))
3516 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
3517 overlaps_a
, overlaps_b
,
3520 if (CF_NOT_KNOWN_P (*overlaps_a
)
3521 || CF_NOT_KNOWN_P (*overlaps_b
))
3522 dependence_stats
.num_siv_unimplemented
++;
3523 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
3524 || CF_NO_DEPENDENCE_P (*overlaps_b
))
3525 dependence_stats
.num_siv_independent
++;
3527 dependence_stats
.num_siv_dependent
++;
3529 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
3532 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
3533 overlaps_a
, overlaps_b
,
3536 if (CF_NOT_KNOWN_P (*overlaps_a
)
3537 || CF_NOT_KNOWN_P (*overlaps_b
))
3538 dependence_stats
.num_siv_unimplemented
++;
3539 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
3540 || CF_NO_DEPENDENCE_P (*overlaps_b
))
3541 dependence_stats
.num_siv_independent
++;
3543 dependence_stats
.num_siv_dependent
++;
3546 goto siv_subscript_dontknow
;
3551 siv_subscript_dontknow
:;
3552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3553 fprintf (dump_file
, " siv test failed: unimplemented");
3554 *overlaps_a
= conflict_fn_not_known ();
3555 *overlaps_b
= conflict_fn_not_known ();
3556 *last_conflicts
= chrec_dont_know
;
3557 dependence_stats
.num_siv_unimplemented
++;
3560 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3561 fprintf (dump_file
, ")\n");
3564 /* Returns false if we can prove that the greatest common divisor of the steps
3565 of CHREC does not divide CST, false otherwise. */
3568 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
3570 HOST_WIDE_INT cd
= 0, val
;
3573 if (!tree_fits_shwi_p (cst
))
3575 val
= tree_to_shwi (cst
);
3577 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
3579 step
= CHREC_RIGHT (chrec
);
3580 if (!tree_fits_shwi_p (step
))
3582 cd
= gcd (cd
, tree_to_shwi (step
));
3583 chrec
= CHREC_LEFT (chrec
);
3586 return val
% cd
== 0;
3589 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3590 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3591 functions that describe the relation between the elements accessed
3592 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3595 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3598 analyze_miv_subscript (tree chrec_a
,
3600 conflict_function
**overlaps_a
,
3601 conflict_function
**overlaps_b
,
3602 tree
*last_conflicts
,
3603 struct loop
*loop_nest
)
3605 tree type
, difference
;
3607 dependence_stats
.num_miv
++;
3608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3609 fprintf (dump_file
, "(analyze_miv_subscript \n");
3611 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3612 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3613 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3614 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
3616 if (eq_evolutions_p (chrec_a
, chrec_b
))
3618 /* Access functions are the same: all the elements are accessed
3619 in the same order. */
3620 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3621 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3622 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
3623 dependence_stats
.num_miv_dependent
++;
3626 else if (evolution_function_is_constant_p (difference
)
3627 /* For the moment, the following is verified:
3628 evolution_function_is_affine_multivariate_p (chrec_a,
3630 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
3632 /* testsuite/.../ssa-chrec-33.c
3633 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3635 The difference is 1, and all the evolution steps are multiples
3636 of 2, consequently there are no overlapping elements. */
3637 *overlaps_a
= conflict_fn_no_dependence ();
3638 *overlaps_b
= conflict_fn_no_dependence ();
3639 *last_conflicts
= integer_zero_node
;
3640 dependence_stats
.num_miv_independent
++;
3643 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
3644 && !chrec_contains_symbols (chrec_a
)
3645 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
3646 && !chrec_contains_symbols (chrec_b
))
3648 /* testsuite/.../ssa-chrec-35.c
3649 {0, +, 1}_2 vs. {0, +, 1}_3
3650 the overlapping elements are respectively located at iterations:
3651 {0, +, 1}_x and {0, +, 1}_x,
3652 in other words, we have the equality:
3653 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3656 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3657 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3659 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3660 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3662 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
3663 overlaps_a
, overlaps_b
, last_conflicts
);
3665 if (CF_NOT_KNOWN_P (*overlaps_a
)
3666 || CF_NOT_KNOWN_P (*overlaps_b
))
3667 dependence_stats
.num_miv_unimplemented
++;
3668 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
3669 || CF_NO_DEPENDENCE_P (*overlaps_b
))
3670 dependence_stats
.num_miv_independent
++;
3672 dependence_stats
.num_miv_dependent
++;
3677 /* When the analysis is too difficult, answer "don't know". */
3678 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3679 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
3681 *overlaps_a
= conflict_fn_not_known ();
3682 *overlaps_b
= conflict_fn_not_known ();
3683 *last_conflicts
= chrec_dont_know
;
3684 dependence_stats
.num_miv_unimplemented
++;
3687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3688 fprintf (dump_file
, ")\n");
3691 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3692 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3693 OVERLAP_ITERATIONS_B are initialized with two functions that
3694 describe the iterations that contain conflicting elements.
3696 Remark: For an integer k >= 0, the following equality is true:
3698 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3702 analyze_overlapping_iterations (tree chrec_a
,
3704 conflict_function
**overlap_iterations_a
,
3705 conflict_function
**overlap_iterations_b
,
3706 tree
*last_conflicts
, struct loop
*loop_nest
)
3708 unsigned int lnn
= loop_nest
->num
;
3710 dependence_stats
.num_subscript_tests
++;
3712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3714 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
3715 fprintf (dump_file
, " (chrec_a = ");
3716 print_generic_expr (dump_file
, chrec_a
);
3717 fprintf (dump_file
, ")\n (chrec_b = ");
3718 print_generic_expr (dump_file
, chrec_b
);
3719 fprintf (dump_file
, ")\n");
3722 if (chrec_a
== NULL_TREE
3723 || chrec_b
== NULL_TREE
3724 || chrec_contains_undetermined (chrec_a
)
3725 || chrec_contains_undetermined (chrec_b
))
3727 dependence_stats
.num_subscript_undetermined
++;
3729 *overlap_iterations_a
= conflict_fn_not_known ();
3730 *overlap_iterations_b
= conflict_fn_not_known ();
3733 /* If they are the same chrec, and are affine, they overlap
3734 on every iteration. */
3735 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3736 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3737 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3739 dependence_stats
.num_same_subscript_function
++;
3740 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3741 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3742 *last_conflicts
= chrec_dont_know
;
3745 /* If they aren't the same, and aren't affine, we can't do anything
3747 else if ((chrec_contains_symbols (chrec_a
)
3748 || chrec_contains_symbols (chrec_b
))
3749 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3750 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3752 dependence_stats
.num_subscript_undetermined
++;
3753 *overlap_iterations_a
= conflict_fn_not_known ();
3754 *overlap_iterations_b
= conflict_fn_not_known ();
3757 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3758 analyze_ziv_subscript (chrec_a
, chrec_b
,
3759 overlap_iterations_a
, overlap_iterations_b
,
3762 else if (siv_subscript_p (chrec_a
, chrec_b
))
3763 analyze_siv_subscript (chrec_a
, chrec_b
,
3764 overlap_iterations_a
, overlap_iterations_b
,
3765 last_conflicts
, lnn
);
3768 analyze_miv_subscript (chrec_a
, chrec_b
,
3769 overlap_iterations_a
, overlap_iterations_b
,
3770 last_conflicts
, loop_nest
);
3772 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3774 fprintf (dump_file
, " (overlap_iterations_a = ");
3775 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3776 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3777 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3778 fprintf (dump_file
, "))\n");
3782 /* Helper function for uniquely inserting distance vectors. */
3785 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3790 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3791 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3794 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3797 /* Helper function for uniquely inserting direction vectors. */
3800 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3805 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3806 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3809 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3812 /* Add a distance of 1 on all the loops outer than INDEX. If we
3813 haven't yet determined a distance for this outer loop, push a new
3814 distance vector composed of the previous distance, and a distance
3815 of 1 for this outer loop. Example:
3823 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3824 save (0, 1), then we have to save (1, 0). */
3827 add_outer_distances (struct data_dependence_relation
*ddr
,
3828 lambda_vector dist_v
, int index
)
3830 /* For each outer loop where init_v is not set, the accesses are
3831 in dependence of distance 1 in the loop. */
3832 while (--index
>= 0)
3834 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3835 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3837 save_dist_v (ddr
, save_v
);
3841 /* Return false when fail to represent the data dependence as a
3842 distance vector. INIT_B is set to true when a component has been
3843 added to the distance vector DIST_V. INDEX_CARRY is then set to
3844 the index in DIST_V that carries the dependence. */
3847 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3848 struct data_reference
*ddr_a
,
3849 struct data_reference
*ddr_b
,
3850 lambda_vector dist_v
, bool *init_b
,
3854 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3856 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3858 tree access_fn_a
, access_fn_b
;
3859 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3861 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3863 non_affine_dependence_relation (ddr
);
3867 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3868 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3870 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3871 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3875 int var_a
= CHREC_VARIABLE (access_fn_a
);
3876 int var_b
= CHREC_VARIABLE (access_fn_b
);
3879 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3881 non_affine_dependence_relation (ddr
);
3885 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3886 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3887 *index_carry
= MIN (index
, *index_carry
);
3889 /* This is the subscript coupling test. If we have already
3890 recorded a distance for this loop (a distance coming from
3891 another subscript), it should be the same. For example,
3892 in the following code, there is no dependence:
3899 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3901 finalize_ddr_dependent (ddr
, chrec_known
);
3905 dist_v
[index
] = dist
;
3909 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3911 /* This can be for example an affine vs. constant dependence
3912 (T[i] vs. T[3]) that is not an affine dependence and is
3913 not representable as a distance vector. */
3914 non_affine_dependence_relation (ddr
);
3922 /* Return true when the DDR contains only constant access functions. */
3925 constant_access_functions (const struct data_dependence_relation
*ddr
)
3929 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3930 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3931 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3937 /* Helper function for the case where DDR_A and DDR_B are the same
3938 multivariate access function with a constant step. For an example
3942 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3945 tree c_1
= CHREC_LEFT (c_2
);
3946 tree c_0
= CHREC_LEFT (c_1
);
3947 lambda_vector dist_v
;
3948 HOST_WIDE_INT v1
, v2
, cd
;
3950 /* Polynomials with more than 2 variables are not handled yet. When
3951 the evolution steps are parameters, it is not possible to
3952 represent the dependence using classical distance vectors. */
3953 if (TREE_CODE (c_0
) != INTEGER_CST
3954 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3955 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3957 DDR_AFFINE_P (ddr
) = false;
3961 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3962 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3964 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3965 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3966 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3967 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3980 save_dist_v (ddr
, dist_v
);
3982 add_outer_distances (ddr
, dist_v
, x_1
);
3985 /* Helper function for the case where DDR_A and DDR_B are the same
3986 access functions. */
3989 add_other_self_distances (struct data_dependence_relation
*ddr
)
3991 lambda_vector dist_v
;
3993 int index_carry
= DDR_NB_LOOPS (ddr
);
3995 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3997 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3999 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
4001 if (!evolution_function_is_univariate_p (access_fun
))
4003 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
4005 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
4009 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
4011 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
4012 add_multivariate_self_dist (ddr
, access_fun
);
4014 /* The evolution step is not constant: it varies in
4015 the outer loop, so this cannot be represented by a
4016 distance vector. For example in pr34635.c the
4017 evolution is {0, +, {0, +, 4}_1}_2. */
4018 DDR_AFFINE_P (ddr
) = false;
4023 index_carry
= MIN (index_carry
,
4024 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
4025 DDR_LOOP_NEST (ddr
)));
4029 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4030 add_outer_distances (ddr
, dist_v
, index_carry
);
4034 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
4036 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4038 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
4039 save_dist_v (ddr
, dist_v
);
4042 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4043 is the case for example when access functions are the same and
4044 equal to a constant, as in:
4051 in which case the distance vectors are (0) and (1). */
4054 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
4058 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
4060 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
4061 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
4062 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
4064 for (j
= 0; j
< ca
->n
; j
++)
4065 if (affine_function_zero_p (ca
->fns
[j
]))
4067 insert_innermost_unit_dist_vector (ddr
);
4071 for (j
= 0; j
< cb
->n
; j
++)
4072 if (affine_function_zero_p (cb
->fns
[j
]))
4074 insert_innermost_unit_dist_vector (ddr
);
4080 /* Compute the classic per loop distance vector. DDR is the data
4081 dependence relation to build a vector from. Return false when fail
4082 to represent the data dependence as a distance vector. */
4085 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
4086 struct loop
*loop_nest
)
4088 bool init_b
= false;
4089 int index_carry
= DDR_NB_LOOPS (ddr
);
4090 lambda_vector dist_v
;
4092 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
4095 if (same_access_functions (ddr
))
4097 /* Save the 0 vector. */
4098 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4099 save_dist_v (ddr
, dist_v
);
4101 if (constant_access_functions (ddr
))
4102 add_distance_for_zero_overlaps (ddr
);
4104 if (DDR_NB_LOOPS (ddr
) > 1)
4105 add_other_self_distances (ddr
);
4110 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4111 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
4112 dist_v
, &init_b
, &index_carry
))
4115 /* Save the distance vector if we initialized one. */
4118 /* Verify a basic constraint: classic distance vectors should
4119 always be lexicographically positive.
4121 Data references are collected in the order of execution of
4122 the program, thus for the following loop
4124 | for (i = 1; i < 100; i++)
4125 | for (j = 1; j < 100; j++)
4127 | t = T[j+1][i-1]; // A
4128 | T[j][i] = t + 2; // B
4131 references are collected following the direction of the wind:
4132 A then B. The data dependence tests are performed also
4133 following this order, such that we're looking at the distance
4134 separating the elements accessed by A from the elements later
4135 accessed by B. But in this example, the distance returned by
4136 test_dep (A, B) is lexicographically negative (-1, 1), that
4137 means that the access A occurs later than B with respect to
4138 the outer loop, ie. we're actually looking upwind. In this
4139 case we solve test_dep (B, A) looking downwind to the
4140 lexicographically positive solution, that returns the
4141 distance vector (1, -1). */
4142 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
4144 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4145 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
4148 compute_subscript_distance (ddr
);
4149 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
4150 save_v
, &init_b
, &index_carry
))
4152 save_dist_v (ddr
, save_v
);
4153 DDR_REVERSED_P (ddr
) = true;
4155 /* In this case there is a dependence forward for all the
4158 | for (k = 1; k < 100; k++)
4159 | for (i = 1; i < 100; i++)
4160 | for (j = 1; j < 100; j++)
4162 | t = T[j+1][i-1]; // A
4163 | T[j][i] = t + 2; // B
4171 if (DDR_NB_LOOPS (ddr
) > 1)
4173 add_outer_distances (ddr
, save_v
, index_carry
);
4174 add_outer_distances (ddr
, dist_v
, index_carry
);
4179 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4180 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
4182 if (DDR_NB_LOOPS (ddr
) > 1)
4184 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4186 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
4187 DDR_A (ddr
), loop_nest
))
4189 compute_subscript_distance (ddr
);
4190 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
4191 opposite_v
, &init_b
,
4195 save_dist_v (ddr
, save_v
);
4196 add_outer_distances (ddr
, dist_v
, index_carry
);
4197 add_outer_distances (ddr
, opposite_v
, index_carry
);
4200 save_dist_v (ddr
, save_v
);
4205 /* There is a distance of 1 on all the outer loops: Example:
4206 there is a dependence of distance 1 on loop_1 for the array A.
4212 add_outer_distances (ddr
, dist_v
,
4213 lambda_vector_first_nz (dist_v
,
4214 DDR_NB_LOOPS (ddr
), 0));
4217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4221 fprintf (dump_file
, "(build_classic_dist_vector\n");
4222 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4224 fprintf (dump_file
, " dist_vector = (");
4225 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
4226 DDR_NB_LOOPS (ddr
));
4227 fprintf (dump_file
, " )\n");
4229 fprintf (dump_file
, ")\n");
4235 /* Return the direction for a given distance.
4236 FIXME: Computing dir this way is suboptimal, since dir can catch
4237 cases that dist is unable to represent. */
4239 static inline enum data_dependence_direction
4240 dir_from_dist (int dist
)
4243 return dir_positive
;
4245 return dir_negative
;
4250 /* Compute the classic per loop direction vector. DDR is the data
4251 dependence relation to build a vector from. */
4254 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
4257 lambda_vector dist_v
;
4259 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
4261 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4263 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4264 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
4266 save_dir_v (ddr
, dir_v
);
4270 /* Helper function. Returns true when there is a dependence between
4271 data references DRA and DRB. */
4274 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
4275 struct data_reference
*dra
,
4276 struct data_reference
*drb
,
4277 struct loop
*loop_nest
)
4280 tree last_conflicts
;
4281 struct subscript
*subscript
;
4282 tree res
= NULL_TREE
;
4284 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
4286 conflict_function
*overlaps_a
, *overlaps_b
;
4288 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
4289 DR_ACCESS_FN (drb
, i
),
4290 &overlaps_a
, &overlaps_b
,
4291 &last_conflicts
, loop_nest
);
4293 if (SUB_CONFLICTS_IN_A (subscript
))
4294 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
4295 if (SUB_CONFLICTS_IN_B (subscript
))
4296 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
4298 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
4299 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
4300 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
4302 /* If there is any undetermined conflict function we have to
4303 give a conservative answer in case we cannot prove that
4304 no dependence exists when analyzing another subscript. */
4305 if (CF_NOT_KNOWN_P (overlaps_a
)
4306 || CF_NOT_KNOWN_P (overlaps_b
))
4308 res
= chrec_dont_know
;
4312 /* When there is a subscript with no dependence we can stop. */
4313 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
4314 || CF_NO_DEPENDENCE_P (overlaps_b
))
4321 if (res
== NULL_TREE
)
4324 if (res
== chrec_known
)
4325 dependence_stats
.num_dependence_independent
++;
4327 dependence_stats
.num_dependence_undetermined
++;
4328 finalize_ddr_dependent (ddr
, res
);
4332 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4335 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
4336 struct loop
*loop_nest
)
4338 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
4339 dependence_stats
.num_dependence_dependent
++;
4341 compute_subscript_distance (ddr
);
4342 if (build_classic_dist_vector (ddr
, loop_nest
))
4343 build_classic_dir_vector (ddr
);
4346 /* Returns true when all the access functions of A are affine or
4347 constant with respect to LOOP_NEST. */
4350 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
4351 const struct loop
*loop_nest
)
4354 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
4357 FOR_EACH_VEC_ELT (fns
, i
, t
)
4358 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
4359 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
4365 /* This computes the affine dependence relation between A and B with
4366 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4367 independence between two accesses, while CHREC_DONT_KNOW is used
4368 for representing the unknown relation.
4370 Note that it is possible to stop the computation of the dependence
4371 relation the first time we detect a CHREC_KNOWN element for a given
4375 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4376 struct loop
*loop_nest
)
4378 struct data_reference
*dra
= DDR_A (ddr
);
4379 struct data_reference
*drb
= DDR_B (ddr
);
4381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4383 fprintf (dump_file
, "(compute_affine_dependence\n");
4384 fprintf (dump_file
, " stmt_a: ");
4385 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4386 fprintf (dump_file
, " stmt_b: ");
4387 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4390 /* Analyze only when the dependence relation is not yet known. */
4391 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4393 dependence_stats
.num_dependence_tests
++;
4395 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4396 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4397 subscript_dependence_tester (ddr
, loop_nest
);
4399 /* As a last case, if the dependence cannot be determined, or if
4400 the dependence is considered too difficult to determine, answer
4404 dependence_stats
.num_dependence_undetermined
++;
4406 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4408 fprintf (dump_file
, "Data ref a:\n");
4409 dump_data_reference (dump_file
, dra
);
4410 fprintf (dump_file
, "Data ref b:\n");
4411 dump_data_reference (dump_file
, drb
);
4412 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4414 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4420 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4421 fprintf (dump_file
, ") -> no dependence\n");
4422 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4423 fprintf (dump_file
, ") -> dependence analysis failed\n");
4425 fprintf (dump_file
, ")\n");
4429 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4430 the data references in DATAREFS, in the LOOP_NEST. When
4431 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4432 relations. Return true when successful, i.e. data references number
4433 is small enough to be handled. */
4436 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4437 vec
<ddr_p
> *dependence_relations
,
4438 vec
<loop_p
> loop_nest
,
4439 bool compute_self_and_rr
)
4441 struct data_dependence_relation
*ddr
;
4442 struct data_reference
*a
, *b
;
4445 if ((int) datarefs
.length ()
4446 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4448 struct data_dependence_relation
*ddr
;
4450 /* Insert a single relation into dependence_relations:
4452 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4453 dependence_relations
->safe_push (ddr
);
4457 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4458 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4459 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4461 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4462 dependence_relations
->safe_push (ddr
);
4463 if (loop_nest
.exists ())
4464 compute_affine_dependence (ddr
, loop_nest
[0]);
4467 if (compute_self_and_rr
)
4468 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4470 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4471 dependence_relations
->safe_push (ddr
);
4472 if (loop_nest
.exists ())
4473 compute_affine_dependence (ddr
, loop_nest
[0]);
4479 /* Describes a location of a memory reference. */
4483 /* The memory reference. */
4486 /* True if the memory reference is read. */
4491 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4492 true if STMT clobbers memory, false otherwise. */
4495 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
4497 bool clobbers_memory
= false;
4500 enum gimple_code stmt_code
= gimple_code (stmt
);
4502 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4503 As we cannot model data-references to not spelled out
4504 accesses give up if they may occur. */
4505 if (stmt_code
== GIMPLE_CALL
4506 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4508 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4509 if (gimple_call_internal_p (stmt
))
4510 switch (gimple_call_internal_fn (stmt
))
4512 case IFN_GOMP_SIMD_LANE
:
4514 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
4515 tree uid
= gimple_call_arg (stmt
, 0);
4516 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
4518 || loop
->simduid
!= SSA_NAME_VAR (uid
))
4519 clobbers_memory
= true;
4523 case IFN_MASK_STORE
:
4526 clobbers_memory
= true;
4530 clobbers_memory
= true;
4532 else if (stmt_code
== GIMPLE_ASM
4533 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
4534 || gimple_vuse (stmt
)))
4535 clobbers_memory
= true;
4537 if (!gimple_vuse (stmt
))
4538 return clobbers_memory
;
4540 if (stmt_code
== GIMPLE_ASSIGN
)
4543 op0
= gimple_assign_lhs (stmt
);
4544 op1
= gimple_assign_rhs1 (stmt
);
4547 || (REFERENCE_CLASS_P (op1
)
4548 && (base
= get_base_address (op1
))
4549 && TREE_CODE (base
) != SSA_NAME
4550 && !is_gimple_min_invariant (base
)))
4554 references
->safe_push (ref
);
4557 else if (stmt_code
== GIMPLE_CALL
)
4563 ref
.is_read
= false;
4564 if (gimple_call_internal_p (stmt
))
4565 switch (gimple_call_internal_fn (stmt
))
4568 if (gimple_call_lhs (stmt
) == NULL_TREE
)
4572 case IFN_MASK_STORE
:
4573 ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)), 0);
4574 align
= tree_to_shwi (gimple_call_arg (stmt
, 1));
4576 type
= TREE_TYPE (gimple_call_lhs (stmt
));
4578 type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
4579 if (TYPE_ALIGN (type
) != align
)
4580 type
= build_aligned_type (type
, align
);
4581 ref
.ref
= fold_build2 (MEM_REF
, type
, gimple_call_arg (stmt
, 0),
4583 references
->safe_push (ref
);
4589 op0
= gimple_call_lhs (stmt
);
4590 n
= gimple_call_num_args (stmt
);
4591 for (i
= 0; i
< n
; i
++)
4593 op1
= gimple_call_arg (stmt
, i
);
4596 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
4600 references
->safe_push (ref
);
4605 return clobbers_memory
;
4609 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
4612 ref
.is_read
= false;
4613 references
->safe_push (ref
);
4615 return clobbers_memory
;
4619 /* Returns true if the loop-nest has any data reference. */
4622 loop_nest_has_data_refs (loop_p loop
)
4624 basic_block
*bbs
= get_loop_body (loop
);
4625 auto_vec
<data_ref_loc
, 3> references
;
4627 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
4629 basic_block bb
= bbs
[i
];
4630 gimple_stmt_iterator bsi
;
4632 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4634 gimple
*stmt
= gsi_stmt (bsi
);
4635 get_references_in_stmt (stmt
, &references
);
4636 if (references
.length ())
4650 if (loop_nest_has_data_refs (loop
))
4658 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4659 reference, returns false, otherwise returns true. NEST is the outermost
4660 loop of the loop nest in which the references should be analyzed. */
4663 find_data_references_in_stmt (struct loop
*nest
, gimple
*stmt
,
4664 vec
<data_reference_p
> *datarefs
)
4667 auto_vec
<data_ref_loc
, 2> references
;
4670 data_reference_p dr
;
4672 if (get_references_in_stmt (stmt
, &references
))
4675 FOR_EACH_VEC_ELT (references
, i
, ref
)
4677 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4678 ref
->ref
, stmt
, ref
->is_read
);
4679 gcc_assert (dr
!= NULL
);
4680 datarefs
->safe_push (dr
);
4686 /* Stores the data references in STMT to DATAREFS. If there is an
4687 unanalyzable reference, returns false, otherwise returns true.
4688 NEST is the outermost loop of the loop nest in which the references
4689 should be instantiated, LOOP is the loop in which the references
4690 should be analyzed. */
4693 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple
*stmt
,
4694 vec
<data_reference_p
> *datarefs
)
4697 auto_vec
<data_ref_loc
, 2> references
;
4700 data_reference_p dr
;
4702 if (get_references_in_stmt (stmt
, &references
))
4705 FOR_EACH_VEC_ELT (references
, i
, ref
)
4707 dr
= create_data_ref (nest
, loop
, ref
->ref
, stmt
, ref
->is_read
);
4708 gcc_assert (dr
!= NULL
);
4709 datarefs
->safe_push (dr
);
4715 /* Search the data references in LOOP, and record the information into
4716 DATAREFS. Returns chrec_dont_know when failing to analyze a
4717 difficult case, returns NULL_TREE otherwise. */
4720 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4721 vec
<data_reference_p
> *datarefs
)
4723 gimple_stmt_iterator bsi
;
4725 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4727 gimple
*stmt
= gsi_stmt (bsi
);
4729 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4731 struct data_reference
*res
;
4732 res
= XCNEW (struct data_reference
);
4733 datarefs
->safe_push (res
);
4735 return chrec_dont_know
;
4742 /* Search the data references in LOOP, and record the information into
4743 DATAREFS. Returns chrec_dont_know when failing to analyze a
4744 difficult case, returns NULL_TREE otherwise.
4746 TODO: This function should be made smarter so that it can handle address
4747 arithmetic as if they were array accesses, etc. */
4750 find_data_references_in_loop (struct loop
*loop
,
4751 vec
<data_reference_p
> *datarefs
)
4753 basic_block bb
, *bbs
;
4756 bbs
= get_loop_body_in_dom_order (loop
);
4758 for (i
= 0; i
< loop
->num_nodes
; i
++)
4762 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4765 return chrec_dont_know
;
4773 /* Return the alignment in bytes that DRB is guaranteed to have at all
4777 dr_alignment (innermost_loop_behavior
*drb
)
4779 /* Get the alignment of BASE_ADDRESS + INIT. */
4780 unsigned int alignment
= drb
->base_alignment
;
4781 unsigned int misalignment
= (drb
->base_misalignment
4782 + TREE_INT_CST_LOW (drb
->init
));
4783 if (misalignment
!= 0)
4784 alignment
= MIN (alignment
, misalignment
& -misalignment
);
4786 /* Cap it to the alignment of OFFSET. */
4787 if (!integer_zerop (drb
->offset
))
4788 alignment
= MIN (alignment
, drb
->offset_alignment
);
4790 /* Cap it to the alignment of STEP. */
4791 if (!integer_zerop (drb
->step
))
4792 alignment
= MIN (alignment
, drb
->step_alignment
);
4797 /* Recursive helper function. */
4800 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4802 /* Inner loops of the nest should not contain siblings. Example:
4803 when there are two consecutive loops,
4814 the dependence relation cannot be captured by the distance
4819 loop_nest
->safe_push (loop
);
4821 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4825 /* Return false when the LOOP is not well nested. Otherwise return
4826 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4827 contain the loops from the outermost to the innermost, as they will
4828 appear in the classic distance vector. */
4831 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4833 loop_nest
->safe_push (loop
);
4835 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4839 /* Returns true when the data dependences have been computed, false otherwise.
4840 Given a loop nest LOOP, the following vectors are returned:
4841 DATAREFS is initialized to all the array elements contained in this loop,
4842 DEPENDENCE_RELATIONS contains the relations between the data references.
4843 Compute read-read and self relations if
4844 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4847 compute_data_dependences_for_loop (struct loop
*loop
,
4848 bool compute_self_and_read_read_dependences
,
4849 vec
<loop_p
> *loop_nest
,
4850 vec
<data_reference_p
> *datarefs
,
4851 vec
<ddr_p
> *dependence_relations
)
4855 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4857 /* If the loop nest is not well formed, or one of the data references
4858 is not computable, give up without spending time to compute other
4861 || !find_loop_nest (loop
, loop_nest
)
4862 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4863 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4864 compute_self_and_read_read_dependences
))
4867 if (dump_file
&& (dump_flags
& TDF_STATS
))
4869 fprintf (dump_file
, "Dependence tester statistics:\n");
4871 fprintf (dump_file
, "Number of dependence tests: %d\n",
4872 dependence_stats
.num_dependence_tests
);
4873 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4874 dependence_stats
.num_dependence_dependent
);
4875 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4876 dependence_stats
.num_dependence_independent
);
4877 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4878 dependence_stats
.num_dependence_undetermined
);
4880 fprintf (dump_file
, "Number of subscript tests: %d\n",
4881 dependence_stats
.num_subscript_tests
);
4882 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4883 dependence_stats
.num_subscript_undetermined
);
4884 fprintf (dump_file
, "Number of same subscript function: %d\n",
4885 dependence_stats
.num_same_subscript_function
);
4887 fprintf (dump_file
, "Number of ziv tests: %d\n",
4888 dependence_stats
.num_ziv
);
4889 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4890 dependence_stats
.num_ziv_dependent
);
4891 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4892 dependence_stats
.num_ziv_independent
);
4893 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4894 dependence_stats
.num_ziv_unimplemented
);
4896 fprintf (dump_file
, "Number of siv tests: %d\n",
4897 dependence_stats
.num_siv
);
4898 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4899 dependence_stats
.num_siv_dependent
);
4900 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4901 dependence_stats
.num_siv_independent
);
4902 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4903 dependence_stats
.num_siv_unimplemented
);
4905 fprintf (dump_file
, "Number of miv tests: %d\n",
4906 dependence_stats
.num_miv
);
4907 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4908 dependence_stats
.num_miv_dependent
);
4909 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4910 dependence_stats
.num_miv_independent
);
4911 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4912 dependence_stats
.num_miv_unimplemented
);
4918 /* Free the memory used by a data dependence relation DDR. */
4921 free_dependence_relation (struct data_dependence_relation
*ddr
)
4926 if (DDR_SUBSCRIPTS (ddr
).exists ())
4927 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4928 DDR_DIST_VECTS (ddr
).release ();
4929 DDR_DIR_VECTS (ddr
).release ();
4934 /* Free the memory used by the data dependence relations from
4935 DEPENDENCE_RELATIONS. */
4938 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4941 struct data_dependence_relation
*ddr
;
4943 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4945 free_dependence_relation (ddr
);
4947 dependence_relations
.release ();
4950 /* Free the memory used by the data references from DATAREFS. */
4953 free_data_refs (vec
<data_reference_p
> datarefs
)
4956 struct data_reference
*dr
;
4958 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4960 datarefs
.release ();