1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 #include "gimple-pretty-print.h"
85 #include "fold-const.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
95 #include "tree-affine.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
105 static struct datadep_stats
107 int num_dependence_tests
;
108 int num_dependence_dependent
;
109 int num_dependence_independent
;
110 int num_dependence_undetermined
;
112 int num_subscript_tests
;
113 int num_subscript_undetermined
;
114 int num_same_subscript_function
;
117 int num_ziv_independent
;
118 int num_ziv_dependent
;
119 int num_ziv_unimplemented
;
122 int num_siv_independent
;
123 int num_siv_dependent
;
124 int num_siv_unimplemented
;
127 int num_miv_independent
;
128 int num_miv_dependent
;
129 int num_miv_unimplemented
;
132 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
133 unsigned int, unsigned int,
135 /* Returns true iff A divides B. */
138 tree_fold_divides_p (const_tree a
, const_tree b
)
140 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
141 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
142 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
145 /* Returns true iff A divides B. */
148 int_divides_p (lambda_int a
, lambda_int b
)
150 return ((b
% a
) == 0);
153 /* Return true if reference REF contains a union access. */
156 ref_contains_union_access_p (tree ref
)
158 while (handled_component_p (ref
))
160 ref
= TREE_OPERAND (ref
, 0);
161 if (TREE_CODE (TREE_TYPE (ref
)) == UNION_TYPE
162 || TREE_CODE (TREE_TYPE (ref
)) == QUAL_UNION_TYPE
)
170 /* Dump into FILE all the data references from DATAREFS. */
173 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
175 for (data_reference
*dr
: datarefs
)
176 dump_data_reference (file
, dr
);
179 /* Unified dump into FILE all the data references from DATAREFS. */
182 debug (vec
<data_reference_p
> &ref
)
184 dump_data_references (stderr
, ref
);
188 debug (vec
<data_reference_p
> *ptr
)
193 fprintf (stderr
, "<nil>\n");
197 /* Dump into STDERR all the data references from DATAREFS. */
200 debug_data_references (vec
<data_reference_p
> datarefs
)
202 dump_data_references (stderr
, datarefs
);
205 /* Print to STDERR the data_reference DR. */
208 debug_data_reference (struct data_reference
*dr
)
210 dump_data_reference (stderr
, dr
);
213 /* Dump function for a DATA_REFERENCE structure. */
216 dump_data_reference (FILE *outf
,
217 struct data_reference
*dr
)
221 fprintf (outf
, "#(Data Ref: \n");
222 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
223 fprintf (outf
, "# stmt: ");
224 print_gimple_stmt (outf
, DR_STMT (dr
), 0);
225 fprintf (outf
, "# ref: ");
226 print_generic_stmt (outf
, DR_REF (dr
));
227 fprintf (outf
, "# base_object: ");
228 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
));
230 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
232 fprintf (outf
, "# Access function %d: ", i
);
233 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
));
235 fprintf (outf
, "#)\n");
238 /* Unified dump function for a DATA_REFERENCE structure. */
241 debug (data_reference
&ref
)
243 dump_data_reference (stderr
, &ref
);
247 debug (data_reference
*ptr
)
252 fprintf (stderr
, "<nil>\n");
256 /* Dumps the affine function described by FN to the file OUTF. */
259 dump_affine_function (FILE *outf
, affine_fn fn
)
264 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
265 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
267 fprintf (outf
, " + ");
268 print_generic_expr (outf
, coef
, TDF_SLIM
);
269 fprintf (outf
, " * x_%u", i
);
273 /* Dumps the conflict function CF to the file OUTF. */
276 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
280 if (cf
->n
== NO_DEPENDENCE
)
281 fprintf (outf
, "no dependence");
282 else if (cf
->n
== NOT_KNOWN
)
283 fprintf (outf
, "not known");
286 for (i
= 0; i
< cf
->n
; i
++)
291 dump_affine_function (outf
, cf
->fns
[i
]);
297 /* Dump function for a SUBSCRIPT structure. */
300 dump_subscript (FILE *outf
, struct subscript
*subscript
)
302 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
304 fprintf (outf
, "\n (subscript \n");
305 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
306 dump_conflict_function (outf
, cf
);
307 if (CF_NONTRIVIAL_P (cf
))
309 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
310 fprintf (outf
, "\n last_conflict: ");
311 print_generic_expr (outf
, last_iteration
);
314 cf
= SUB_CONFLICTS_IN_B (subscript
);
315 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
316 dump_conflict_function (outf
, cf
);
317 if (CF_NONTRIVIAL_P (cf
))
319 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
320 fprintf (outf
, "\n last_conflict: ");
321 print_generic_expr (outf
, last_iteration
);
324 fprintf (outf
, "\n (Subscript distance: ");
325 print_generic_expr (outf
, SUB_DISTANCE (subscript
));
326 fprintf (outf
, " ))\n");
329 /* Print the classic direction vector DIRV to OUTF. */
332 print_direction_vector (FILE *outf
,
338 for (eq
= 0; eq
< length
; eq
++)
340 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
346 fprintf (outf
, " +");
349 fprintf (outf
, " -");
352 fprintf (outf
, " =");
354 case dir_positive_or_equal
:
355 fprintf (outf
, " +=");
357 case dir_positive_or_negative
:
358 fprintf (outf
, " +-");
360 case dir_negative_or_equal
:
361 fprintf (outf
, " -=");
364 fprintf (outf
, " *");
367 fprintf (outf
, "indep");
371 fprintf (outf
, "\n");
374 /* Print a vector of direction vectors. */
377 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
380 for (lambda_vector v
: dir_vects
)
381 print_direction_vector (outf
, v
, length
);
384 /* Print out a vector VEC of length N to OUTFILE. */
387 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
391 for (i
= 0; i
< n
; i
++)
392 fprintf (outfile
, HOST_WIDE_INT_PRINT_DEC
" ", vector
[i
]);
393 fprintf (outfile
, "\n");
396 /* Print a vector of distance vectors. */
399 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
402 for (lambda_vector v
: dist_vects
)
403 print_lambda_vector (outf
, v
, length
);
406 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
409 dump_data_dependence_relation (FILE *outf
, const data_dependence_relation
*ddr
)
411 struct data_reference
*dra
, *drb
;
413 fprintf (outf
, "(Data Dep: \n");
415 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
422 dump_data_reference (outf
, dra
);
424 fprintf (outf
, " (nil)\n");
426 dump_data_reference (outf
, drb
);
428 fprintf (outf
, " (nil)\n");
430 fprintf (outf
, " (don't know)\n)\n");
436 dump_data_reference (outf
, dra
);
437 dump_data_reference (outf
, drb
);
439 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
440 fprintf (outf
, " (no dependence)\n");
442 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
448 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
450 fprintf (outf
, " access_fn_A: ");
451 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 0));
452 fprintf (outf
, " access_fn_B: ");
453 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 1));
454 dump_subscript (outf
, sub
);
457 fprintf (outf
, " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
459 fprintf (outf
, "%d ", loopi
->num
);
460 fprintf (outf
, ")\n");
462 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
464 fprintf (outf
, " distance_vector: ");
465 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
469 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
471 fprintf (outf
, " direction_vector: ");
472 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
477 fprintf (outf
, ")\n");
483 debug_data_dependence_relation (const struct data_dependence_relation
*ddr
)
485 dump_data_dependence_relation (stderr
, ddr
);
488 /* Dump into FILE all the dependence relations from DDRS. */
491 dump_data_dependence_relations (FILE *file
, const vec
<ddr_p
> &ddrs
)
493 for (auto ddr
: ddrs
)
494 dump_data_dependence_relation (file
, ddr
);
498 debug (vec
<ddr_p
> &ref
)
500 dump_data_dependence_relations (stderr
, ref
);
504 debug (vec
<ddr_p
> *ptr
)
509 fprintf (stderr
, "<nil>\n");
513 /* Dump to STDERR all the dependence relations from DDRS. */
516 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
518 dump_data_dependence_relations (stderr
, ddrs
);
521 /* Dumps the distance and direction vectors in FILE. DDRS contains
522 the dependence relations, and VECT_SIZE is the size of the
523 dependence vectors, or in other words the number of loops in the
527 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
529 for (data_dependence_relation
*ddr
: ddrs
)
530 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
532 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
534 fprintf (file
, "DISTANCE_V (");
535 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
536 fprintf (file
, ")\n");
539 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
541 fprintf (file
, "DIRECTION_V (");
542 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
543 fprintf (file
, ")\n");
547 fprintf (file
, "\n\n");
550 /* Dumps the data dependence relations DDRS in FILE. */
553 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
555 for (data_dependence_relation
*ddr
: ddrs
)
556 dump_data_dependence_relation (file
, ddr
);
558 fprintf (file
, "\n\n");
562 debug_ddrs (vec
<ddr_p
> ddrs
)
564 dump_ddrs (stderr
, ddrs
);
567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
570 - OP0 CODE OP1 has integral type TYPE
571 - the range of OP0 is given by OP0_RANGE and
572 - the range of OP1 is given by OP1_RANGE.
574 Independently of RESULT_RANGE, try to compute:
576 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 - (sizetype) (OP0 CODE OP1)
579 as a constant and subtract DELTA from the ssizetype constant in *OFF.
580 Return true on success, or false if DELTA is not known at compile time.
582 Truncation and sign changes are known to distribute over CODE, i.e.
584 (itype) (A CODE B) == (itype) A CODE (itype) B
586 for any integral type ITYPE whose precision is no greater than the
587 precision of A and B. */
590 compute_distributive_range (tree type
, value_range
&op0_range
,
591 tree_code code
, value_range
&op1_range
,
592 tree
*off
, value_range
*result_range
)
594 gcc_assert (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
));
597 range_op_handler
op (code
);
598 if (!op
.fold_range (*result_range
, type
, op0_range
, op1_range
))
599 result_range
->set_varying (type
);
602 /* The distributive property guarantees that if TYPE is no narrower
605 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 and so we can treat DELTA as zero. */
608 if (TYPE_PRECISION (type
) >= TYPE_PRECISION (sizetype
))
611 /* If overflow is undefined, we can assume that:
613 X == (ssizetype) OP0 CODE (ssizetype) OP1
615 is within the range of TYPE, i.e.:
617 X == (ssizetype) (TYPE) X
619 Distributing the (TYPE) truncation over X gives:
621 X == (ssizetype) (OP0 CODE OP1)
623 Casting both sides to sizetype and distributing the sizetype cast
626 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 and so we can treat DELTA as zero. */
629 if (TYPE_OVERFLOW_UNDEFINED (type
))
632 /* Compute the range of:
634 (ssizetype) OP0 CODE (ssizetype) OP1
636 The distributive property guarantees that this has the same bitpattern as:
638 (sizetype) OP0 CODE (sizetype) OP1
640 but its range is more conducive to analysis. */
641 range_cast (op0_range
, ssizetype
);
642 range_cast (op1_range
, ssizetype
);
643 value_range wide_range
;
644 range_op_handler
op (code
);
645 bool saved_flag_wrapv
= flag_wrapv
;
647 if (!op
.fold_range (wide_range
, ssizetype
, op0_range
, op1_range
))
648 wide_range
.set_varying (ssizetype
);;
649 flag_wrapv
= saved_flag_wrapv
;
650 if (wide_range
.num_pairs () != 1
651 || wide_range
.varying_p () || wide_range
.undefined_p ())
654 wide_int lb
= wide_range
.lower_bound ();
655 wide_int ub
= wide_range
.upper_bound ();
657 /* Calculate the number of times that each end of the range overflows or
658 underflows TYPE. We can only calculate DELTA if the numbers match. */
659 unsigned int precision
= TYPE_PRECISION (type
);
660 if (!TYPE_UNSIGNED (type
))
662 wide_int type_min
= wi::mask (precision
- 1, true, lb
.get_precision ());
666 wide_int upper_bits
= wi::mask (precision
, true, lb
.get_precision ());
672 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
673 negative values indicating underflow. The low PRECISION bits of LB
674 are clear, so DELTA is therefore LB (== UB). */
675 *off
= wide_int_to_tree (ssizetype
, wi::to_wide (*off
) - lb
);
679 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
680 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
681 FROM_TYPE are integral types. */
684 nop_conversion_for_offset_p (tree to_type
, tree from_type
, value_range
&range
)
686 gcc_assert (INTEGRAL_TYPE_P (to_type
)
687 && INTEGRAL_TYPE_P (from_type
)
688 && !TYPE_OVERFLOW_TRAPS (to_type
)
689 && !TYPE_OVERFLOW_TRAPS (from_type
));
691 /* Converting to something no narrower than sizetype and then to sizetype
692 is equivalent to converting directly to sizetype. */
693 if (TYPE_PRECISION (to_type
) >= TYPE_PRECISION (sizetype
))
696 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
697 if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
)
698 && (TYPE_UNSIGNED (from_type
) || !TYPE_UNSIGNED (to_type
)))
701 /* For narrowing conversions, we could in principle test whether
702 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
703 and apply a constant adjustment.
705 For other conversions (which involve a sign change) we could
706 check that the signs are always equal, and apply a constant
707 adjustment if the signs are negative.
709 However, both cases should be rare. */
710 return range_fits_type_p (&range
, TYPE_PRECISION (to_type
),
711 TYPE_SIGN (to_type
));
715 split_constant_offset (tree type
, tree
*var
, tree
*off
,
716 value_range
*result_range
,
717 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
720 /* Helper function for split_constant_offset. If TYPE is a pointer type,
721 try to express OP0 CODE OP1 as:
723 POINTER_PLUS <*VAR, (sizetype) *OFF>
728 - *OFF is a constant of type ssizetype.
730 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 *VAR + (sizetype) *OFF
736 - *VAR has type sizetype
737 - *OFF is a constant of type ssizetype.
739 In both cases, OP0 CODE OP1 has type TYPE.
741 Return true on success. A false return value indicates that we can't
742 do better than set *OFF to zero.
744 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
745 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
748 visited. LIMIT counts down the number of SSA names that we are
749 allowed to process before giving up. */
752 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
753 tree
*var
, tree
*off
, value_range
*result_range
,
754 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
759 value_range op0_range
, op1_range
;
764 if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
))
771 *off
= fold_convert (ssizetype
, op0
);
774 wide_int w
= wi::to_wide (op0
);
775 result_range
->set (TREE_TYPE (op0
), w
, w
);
779 case POINTER_PLUS_EXPR
:
780 split_constant_offset (op0
, &var0
, &off0
, nullptr, cache
, limit
);
781 split_constant_offset (op1
, &var1
, &off1
, nullptr, cache
, limit
);
782 *var
= fold_build2 (POINTER_PLUS_EXPR
, type
, var0
, var1
);
783 *off
= size_binop (PLUS_EXPR
, off0
, off1
);
788 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
789 split_constant_offset (op1
, &var1
, &off1
, &op1_range
, cache
, limit
);
790 *off
= size_binop (code
, off0
, off1
);
791 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
794 *var
= fold_build2 (code
, sizetype
, var0
, var1
);
798 if (TREE_CODE (op1
) != INTEGER_CST
)
801 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
802 op1_range
.set (TREE_TYPE (op1
), wi::to_wide (op1
), wi::to_wide (op1
));
803 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
804 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
807 *var
= fold_build2 (MULT_EXPR
, sizetype
, var0
,
808 fold_convert (sizetype
, op1
));
814 poly_int64 pbitsize
, pbitpos
, pbytepos
;
816 int punsignedp
, preversep
, pvolatilep
;
818 op0
= TREE_OPERAND (op0
, 0);
820 = get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
821 &punsignedp
, &preversep
, &pvolatilep
);
823 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
825 base
= build_fold_addr_expr (base
);
826 off0
= ssize_int (pbytepos
);
830 split_constant_offset (poffset
, &poffset
, &off1
, nullptr,
832 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
833 base
= fold_build_pointer_plus (base
, poffset
);
836 var0
= fold_convert (type
, base
);
838 /* If variable length types are involved, punt, otherwise casts
839 might be converted into ARRAY_REFs in gimplify_conversion.
840 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
841 possibly no longer appears in current GIMPLE, might resurface.
842 This perhaps could run
843 if (CONVERT_EXPR_P (var0))
845 gimplify_conversion (&var0);
846 // Attempt to fill in any within var0 found ARRAY_REF's
847 // element size from corresponding op embedded ARRAY_REF,
848 // if unsuccessful, just punt.
850 while (POINTER_TYPE_P (type
))
851 type
= TREE_TYPE (type
);
852 if (int_size_in_bytes (type
) < 0)
862 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
865 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
866 enum tree_code subcode
;
868 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
871 subcode
= gimple_assign_rhs_code (def_stmt
);
873 /* We are using a cache to avoid un-CSEing large amounts of code. */
874 bool use_cache
= false;
875 if (!has_single_use (op0
)
876 && (subcode
== POINTER_PLUS_EXPR
877 || subcode
== PLUS_EXPR
878 || subcode
== MINUS_EXPR
879 || subcode
== MULT_EXPR
880 || subcode
== ADDR_EXPR
881 || CONVERT_EXPR_CODE_P (subcode
)))
885 std::pair
<tree
, tree
> &e
= cache
.get_or_insert (op0
, &existed
);
888 if (integer_zerop (e
.second
))
892 /* The caller sets the range in this case. */
895 e
= std::make_pair (op0
, ssize_int (0));
902 var0
= gimple_assign_rhs1 (def_stmt
);
903 var1
= gimple_assign_rhs2 (def_stmt
);
905 bool res
= split_constant_offset_1 (type
, var0
, subcode
, var1
,
906 var
, off
, nullptr, cache
, limit
);
907 if (res
&& use_cache
)
908 *cache
.get (op0
) = std::make_pair (*var
, *off
);
909 /* The caller sets the range in this case. */
914 /* We can only handle the following conversions:
916 - Conversions from one pointer type to another pointer type.
918 - Conversions from one non-trapping integral type to another
919 non-trapping integral type. In this case, the recursive
920 call makes sure that:
924 can be expressed as a sizetype operation involving VAR and OFF,
925 and all we need to do is check whether:
927 (sizetype) OP0 == (sizetype) (TYPE) OP0
929 - Conversions from a non-trapping sizetype-size integral type to
930 a like-sized pointer type. In this case, the recursive call
933 (sizetype) OP0 == *VAR + (sizetype) *OFF
935 and we can convert that to:
937 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
939 - Conversions from a sizetype-sized pointer type to a like-sized
940 non-trapping integral type. In this case, the recursive call
943 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
945 where the POINTER_PLUS and *VAR have the same precision as
946 TYPE (and the same precision as sizetype). Then:
948 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 tree itype
= TREE_TYPE (op0
);
950 if ((POINTER_TYPE_P (itype
)
951 || (INTEGRAL_TYPE_P (itype
) && !TYPE_OVERFLOW_TRAPS (itype
)))
952 && (POINTER_TYPE_P (type
)
953 || (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
)))
954 && (POINTER_TYPE_P (type
) == POINTER_TYPE_P (itype
)
955 || (TYPE_PRECISION (type
) == TYPE_PRECISION (sizetype
)
956 && TYPE_PRECISION (itype
) == TYPE_PRECISION (sizetype
))))
958 if (POINTER_TYPE_P (type
))
960 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
961 *var
= fold_convert (type
, *var
);
963 else if (POINTER_TYPE_P (itype
))
965 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
966 *var
= fold_convert (sizetype
, *var
);
970 split_constant_offset (op0
, var
, off
, &op0_range
,
972 if (!nop_conversion_for_offset_p (type
, itype
, op0_range
))
976 *result_range
= op0_range
;
977 range_cast (*result_range
, type
);
990 /* If EXP has pointer type, try to express it as:
992 POINTER_PLUS <*VAR, (sizetype) *OFF>
996 - *VAR has the same type as EXP
997 - *OFF is a constant of type ssizetype.
999 If EXP has an integral type, try to express (sizetype) EXP as:
1001 *VAR + (sizetype) *OFF
1005 - *VAR has type sizetype
1006 - *OFF is a constant of type ssizetype.
1008 If EXP_RANGE is nonnull, set it to the range of EXP.
1010 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 visited. LIMIT counts down the number of SSA names that we are
1012 allowed to process before giving up. */
1015 split_constant_offset (tree exp
, tree
*var
, tree
*off
, value_range
*exp_range
,
1016 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
1019 tree type
= TREE_TYPE (exp
), op0
, op1
;
1020 enum tree_code code
;
1022 code
= TREE_CODE (exp
);
1026 if (code
== SSA_NAME
)
1029 get_range_query (cfun
)->range_of_expr (vr
, exp
);
1030 if (vr
.undefined_p ())
1031 vr
.set_varying (TREE_TYPE (exp
));
1032 tree vr_min
, vr_max
;
1033 value_range_kind vr_kind
= get_legacy_range (vr
, vr_min
, vr_max
);
1034 wide_int var_min
= wi::to_wide (vr_min
);
1035 wide_int var_max
= wi::to_wide (vr_max
);
1036 wide_int var_nonzero
= get_nonzero_bits (exp
);
1037 vr_kind
= intersect_range_with_nonzero_bits (vr_kind
,
1041 /* This check for VR_VARYING is here because the old code
1042 using get_range_info would return VR_RANGE for the entire
1043 domain, instead of VR_VARYING. The new code normalizes
1044 full-domain ranges to VR_VARYING. */
1045 if (vr_kind
== VR_RANGE
|| vr_kind
== VR_VARYING
)
1046 *exp_range
= value_range (type
, var_min
, var_max
);
1050 if (!tree_is_chrec (exp
)
1051 && get_gimple_rhs_class (TREE_CODE (exp
)) != GIMPLE_TERNARY_RHS
)
1053 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
1054 if (split_constant_offset_1 (type
, op0
, code
, op1
, var
, off
,
1055 exp_range
, cache
, limit
))
1060 if (INTEGRAL_TYPE_P (type
))
1061 *var
= fold_convert (sizetype
, *var
);
1062 *off
= ssize_int (0);
1065 if (exp_range
&& code
!= SSA_NAME
1066 && get_range_query (cfun
)->range_of_expr (r
, exp
)
1067 && !r
.undefined_p ())
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1072 type as EXP while OFF has type ssizetype. */
1075 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
1077 unsigned limit
= param_ssa_name_def_chain_limit
;
1078 static hash_map
<tree
, std::pair
<tree
, tree
> > *cache
;
1080 cache
= new hash_map
<tree
, std::pair
<tree
, tree
> > (37);
1081 split_constant_offset (exp
, var
, off
, nullptr, *cache
, &limit
);
1082 *var
= fold_convert (TREE_TYPE (exp
), *var
);
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087 casts, and with type of pointer to the object). */
1090 canonicalize_base_object_address (tree addr
)
1096 /* The base address may be obtained by casting from integer, in that case
1098 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
1101 if (TREE_CODE (addr
) != ADDR_EXPR
)
1104 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
1107 /* Analyze the behavior of memory reference REF within STMT.
1108 There are two modes:
1110 - BB analysis. In this case we simply split the address into base,
1111 init and offset components, without reference to any containing loop.
1112 The resulting base and offset are general expressions and they can
1113 vary arbitrarily from one iteration of the containing loop to the next.
1114 The step is always zero.
1116 - loop analysis. In this case we analyze the reference both wrt LOOP
1117 and on the basis that the reference occurs (is "used") in LOOP;
1118 see the comment above analyze_scalar_evolution_in_loop for more
1119 information about this distinction. The base, init, offset and
1120 step fields are all invariant in LOOP.
1122 Perform BB analysis if LOOP is null, or if LOOP is the function's
1123 dummy outermost loop. In other cases perform loop analysis.
1125 Return true if the analysis succeeded and store the results in DRB if so.
1126 BB analysis can only fail for bitfield or reversed-storage accesses. */
1129 dr_analyze_innermost (innermost_loop_behavior
*drb
, tree ref
,
1130 class loop
*loop
, const gimple
*stmt
)
1132 poly_int64 pbitsize
, pbitpos
;
1135 int punsignedp
, preversep
, pvolatilep
;
1136 affine_iv base_iv
, offset_iv
;
1137 tree init
, dinit
, step
;
1138 bool in_loop
= (loop
&& loop
->num
);
1140 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1141 fprintf (dump_file
, "analyze_innermost: ");
1143 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
1144 &punsignedp
, &preversep
, &pvolatilep
);
1145 gcc_assert (base
!= NULL_TREE
);
1147 poly_int64 pbytepos
;
1148 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
1149 return opt_result::failure_at (stmt
,
1150 "failed: bit offset alignment.\n");
1153 return opt_result::failure_at (stmt
,
1154 "failed: reverse storage order.\n");
1156 /* Calculate the alignment and misalignment for the inner reference. */
1157 unsigned int HOST_WIDE_INT bit_base_misalignment
;
1158 unsigned int bit_base_alignment
;
1159 get_object_alignment_1 (base
, &bit_base_alignment
, &bit_base_misalignment
);
1161 /* There are no bitfield references remaining in BASE, so the values
1162 we got back must be whole bytes. */
1163 gcc_assert (bit_base_alignment
% BITS_PER_UNIT
== 0
1164 && bit_base_misalignment
% BITS_PER_UNIT
== 0);
1165 unsigned int base_alignment
= bit_base_alignment
/ BITS_PER_UNIT
;
1166 poly_int64 base_misalignment
= bit_base_misalignment
/ BITS_PER_UNIT
;
1168 if (TREE_CODE (base
) == MEM_REF
)
1170 if (!integer_zerop (TREE_OPERAND (base
, 1)))
1172 /* Subtract MOFF from the base and add it to POFFSET instead.
1173 Adjust the misalignment to reflect the amount we subtracted. */
1174 poly_offset_int moff
= mem_ref_offset (base
);
1175 base_misalignment
-= moff
.force_shwi ();
1176 tree mofft
= wide_int_to_tree (sizetype
, moff
);
1180 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
1182 base
= TREE_OPERAND (base
, 0);
1185 base
= build_fold_addr_expr (base
);
1189 if (!simple_iv (loop
, loop
, base
, &base_iv
, true))
1190 return opt_result::failure_at
1191 (stmt
, "failed: evolution of base is not affine.\n");
1195 base_iv
.base
= base
;
1196 base_iv
.step
= ssize_int (0);
1197 base_iv
.no_overflow
= true;
1202 offset_iv
.base
= ssize_int (0);
1203 offset_iv
.step
= ssize_int (0);
1209 offset_iv
.base
= poffset
;
1210 offset_iv
.step
= ssize_int (0);
1212 else if (!simple_iv (loop
, loop
, poffset
, &offset_iv
, true))
1213 return opt_result::failure_at
1214 (stmt
, "failed: evolution of offset is not affine.\n");
1217 init
= ssize_int (pbytepos
);
1219 /* Subtract any constant component from the base and add it to INIT instead.
1220 Adjust the misalignment to reflect the amount we subtracted. */
1221 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
1222 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1223 base_misalignment
-= TREE_INT_CST_LOW (dinit
);
1225 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
1226 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1228 step
= size_binop (PLUS_EXPR
,
1229 fold_convert (ssizetype
, base_iv
.step
),
1230 fold_convert (ssizetype
, offset_iv
.step
));
1232 base
= canonicalize_base_object_address (base_iv
.base
);
1234 /* See if get_pointer_alignment can guarantee a higher alignment than
1235 the one we calculated above. */
1236 unsigned int HOST_WIDE_INT alt_misalignment
;
1237 unsigned int alt_alignment
;
1238 get_pointer_alignment_1 (base
, &alt_alignment
, &alt_misalignment
);
1240 /* As above, these values must be whole bytes. */
1241 gcc_assert (alt_alignment
% BITS_PER_UNIT
== 0
1242 && alt_misalignment
% BITS_PER_UNIT
== 0);
1243 alt_alignment
/= BITS_PER_UNIT
;
1244 alt_misalignment
/= BITS_PER_UNIT
;
1246 if (base_alignment
< alt_alignment
)
1248 base_alignment
= alt_alignment
;
1249 base_misalignment
= alt_misalignment
;
1252 drb
->base_address
= base
;
1253 drb
->offset
= fold_convert (ssizetype
, offset_iv
.base
);
1256 if (known_misalignment (base_misalignment
, base_alignment
,
1257 &drb
->base_misalignment
))
1258 drb
->base_alignment
= base_alignment
;
1261 drb
->base_alignment
= known_alignment (base_misalignment
);
1262 drb
->base_misalignment
= 0;
1264 drb
->offset_alignment
= highest_pow2_factor (offset_iv
.base
);
1265 drb
->step_alignment
= highest_pow2_factor (step
);
1267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1268 fprintf (dump_file
, "success.\n");
1270 return opt_result::success ();
1273 /* Return true if OP is a valid component reference for a DR access
1274 function. This accepts a subset of what handled_component_p accepts. */
1277 access_fn_component_p (tree op
)
1279 switch (TREE_CODE (op
))
1287 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op
, 0))) == RECORD_TYPE
;
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1298 base_supports_access_fn_components_p (tree base
)
1300 switch (TREE_CODE (TREE_TYPE (base
)))
1311 /* Determines the base object and the list of indices of memory reference
1312 DR, analyzed in LOOP and instantiated before NEST. */
1315 dr_analyze_indices (struct indices
*dri
, tree ref
, edge nest
, loop_p loop
)
1317 /* If analyzing a basic-block there are no indices to analyze
1318 and thus no access functions. */
1321 dri
->base_object
= ref
;
1322 dri
->access_fns
.create (0);
1326 vec
<tree
> access_fns
= vNULL
;
1328 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329 into a two element array with a constant index. The base is
1330 then just the immediate underlying object. */
1331 if (TREE_CODE (ref
) == REALPART_EXPR
)
1333 ref
= TREE_OPERAND (ref
, 0);
1334 access_fns
.safe_push (integer_zero_node
);
1336 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
1338 ref
= TREE_OPERAND (ref
, 0);
1339 access_fns
.safe_push (integer_one_node
);
1342 /* Analyze access functions of dimensions we know to be independent.
1343 The list of component references handled here should be kept in
1344 sync with access_fn_component_p. */
1345 while (handled_component_p (ref
))
1347 if (TREE_CODE (ref
) == ARRAY_REF
)
1349 tree op
= TREE_OPERAND (ref
, 1);
1350 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1351 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1352 access_fns
.safe_push (access_fn
);
1354 else if (TREE_CODE (ref
) == COMPONENT_REF
1355 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
1357 /* For COMPONENT_REFs of records (but not unions!) use the
1358 FIELD_DECL offset as constant access function so we can
1359 disambiguate a[i].f1 and a[i].f2. */
1360 tree off
= component_ref_field_offset (ref
);
1361 off
= size_binop (PLUS_EXPR
,
1362 size_binop (MULT_EXPR
,
1363 fold_convert (bitsizetype
, off
),
1364 bitsize_int (BITS_PER_UNIT
)),
1365 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
1366 access_fns
.safe_push (off
);
1369 /* If we have an unhandled component we could not translate
1370 to an access function stop analyzing. We have determined
1371 our base object in this case. */
1374 ref
= TREE_OPERAND (ref
, 0);
1377 /* If the address operand of a MEM_REF base has an evolution in the
1378 analyzed nest, add it as an additional independent access-function. */
1379 if (TREE_CODE (ref
) == MEM_REF
)
1381 tree op
= TREE_OPERAND (ref
, 0);
1382 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1383 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1384 STRIP_NOPS (access_fn
);
1385 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
1387 tree memoff
= TREE_OPERAND (ref
, 1);
1388 tree base
= initial_condition (access_fn
);
1389 tree orig_type
= TREE_TYPE (base
);
1390 STRIP_USELESS_TYPE_CONVERSION (base
);
1392 split_constant_offset (base
, &base
, &off
);
1393 STRIP_USELESS_TYPE_CONVERSION (base
);
1394 /* Fold the MEM_REF offset into the evolutions initial
1395 value to make more bases comparable. */
1396 if (!integer_zerop (memoff
))
1398 off
= size_binop (PLUS_EXPR
, off
,
1399 fold_convert (ssizetype
, memoff
));
1400 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1402 /* Adjust the offset so it is a multiple of the access type
1403 size and thus we separate bases that can possibly be used
1404 to produce partial overlaps (which the access_fn machinery
1407 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1408 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1409 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1412 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref
))),
1415 /* If we can't compute the remainder simply force the initial
1416 condition to zero. */
1417 rem
= wi::to_wide (off
);
1418 off
= wide_int_to_tree (ssizetype
, wi::to_wide (off
) - rem
);
1419 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1420 /* And finally replace the initial condition. */
1421 access_fn
= chrec_replace_initial_condition
1422 (access_fn
, fold_convert (orig_type
, off
));
1423 /* ??? This is still not a suitable base object for
1424 dr_may_alias_p - the base object needs to be an
1425 access that covers the object as whole. With
1426 an evolution in the pointer this cannot be
1428 As a band-aid, mark the access so we can special-case
1429 it in dr_may_alias_p. */
1431 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1432 MEM_REF
, TREE_TYPE (ref
),
1434 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1435 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1436 dri
->unconstrained_base
= true;
1437 access_fns
.safe_push (access_fn
);
1440 else if (DECL_P (ref
))
1442 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1443 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1444 build_fold_addr_expr (ref
),
1445 build_int_cst (reference_alias_ptr_type (ref
), 0));
1448 dri
->base_object
= ref
;
1449 dri
->access_fns
= access_fns
;
1452 /* Extracts the alias analysis information from the memory reference DR. */
1455 dr_analyze_alias (struct data_reference
*dr
)
1457 tree ref
= DR_REF (dr
);
1458 tree base
= get_base_address (ref
), addr
;
1460 if (INDIRECT_REF_P (base
)
1461 || TREE_CODE (base
) == MEM_REF
)
1463 addr
= TREE_OPERAND (base
, 0);
1464 if (TREE_CODE (addr
) == SSA_NAME
)
1465 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1469 /* Frees data reference DR. */
1472 free_data_ref (data_reference_p dr
)
1474 DR_ACCESS_FNS (dr
).release ();
1475 if (dr
->alt_indices
.base_object
)
1476 dr
->alt_indices
.access_fns
.release ();
1480 /* Analyze memory reference MEMREF, which is accessed in STMT.
1481 The reference is a read if IS_READ is true, otherwise it is a write.
1482 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483 within STMT, i.e. that it might not occur even if STMT is executed
1484 and runs to completion.
1486 Return the data_reference description of MEMREF. NEST is the outermost
1487 loop in which the reference should be instantiated, LOOP is the loop
1488 in which the data reference should be analyzed. */
1490 struct data_reference
*
1491 create_data_ref (edge nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1492 bool is_read
, bool is_conditional_in_stmt
)
1494 struct data_reference
*dr
;
1496 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1498 fprintf (dump_file
, "Creating dr for ");
1499 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1500 fprintf (dump_file
, "\n");
1503 dr
= XCNEW (struct data_reference
);
1504 DR_STMT (dr
) = stmt
;
1505 DR_REF (dr
) = memref
;
1506 DR_IS_READ (dr
) = is_read
;
1507 DR_IS_CONDITIONAL_IN_STMT (dr
) = is_conditional_in_stmt
;
1509 dr_analyze_innermost (&DR_INNERMOST (dr
), memref
,
1510 nest
!= NULL
? loop
: NULL
, stmt
);
1511 dr_analyze_indices (&dr
->indices
, DR_REF (dr
), nest
, loop
);
1512 dr_analyze_alias (dr
);
1514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1517 fprintf (dump_file
, "\tbase_address: ");
1518 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1519 fprintf (dump_file
, "\n\toffset from base address: ");
1520 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1521 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1522 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1523 fprintf (dump_file
, "\n\tstep: ");
1524 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1525 fprintf (dump_file
, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr
));
1526 fprintf (dump_file
, "\n\tbase misalignment: %d",
1527 DR_BASE_MISALIGNMENT (dr
));
1528 fprintf (dump_file
, "\n\toffset alignment: %d",
1529 DR_OFFSET_ALIGNMENT (dr
));
1530 fprintf (dump_file
, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr
));
1531 fprintf (dump_file
, "\n\tbase_object: ");
1532 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1533 fprintf (dump_file
, "\n");
1534 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1536 fprintf (dump_file
, "\tAccess function %d: ", i
);
1537 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1544 /* A helper function computes order between two tree expressions T1 and T2.
1545 This is used in comparator functions sorting objects based on the order
1546 of tree expressions. The function returns -1, 0, or 1. */
1549 data_ref_compare_tree (tree t1
, tree t2
)
1552 enum tree_code code
;
1562 STRIP_USELESS_TYPE_CONVERSION (t1
);
1563 STRIP_USELESS_TYPE_CONVERSION (t2
);
1567 if (TREE_CODE (t1
) != TREE_CODE (t2
)
1568 && ! (CONVERT_EXPR_P (t1
) && CONVERT_EXPR_P (t2
)))
1569 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
1571 code
= TREE_CODE (t1
);
1575 return tree_int_cst_compare (t1
, t2
);
1578 if (TREE_STRING_LENGTH (t1
) != TREE_STRING_LENGTH (t2
))
1579 return TREE_STRING_LENGTH (t1
) < TREE_STRING_LENGTH (t2
) ? -1 : 1;
1580 return memcmp (TREE_STRING_POINTER (t1
), TREE_STRING_POINTER (t2
),
1581 TREE_STRING_LENGTH (t1
));
1584 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
1585 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
1589 if (POLY_INT_CST_P (t1
))
1590 return compare_sizes_for_sort (wi::to_poly_widest (t1
),
1591 wi::to_poly_widest (t2
));
1593 tclass
= TREE_CODE_CLASS (code
);
1595 /* For decls, compare their UIDs. */
1596 if (tclass
== tcc_declaration
)
1598 if (DECL_UID (t1
) != DECL_UID (t2
))
1599 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
1602 /* For expressions, compare their operands recursively. */
1603 else if (IS_EXPR_CODE_CLASS (tclass
))
1605 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
1607 cmp
= data_ref_compare_tree (TREE_OPERAND (t1
, i
),
1608 TREE_OPERAND (t2
, i
));
1620 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1624 runtime_alias_check_p (ddr_p ddr
, class loop
*loop
, bool speed_p
)
1626 if (dump_enabled_p ())
1627 dump_printf (MSG_NOTE
,
1628 "consider run-time aliasing test between %T and %T\n",
1629 DR_REF (DDR_A (ddr
)), DR_REF (DDR_B (ddr
)));
1632 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1633 "runtime alias check not supported when"
1634 " optimizing for size.\n");
1636 /* FORNOW: We don't support versioning with outer-loop in either
1637 vectorization or loop distribution. */
1638 if (loop
!= NULL
&& loop
->inner
!= NULL
)
1639 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1640 "runtime alias check not supported for"
1643 return opt_result::success ();
1646 /* Operator == between two dr_with_seg_len objects.
1648 This equality operator is used to make sure two data refs
1649 are the same one so that we will consider to combine the
1650 aliasing checks of those two pairs of data dependent data
1654 operator == (const dr_with_seg_len
& d1
,
1655 const dr_with_seg_len
& d2
)
1657 return (operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
1658 DR_BASE_ADDRESS (d2
.dr
), 0)
1659 && data_ref_compare_tree (DR_OFFSET (d1
.dr
), DR_OFFSET (d2
.dr
)) == 0
1660 && data_ref_compare_tree (DR_INIT (d1
.dr
), DR_INIT (d2
.dr
)) == 0
1661 && data_ref_compare_tree (d1
.seg_len
, d2
.seg_len
) == 0
1662 && known_eq (d1
.access_size
, d2
.access_size
)
1663 && d1
.align
== d2
.align
);
1666 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1667 so that we can combine aliasing checks in one scan. */
1670 comp_dr_with_seg_len_pair (const void *pa_
, const void *pb_
)
1672 const dr_with_seg_len_pair_t
* pa
= (const dr_with_seg_len_pair_t
*) pa_
;
1673 const dr_with_seg_len_pair_t
* pb
= (const dr_with_seg_len_pair_t
*) pb_
;
1674 const dr_with_seg_len
&a1
= pa
->first
, &a2
= pa
->second
;
1675 const dr_with_seg_len
&b1
= pb
->first
, &b2
= pb
->second
;
1677 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1678 if a and c have the same basic address snd step, and b and d have the same
1679 address and step. Therefore, if any a&c or b&d don't have the same address
1680 and step, we don't care the order of those two pairs after sorting. */
1683 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a1
.dr
),
1684 DR_BASE_ADDRESS (b1
.dr
))) != 0)
1686 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a2
.dr
),
1687 DR_BASE_ADDRESS (b2
.dr
))) != 0)
1689 if ((comp_res
= data_ref_compare_tree (DR_STEP (a1
.dr
),
1690 DR_STEP (b1
.dr
))) != 0)
1692 if ((comp_res
= data_ref_compare_tree (DR_STEP (a2
.dr
),
1693 DR_STEP (b2
.dr
))) != 0)
1695 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a1
.dr
),
1696 DR_OFFSET (b1
.dr
))) != 0)
1698 if ((comp_res
= data_ref_compare_tree (DR_INIT (a1
.dr
),
1699 DR_INIT (b1
.dr
))) != 0)
1701 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a2
.dr
),
1702 DR_OFFSET (b2
.dr
))) != 0)
1704 if ((comp_res
= data_ref_compare_tree (DR_INIT (a2
.dr
),
1705 DR_INIT (b2
.dr
))) != 0)
1711 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1714 dump_alias_pair (dr_with_seg_len_pair_t
*alias_pair
, const char *indent
)
1716 dump_printf (MSG_NOTE
, "%sreference: %T vs. %T\n", indent
,
1717 DR_REF (alias_pair
->first
.dr
),
1718 DR_REF (alias_pair
->second
.dr
));
1720 dump_printf (MSG_NOTE
, "%ssegment length: %T", indent
,
1721 alias_pair
->first
.seg_len
);
1722 if (!operand_equal_p (alias_pair
->first
.seg_len
,
1723 alias_pair
->second
.seg_len
, 0))
1724 dump_printf (MSG_NOTE
, " vs. %T", alias_pair
->second
.seg_len
);
1726 dump_printf (MSG_NOTE
, "\n%saccess size: ", indent
);
1727 dump_dec (MSG_NOTE
, alias_pair
->first
.access_size
);
1728 if (maybe_ne (alias_pair
->first
.access_size
, alias_pair
->second
.access_size
))
1730 dump_printf (MSG_NOTE
, " vs. ");
1731 dump_dec (MSG_NOTE
, alias_pair
->second
.access_size
);
1734 dump_printf (MSG_NOTE
, "\n%salignment: %d", indent
,
1735 alias_pair
->first
.align
);
1736 if (alias_pair
->first
.align
!= alias_pair
->second
.align
)
1737 dump_printf (MSG_NOTE
, " vs. %d", alias_pair
->second
.align
);
1739 dump_printf (MSG_NOTE
, "\n%sflags: ", indent
);
1740 if (alias_pair
->flags
& DR_ALIAS_RAW
)
1741 dump_printf (MSG_NOTE
, " RAW");
1742 if (alias_pair
->flags
& DR_ALIAS_WAR
)
1743 dump_printf (MSG_NOTE
, " WAR");
1744 if (alias_pair
->flags
& DR_ALIAS_WAW
)
1745 dump_printf (MSG_NOTE
, " WAW");
1746 if (alias_pair
->flags
& DR_ALIAS_ARBITRARY
)
1747 dump_printf (MSG_NOTE
, " ARBITRARY");
1748 if (alias_pair
->flags
& DR_ALIAS_SWAPPED
)
1749 dump_printf (MSG_NOTE
, " SWAPPED");
1750 if (alias_pair
->flags
& DR_ALIAS_UNSWAPPED
)
1751 dump_printf (MSG_NOTE
, " UNSWAPPED");
1752 if (alias_pair
->flags
& DR_ALIAS_MIXED_STEPS
)
1753 dump_printf (MSG_NOTE
, " MIXED_STEPS");
1754 if (alias_pair
->flags
== 0)
1755 dump_printf (MSG_NOTE
, " <none>");
1756 dump_printf (MSG_NOTE
, "\n");
1759 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1760 FACTOR is number of iterations that each data reference is accessed.
1762 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1763 we create an expression:
1765 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1766 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1768 for aliasing checks. However, in some cases we can decrease the number
1769 of checks by combining two checks into one. For example, suppose we have
1770 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1771 condition is satisfied:
1773 load_ptr_0 < load_ptr_1 &&
1774 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1776 (this condition means, in each iteration of vectorized loop, the accessed
1777 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1780 we then can use only the following expression to finish the alising checks
1781 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1783 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1786 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1790 prune_runtime_alias_test_list (vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1793 if (alias_pairs
->is_empty ())
1796 /* Canonicalize each pair so that the base components are ordered wrt
1797 data_ref_compare_tree. This allows the loop below to merge more
1800 dr_with_seg_len_pair_t
*alias_pair
;
1801 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1803 data_reference_p dr_a
= alias_pair
->first
.dr
;
1804 data_reference_p dr_b
= alias_pair
->second
.dr
;
1805 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
1806 DR_BASE_ADDRESS (dr_b
));
1808 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
1810 comp_res
= data_ref_compare_tree (DR_INIT (dr_a
), DR_INIT (dr_b
));
1813 std::swap (alias_pair
->first
, alias_pair
->second
);
1814 alias_pair
->flags
|= DR_ALIAS_SWAPPED
;
1817 alias_pair
->flags
|= DR_ALIAS_UNSWAPPED
;
1820 /* Sort the collected data ref pairs so that we can scan them once to
1821 combine all possible aliasing checks. */
1822 alias_pairs
->qsort (comp_dr_with_seg_len_pair
);
1824 /* Scan the sorted dr pairs and check if we can combine alias checks
1825 of two neighboring dr pairs. */
1826 unsigned int last
= 0;
1827 for (i
= 1; i
< alias_pairs
->length (); ++i
)
1829 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1830 dr_with_seg_len_pair_t
*alias_pair1
= &(*alias_pairs
)[last
];
1831 dr_with_seg_len_pair_t
*alias_pair2
= &(*alias_pairs
)[i
];
1833 dr_with_seg_len
*dr_a1
= &alias_pair1
->first
;
1834 dr_with_seg_len
*dr_b1
= &alias_pair1
->second
;
1835 dr_with_seg_len
*dr_a2
= &alias_pair2
->first
;
1836 dr_with_seg_len
*dr_b2
= &alias_pair2
->second
;
1838 /* Remove duplicate data ref pairs. */
1839 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
1841 if (dump_enabled_p ())
1842 dump_printf (MSG_NOTE
, "found equal ranges %T, %T and %T, %T\n",
1843 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1844 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1845 alias_pair1
->flags
|= alias_pair2
->flags
;
1849 /* Assume that we won't be able to merge the pairs, then correct
1853 (*alias_pairs
)[last
] = (*alias_pairs
)[i
];
1855 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
1857 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1858 and DR_A1 and DR_A2 are two consecutive memrefs. */
1859 if (*dr_a1
== *dr_a2
)
1861 std::swap (dr_a1
, dr_b1
);
1862 std::swap (dr_a2
, dr_b2
);
1865 poly_int64 init_a1
, init_a2
;
1866 /* Only consider cases in which the distance between the initial
1867 DR_A1 and the initial DR_A2 is known at compile time. */
1868 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
1869 DR_BASE_ADDRESS (dr_a2
->dr
), 0)
1870 || !operand_equal_p (DR_OFFSET (dr_a1
->dr
),
1871 DR_OFFSET (dr_a2
->dr
), 0)
1872 || !poly_int_tree_p (DR_INIT (dr_a1
->dr
), &init_a1
)
1873 || !poly_int_tree_p (DR_INIT (dr_a2
->dr
), &init_a2
))
1876 /* Don't combine if we can't tell which one comes first. */
1877 if (!ordered_p (init_a1
, init_a2
))
1880 /* Work out what the segment length would be if we did combine
1883 - If DR_A1 and DR_A2 have equal lengths, that length is
1884 also the combined length.
1886 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1887 length is the lower bound on those lengths.
1889 - If DR_A1 and DR_A2 both have positive lengths, the combined
1890 length is the upper bound on those lengths.
1892 Other cases are unlikely to give a useful combination.
1894 The lengths both have sizetype, so the sign is taken from
1895 the step instead. */
1896 poly_uint64 new_seg_len
= 0;
1897 bool new_seg_len_p
= !operand_equal_p (dr_a1
->seg_len
,
1901 poly_uint64 seg_len_a1
, seg_len_a2
;
1902 if (!poly_int_tree_p (dr_a1
->seg_len
, &seg_len_a1
)
1903 || !poly_int_tree_p (dr_a2
->seg_len
, &seg_len_a2
))
1906 tree indicator_a
= dr_direction_indicator (dr_a1
->dr
);
1907 if (TREE_CODE (indicator_a
) != INTEGER_CST
)
1910 tree indicator_b
= dr_direction_indicator (dr_a2
->dr
);
1911 if (TREE_CODE (indicator_b
) != INTEGER_CST
)
1914 int sign_a
= tree_int_cst_sgn (indicator_a
);
1915 int sign_b
= tree_int_cst_sgn (indicator_b
);
1917 if (sign_a
<= 0 && sign_b
<= 0)
1918 new_seg_len
= lower_bound (seg_len_a1
, seg_len_a2
);
1919 else if (sign_a
>= 0 && sign_b
>= 0)
1920 new_seg_len
= upper_bound (seg_len_a1
, seg_len_a2
);
1924 /* At this point we're committed to merging the refs. */
1926 /* Make sure dr_a1 starts left of dr_a2. */
1927 if (maybe_gt (init_a1
, init_a2
))
1929 std::swap (*dr_a1
, *dr_a2
);
1930 std::swap (init_a1
, init_a2
);
1933 /* The DR_Bs are equal, so only the DR_As can introduce
1935 if (!operand_equal_p (DR_STEP (dr_a1
->dr
), DR_STEP (dr_a2
->dr
), 0))
1936 alias_pair1
->flags
|= DR_ALIAS_MIXED_STEPS
;
1940 dr_a1
->seg_len
= build_int_cst (TREE_TYPE (dr_a1
->seg_len
),
1942 dr_a1
->align
= MIN (dr_a1
->align
, known_alignment (new_seg_len
));
1945 /* This is always positive due to the swap above. */
1946 poly_uint64 diff
= init_a2
- init_a1
;
1948 /* The new check will start at DR_A1. Make sure that its access
1949 size encompasses the initial DR_A2. */
1950 if (maybe_lt (dr_a1
->access_size
, diff
+ dr_a2
->access_size
))
1952 dr_a1
->access_size
= upper_bound (dr_a1
->access_size
,
1953 diff
+ dr_a2
->access_size
);
1954 unsigned int new_align
= known_alignment (dr_a1
->access_size
);
1955 dr_a1
->align
= MIN (dr_a1
->align
, new_align
);
1957 if (dump_enabled_p ())
1958 dump_printf (MSG_NOTE
, "merging ranges for %T, %T and %T, %T\n",
1959 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1960 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1961 alias_pair1
->flags
|= alias_pair2
->flags
;
1965 alias_pairs
->truncate (last
+ 1);
1967 /* Try to restore the original dr_with_seg_len order within each
1968 dr_with_seg_len_pair_t. If we ended up combining swapped and
1969 unswapped pairs into the same check, we have to invalidate any
1970 RAW, WAR and WAW information for it. */
1971 if (dump_enabled_p ())
1972 dump_printf (MSG_NOTE
, "merged alias checks:\n");
1973 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1975 unsigned int swap_mask
= (DR_ALIAS_SWAPPED
| DR_ALIAS_UNSWAPPED
);
1976 unsigned int swapped
= (alias_pair
->flags
& swap_mask
);
1977 if (swapped
== DR_ALIAS_SWAPPED
)
1978 std::swap (alias_pair
->first
, alias_pair
->second
);
1979 else if (swapped
!= DR_ALIAS_UNSWAPPED
)
1980 alias_pair
->flags
|= DR_ALIAS_ARBITRARY
;
1981 alias_pair
->flags
&= ~swap_mask
;
1982 if (dump_enabled_p ())
1983 dump_alias_pair (alias_pair
, " ");
1987 /* A subroutine of create_intersect_range_checks, with a subset of the
1988 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1989 to optimize cases in which the references form a simple RAW, WAR or
1993 create_ifn_alias_checks (tree
*cond_expr
,
1994 const dr_with_seg_len_pair_t
&alias_pair
)
1996 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
1997 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
1999 /* Check for cases in which:
2001 (a) we have a known RAW, WAR or WAR dependence
2002 (b) the accesses are well-ordered in both the original and new code
2003 (see the comment above the DR_ALIAS_* flags for details); and
2004 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2005 if (alias_pair
.flags
& ~(DR_ALIAS_RAW
| DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2008 /* Make sure that both DRs access the same pattern of bytes,
2009 with a constant length and step. */
2010 poly_uint64 seg_len
;
2011 if (!operand_equal_p (dr_a
.seg_len
, dr_b
.seg_len
, 0)
2012 || !poly_int_tree_p (dr_a
.seg_len
, &seg_len
)
2013 || maybe_ne (dr_a
.access_size
, dr_b
.access_size
)
2014 || !operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0)
2015 || !tree_fits_uhwi_p (DR_STEP (dr_a
.dr
)))
2018 unsigned HOST_WIDE_INT bytes
= tree_to_uhwi (DR_STEP (dr_a
.dr
));
2019 tree addr_a
= DR_BASE_ADDRESS (dr_a
.dr
);
2020 tree addr_b
= DR_BASE_ADDRESS (dr_b
.dr
);
2022 /* See whether the target suports what we want to do. WAW checks are
2023 equivalent to WAR checks here. */
2024 internal_fn ifn
= (alias_pair
.flags
& DR_ALIAS_RAW
2025 ? IFN_CHECK_RAW_PTRS
2026 : IFN_CHECK_WAR_PTRS
);
2027 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2028 poly_uint64 full_length
= seg_len
+ bytes
;
2029 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2030 full_length
, align
))
2032 full_length
= seg_len
+ dr_a
.access_size
;
2033 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2034 full_length
, align
))
2038 /* Commit to using this form of test. */
2039 addr_a
= fold_build_pointer_plus (addr_a
, DR_OFFSET (dr_a
.dr
));
2040 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2042 addr_b
= fold_build_pointer_plus (addr_b
, DR_OFFSET (dr_b
.dr
));
2043 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2045 *cond_expr
= build_call_expr_internal_loc (UNKNOWN_LOCATION
,
2046 ifn
, boolean_type_node
,
2048 size_int (full_length
),
2051 if (dump_enabled_p ())
2053 if (ifn
== IFN_CHECK_RAW_PTRS
)
2054 dump_printf (MSG_NOTE
, "using an IFN_CHECK_RAW_PTRS test\n");
2056 dump_printf (MSG_NOTE
, "using an IFN_CHECK_WAR_PTRS test\n");
2061 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2062 free of aliases, using a condition based on index values instead
2063 of a condition based on addresses. Return true on success,
2064 storing the condition in *COND_EXPR.
2066 This can only be done if the two data references in ALIAS_PAIR access
2067 the same array object and the index is the only difference. For example,
2068 if the two data references are DR_A and DR_B:
2071 data-ref arr[i] arr[j]
2073 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2075 The addresses and their index are like:
2077 |<- ADDR_A ->| |<- ADDR_B ->|
2078 ------------------------------------------------------->
2080 ------------------------------------------------------->
2081 i_0 ... i_0+4 j_0 ... j_0+4
2083 We can create expression based on index rather than address:
2085 (unsigned) (i_0 - j_0 + 3) <= 6
2087 i.e. the indices are less than 4 apart.
2089 Note evolution step of index needs to be considered in comparison. */
2092 create_intersect_range_checks_index (class loop
*loop
, tree
*cond_expr
,
2093 const dr_with_seg_len_pair_t
&alias_pair
)
2095 const dr_with_seg_len
&dr_a
= alias_pair
.first
;
2096 const dr_with_seg_len
&dr_b
= alias_pair
.second
;
2097 if ((alias_pair
.flags
& DR_ALIAS_MIXED_STEPS
)
2098 || integer_zerop (DR_STEP (dr_a
.dr
))
2099 || integer_zerop (DR_STEP (dr_b
.dr
))
2100 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
2103 poly_uint64 seg_len1
, seg_len2
;
2104 if (!poly_int_tree_p (dr_a
.seg_len
, &seg_len1
)
2105 || !poly_int_tree_p (dr_b
.seg_len
, &seg_len2
))
2108 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
2111 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
2114 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
2117 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
2119 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
2120 unsigned HOST_WIDE_INT abs_step
= tree_to_shwi (DR_STEP (dr_a
.dr
));
2123 abs_step
= -abs_step
;
2124 seg_len1
= (-wi::to_poly_wide (dr_a
.seg_len
)).force_uhwi ();
2125 seg_len2
= (-wi::to_poly_wide (dr_b
.seg_len
)).force_uhwi ();
2128 /* Infer the number of iterations with which the memory segment is accessed
2129 by DR. In other words, alias is checked if memory segment accessed by
2130 DR_A in some iterations intersect with memory segment accessed by DR_B
2131 in the same amount iterations.
2132 Note segnment length is a linear function of number of iterations with
2133 DR_STEP as the coefficient. */
2134 poly_uint64 niter_len1
, niter_len2
;
2135 if (!can_div_trunc_p (seg_len1
+ abs_step
- 1, abs_step
, &niter_len1
)
2136 || !can_div_trunc_p (seg_len2
+ abs_step
- 1, abs_step
, &niter_len2
))
2139 /* Divide each access size by the byte step, rounding up. */
2140 poly_uint64 niter_access1
, niter_access2
;
2141 if (!can_div_trunc_p (dr_a
.access_size
+ abs_step
- 1,
2142 abs_step
, &niter_access1
)
2143 || !can_div_trunc_p (dr_b
.access_size
+ abs_step
- 1,
2144 abs_step
, &niter_access2
))
2147 bool waw_or_war_p
= (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
)) == 0;
2150 for (unsigned int i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
2152 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
2153 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
2154 /* Two indices must be the same if they are not scev, or not scev wrto
2155 current loop being vecorized. */
2156 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
2157 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
2158 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
2159 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
2161 if (operand_equal_p (access1
, access2
, 0))
2171 /* Ought not to happen in practice, since if all accesses are equal then the
2172 alias should be decidable at compile time. */
2176 /* The two indices must have the same step. */
2177 tree access1
= DR_ACCESS_FN (dr_a
.dr
, found
);
2178 tree access2
= DR_ACCESS_FN (dr_b
.dr
, found
);
2179 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
2182 tree idx_step
= CHREC_RIGHT (access1
);
2183 /* Index must have const step, otherwise DR_STEP won't be constant. */
2184 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
2185 /* Index must evaluate in the same direction as DR. */
2186 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
2188 tree min1
= CHREC_LEFT (access1
);
2189 tree min2
= CHREC_LEFT (access2
);
2190 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
2193 /* Ideally, alias can be checked against loop's control IV, but we
2194 need to prove linear mapping between control IV and reference
2195 index. Although that should be true, we check against (array)
2196 index of data reference. Like segment length, index length is
2197 linear function of the number of iterations with index_step as
2198 the coefficient, i.e, niter_len * idx_step. */
2199 offset_int abs_idx_step
= offset_int::from (wi::to_wide (idx_step
),
2202 abs_idx_step
= -abs_idx_step
;
2203 poly_offset_int idx_len1
= abs_idx_step
* niter_len1
;
2204 poly_offset_int idx_len2
= abs_idx_step
* niter_len2
;
2205 poly_offset_int idx_access1
= abs_idx_step
* niter_access1
;
2206 poly_offset_int idx_access2
= abs_idx_step
* niter_access2
;
2208 gcc_assert (known_ge (idx_len1
, 0)
2209 && known_ge (idx_len2
, 0)
2210 && known_ge (idx_access1
, 0)
2211 && known_ge (idx_access2
, 0));
2213 /* Each access has the following pattern, with lengths measured
2217 <--- A: -ve step --->
2218 +-----+-------+-----+-------+-----+
2219 | n-1 | ..... | 0 | ..... | n-1 |
2220 +-----+-------+-----+-------+-----+
2221 <--- B: +ve step --->
2226 where "n" is the number of scalar iterations covered by the segment
2227 and where each access spans idx_access units.
2229 A is the range of bytes accessed when the step is negative,
2230 B is the range when the step is positive.
2232 When checking for general overlap, we need to test whether
2235 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2239 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2243 low_offsetN = +ve step ? 0 : -idx_lenN;
2244 high_offsetN = +ve step ? idx_lenN : 0;
2246 This is equivalent to testing whether:
2248 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2249 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2251 Converting this into a single test, there is an overlap if:
2253 0 <= min2 - min1 + bias <= limit
2255 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2256 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2257 + (high_offset2 - low_offset2 + idx_access2 - 1)
2258 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2260 Combining the tests requires limit to be computable in an unsigned
2261 form of the index type; if it isn't, we fall back to the usual
2262 pointer-based checks.
2264 We can do better if DR_B is a write and if DR_A and DR_B are
2265 well-ordered in both the original and the new code (see the
2266 comment above the DR_ALIAS_* flags for details). In this case
2267 we know that for each i in [0, n-1], the write performed by
2268 access i of DR_B occurs after access numbers j<=i of DR_A in
2269 both the original and the new code. Any write or anti
2270 dependencies wrt those DR_A accesses are therefore maintained.
2272 We just need to make sure that each individual write in DR_B does not
2273 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2274 after the DR_B access in the original code but happen before it in
2277 We know the steps for both accesses are equal, so by induction, we
2278 just need to test whether the first write of DR_B overlaps a later
2279 access of DR_A. In other words, we need to move min1 along by
2282 min1' = min1 + idx_step
2286 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2290 [min2, min2 + idx_access2 - 1]
2294 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2295 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2297 idx_len1
-= abs_idx_step
;
2299 poly_offset_int limit
= idx_len1
+ idx_access1
- 1 + idx_access2
- 1;
2303 tree utype
= unsigned_type_for (TREE_TYPE (min1
));
2304 if (!wi::fits_to_tree_p (limit
, utype
))
2307 poly_offset_int low_offset1
= neg_step
? -idx_len1
: 0;
2308 poly_offset_int high_offset2
= neg_step
|| waw_or_war_p
? 0 : idx_len2
;
2309 poly_offset_int bias
= high_offset2
+ idx_access2
- 1 - low_offset1
;
2310 /* Equivalent to adding IDX_STEP to MIN1. */
2312 bias
-= wi::to_offset (idx_step
);
2314 tree subject
= fold_build2 (MINUS_EXPR
, utype
,
2315 fold_convert (utype
, min2
),
2316 fold_convert (utype
, min1
));
2317 subject
= fold_build2 (PLUS_EXPR
, utype
, subject
,
2318 wide_int_to_tree (utype
, bias
));
2319 tree part_cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
,
2320 wide_int_to_tree (utype
, limit
));
2322 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2323 *cond_expr
, part_cond_expr
);
2325 *cond_expr
= part_cond_expr
;
2326 if (dump_enabled_p ())
2329 dump_printf (MSG_NOTE
, "using an index-based WAR/WAW test\n");
2331 dump_printf (MSG_NOTE
, "using an index-based overlap test\n");
2336 /* A subroutine of create_intersect_range_checks, with a subset of the
2337 same arguments. Try to optimize cases in which the second access
2338 is a write and in which some overlap is valid. */
2341 create_waw_or_war_checks (tree
*cond_expr
,
2342 const dr_with_seg_len_pair_t
&alias_pair
)
2344 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2345 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2347 /* Check for cases in which:
2349 (a) DR_B is always a write;
2350 (b) the accesses are well-ordered in both the original and new code
2351 (see the comment above the DR_ALIAS_* flags for details); and
2352 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2353 if (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2356 /* Check for equal (but possibly variable) steps. */
2357 tree step
= DR_STEP (dr_a
.dr
);
2358 if (!operand_equal_p (step
, DR_STEP (dr_b
.dr
)))
2361 /* Make sure that we can operate on sizetype without loss of precision. */
2362 tree addr_type
= TREE_TYPE (DR_BASE_ADDRESS (dr_a
.dr
));
2363 if (TYPE_PRECISION (addr_type
) != TYPE_PRECISION (sizetype
))
2366 /* All addresses involved are known to have a common alignment ALIGN.
2367 We can therefore subtract ALIGN from an exclusive endpoint to get
2368 an inclusive endpoint. In the best (and common) case, ALIGN is the
2369 same as the access sizes of both DRs, and so subtracting ALIGN
2370 cancels out the addition of an access size. */
2371 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2372 poly_uint64 last_chunk_a
= dr_a
.access_size
- align
;
2373 poly_uint64 last_chunk_b
= dr_b
.access_size
- align
;
2375 /* Get a boolean expression that is true when the step is negative. */
2376 tree indicator
= dr_direction_indicator (dr_a
.dr
);
2377 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2378 fold_convert (ssizetype
, indicator
),
2381 /* Get lengths in sizetype. */
2383 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (dr_a
.seg_len
));
2384 step
= fold_convert (sizetype
, rewrite_to_non_trapping_overflow (step
));
2386 /* Each access has the following pattern:
2389 <--- A: -ve step --->
2390 +-----+-------+-----+-------+-----+
2391 | n-1 | ..... | 0 | ..... | n-1 |
2392 +-----+-------+-----+-------+-----+
2393 <--- B: +ve step --->
2398 where "n" is the number of scalar iterations covered by the segment.
2400 A is the range of bytes accessed when the step is negative,
2401 B is the range when the step is positive.
2403 We know that DR_B is a write. We also know (from checking that
2404 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2405 the write performed by access i of DR_B occurs after access numbers
2406 j<=i of DR_A in both the original and the new code. Any write or
2407 anti dependencies wrt those DR_A accesses are therefore maintained.
2409 We just need to make sure that each individual write in DR_B does not
2410 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2411 after the DR_B access in the original code but happen before it in
2414 We know the steps for both accesses are equal, so by induction, we
2415 just need to test whether the first write of DR_B overlaps a later
2416 access of DR_A. In other words, we need to move addr_a along by
2419 addr_a' = addr_a + step
2423 [addr_b, addr_b + last_chunk_b]
2427 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2429 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2431 low_offset_a = +ve step ? 0 : seg_len_a - step
2432 high_offset_a = +ve step ? seg_len_a - step : 0
2434 This is equivalent to testing whether:
2436 addr_a' + low_offset_a <= addr_b + last_chunk_b
2437 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2439 Converting this into a single test, there is an overlap if:
2441 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2443 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2445 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2446 less than the size of the object underlying DR_A. We also know
2447 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2448 guaranteed at compile time. There can therefore be no overflow if
2449 "limit" is calculated in an unsigned type with pointer precision. */
2450 tree addr_a
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a
.dr
),
2451 DR_OFFSET (dr_a
.dr
));
2452 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2454 tree addr_b
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b
.dr
),
2455 DR_OFFSET (dr_b
.dr
));
2456 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2458 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2459 addr_a
= fold_build_pointer_plus (addr_a
, step
);
2460 tree seg_len_a_minus_step
= fold_build2 (MINUS_EXPR
, sizetype
,
2462 if (!CONSTANT_CLASS_P (seg_len_a_minus_step
))
2463 seg_len_a_minus_step
= build1 (SAVE_EXPR
, sizetype
, seg_len_a_minus_step
);
2465 tree low_offset_a
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2466 seg_len_a_minus_step
, size_zero_node
);
2467 if (!CONSTANT_CLASS_P (low_offset_a
))
2468 low_offset_a
= build1 (SAVE_EXPR
, sizetype
, low_offset_a
);
2470 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2471 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2472 tree high_offset_a
= fold_build2 (MINUS_EXPR
, sizetype
, seg_len_a_minus_step
,
2475 /* The amount added to addr_b - addr_a'. */
2476 tree bias
= fold_build2 (MINUS_EXPR
, sizetype
,
2477 size_int (last_chunk_b
), low_offset_a
);
2479 tree limit
= fold_build2 (MINUS_EXPR
, sizetype
, high_offset_a
, low_offset_a
);
2480 limit
= fold_build2 (PLUS_EXPR
, sizetype
, limit
,
2481 size_int (last_chunk_a
+ last_chunk_b
));
2483 tree subject
= fold_build2 (POINTER_DIFF_EXPR
, ssizetype
, addr_b
, addr_a
);
2484 subject
= fold_build2 (PLUS_EXPR
, sizetype
,
2485 fold_convert (sizetype
, subject
), bias
);
2487 *cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
, limit
);
2488 if (dump_enabled_p ())
2489 dump_printf (MSG_NOTE
, "using an address-based WAR/WAW test\n");
2493 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2494 every address ADDR accessed by D:
2496 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2498 In this case, every element accessed by D is aligned to at least
2501 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2503 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2506 get_segment_min_max (const dr_with_seg_len
&d
, tree
*seg_min_out
,
2507 tree
*seg_max_out
, HOST_WIDE_INT align
)
2509 /* Each access has the following pattern:
2512 <--- A: -ve step --->
2513 +-----+-------+-----+-------+-----+
2514 | n-1 | ,.... | 0 | ..... | n-1 |
2515 +-----+-------+-----+-------+-----+
2516 <--- B: +ve step --->
2521 where "n" is the number of scalar iterations covered by the segment.
2522 (This should be VF for a particular pair if we know that both steps
2523 are the same, otherwise it will be the full number of scalar loop
2526 A is the range of bytes accessed when the step is negative,
2527 B is the range when the step is positive.
2529 If the access size is "access_size" bytes, the lowest addressed byte is:
2531 base + (step < 0 ? seg_len : 0) [LB]
2533 and the highest addressed byte is always below:
2535 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2541 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2544 LB <= ADDR <= UB - ALIGN
2546 where "- ALIGN" folds naturally with the "+ access_size" and often
2549 We don't try to simplify LB and UB beyond this (e.g. by using
2550 MIN and MAX based on whether seg_len rather than the stride is
2551 negative) because it is possible for the absolute size of the
2552 segment to overflow the range of a ssize_t.
2554 Keeping the pointer_plus outside of the cond_expr should allow
2555 the cond_exprs to be shared with other alias checks. */
2556 tree indicator
= dr_direction_indicator (d
.dr
);
2557 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2558 fold_convert (ssizetype
, indicator
),
2560 tree addr_base
= fold_build_pointer_plus (DR_BASE_ADDRESS (d
.dr
),
2562 addr_base
= fold_build_pointer_plus (addr_base
, DR_INIT (d
.dr
));
2564 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (d
.seg_len
));
2566 tree min_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2567 seg_len
, size_zero_node
);
2568 tree max_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2569 size_zero_node
, seg_len
);
2570 max_reach
= fold_build2 (PLUS_EXPR
, sizetype
, max_reach
,
2571 size_int (d
.access_size
- align
));
2573 *seg_min_out
= fold_build_pointer_plus (addr_base
, min_reach
);
2574 *seg_max_out
= fold_build_pointer_plus (addr_base
, max_reach
);
2577 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2578 storing the condition in *COND_EXPR. The fallback is to generate a
2579 a test that the two accesses do not overlap:
2581 end_a <= start_b || end_b <= start_a. */
2584 create_intersect_range_checks (class loop
*loop
, tree
*cond_expr
,
2585 const dr_with_seg_len_pair_t
&alias_pair
)
2587 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2588 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2589 *cond_expr
= NULL_TREE
;
2590 if (create_intersect_range_checks_index (loop
, cond_expr
, alias_pair
))
2593 if (create_ifn_alias_checks (cond_expr
, alias_pair
))
2596 if (create_waw_or_war_checks (cond_expr
, alias_pair
))
2599 unsigned HOST_WIDE_INT min_align
;
2601 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2602 are equivalent. This is just an optimization heuristic. */
2603 if (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
2604 && TREE_CODE (DR_STEP (dr_b
.dr
)) == INTEGER_CST
)
2606 /* In this case adding access_size to seg_len is likely to give
2607 a simple X * step, where X is either the number of scalar
2608 iterations or the vectorization factor. We're better off
2609 keeping that, rather than subtracting an alignment from it.
2611 In this case the maximum values are exclusive and so there is
2612 no alias if the maximum of one segment equals the minimum
2619 /* Calculate the minimum alignment shared by all four pointers,
2620 then arrange for this alignment to be subtracted from the
2621 exclusive maximum values to get inclusive maximum values.
2622 This "- min_align" is cumulative with a "+ access_size"
2623 in the calculation of the maximum values. In the best
2624 (and common) case, the two cancel each other out, leaving
2625 us with an inclusive bound based only on seg_len. In the
2626 worst case we're simply adding a smaller number than before.
2628 Because the maximum values are inclusive, there is an alias
2629 if the maximum value of one segment is equal to the minimum
2630 value of the other. */
2631 min_align
= MIN (dr_a
.align
, dr_b
.align
);
2635 tree seg_a_min
, seg_a_max
, seg_b_min
, seg_b_max
;
2636 get_segment_min_max (dr_a
, &seg_a_min
, &seg_a_max
, min_align
);
2637 get_segment_min_max (dr_b
, &seg_b_min
, &seg_b_max
, min_align
);
2640 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2641 fold_build2 (cmp_code
, boolean_type_node
, seg_a_max
, seg_b_min
),
2642 fold_build2 (cmp_code
, boolean_type_node
, seg_b_max
, seg_a_min
));
2643 if (dump_enabled_p ())
2644 dump_printf (MSG_NOTE
, "using an address-based overlap test\n");
2647 /* Create a conditional expression that represents the run-time checks for
2648 overlapping of address ranges represented by a list of data references
2649 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2650 COND_EXPR is the conditional expression to be used in the if statement
2651 that controls which version of the loop gets executed at runtime. */
2654 create_runtime_alias_checks (class loop
*loop
,
2655 const vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
2658 tree part_cond_expr
;
2660 fold_defer_overflow_warnings ();
2661 for (const dr_with_seg_len_pair_t
&alias_pair
: alias_pairs
)
2663 gcc_assert (alias_pair
.flags
);
2664 if (dump_enabled_p ())
2665 dump_printf (MSG_NOTE
,
2666 "create runtime check for data references %T and %T\n",
2667 DR_REF (alias_pair
.first
.dr
),
2668 DR_REF (alias_pair
.second
.dr
));
2670 /* Create condition expression for each pair data references. */
2671 create_intersect_range_checks (loop
, &part_cond_expr
, alias_pair
);
2673 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2674 *cond_expr
, part_cond_expr
);
2676 *cond_expr
= part_cond_expr
;
2678 fold_undefer_and_ignore_overflow_warnings ();
2681 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2684 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
2688 STRIP_NOPS (offset1
);
2689 STRIP_NOPS (offset2
);
2691 if (offset1
== offset2
)
2694 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
2695 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
2698 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
2699 TREE_OPERAND (offset2
, 0));
2701 if (!res
|| !BINARY_CLASS_P (offset1
))
2704 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
2705 TREE_OPERAND (offset2
, 1));
2710 /* Check if DRA and DRB have equal offsets. */
2712 dr_equal_offsets_p (struct data_reference
*dra
,
2713 struct data_reference
*drb
)
2715 tree offset1
, offset2
;
2717 offset1
= DR_OFFSET (dra
);
2718 offset2
= DR_OFFSET (drb
);
2720 return dr_equal_offsets_p1 (offset1
, offset2
);
2723 /* Returns true if FNA == FNB. */
2726 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
2728 unsigned i
, n
= fna
.length ();
2730 if (n
!= fnb
.length ())
2733 for (i
= 0; i
< n
; i
++)
2734 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
2740 /* If all the functions in CF are the same, returns one of them,
2741 otherwise returns NULL. */
2744 common_affine_function (conflict_function
*cf
)
2749 if (!CF_NONTRIVIAL_P (cf
))
2750 return affine_fn ();
2754 for (i
= 1; i
< cf
->n
; i
++)
2755 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
2756 return affine_fn ();
2761 /* Returns the base of the affine function FN. */
2764 affine_function_base (affine_fn fn
)
2769 /* Returns true if FN is a constant. */
2772 affine_function_constant_p (affine_fn fn
)
2777 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
2778 if (!integer_zerop (coef
))
2784 /* Returns true if FN is the zero constant function. */
2787 affine_function_zero_p (affine_fn fn
)
2789 return (integer_zerop (affine_function_base (fn
))
2790 && affine_function_constant_p (fn
));
2793 /* Returns a signed integer type with the largest precision from TA
2797 signed_type_for_types (tree ta
, tree tb
)
2799 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
2800 return signed_type_for (ta
);
2802 return signed_type_for (tb
);
2805 /* Applies operation OP on affine functions FNA and FNB, and returns the
2809 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
2815 if (fnb
.length () > fna
.length ())
2827 for (i
= 0; i
< n
; i
++)
2829 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
2830 TREE_TYPE (fnb
[i
]));
2831 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
2834 for (; fna
.iterate (i
, &coef
); i
++)
2835 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2836 coef
, integer_zero_node
));
2837 for (; fnb
.iterate (i
, &coef
); i
++)
2838 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2839 integer_zero_node
, coef
));
2844 /* Returns the sum of affine functions FNA and FNB. */
2847 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
2849 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
2852 /* Returns the difference of affine functions FNA and FNB. */
2855 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
2857 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
2860 /* Frees affine function FN. */
2863 affine_fn_free (affine_fn fn
)
2868 /* Determine for each subscript in the data dependence relation DDR
2872 compute_subscript_distance (struct data_dependence_relation
*ddr
)
2874 conflict_function
*cf_a
, *cf_b
;
2875 affine_fn fn_a
, fn_b
, diff
;
2877 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
2881 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2883 struct subscript
*subscript
;
2885 subscript
= DDR_SUBSCRIPT (ddr
, i
);
2886 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
2887 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
2889 fn_a
= common_affine_function (cf_a
);
2890 fn_b
= common_affine_function (cf_b
);
2891 if (!fn_a
.exists () || !fn_b
.exists ())
2893 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2896 diff
= affine_fn_minus (fn_a
, fn_b
);
2898 if (affine_function_constant_p (diff
))
2899 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
2901 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2903 affine_fn_free (diff
);
2908 /* Returns the conflict function for "unknown". */
2910 static conflict_function
*
2911 conflict_fn_not_known (void)
2913 conflict_function
*fn
= XCNEW (conflict_function
);
2919 /* Returns the conflict function for "independent". */
2921 static conflict_function
*
2922 conflict_fn_no_dependence (void)
2924 conflict_function
*fn
= XCNEW (conflict_function
);
2925 fn
->n
= NO_DEPENDENCE
;
2930 /* Returns true if the address of OBJ is invariant in LOOP. */
2933 object_address_invariant_in_loop_p (const class loop
*loop
, const_tree obj
)
2935 while (handled_component_p (obj
))
2937 if (TREE_CODE (obj
) == ARRAY_REF
)
2939 for (int i
= 1; i
< 4; ++i
)
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, i
),
2944 else if (TREE_CODE (obj
) == COMPONENT_REF
)
2946 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2950 obj
= TREE_OPERAND (obj
, 0);
2953 if (!INDIRECT_REF_P (obj
)
2954 && TREE_CODE (obj
) != MEM_REF
)
2957 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
2961 /* Returns false if we can prove that data references A and B do not alias,
2962 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2966 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
2967 class loop
*loop_nest
)
2969 tree addr_a
= DR_BASE_OBJECT (a
);
2970 tree addr_b
= DR_BASE_OBJECT (b
);
2972 /* If we are not processing a loop nest but scalar code we
2973 do not need to care about possible cross-iteration dependences
2974 and thus can process the full original reference. Do so,
2975 similar to how loop invariant motion applies extra offset-based
2979 tree tree_size_a
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a
)));
2980 tree tree_size_b
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b
)));
2982 if (DR_BASE_ADDRESS (a
)
2983 && DR_BASE_ADDRESS (b
)
2984 && operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
))
2985 && operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
))
2986 && poly_int_tree_p (tree_size_a
)
2987 && poly_int_tree_p (tree_size_b
)
2988 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a
)),
2989 wi::to_poly_widest (tree_size_a
),
2990 wi::to_poly_widest (DR_INIT (b
)),
2991 wi::to_poly_widest (tree_size_b
)))
2993 gcc_assert (integer_zerop (DR_STEP (a
))
2994 && integer_zerop (DR_STEP (b
)));
2998 aff_tree off1
, off2
;
2999 poly_widest_int size1
, size2
;
3000 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
3001 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
3002 aff_combination_scale (&off1
, -1);
3003 aff_combination_add (&off2
, &off1
);
3004 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
3008 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
3009 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
3010 /* For cross-iteration dependences the cliques must be valid for the
3011 whole loop, not just individual iterations. */
3013 || MR_DEPENDENCE_CLIQUE (addr_a
) == 1
3014 || MR_DEPENDENCE_CLIQUE (addr_a
) == loop_nest
->owned_clique
)
3015 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
3016 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
3019 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3020 do not know the size of the base-object. So we cannot do any
3021 offset/overlap based analysis but have to rely on points-to
3022 information only. */
3023 if (TREE_CODE (addr_a
) == MEM_REF
3024 && (DR_UNCONSTRAINED_BASE (a
)
3025 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
3027 /* For true dependences we can apply TBAA. */
3028 if (flag_strict_aliasing
3029 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3030 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3031 get_alias_set (DR_REF (b
))))
3033 if (TREE_CODE (addr_b
) == MEM_REF
)
3034 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3035 TREE_OPERAND (addr_b
, 0));
3037 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3038 build_fold_addr_expr (addr_b
));
3040 else if (TREE_CODE (addr_b
) == MEM_REF
3041 && (DR_UNCONSTRAINED_BASE (b
)
3042 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
3044 /* For true dependences we can apply TBAA. */
3045 if (flag_strict_aliasing
3046 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3047 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3048 get_alias_set (DR_REF (b
))))
3050 if (TREE_CODE (addr_a
) == MEM_REF
)
3051 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3052 TREE_OPERAND (addr_b
, 0));
3054 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
3055 TREE_OPERAND (addr_b
, 0));
3058 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3059 that is being subsetted in the loop nest. */
3060 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
3061 return refs_output_dependent_p (addr_a
, addr_b
);
3062 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
3063 return refs_anti_dependent_p (addr_a
, addr_b
);
3064 return refs_may_alias_p (addr_a
, addr_b
);
3067 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3068 if it is meaningful to compare their associated access functions
3069 when checking for dependencies. */
3072 access_fn_components_comparable_p (tree ref_a
, tree ref_b
)
3074 /* Allow pairs of component refs from the following sets:
3076 { REALPART_EXPR, IMAGPART_EXPR }
3079 tree_code code_a
= TREE_CODE (ref_a
);
3080 tree_code code_b
= TREE_CODE (ref_b
);
3081 if (code_a
== IMAGPART_EXPR
)
3082 code_a
= REALPART_EXPR
;
3083 if (code_b
== IMAGPART_EXPR
)
3084 code_b
= REALPART_EXPR
;
3085 if (code_a
!= code_b
)
3088 if (TREE_CODE (ref_a
) == COMPONENT_REF
)
3089 /* ??? We cannot simply use the type of operand #0 of the refs here as
3090 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3091 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3092 return (DECL_CONTEXT (TREE_OPERAND (ref_a
, 1))
3093 == DECL_CONTEXT (TREE_OPERAND (ref_b
, 1)));
3095 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a
, 0)),
3096 TREE_TYPE (TREE_OPERAND (ref_b
, 0)));
3099 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3100 is true when the main indices of A and B were not comparable so we try again
3101 with alternate indices computed on an indirect reference. */
3103 struct data_dependence_relation
*
3104 initialize_data_dependence_relation (struct data_dependence_relation
*res
,
3105 vec
<loop_p
> loop_nest
,
3106 bool use_alt_indices
)
3108 struct data_reference
*a
= DDR_A (res
);
3109 struct data_reference
*b
= DDR_B (res
);
3112 struct indices
*indices_a
= &a
->indices
;
3113 struct indices
*indices_b
= &b
->indices
;
3114 if (use_alt_indices
)
3116 if (TREE_CODE (DR_REF (a
)) != MEM_REF
)
3117 indices_a
= &a
->alt_indices
;
3118 if (TREE_CODE (DR_REF (b
)) != MEM_REF
)
3119 indices_b
= &b
->alt_indices
;
3121 unsigned int num_dimensions_a
= indices_a
->access_fns
.length ();
3122 unsigned int num_dimensions_b
= indices_b
->access_fns
.length ();
3123 if (num_dimensions_a
== 0 || num_dimensions_b
== 0)
3125 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3129 /* For unconstrained bases, the root (highest-indexed) subscript
3130 describes a variation in the base of the original DR_REF rather
3131 than a component access. We have no type that accurately describes
3132 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3133 applying this subscript) so limit the search to the last real
3139 f (int a[][8], int b[][8])
3141 for (int i = 0; i < 8; ++i)
3142 a[i * 2][0] = b[i][0];
3145 the a and b accesses have a single ARRAY_REF component reference [0]
3146 but have two subscripts. */
3147 if (indices_a
->unconstrained_base
)
3148 num_dimensions_a
-= 1;
3149 if (indices_b
->unconstrained_base
)
3150 num_dimensions_b
-= 1;
3152 /* These structures describe sequences of component references in
3153 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3154 specific access function. */
3156 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3157 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3158 indices. In C notation, these are the indices of the rightmost
3159 component references; e.g. for a sequence .b.c.d, the start
3161 unsigned int start_a
;
3162 unsigned int start_b
;
3164 /* The sequence contains LENGTH consecutive access functions from
3166 unsigned int length
;
3168 /* The enclosing objects for the A and B sequences respectively,
3169 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3170 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3173 } full_seq
= {}, struct_seq
= {};
3175 /* Before each iteration of the loop:
3177 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3178 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3179 unsigned int index_a
= 0;
3180 unsigned int index_b
= 0;
3181 tree ref_a
= DR_REF (a
);
3182 tree ref_b
= DR_REF (b
);
3184 /* Now walk the component references from the final DR_REFs back up to
3185 the enclosing base objects. Each component reference corresponds
3186 to one access function in the DR, with access function 0 being for
3187 the final DR_REF and the highest-indexed access function being the
3188 one that is applied to the base of the DR.
3190 Look for a sequence of component references whose access functions
3191 are comparable (see access_fn_components_comparable_p). If more
3192 than one such sequence exists, pick the one nearest the base
3193 (which is the leftmost sequence in C notation). Store this sequence
3196 For example, if we have:
3198 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3201 B: __real b[0][i].s.e[i].f
3203 (where d is the same type as the real component of f) then the access
3210 B: __real .f [i] .e .s [i]
3212 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3213 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3214 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3215 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3216 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3217 index foo[10] arrays, so is again comparable. The sequence is
3220 A: [1, 3] (i.e. [i].s.c)
3221 B: [3, 5] (i.e. [i].s.e)
3223 Also look for sequences of component references whose access
3224 functions are comparable and whose enclosing objects have the same
3225 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3226 example, STRUCT_SEQ would be:
3228 A: [1, 2] (i.e. s.c)
3229 B: [3, 4] (i.e. s.e) */
3230 while (index_a
< num_dimensions_a
&& index_b
< num_dimensions_b
)
3232 /* The alternate indices form always has a single dimension
3233 with unconstrained base. */
3234 gcc_assert (!use_alt_indices
);
3236 /* REF_A and REF_B must be one of the component access types
3237 allowed by dr_analyze_indices. */
3238 gcc_checking_assert (access_fn_component_p (ref_a
));
3239 gcc_checking_assert (access_fn_component_p (ref_b
));
3241 /* Get the immediately-enclosing objects for REF_A and REF_B,
3242 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3243 and DR_ACCESS_FN (B, INDEX_B). */
3244 tree object_a
= TREE_OPERAND (ref_a
, 0);
3245 tree object_b
= TREE_OPERAND (ref_b
, 0);
3247 tree type_a
= TREE_TYPE (object_a
);
3248 tree type_b
= TREE_TYPE (object_b
);
3249 if (access_fn_components_comparable_p (ref_a
, ref_b
))
3251 /* This pair of component accesses is comparable for dependence
3252 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3253 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3254 if (full_seq
.start_a
+ full_seq
.length
!= index_a
3255 || full_seq
.start_b
+ full_seq
.length
!= index_b
)
3257 /* The accesses don't extend the current sequence,
3258 so start a new one here. */
3259 full_seq
.start_a
= index_a
;
3260 full_seq
.start_b
= index_b
;
3261 full_seq
.length
= 0;
3264 /* Add this pair of references to the sequence. */
3265 full_seq
.length
+= 1;
3266 full_seq
.object_a
= object_a
;
3267 full_seq
.object_b
= object_b
;
3269 /* If the enclosing objects are structures (and thus have the
3270 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3271 if (TREE_CODE (type_a
) == RECORD_TYPE
)
3272 struct_seq
= full_seq
;
3274 /* Move to the next containing reference for both A and B. */
3282 /* Try to approach equal type sizes. */
3283 if (!COMPLETE_TYPE_P (type_a
)
3284 || !COMPLETE_TYPE_P (type_b
)
3285 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a
))
3286 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b
)))
3289 unsigned HOST_WIDE_INT size_a
= tree_to_uhwi (TYPE_SIZE_UNIT (type_a
));
3290 unsigned HOST_WIDE_INT size_b
= tree_to_uhwi (TYPE_SIZE_UNIT (type_b
));
3291 if (size_a
<= size_b
)
3296 if (size_b
<= size_a
)
3303 /* See whether FULL_SEQ ends at the base and whether the two bases
3304 are equal. We do not care about TBAA or alignment info so we can
3305 use OEP_ADDRESS_OF to avoid false negatives. */
3306 tree base_a
= indices_a
->base_object
;
3307 tree base_b
= indices_b
->base_object
;
3308 bool same_base_p
= (full_seq
.start_a
+ full_seq
.length
== num_dimensions_a
3309 && full_seq
.start_b
+ full_seq
.length
== num_dimensions_b
3310 && (indices_a
->unconstrained_base
3311 == indices_b
->unconstrained_base
)
3312 && operand_equal_p (base_a
, base_b
, OEP_ADDRESS_OF
)
3313 && (types_compatible_p (TREE_TYPE (base_a
),
3315 || (!base_supports_access_fn_components_p (base_a
)
3316 && !base_supports_access_fn_components_p (base_b
)
3318 (TYPE_SIZE (TREE_TYPE (base_a
)),
3319 TYPE_SIZE (TREE_TYPE (base_b
)), 0)))
3320 && (!loop_nest
.exists ()
3321 || (object_address_invariant_in_loop_p
3322 (loop_nest
[0], base_a
))));
3324 /* If the bases are the same, we can include the base variation too.
3325 E.g. the b accesses in:
3327 for (int i = 0; i < n; ++i)
3328 b[i + 4][0] = b[i][0];
3330 have a definite dependence distance of 4, while for:
3332 for (int i = 0; i < n; ++i)
3333 a[i + 4][0] = b[i][0];
3335 the dependence distance depends on the gap between a and b.
3337 If the bases are different then we can only rely on the sequence
3338 rooted at a structure access, since arrays are allowed to overlap
3339 arbitrarily and change shape arbitrarily. E.g. we treat this as
3344 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3346 where two lvalues with the same int[4][3] type overlap, and where
3347 both lvalues are distinct from the object's declared type. */
3350 if (indices_a
->unconstrained_base
)
3351 full_seq
.length
+= 1;
3354 full_seq
= struct_seq
;
3356 /* Punt if we didn't find a suitable sequence. */
3357 if (full_seq
.length
== 0)
3360 || (TREE_CODE (DR_REF (a
)) == MEM_REF
3361 && TREE_CODE (DR_REF (b
)) == MEM_REF
)
3362 || may_be_nonaddressable_p (DR_REF (a
))
3363 || may_be_nonaddressable_p (DR_REF (b
)))
3365 /* Fully exhausted possibilities. */
3366 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3370 /* Try evaluating both DRs as dereferences of pointers. */
3371 if (!a
->alt_indices
.base_object
3372 && TREE_CODE (DR_REF (a
)) != MEM_REF
)
3374 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (a
)),
3375 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (a
)),
3377 (reference_alias_ptr_type (DR_REF (a
)), 0));
3378 dr_analyze_indices (&a
->alt_indices
, alt_ref
,
3379 loop_preheader_edge (loop_nest
[0]),
3380 loop_containing_stmt (DR_STMT (a
)));
3382 if (!b
->alt_indices
.base_object
3383 && TREE_CODE (DR_REF (b
)) != MEM_REF
)
3385 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (b
)),
3386 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (b
)),
3388 (reference_alias_ptr_type (DR_REF (b
)), 0));
3389 dr_analyze_indices (&b
->alt_indices
, alt_ref
,
3390 loop_preheader_edge (loop_nest
[0]),
3391 loop_containing_stmt (DR_STMT (b
)));
3393 return initialize_data_dependence_relation (res
, loop_nest
, true);
3398 /* Partial overlap is possible for different bases when strict aliasing
3399 is not in effect. It's also possible if either base involves a union
3402 struct s1 { int a[2]; };
3403 struct s2 { struct s1 b; int c; };
3404 struct s3 { int d; struct s1 e; };
3405 union u { struct s2 f; struct s3 g; } *p, *q;
3407 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3408 "p->g.e" (base "p->g") and might partially overlap the s1 at
3409 "q->g.e" (base "q->g"). */
3410 if (!flag_strict_aliasing
3411 || ref_contains_union_access_p (full_seq
.object_a
)
3412 || ref_contains_union_access_p (full_seq
.object_b
))
3414 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3418 DDR_COULD_BE_INDEPENDENT_P (res
) = true;
3419 if (!loop_nest
.exists ()
3420 || (object_address_invariant_in_loop_p (loop_nest
[0],
3422 && object_address_invariant_in_loop_p (loop_nest
[0],
3423 full_seq
.object_b
)))
3425 DDR_OBJECT_A (res
) = full_seq
.object_a
;
3426 DDR_OBJECT_B (res
) = full_seq
.object_b
;
3430 DDR_AFFINE_P (res
) = true;
3431 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
3432 DDR_SUBSCRIPTS (res
).create (full_seq
.length
);
3433 DDR_LOOP_NEST (res
) = loop_nest
;
3434 DDR_SELF_REFERENCE (res
) = false;
3436 for (i
= 0; i
< full_seq
.length
; ++i
)
3438 struct subscript
*subscript
;
3440 subscript
= XNEW (struct subscript
);
3441 SUB_ACCESS_FN (subscript
, 0) = indices_a
->access_fns
[full_seq
.start_a
+ i
];
3442 SUB_ACCESS_FN (subscript
, 1) = indices_b
->access_fns
[full_seq
.start_b
+ i
];
3443 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
3444 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
3445 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
3446 SUB_DISTANCE (subscript
) = chrec_dont_know
;
3447 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
3453 /* Initialize a data dependence relation between data accesses A and
3454 B. NB_LOOPS is the number of loops surrounding the references: the
3455 size of the classic distance/direction vectors. */
3457 struct data_dependence_relation
*
3458 initialize_data_dependence_relation (struct data_reference
*a
,
3459 struct data_reference
*b
,
3460 vec
<loop_p
> loop_nest
)
3462 data_dependence_relation
*res
= XCNEW (struct data_dependence_relation
);
3465 DDR_LOOP_NEST (res
).create (0);
3466 DDR_SUBSCRIPTS (res
).create (0);
3467 DDR_DIR_VECTS (res
).create (0);
3468 DDR_DIST_VECTS (res
).create (0);
3470 if (a
== NULL
|| b
== NULL
)
3472 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3476 /* If the data references do not alias, then they are independent. */
3477 if (!dr_may_alias_p (a
, b
, loop_nest
.exists () ? loop_nest
[0] : NULL
))
3479 DDR_ARE_DEPENDENT (res
) = chrec_known
;
3483 return initialize_data_dependence_relation (res
, loop_nest
, false);
3487 /* Frees memory used by the conflict function F. */
3490 free_conflict_function (conflict_function
*f
)
3494 if (CF_NONTRIVIAL_P (f
))
3496 for (i
= 0; i
< f
->n
; i
++)
3497 affine_fn_free (f
->fns
[i
]);
3502 /* Frees memory used by SUBSCRIPTS. */
3505 free_subscripts (vec
<subscript_p
> subscripts
)
3507 for (subscript_p s
: subscripts
)
3509 free_conflict_function (s
->conflicting_iterations_in_a
);
3510 free_conflict_function (s
->conflicting_iterations_in_b
);
3513 subscripts
.release ();
3516 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3520 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
3523 DDR_ARE_DEPENDENT (ddr
) = chrec
;
3524 free_subscripts (DDR_SUBSCRIPTS (ddr
));
3525 DDR_SUBSCRIPTS (ddr
).create (0);
3528 /* The dependence relation DDR cannot be represented by a distance
3532 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
3534 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3535 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
3537 DDR_AFFINE_P (ddr
) = false;
3542 /* This section contains the classic Banerjee tests. */
3544 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3545 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3548 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3550 return (evolution_function_is_constant_p (chrec_a
)
3551 && evolution_function_is_constant_p (chrec_b
));
3554 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3555 variable, i.e., if the SIV (Single Index Variable) test is true. */
3558 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3560 if ((evolution_function_is_constant_p (chrec_a
)
3561 && evolution_function_is_univariate_p (chrec_b
))
3562 || (evolution_function_is_constant_p (chrec_b
)
3563 && evolution_function_is_univariate_p (chrec_a
)))
3566 if (evolution_function_is_univariate_p (chrec_a
)
3567 && evolution_function_is_univariate_p (chrec_b
))
3569 switch (TREE_CODE (chrec_a
))
3571 case POLYNOMIAL_CHREC
:
3572 switch (TREE_CODE (chrec_b
))
3574 case POLYNOMIAL_CHREC
:
3575 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
3591 /* Creates a conflict function with N dimensions. The affine functions
3592 in each dimension follow. */
3594 static conflict_function
*
3595 conflict_fn (unsigned n
, ...)
3598 conflict_function
*ret
= XCNEW (conflict_function
);
3601 gcc_assert (n
> 0 && n
<= MAX_DIM
);
3605 for (i
= 0; i
< n
; i
++)
3606 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
3612 /* Returns constant affine function with value CST. */
3615 affine_fn_cst (tree cst
)
3619 fn
.quick_push (cst
);
3623 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3626 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
3629 fn
.create (dim
+ 1);
3632 gcc_assert (dim
> 0);
3633 fn
.quick_push (cst
);
3634 for (i
= 1; i
< dim
; i
++)
3635 fn
.quick_push (integer_zero_node
);
3636 fn
.quick_push (coef
);
3640 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3641 *OVERLAPS_B are initialized to the functions that describe the
3642 relation between the elements accessed twice by CHREC_A and
3643 CHREC_B. For k >= 0, the following property is verified:
3645 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3648 analyze_ziv_subscript (tree chrec_a
,
3650 conflict_function
**overlaps_a
,
3651 conflict_function
**overlaps_b
,
3652 tree
*last_conflicts
)
3654 tree type
, difference
;
3655 dependence_stats
.num_ziv
++;
3657 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3658 fprintf (dump_file
, "(analyze_ziv_subscript \n");
3660 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3661 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3662 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3663 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
3665 switch (TREE_CODE (difference
))
3668 if (integer_zerop (difference
))
3670 /* The difference is equal to zero: the accessed index
3671 overlaps for each iteration in the loop. */
3672 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3673 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3674 *last_conflicts
= chrec_dont_know
;
3675 dependence_stats
.num_ziv_dependent
++;
3679 /* The accesses do not overlap. */
3680 *overlaps_a
= conflict_fn_no_dependence ();
3681 *overlaps_b
= conflict_fn_no_dependence ();
3682 *last_conflicts
= integer_zero_node
;
3683 dependence_stats
.num_ziv_independent
++;
3688 /* We're not sure whether the indexes overlap. For the moment,
3689 conservatively answer "don't know". */
3690 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3691 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
3693 *overlaps_a
= conflict_fn_not_known ();
3694 *overlaps_b
= conflict_fn_not_known ();
3695 *last_conflicts
= chrec_dont_know
;
3696 dependence_stats
.num_ziv_unimplemented
++;
3700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3701 fprintf (dump_file
, ")\n");
3704 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3705 and only if it fits to the int type. If this is not the case, or the
3706 bound on the number of iterations of LOOP could not be derived, returns
3710 max_stmt_executions_tree (class loop
*loop
)
3714 if (!max_stmt_executions (loop
, &nit
))
3715 return chrec_dont_know
;
3717 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
3718 return chrec_dont_know
;
3720 return wide_int_to_tree (unsigned_type_node
, nit
);
3723 /* Determine whether the CHREC is always positive/negative. If the expression
3724 cannot be statically analyzed, return false, otherwise set the answer into
3728 chrec_is_positive (tree chrec
, bool *value
)
3730 bool value0
, value1
, value2
;
3731 tree end_value
, nb_iter
;
3733 switch (TREE_CODE (chrec
))
3735 case POLYNOMIAL_CHREC
:
3736 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
3737 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
3740 /* FIXME -- overflows. */
3741 if (value0
== value1
)
3747 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3748 and the proof consists in showing that the sign never
3749 changes during the execution of the loop, from 0 to
3750 loop->nb_iterations. */
3751 if (!evolution_function_is_affine_p (chrec
))
3754 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
3755 if (chrec_contains_undetermined (nb_iter
))
3759 /* TODO -- If the test is after the exit, we may decrease the number of
3760 iterations by one. */
3762 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
3765 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
3767 if (!chrec_is_positive (end_value
, &value2
))
3771 return value0
== value1
;
3774 switch (tree_int_cst_sgn (chrec
))
3793 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3794 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3795 *OVERLAPS_B are initialized to the functions that describe the
3796 relation between the elements accessed twice by CHREC_A and
3797 CHREC_B. For k >= 0, the following property is verified:
3799 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3802 analyze_siv_subscript_cst_affine (tree chrec_a
,
3804 conflict_function
**overlaps_a
,
3805 conflict_function
**overlaps_b
,
3806 tree
*last_conflicts
)
3808 bool value0
, value1
, value2
;
3809 tree type
, difference
, tmp
;
3811 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3812 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3813 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3814 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
3816 /* Special case overlap in the first iteration. */
3817 if (integer_zerop (difference
))
3819 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3820 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3821 *last_conflicts
= integer_one_node
;
3825 if (!chrec_is_positive (initial_condition (difference
), &value0
))
3827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3828 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
3830 dependence_stats
.num_siv_unimplemented
++;
3831 *overlaps_a
= conflict_fn_not_known ();
3832 *overlaps_b
= conflict_fn_not_known ();
3833 *last_conflicts
= chrec_dont_know
;
3838 if (value0
== false)
3840 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3841 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
3843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3844 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3846 *overlaps_a
= conflict_fn_not_known ();
3847 *overlaps_b
= conflict_fn_not_known ();
3848 *last_conflicts
= chrec_dont_know
;
3849 dependence_stats
.num_siv_unimplemented
++;
3858 chrec_b = {10, +, 1}
3861 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3863 HOST_WIDE_INT numiter
;
3864 class loop
*loop
= get_chrec_loop (chrec_b
);
3866 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3867 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
3868 fold_build1 (ABS_EXPR
, type
, difference
),
3869 CHREC_RIGHT (chrec_b
));
3870 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3871 *last_conflicts
= integer_one_node
;
3874 /* Perform weak-zero siv test to see if overlap is
3875 outside the loop bounds. */
3876 numiter
= max_stmt_executions_int (loop
);
3879 && compare_tree_int (tmp
, numiter
) > 0)
3881 free_conflict_function (*overlaps_a
);
3882 free_conflict_function (*overlaps_b
);
3883 *overlaps_a
= conflict_fn_no_dependence ();
3884 *overlaps_b
= conflict_fn_no_dependence ();
3885 *last_conflicts
= integer_zero_node
;
3886 dependence_stats
.num_siv_independent
++;
3889 dependence_stats
.num_siv_dependent
++;
3893 /* When the step does not divide the difference, there are
3897 *overlaps_a
= conflict_fn_no_dependence ();
3898 *overlaps_b
= conflict_fn_no_dependence ();
3899 *last_conflicts
= integer_zero_node
;
3900 dependence_stats
.num_siv_independent
++;
3909 chrec_b = {10, +, -1}
3911 In this case, chrec_a will not overlap with chrec_b. */
3912 *overlaps_a
= conflict_fn_no_dependence ();
3913 *overlaps_b
= conflict_fn_no_dependence ();
3914 *last_conflicts
= integer_zero_node
;
3915 dependence_stats
.num_siv_independent
++;
3922 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3923 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
3925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3926 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3928 *overlaps_a
= conflict_fn_not_known ();
3929 *overlaps_b
= conflict_fn_not_known ();
3930 *last_conflicts
= chrec_dont_know
;
3931 dependence_stats
.num_siv_unimplemented
++;
3936 if (value2
== false)
3940 chrec_b = {10, +, -1}
3942 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3944 HOST_WIDE_INT numiter
;
3945 class loop
*loop
= get_chrec_loop (chrec_b
);
3947 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3948 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
3949 CHREC_RIGHT (chrec_b
));
3950 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3951 *last_conflicts
= integer_one_node
;
3953 /* Perform weak-zero siv test to see if overlap is
3954 outside the loop bounds. */
3955 numiter
= max_stmt_executions_int (loop
);
3958 && compare_tree_int (tmp
, numiter
) > 0)
3960 free_conflict_function (*overlaps_a
);
3961 free_conflict_function (*overlaps_b
);
3962 *overlaps_a
= conflict_fn_no_dependence ();
3963 *overlaps_b
= conflict_fn_no_dependence ();
3964 *last_conflicts
= integer_zero_node
;
3965 dependence_stats
.num_siv_independent
++;
3968 dependence_stats
.num_siv_dependent
++;
3972 /* When the step does not divide the difference, there
3976 *overlaps_a
= conflict_fn_no_dependence ();
3977 *overlaps_b
= conflict_fn_no_dependence ();
3978 *last_conflicts
= integer_zero_node
;
3979 dependence_stats
.num_siv_independent
++;
3989 In this case, chrec_a will not overlap with chrec_b. */
3990 *overlaps_a
= conflict_fn_no_dependence ();
3991 *overlaps_b
= conflict_fn_no_dependence ();
3992 *last_conflicts
= integer_zero_node
;
3993 dependence_stats
.num_siv_independent
++;
4001 /* Helper recursive function for initializing the matrix A. Returns
4002 the initial value of CHREC. */
4005 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
4009 switch (TREE_CODE (chrec
))
4011 case POLYNOMIAL_CHREC
:
4012 HOST_WIDE_INT chrec_right
;
4013 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec
)))
4014 return chrec_dont_know
;
4015 chrec_right
= int_cst_value (CHREC_RIGHT (chrec
));
4016 /* We want to be able to negate without overflow. */
4017 if (chrec_right
== HOST_WIDE_INT_MIN
)
4018 return chrec_dont_know
;
4019 A
[index
][0] = mult
* chrec_right
;
4020 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
4026 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4027 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
4029 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
4034 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4035 return chrec_convert (chrec_type (chrec
), op
, NULL
);
4040 /* Handle ~X as -1 - X. */
4041 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4042 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
4043 build_int_cst (TREE_TYPE (chrec
), -1), op
);
4055 #define FLOOR_DIV(x,y) ((x) / (y))
4057 /* Solves the special case of the Diophantine equation:
4058 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4060 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4061 number of iterations that loops X and Y run. The overlaps will be
4062 constructed as evolutions in dimension DIM. */
4065 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter
,
4066 HOST_WIDE_INT step_a
,
4067 HOST_WIDE_INT step_b
,
4068 affine_fn
*overlaps_a
,
4069 affine_fn
*overlaps_b
,
4070 tree
*last_conflicts
, int dim
)
4072 if (((step_a
> 0 && step_b
> 0)
4073 || (step_a
< 0 && step_b
< 0)))
4075 HOST_WIDE_INT step_overlaps_a
, step_overlaps_b
;
4076 HOST_WIDE_INT gcd_steps_a_b
, last_conflict
, tau2
;
4078 gcd_steps_a_b
= gcd (step_a
, step_b
);
4079 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
4080 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
4084 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
4085 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
4086 last_conflict
= tau2
;
4087 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
4090 *last_conflicts
= chrec_dont_know
;
4092 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
4093 build_int_cst (NULL_TREE
,
4095 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
4096 build_int_cst (NULL_TREE
,
4102 *overlaps_a
= affine_fn_cst (integer_zero_node
);
4103 *overlaps_b
= affine_fn_cst (integer_zero_node
);
4104 *last_conflicts
= integer_zero_node
;
4108 /* Solves the special case of a Diophantine equation where CHREC_A is
4109 an affine bivariate function, and CHREC_B is an affine univariate
4110 function. For example,
4112 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4114 has the following overlapping functions:
4116 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4117 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4118 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4120 FORNOW: This is a specialized implementation for a case occurring in
4121 a common benchmark. Implement the general algorithm. */
4124 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
4125 conflict_function
**overlaps_a
,
4126 conflict_function
**overlaps_b
,
4127 tree
*last_conflicts
)
4129 bool xz_p
, yz_p
, xyz_p
;
4130 HOST_WIDE_INT step_x
, step_y
, step_z
;
4131 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
4132 affine_fn overlaps_a_xz
, overlaps_b_xz
;
4133 affine_fn overlaps_a_yz
, overlaps_b_yz
;
4134 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
4135 affine_fn ova1
, ova2
, ovb
;
4136 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
4138 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
4139 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
4140 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
4142 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
4143 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4144 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4146 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
4148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4149 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
4151 *overlaps_a
= conflict_fn_not_known ();
4152 *overlaps_b
= conflict_fn_not_known ();
4153 *last_conflicts
= chrec_dont_know
;
4157 niter
= MIN (niter_x
, niter_z
);
4158 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
4161 &last_conflicts_xz
, 1);
4162 niter
= MIN (niter_y
, niter_z
);
4163 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
4166 &last_conflicts_yz
, 2);
4167 niter
= MIN (niter_x
, niter_z
);
4168 niter
= MIN (niter_y
, niter
);
4169 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
4172 &last_conflicts_xyz
, 3);
4174 xz_p
= !integer_zerop (last_conflicts_xz
);
4175 yz_p
= !integer_zerop (last_conflicts_yz
);
4176 xyz_p
= !integer_zerop (last_conflicts_xyz
);
4178 if (xz_p
|| yz_p
|| xyz_p
)
4180 ova1
= affine_fn_cst (integer_zero_node
);
4181 ova2
= affine_fn_cst (integer_zero_node
);
4182 ovb
= affine_fn_cst (integer_zero_node
);
4185 affine_fn t0
= ova1
;
4188 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
4189 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
4190 affine_fn_free (t0
);
4191 affine_fn_free (t2
);
4192 *last_conflicts
= last_conflicts_xz
;
4196 affine_fn t0
= ova2
;
4199 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
4200 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
4201 affine_fn_free (t0
);
4202 affine_fn_free (t2
);
4203 *last_conflicts
= last_conflicts_yz
;
4207 affine_fn t0
= ova1
;
4208 affine_fn t2
= ova2
;
4211 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
4212 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
4213 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
4214 affine_fn_free (t0
);
4215 affine_fn_free (t2
);
4216 affine_fn_free (t4
);
4217 *last_conflicts
= last_conflicts_xyz
;
4219 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
4220 *overlaps_b
= conflict_fn (1, ovb
);
4224 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4225 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4226 *last_conflicts
= integer_zero_node
;
4229 affine_fn_free (overlaps_a_xz
);
4230 affine_fn_free (overlaps_b_xz
);
4231 affine_fn_free (overlaps_a_yz
);
4232 affine_fn_free (overlaps_b_yz
);
4233 affine_fn_free (overlaps_a_xyz
);
4234 affine_fn_free (overlaps_b_xyz
);
4237 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4240 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
4243 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
4246 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4249 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
4254 for (i
= 0; i
< m
; i
++)
4255 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
4258 /* Store the N x N identity matrix in MAT. */
4261 lambda_matrix_id (lambda_matrix mat
, int size
)
4265 for (i
= 0; i
< size
; i
++)
4266 for (j
= 0; j
< size
; j
++)
4267 mat
[i
][j
] = (i
== j
) ? 1 : 0;
4270 /* Return the index of the first nonzero element of vector VEC1 between
4271 START and N. We must have START <= N.
4272 Returns N if VEC1 is the zero vector. */
4275 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
4278 while (j
< n
&& vec1
[j
] == 0)
4283 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4284 R2 = R2 + CONST1 * R1. */
4287 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
,
4295 for (i
= 0; i
< n
; i
++)
4298 lambda_int tem
= mul_hwi (mat
[r1
][i
], const1
, &ovf
);
4301 lambda_int tem2
= add_hwi (mat
[r2
][i
], tem
, &ovf
);
4302 if (ovf
|| tem2
== HOST_WIDE_INT_MIN
)
4310 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4311 and store the result in VEC2. */
4314 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
4315 int size
, lambda_int const1
)
4320 lambda_vector_clear (vec2
, size
);
4322 for (i
= 0; i
< size
; i
++)
4323 vec2
[i
] = const1
* vec1
[i
];
4326 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4329 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
4332 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
4335 /* Negate row R1 of matrix MAT which has N columns. */
4338 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
4340 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
4343 /* Return true if two vectors are equal. */
4346 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
4349 for (i
= 0; i
< size
; i
++)
4350 if (vec1
[i
] != vec2
[i
])
4355 /* Given an M x N integer matrix A, this function determines an M x
4356 M unimodular matrix U, and an M x N echelon matrix S such that
4357 "U.A = S". This decomposition is also known as "right Hermite".
4359 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4360 Restructuring Compilers" Utpal Banerjee. */
4363 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
4364 lambda_matrix S
, lambda_matrix U
)
4368 lambda_matrix_copy (A
, S
, m
, n
);
4369 lambda_matrix_id (U
, m
);
4371 for (j
= 0; j
< n
; j
++)
4373 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
4376 for (i
= m
- 1; i
>= i0
; i
--)
4378 while (S
[i
][j
] != 0)
4380 lambda_int factor
, a
, b
;
4384 gcc_assert (a
!= HOST_WIDE_INT_MIN
);
4387 if (!lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
))
4389 std::swap (S
[i
], S
[i
-1]);
4391 if (!lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
))
4393 std::swap (U
[i
], U
[i
-1]);
4402 /* Determines the overlapping elements due to accesses CHREC_A and
4403 CHREC_B, that are affine functions. This function cannot handle
4404 symbolic evolution functions, ie. when initial conditions are
4405 parameters, because it uses lambda matrices of integers. */
4408 analyze_subscript_affine_affine (tree chrec_a
,
4410 conflict_function
**overlaps_a
,
4411 conflict_function
**overlaps_b
,
4412 tree
*last_conflicts
)
4414 unsigned nb_vars_a
, nb_vars_b
, dim
;
4415 lambda_int gamma
, gcd_alpha_beta
;
4416 lambda_matrix A
, U
, S
;
4417 struct obstack scratch_obstack
;
4419 if (eq_evolutions_p (chrec_a
, chrec_b
))
4421 /* The accessed index overlaps for each iteration in the
4423 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4424 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4425 *last_conflicts
= chrec_dont_know
;
4428 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4429 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
4431 /* For determining the initial intersection, we have to solve a
4432 Diophantine equation. This is the most time consuming part.
4434 For answering to the question: "Is there a dependence?" we have
4435 to prove that there exists a solution to the Diophantine
4436 equation, and that the solution is in the iteration domain,
4437 i.e. the solution is positive or zero, and that the solution
4438 happens before the upper bound loop.nb_iterations. Otherwise
4439 there is no dependence. This function outputs a description of
4440 the iterations that hold the intersections. */
4442 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
4443 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
4445 gcc_obstack_init (&scratch_obstack
);
4447 dim
= nb_vars_a
+ nb_vars_b
;
4448 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
4449 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4450 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4452 tree init_a
= initialize_matrix_A (A
, chrec_a
, 0, 1);
4453 tree init_b
= initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1);
4454 if (init_a
== chrec_dont_know
4455 || init_b
== chrec_dont_know
)
4457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4458 fprintf (dump_file
, "affine-affine test failed: "
4459 "representation issue.\n");
4460 *overlaps_a
= conflict_fn_not_known ();
4461 *overlaps_b
= conflict_fn_not_known ();
4462 *last_conflicts
= chrec_dont_know
;
4463 goto end_analyze_subs_aa
;
4465 gamma
= int_cst_value (init_b
) - int_cst_value (init_a
);
4467 /* Don't do all the hard work of solving the Diophantine equation
4468 when we already know the solution: for example,
4471 | gamma = 3 - 3 = 0.
4472 Then the first overlap occurs during the first iterations:
4473 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4477 if (nb_vars_a
== 1 && nb_vars_b
== 1)
4479 HOST_WIDE_INT step_a
, step_b
;
4480 HOST_WIDE_INT niter
, niter_a
, niter_b
;
4483 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4484 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4485 niter
= MIN (niter_a
, niter_b
);
4486 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
4487 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
4489 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
4492 *overlaps_a
= conflict_fn (1, ova
);
4493 *overlaps_b
= conflict_fn (1, ovb
);
4496 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
4497 compute_overlap_steps_for_affine_1_2
4498 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
4500 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
4501 compute_overlap_steps_for_affine_1_2
4502 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
4506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4507 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
4508 *overlaps_a
= conflict_fn_not_known ();
4509 *overlaps_b
= conflict_fn_not_known ();
4510 *last_conflicts
= chrec_dont_know
;
4512 goto end_analyze_subs_aa
;
4516 if (!lambda_matrix_right_hermite (A
, dim
, 1, S
, U
))
4518 *overlaps_a
= conflict_fn_not_known ();
4519 *overlaps_b
= conflict_fn_not_known ();
4520 *last_conflicts
= chrec_dont_know
;
4521 goto end_analyze_subs_aa
;
4527 lambda_matrix_row_negate (U
, dim
, 0);
4529 gcd_alpha_beta
= S
[0][0];
4531 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4532 but that is a quite strange case. Instead of ICEing, answer
4534 if (gcd_alpha_beta
== 0)
4536 *overlaps_a
= conflict_fn_not_known ();
4537 *overlaps_b
= conflict_fn_not_known ();
4538 *last_conflicts
= chrec_dont_know
;
4539 goto end_analyze_subs_aa
;
4542 /* The classic "gcd-test". */
4543 if (!int_divides_p (gcd_alpha_beta
, gamma
))
4545 /* The "gcd-test" has determined that there is no integer
4546 solution, i.e. there is no dependence. */
4547 *overlaps_a
= conflict_fn_no_dependence ();
4548 *overlaps_b
= conflict_fn_no_dependence ();
4549 *last_conflicts
= integer_zero_node
;
4552 /* Both access functions are univariate. This includes SIV and MIV cases. */
4553 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
4555 /* Both functions should have the same evolution sign. */
4556 if (((A
[0][0] > 0 && -A
[1][0] > 0)
4557 || (A
[0][0] < 0 && -A
[1][0] < 0)))
4559 /* The solutions are given by:
4561 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4564 For a given integer t. Using the following variables,
4566 | i0 = u11 * gamma / gcd_alpha_beta
4567 | j0 = u12 * gamma / gcd_alpha_beta
4574 | y0 = j0 + j1 * t. */
4575 HOST_WIDE_INT i0
, j0
, i1
, j1
;
4577 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
4578 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
4582 if ((i1
== 0 && i0
< 0)
4583 || (j1
== 0 && j0
< 0))
4585 /* There is no solution.
4586 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4587 falls in here, but for the moment we don't look at the
4588 upper bound of the iteration domain. */
4589 *overlaps_a
= conflict_fn_no_dependence ();
4590 *overlaps_b
= conflict_fn_no_dependence ();
4591 *last_conflicts
= integer_zero_node
;
4592 goto end_analyze_subs_aa
;
4595 if (i1
> 0 && j1
> 0)
4597 HOST_WIDE_INT niter_a
4598 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
4599 HOST_WIDE_INT niter_b
4600 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
4601 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
4603 /* (X0, Y0) is a solution of the Diophantine equation:
4604 "chrec_a (X0) = chrec_b (Y0)". */
4605 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
4607 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
4608 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
4610 /* (X1, Y1) is the smallest positive solution of the eq
4611 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4612 first conflict occurs. */
4613 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
4614 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
4615 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
4619 /* If the overlap occurs outside of the bounds of the
4620 loop, there is no dependence. */
4621 if (x1
>= niter_a
|| y1
>= niter_b
)
4623 *overlaps_a
= conflict_fn_no_dependence ();
4624 *overlaps_b
= conflict_fn_no_dependence ();
4625 *last_conflicts
= integer_zero_node
;
4626 goto end_analyze_subs_aa
;
4629 /* max stmt executions can get quite large, avoid
4630 overflows by using wide ints here. */
4632 = wi::smin (wi::sdiv_floor (wi::sub (niter_a
, i0
), i1
),
4633 wi::sdiv_floor (wi::sub (niter_b
, j0
), j1
));
4634 widest_int last_conflict
= wi::sub (tau2
, (x1
- i0
)/i1
);
4635 if (wi::min_precision (last_conflict
, SIGNED
)
4636 <= TYPE_PRECISION (integer_type_node
))
4638 = build_int_cst (integer_type_node
,
4639 last_conflict
.to_shwi ());
4641 *last_conflicts
= chrec_dont_know
;
4644 *last_conflicts
= chrec_dont_know
;
4648 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
4650 build_int_cst (NULL_TREE
, i1
)));
4653 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
4655 build_int_cst (NULL_TREE
, j1
)));
4659 /* FIXME: For the moment, the upper bound of the
4660 iteration domain for i and j is not checked. */
4661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4662 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4663 *overlaps_a
= conflict_fn_not_known ();
4664 *overlaps_b
= conflict_fn_not_known ();
4665 *last_conflicts
= chrec_dont_know
;
4670 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4671 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4672 *overlaps_a
= conflict_fn_not_known ();
4673 *overlaps_b
= conflict_fn_not_known ();
4674 *last_conflicts
= chrec_dont_know
;
4679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4680 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4681 *overlaps_a
= conflict_fn_not_known ();
4682 *overlaps_b
= conflict_fn_not_known ();
4683 *last_conflicts
= chrec_dont_know
;
4686 end_analyze_subs_aa
:
4687 obstack_free (&scratch_obstack
, NULL
);
4688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4690 fprintf (dump_file
, " (overlaps_a = ");
4691 dump_conflict_function (dump_file
, *overlaps_a
);
4692 fprintf (dump_file
, ")\n (overlaps_b = ");
4693 dump_conflict_function (dump_file
, *overlaps_b
);
4694 fprintf (dump_file
, "))\n");
4698 /* Returns true when analyze_subscript_affine_affine can be used for
4699 determining the dependence relation between chrec_a and chrec_b,
4700 that contain symbols. This function modifies chrec_a and chrec_b
4701 such that the analysis result is the same, and such that they don't
4702 contain symbols, and then can safely be passed to the analyzer.
4704 Example: The analysis of the following tuples of evolutions produce
4705 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4708 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4709 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4713 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
4715 tree diff
, type
, left_a
, left_b
, right_b
;
4717 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
4718 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
4719 /* FIXME: For the moment not handled. Might be refined later. */
4722 type
= chrec_type (*chrec_a
);
4723 left_a
= CHREC_LEFT (*chrec_a
);
4724 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
4725 diff
= chrec_fold_minus (type
, left_a
, left_b
);
4727 if (!evolution_function_is_constant_p (diff
))
4730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4731 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
4733 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
4734 diff
, CHREC_RIGHT (*chrec_a
));
4735 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
4736 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
4737 build_int_cst (type
, 0),
4742 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4743 *OVERLAPS_B are initialized to the functions that describe the
4744 relation between the elements accessed twice by CHREC_A and
4745 CHREC_B. For k >= 0, the following property is verified:
4747 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4750 analyze_siv_subscript (tree chrec_a
,
4752 conflict_function
**overlaps_a
,
4753 conflict_function
**overlaps_b
,
4754 tree
*last_conflicts
,
4757 dependence_stats
.num_siv
++;
4759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4760 fprintf (dump_file
, "(analyze_siv_subscript \n");
4762 if (evolution_function_is_constant_p (chrec_a
)
4763 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4764 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
4765 overlaps_a
, overlaps_b
, last_conflicts
);
4767 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4768 && evolution_function_is_constant_p (chrec_b
))
4769 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
4770 overlaps_b
, overlaps_a
, last_conflicts
);
4772 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4773 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4775 if (!chrec_contains_symbols (chrec_a
)
4776 && !chrec_contains_symbols (chrec_b
))
4778 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4779 overlaps_a
, overlaps_b
,
4782 if (CF_NOT_KNOWN_P (*overlaps_a
)
4783 || CF_NOT_KNOWN_P (*overlaps_b
))
4784 dependence_stats
.num_siv_unimplemented
++;
4785 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4786 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4787 dependence_stats
.num_siv_independent
++;
4789 dependence_stats
.num_siv_dependent
++;
4791 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
4794 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4795 overlaps_a
, overlaps_b
,
4798 if (CF_NOT_KNOWN_P (*overlaps_a
)
4799 || CF_NOT_KNOWN_P (*overlaps_b
))
4800 dependence_stats
.num_siv_unimplemented
++;
4801 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4802 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4803 dependence_stats
.num_siv_independent
++;
4805 dependence_stats
.num_siv_dependent
++;
4808 goto siv_subscript_dontknow
;
4813 siv_subscript_dontknow
:;
4814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4815 fprintf (dump_file
, " siv test failed: unimplemented");
4816 *overlaps_a
= conflict_fn_not_known ();
4817 *overlaps_b
= conflict_fn_not_known ();
4818 *last_conflicts
= chrec_dont_know
;
4819 dependence_stats
.num_siv_unimplemented
++;
4822 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4823 fprintf (dump_file
, ")\n");
4826 /* Returns false if we can prove that the greatest common divisor of the steps
4827 of CHREC does not divide CST, false otherwise. */
4830 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
4832 HOST_WIDE_INT cd
= 0, val
;
4835 if (!tree_fits_shwi_p (cst
))
4837 val
= tree_to_shwi (cst
);
4839 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
4841 step
= CHREC_RIGHT (chrec
);
4842 if (!tree_fits_shwi_p (step
))
4844 cd
= gcd (cd
, tree_to_shwi (step
));
4845 chrec
= CHREC_LEFT (chrec
);
4848 return val
% cd
== 0;
4851 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4852 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4853 functions that describe the relation between the elements accessed
4854 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4857 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4860 analyze_miv_subscript (tree chrec_a
,
4862 conflict_function
**overlaps_a
,
4863 conflict_function
**overlaps_b
,
4864 tree
*last_conflicts
,
4865 class loop
*loop_nest
)
4867 tree type
, difference
;
4869 dependence_stats
.num_miv
++;
4870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4871 fprintf (dump_file
, "(analyze_miv_subscript \n");
4873 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
4874 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
4875 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
4876 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
4878 if (eq_evolutions_p (chrec_a
, chrec_b
))
4880 /* Access functions are the same: all the elements are accessed
4881 in the same order. */
4882 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4883 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4884 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
4885 dependence_stats
.num_miv_dependent
++;
4888 else if (evolution_function_is_constant_p (difference
)
4889 && evolution_function_is_affine_multivariate_p (chrec_a
,
4891 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
4893 /* testsuite/.../ssa-chrec-33.c
4894 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4896 The difference is 1, and all the evolution steps are multiples
4897 of 2, consequently there are no overlapping elements. */
4898 *overlaps_a
= conflict_fn_no_dependence ();
4899 *overlaps_b
= conflict_fn_no_dependence ();
4900 *last_conflicts
= integer_zero_node
;
4901 dependence_stats
.num_miv_independent
++;
4904 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest
->num
)
4905 && !chrec_contains_symbols (chrec_a
, loop_nest
)
4906 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest
->num
)
4907 && !chrec_contains_symbols (chrec_b
, loop_nest
))
4909 /* testsuite/.../ssa-chrec-35.c
4910 {0, +, 1}_2 vs. {0, +, 1}_3
4911 the overlapping elements are respectively located at iterations:
4912 {0, +, 1}_x and {0, +, 1}_x,
4913 in other words, we have the equality:
4914 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4917 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4918 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4920 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4921 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4923 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4924 overlaps_a
, overlaps_b
, last_conflicts
);
4926 if (CF_NOT_KNOWN_P (*overlaps_a
)
4927 || CF_NOT_KNOWN_P (*overlaps_b
))
4928 dependence_stats
.num_miv_unimplemented
++;
4929 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4930 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4931 dependence_stats
.num_miv_independent
++;
4933 dependence_stats
.num_miv_dependent
++;
4938 /* When the analysis is too difficult, answer "don't know". */
4939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4940 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
4942 *overlaps_a
= conflict_fn_not_known ();
4943 *overlaps_b
= conflict_fn_not_known ();
4944 *last_conflicts
= chrec_dont_know
;
4945 dependence_stats
.num_miv_unimplemented
++;
4948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4949 fprintf (dump_file
, ")\n");
4952 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4953 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4954 OVERLAP_ITERATIONS_B are initialized with two functions that
4955 describe the iterations that contain conflicting elements.
4957 Remark: For an integer k >= 0, the following equality is true:
4959 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4963 analyze_overlapping_iterations (tree chrec_a
,
4965 conflict_function
**overlap_iterations_a
,
4966 conflict_function
**overlap_iterations_b
,
4967 tree
*last_conflicts
, class loop
*loop_nest
)
4969 unsigned int lnn
= loop_nest
->num
;
4971 dependence_stats
.num_subscript_tests
++;
4973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4975 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
4976 fprintf (dump_file
, " (chrec_a = ");
4977 print_generic_expr (dump_file
, chrec_a
);
4978 fprintf (dump_file
, ")\n (chrec_b = ");
4979 print_generic_expr (dump_file
, chrec_b
);
4980 fprintf (dump_file
, ")\n");
4983 if (chrec_a
== NULL_TREE
4984 || chrec_b
== NULL_TREE
4985 || chrec_contains_undetermined (chrec_a
)
4986 || chrec_contains_undetermined (chrec_b
))
4988 dependence_stats
.num_subscript_undetermined
++;
4990 *overlap_iterations_a
= conflict_fn_not_known ();
4991 *overlap_iterations_b
= conflict_fn_not_known ();
4994 /* If they are the same chrec, and are affine, they overlap
4995 on every iteration. */
4996 else if (eq_evolutions_p (chrec_a
, chrec_b
)
4997 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
4998 || operand_equal_p (chrec_a
, chrec_b
, 0)))
5000 dependence_stats
.num_same_subscript_function
++;
5001 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
5002 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
5003 *last_conflicts
= chrec_dont_know
;
5006 /* If they aren't the same, and aren't affine, we can't do anything
5008 else if ((chrec_contains_symbols (chrec_a
)
5009 || chrec_contains_symbols (chrec_b
))
5010 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
5011 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
5013 dependence_stats
.num_subscript_undetermined
++;
5014 *overlap_iterations_a
= conflict_fn_not_known ();
5015 *overlap_iterations_b
= conflict_fn_not_known ();
5018 else if (ziv_subscript_p (chrec_a
, chrec_b
))
5019 analyze_ziv_subscript (chrec_a
, chrec_b
,
5020 overlap_iterations_a
, overlap_iterations_b
,
5023 else if (siv_subscript_p (chrec_a
, chrec_b
))
5024 analyze_siv_subscript (chrec_a
, chrec_b
,
5025 overlap_iterations_a
, overlap_iterations_b
,
5026 last_conflicts
, lnn
);
5029 analyze_miv_subscript (chrec_a
, chrec_b
,
5030 overlap_iterations_a
, overlap_iterations_b
,
5031 last_conflicts
, loop_nest
);
5033 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5035 fprintf (dump_file
, " (overlap_iterations_a = ");
5036 dump_conflict_function (dump_file
, *overlap_iterations_a
);
5037 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
5038 dump_conflict_function (dump_file
, *overlap_iterations_b
);
5039 fprintf (dump_file
, "))\n");
5043 /* Helper function for uniquely inserting distance vectors. */
5046 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
5048 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
5049 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
5052 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
5055 /* Helper function for uniquely inserting direction vectors. */
5058 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
5060 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
5061 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
5064 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
5067 /* Add a distance of 1 on all the loops outer than INDEX. If we
5068 haven't yet determined a distance for this outer loop, push a new
5069 distance vector composed of the previous distance, and a distance
5070 of 1 for this outer loop. Example:
5078 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5079 save (0, 1), then we have to save (1, 0). */
5082 add_outer_distances (struct data_dependence_relation
*ddr
,
5083 lambda_vector dist_v
, int index
)
5085 /* For each outer loop where init_v is not set, the accesses are
5086 in dependence of distance 1 in the loop. */
5087 while (--index
>= 0)
5089 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5090 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5092 save_dist_v (ddr
, save_v
);
5096 /* Return false when fail to represent the data dependence as a
5097 distance vector. A_INDEX is the index of the first reference
5098 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5099 second reference. INIT_B is set to true when a component has been
5100 added to the distance vector DIST_V. INDEX_CARRY is then set to
5101 the index in DIST_V that carries the dependence. */
5104 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
5105 unsigned int a_index
, unsigned int b_index
,
5106 lambda_vector dist_v
, bool *init_b
,
5110 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5111 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5113 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5115 tree access_fn_a
, access_fn_b
;
5116 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
5118 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5120 non_affine_dependence_relation (ddr
);
5124 access_fn_a
= SUB_ACCESS_FN (subscript
, a_index
);
5125 access_fn_b
= SUB_ACCESS_FN (subscript
, b_index
);
5127 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
5128 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
5132 int var_a
= CHREC_VARIABLE (access_fn_a
);
5133 int var_b
= CHREC_VARIABLE (access_fn_b
);
5136 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5138 non_affine_dependence_relation (ddr
);
5142 /* When data references are collected in a loop while data
5143 dependences are analyzed in loop nest nested in the loop, we
5144 would have more number of access functions than number of
5145 loops. Skip access functions of loops not in the loop nest.
5147 See PR89725 for more information. */
5148 if (flow_loop_nested_p (get_loop (cfun
, var_a
), loop
))
5151 dist
= int_cst_value (SUB_DISTANCE (subscript
));
5152 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
5153 *index_carry
= MIN (index
, *index_carry
);
5155 /* This is the subscript coupling test. If we have already
5156 recorded a distance for this loop (a distance coming from
5157 another subscript), it should be the same. For example,
5158 in the following code, there is no dependence:
5165 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
5167 finalize_ddr_dependent (ddr
, chrec_known
);
5171 dist_v
[index
] = dist
;
5175 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
5177 /* This can be for example an affine vs. constant dependence
5178 (T[i] vs. T[3]) that is not an affine dependence and is
5179 not representable as a distance vector. */
5180 non_affine_dependence_relation (ddr
);
5190 /* Return true when the DDR contains only invariant access functions wrto. loop
5194 invariant_access_functions (const struct data_dependence_relation
*ddr
,
5197 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5198 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 0), lnum
)
5199 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 1), lnum
))
5205 /* Helper function for the case where DDR_A and DDR_B are the same
5206 multivariate access function with a constant step. For an example
5210 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
5213 tree c_1
= CHREC_LEFT (c_2
);
5214 tree c_0
= CHREC_LEFT (c_1
);
5215 lambda_vector dist_v
;
5216 HOST_WIDE_INT v1
, v2
, cd
;
5218 /* Polynomials with more than 2 variables are not handled yet. When
5219 the evolution steps are parameters, it is not possible to
5220 represent the dependence using classical distance vectors. */
5221 if (TREE_CODE (c_0
) != INTEGER_CST
5222 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
5223 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
5225 DDR_AFFINE_P (ddr
) = false;
5229 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
5230 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
5232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5233 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5234 v1
= int_cst_value (CHREC_RIGHT (c_1
));
5235 v2
= int_cst_value (CHREC_RIGHT (c_2
));
5248 save_dist_v (ddr
, dist_v
);
5250 add_outer_distances (ddr
, dist_v
, x_1
);
5253 /* Helper function for the case where DDR_A and DDR_B are the same
5254 access functions. */
5257 add_other_self_distances (struct data_dependence_relation
*ddr
)
5259 lambda_vector dist_v
;
5261 int index_carry
= DDR_NB_LOOPS (ddr
);
5263 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5265 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
5267 tree access_fun
= SUB_ACCESS_FN (sub
, 0);
5269 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
5271 if (!evolution_function_is_univariate_p (access_fun
, loop
->num
))
5273 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
5275 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
5279 access_fun
= SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr
, 0), 0);
5281 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
5282 add_multivariate_self_dist (ddr
, access_fun
);
5284 /* The evolution step is not constant: it varies in
5285 the outer loop, so this cannot be represented by a
5286 distance vector. For example in pr34635.c the
5287 evolution is {0, +, {0, +, 4}_1}_2. */
5288 DDR_AFFINE_P (ddr
) = false;
5293 /* When data references are collected in a loop while data
5294 dependences are analyzed in loop nest nested in the loop, we
5295 would have more number of access functions than number of
5296 loops. Skip access functions of loops not in the loop nest.
5298 See PR89725 for more information. */
5299 if (flow_loop_nested_p (get_loop (cfun
, CHREC_VARIABLE (access_fun
)),
5303 index_carry
= MIN (index_carry
,
5304 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
5305 DDR_LOOP_NEST (ddr
)));
5309 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5310 add_outer_distances (ddr
, dist_v
, index_carry
);
5314 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
5316 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5319 save_dist_v (ddr
, dist_v
);
5322 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5323 is the case for example when access functions are the same and
5324 equal to a constant, as in:
5331 in which case the distance vectors are (0) and (1). */
5334 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
5338 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5340 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
5341 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
5342 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
5344 for (j
= 0; j
< ca
->n
; j
++)
5345 if (affine_function_zero_p (ca
->fns
[j
]))
5347 insert_innermost_unit_dist_vector (ddr
);
5351 for (j
= 0; j
< cb
->n
; j
++)
5352 if (affine_function_zero_p (cb
->fns
[j
]))
5354 insert_innermost_unit_dist_vector (ddr
);
5360 /* Return true when the DDR contains two data references that have the
5361 same access functions. */
5364 same_access_functions (const struct data_dependence_relation
*ddr
)
5366 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5367 if (!eq_evolutions_p (SUB_ACCESS_FN (sub
, 0),
5368 SUB_ACCESS_FN (sub
, 1)))
5374 /* Compute the classic per loop distance vector. DDR is the data
5375 dependence relation to build a vector from. Return false when fail
5376 to represent the data dependence as a distance vector. */
5379 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
5380 class loop
*loop_nest
)
5382 bool init_b
= false;
5383 int index_carry
= DDR_NB_LOOPS (ddr
);
5384 lambda_vector dist_v
;
5386 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
5389 if (same_access_functions (ddr
))
5391 /* Save the 0 vector. */
5392 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5393 save_dist_v (ddr
, dist_v
);
5395 if (invariant_access_functions (ddr
, loop_nest
->num
))
5396 add_distance_for_zero_overlaps (ddr
);
5398 if (DDR_NB_LOOPS (ddr
) > 1)
5399 add_other_self_distances (ddr
);
5404 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5405 if (!build_classic_dist_vector_1 (ddr
, 0, 1, dist_v
, &init_b
, &index_carry
))
5408 /* Save the distance vector if we initialized one. */
5411 /* Verify a basic constraint: classic distance vectors should
5412 always be lexicographically positive.
5414 Data references are collected in the order of execution of
5415 the program, thus for the following loop
5417 | for (i = 1; i < 100; i++)
5418 | for (j = 1; j < 100; j++)
5420 | t = T[j+1][i-1]; // A
5421 | T[j][i] = t + 2; // B
5424 references are collected following the direction of the wind:
5425 A then B. The data dependence tests are performed also
5426 following this order, such that we're looking at the distance
5427 separating the elements accessed by A from the elements later
5428 accessed by B. But in this example, the distance returned by
5429 test_dep (A, B) is lexicographically negative (-1, 1), that
5430 means that the access A occurs later than B with respect to
5431 the outer loop, ie. we're actually looking upwind. In this
5432 case we solve test_dep (B, A) looking downwind to the
5433 lexicographically positive solution, that returns the
5434 distance vector (1, -1). */
5435 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
5437 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5438 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5440 compute_subscript_distance (ddr
);
5441 if (!build_classic_dist_vector_1 (ddr
, 1, 0, save_v
, &init_b
,
5444 save_dist_v (ddr
, save_v
);
5445 DDR_REVERSED_P (ddr
) = true;
5447 /* In this case there is a dependence forward for all the
5450 | for (k = 1; k < 100; k++)
5451 | for (i = 1; i < 100; i++)
5452 | for (j = 1; j < 100; j++)
5454 | t = T[j+1][i-1]; // A
5455 | T[j][i] = t + 2; // B
5463 if (DDR_NB_LOOPS (ddr
) > 1)
5465 add_outer_distances (ddr
, save_v
, index_carry
);
5466 add_outer_distances (ddr
, dist_v
, index_carry
);
5471 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5472 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5474 if (DDR_NB_LOOPS (ddr
) > 1)
5476 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5478 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5480 compute_subscript_distance (ddr
);
5481 if (!build_classic_dist_vector_1 (ddr
, 1, 0, opposite_v
, &init_b
,
5485 save_dist_v (ddr
, save_v
);
5486 add_outer_distances (ddr
, dist_v
, index_carry
);
5487 add_outer_distances (ddr
, opposite_v
, index_carry
);
5490 save_dist_v (ddr
, save_v
);
5495 /* There is a distance of 1 on all the outer loops: Example:
5496 there is a dependence of distance 1 on loop_1 for the array A.
5502 add_outer_distances (ddr
, dist_v
,
5503 lambda_vector_first_nz (dist_v
,
5504 DDR_NB_LOOPS (ddr
), 0));
5507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5511 fprintf (dump_file
, "(build_classic_dist_vector\n");
5512 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
5514 fprintf (dump_file
, " dist_vector = (");
5515 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
5516 DDR_NB_LOOPS (ddr
));
5517 fprintf (dump_file
, " )\n");
5519 fprintf (dump_file
, ")\n");
5525 /* Return the direction for a given distance.
5526 FIXME: Computing dir this way is suboptimal, since dir can catch
5527 cases that dist is unable to represent. */
5529 static inline enum data_dependence_direction
5530 dir_from_dist (int dist
)
5533 return dir_positive
;
5535 return dir_negative
;
5540 /* Compute the classic per loop direction vector. DDR is the data
5541 dependence relation to build a vector from. */
5544 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
5547 lambda_vector dist_v
;
5549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
5551 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5553 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
5554 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
5556 save_dir_v (ddr
, dir_v
);
5560 /* Helper function. Returns true when there is a dependence between the
5561 data references. A_INDEX is the index of the first reference (0 for
5562 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5565 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
5566 unsigned int a_index
, unsigned int b_index
,
5567 class loop
*loop_nest
)
5570 tree last_conflicts
;
5571 struct subscript
*subscript
;
5572 tree res
= NULL_TREE
;
5574 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
5576 conflict_function
*overlaps_a
, *overlaps_b
;
5578 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript
, a_index
),
5579 SUB_ACCESS_FN (subscript
, b_index
),
5580 &overlaps_a
, &overlaps_b
,
5581 &last_conflicts
, loop_nest
);
5583 if (SUB_CONFLICTS_IN_A (subscript
))
5584 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
5585 if (SUB_CONFLICTS_IN_B (subscript
))
5586 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
5588 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
5589 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
5590 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
5592 /* If there is any undetermined conflict function we have to
5593 give a conservative answer in case we cannot prove that
5594 no dependence exists when analyzing another subscript. */
5595 if (CF_NOT_KNOWN_P (overlaps_a
)
5596 || CF_NOT_KNOWN_P (overlaps_b
))
5598 res
= chrec_dont_know
;
5602 /* When there is a subscript with no dependence we can stop. */
5603 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
5604 || CF_NO_DEPENDENCE_P (overlaps_b
))
5611 if (res
== NULL_TREE
)
5614 if (res
== chrec_known
)
5615 dependence_stats
.num_dependence_independent
++;
5617 dependence_stats
.num_dependence_undetermined
++;
5618 finalize_ddr_dependent (ddr
, res
);
5622 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5625 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
5626 class loop
*loop_nest
)
5628 if (subscript_dependence_tester_1 (ddr
, 0, 1, loop_nest
))
5629 dependence_stats
.num_dependence_dependent
++;
5631 compute_subscript_distance (ddr
);
5632 if (build_classic_dist_vector (ddr
, loop_nest
))
5633 build_classic_dir_vector (ddr
);
5636 /* Returns true when all the access functions of A are affine or
5637 constant with respect to LOOP_NEST. */
5640 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
5641 const class loop
*loop_nest
)
5643 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
5645 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
5646 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
5652 /* This computes the affine dependence relation between A and B with
5653 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5654 independence between two accesses, while CHREC_DONT_KNOW is used
5655 for representing the unknown relation.
5657 Note that it is possible to stop the computation of the dependence
5658 relation the first time we detect a CHREC_KNOWN element for a given
5662 compute_affine_dependence (struct data_dependence_relation
*ddr
,
5663 class loop
*loop_nest
)
5665 struct data_reference
*dra
= DDR_A (ddr
);
5666 struct data_reference
*drb
= DDR_B (ddr
);
5668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5670 fprintf (dump_file
, "(compute_affine_dependence\n");
5671 fprintf (dump_file
, " ref_a: ");
5672 print_generic_expr (dump_file
, DR_REF (dra
));
5673 fprintf (dump_file
, ", stmt_a: ");
5674 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
5675 fprintf (dump_file
, " ref_b: ");
5676 print_generic_expr (dump_file
, DR_REF (drb
));
5677 fprintf (dump_file
, ", stmt_b: ");
5678 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
5681 /* Analyze only when the dependence relation is not yet known. */
5682 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
5684 dependence_stats
.num_dependence_tests
++;
5686 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
5687 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
5688 subscript_dependence_tester (ddr
, loop_nest
);
5690 /* As a last case, if the dependence cannot be determined, or if
5691 the dependence is considered too difficult to determine, answer
5695 dependence_stats
.num_dependence_undetermined
++;
5697 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5699 fprintf (dump_file
, "Data ref a:\n");
5700 dump_data_reference (dump_file
, dra
);
5701 fprintf (dump_file
, "Data ref b:\n");
5702 dump_data_reference (dump_file
, drb
);
5703 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
5705 finalize_ddr_dependent (ddr
, chrec_dont_know
);
5709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5711 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
5712 fprintf (dump_file
, ") -> no dependence\n");
5713 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5714 fprintf (dump_file
, ") -> dependence analysis failed\n");
5716 fprintf (dump_file
, ")\n");
5720 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5721 the data references in DATAREFS, in the LOOP_NEST. When
5722 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5723 relations. Return true when successful, i.e. data references number
5724 is small enough to be handled. */
5727 compute_all_dependences (const vec
<data_reference_p
> &datarefs
,
5728 vec
<ddr_p
> *dependence_relations
,
5729 const vec
<loop_p
> &loop_nest
,
5730 bool compute_self_and_rr
)
5732 struct data_dependence_relation
*ddr
;
5733 struct data_reference
*a
, *b
;
5736 if ((int) datarefs
.length ()
5737 > param_loop_max_datarefs_for_datadeps
)
5739 struct data_dependence_relation
*ddr
;
5741 /* Insert a single relation into dependence_relations:
5743 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
5744 dependence_relations
->safe_push (ddr
);
5748 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5749 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
5750 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
5752 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
5753 dependence_relations
->safe_push (ddr
);
5754 if (loop_nest
.exists ())
5755 compute_affine_dependence (ddr
, loop_nest
[0]);
5758 if (compute_self_and_rr
)
5759 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5761 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
5762 dependence_relations
->safe_push (ddr
);
5763 if (loop_nest
.exists ())
5764 compute_affine_dependence (ddr
, loop_nest
[0]);
5770 /* Describes a location of a memory reference. */
5774 /* The memory reference. */
5777 /* True if the memory reference is read. */
5780 /* True if the data reference is conditional within the containing
5781 statement, i.e. if it might not occur even when the statement
5782 is executed and runs to completion. */
5783 bool is_conditional_in_stmt
;
5787 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5788 true if STMT clobbers memory, false otherwise. */
5791 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
5793 bool clobbers_memory
= false;
5796 enum gimple_code stmt_code
= gimple_code (stmt
);
5798 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5799 As we cannot model data-references to not spelled out
5800 accesses give up if they may occur. */
5801 if (stmt_code
== GIMPLE_CALL
5802 && !(gimple_call_flags (stmt
) & ECF_CONST
))
5804 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5805 if (gimple_call_internal_p (stmt
))
5806 switch (gimple_call_internal_fn (stmt
))
5808 case IFN_GOMP_SIMD_LANE
:
5810 class loop
*loop
= gimple_bb (stmt
)->loop_father
;
5811 tree uid
= gimple_call_arg (stmt
, 0);
5812 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
5814 || loop
->simduid
!= SSA_NAME_VAR (uid
))
5815 clobbers_memory
= true;
5819 case IFN_MASK_STORE
:
5824 = gimple_call_addr_fndecl (gimple_call_arg (stmt
, 0));
5826 || (flags_from_decl_or_type (orig_fndecl
) & ECF_CONST
) == 0)
5827 clobbers_memory
= true;
5831 clobbers_memory
= true;
5835 clobbers_memory
= true;
5837 else if (stmt_code
== GIMPLE_ASM
5838 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
5839 || gimple_vuse (stmt
)))
5840 clobbers_memory
= true;
5842 if (!gimple_vuse (stmt
))
5843 return clobbers_memory
;
5845 if (stmt_code
== GIMPLE_ASSIGN
)
5848 op0
= gimple_assign_lhs (stmt
);
5849 op1
= gimple_assign_rhs1 (stmt
);
5852 || (REFERENCE_CLASS_P (op1
)
5853 && (base
= get_base_address (op1
))
5854 && TREE_CODE (base
) != SSA_NAME
5855 && !is_gimple_min_invariant (base
)))
5859 ref
.is_conditional_in_stmt
= false;
5860 references
->safe_push (ref
);
5863 else if (stmt_code
== GIMPLE_CALL
)
5869 ref
.is_read
= false;
5870 if (gimple_call_internal_p (stmt
))
5871 switch (gimple_call_internal_fn (stmt
))
5874 if (gimple_call_lhs (stmt
) == NULL_TREE
)
5878 case IFN_MASK_STORE
:
5879 ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)), 0);
5880 align
= tree_to_shwi (gimple_call_arg (stmt
, 1));
5882 type
= TREE_TYPE (gimple_call_lhs (stmt
));
5884 type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
5885 if (TYPE_ALIGN (type
) != align
)
5886 type
= build_aligned_type (type
, align
);
5887 ref
.is_conditional_in_stmt
= true;
5888 ref
.ref
= fold_build2 (MEM_REF
, type
, gimple_call_arg (stmt
, 0),
5890 references
->safe_push (ref
);
5899 op0
= gimple_call_lhs (stmt
);
5900 n
= gimple_call_num_args (stmt
);
5903 op1
= gimple_call_arg (stmt
, i
);
5906 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
5910 ref
.is_conditional_in_stmt
= false;
5911 references
->safe_push (ref
);
5916 return clobbers_memory
;
5920 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
5923 ref
.is_read
= false;
5924 ref
.is_conditional_in_stmt
= false;
5925 references
->safe_push (ref
);
5927 return clobbers_memory
;
5931 /* Returns true if the loop-nest has any data reference. */
5934 loop_nest_has_data_refs (loop_p loop
)
5936 basic_block
*bbs
= get_loop_body (loop
);
5937 auto_vec
<data_ref_loc
, 3> references
;
5939 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
5941 basic_block bb
= bbs
[i
];
5942 gimple_stmt_iterator bsi
;
5944 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5946 gimple
*stmt
= gsi_stmt (bsi
);
5947 get_references_in_stmt (stmt
, &references
);
5948 if (references
.length ())
5959 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5960 reference, returns false, otherwise returns true. NEST is the outermost
5961 loop of the loop nest in which the references should be analyzed. */
5964 find_data_references_in_stmt (class loop
*nest
, gimple
*stmt
,
5965 vec
<data_reference_p
> *datarefs
)
5967 auto_vec
<data_ref_loc
, 2> references
;
5968 data_reference_p dr
;
5970 if (get_references_in_stmt (stmt
, &references
))
5971 return opt_result::failure_at (stmt
, "statement clobbers memory: %G",
5974 for (const data_ref_loc
&ref
: references
)
5976 dr
= create_data_ref (nest
? loop_preheader_edge (nest
) : NULL
,
5977 loop_containing_stmt (stmt
), ref
.ref
,
5978 stmt
, ref
.is_read
, ref
.is_conditional_in_stmt
);
5979 gcc_assert (dr
!= NULL
);
5980 datarefs
->safe_push (dr
);
5983 return opt_result::success ();
5986 /* Stores the data references in STMT to DATAREFS. If there is an
5987 unanalyzable reference, returns false, otherwise returns true.
5988 NEST is the outermost loop of the loop nest in which the references
5989 should be instantiated, LOOP is the loop in which the references
5990 should be analyzed. */
5993 graphite_find_data_references_in_stmt (edge nest
, loop_p loop
, gimple
*stmt
,
5994 vec
<data_reference_p
> *datarefs
)
5996 auto_vec
<data_ref_loc
, 2> references
;
5998 data_reference_p dr
;
6000 if (get_references_in_stmt (stmt
, &references
))
6003 for (const data_ref_loc
&ref
: references
)
6005 dr
= create_data_ref (nest
, loop
, ref
.ref
, stmt
, ref
.is_read
,
6006 ref
.is_conditional_in_stmt
);
6007 gcc_assert (dr
!= NULL
);
6008 datarefs
->safe_push (dr
);
6014 /* Search the data references in LOOP, and record the information into
6015 DATAREFS. Returns chrec_dont_know when failing to analyze a
6016 difficult case, returns NULL_TREE otherwise. */
6019 find_data_references_in_bb (class loop
*loop
, basic_block bb
,
6020 vec
<data_reference_p
> *datarefs
)
6022 gimple_stmt_iterator bsi
;
6024 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
6026 gimple
*stmt
= gsi_stmt (bsi
);
6028 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
6030 struct data_reference
*res
;
6031 res
= XCNEW (struct data_reference
);
6032 datarefs
->safe_push (res
);
6034 return chrec_dont_know
;
6041 /* Search the data references in LOOP, and record the information into
6042 DATAREFS. Returns chrec_dont_know when failing to analyze a
6043 difficult case, returns NULL_TREE otherwise.
6045 TODO: This function should be made smarter so that it can handle address
6046 arithmetic as if they were array accesses, etc. */
6049 find_data_references_in_loop (class loop
*loop
,
6050 vec
<data_reference_p
> *datarefs
)
6052 basic_block bb
, *bbs
;
6055 bbs
= get_loop_body_in_dom_order (loop
);
6057 for (i
= 0; i
< loop
->num_nodes
; i
++)
6061 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
6064 return chrec_dont_know
;
6072 /* Return the alignment in bytes that DRB is guaranteed to have at all
6076 dr_alignment (innermost_loop_behavior
*drb
)
6078 /* Get the alignment of BASE_ADDRESS + INIT. */
6079 unsigned int alignment
= drb
->base_alignment
;
6080 unsigned int misalignment
= (drb
->base_misalignment
6081 + TREE_INT_CST_LOW (drb
->init
));
6082 if (misalignment
!= 0)
6083 alignment
= MIN (alignment
, misalignment
& -misalignment
);
6085 /* Cap it to the alignment of OFFSET. */
6086 if (!integer_zerop (drb
->offset
))
6087 alignment
= MIN (alignment
, drb
->offset_alignment
);
6089 /* Cap it to the alignment of STEP. */
6090 if (!integer_zerop (drb
->step
))
6091 alignment
= MIN (alignment
, drb
->step_alignment
);
6096 /* If BASE is a pointer-typed SSA name, try to find the object that it
6097 is based on. Return this object X on success and store the alignment
6098 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6101 get_base_for_alignment_1 (tree base
, unsigned int *alignment_out
)
6103 if (TREE_CODE (base
) != SSA_NAME
|| !POINTER_TYPE_P (TREE_TYPE (base
)))
6106 gimple
*def
= SSA_NAME_DEF_STMT (base
);
6107 base
= analyze_scalar_evolution (loop_containing_stmt (def
), base
);
6109 /* Peel chrecs and record the minimum alignment preserved by
6111 unsigned int alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6112 while (TREE_CODE (base
) == POLYNOMIAL_CHREC
)
6114 unsigned int step_alignment
= highest_pow2_factor (CHREC_RIGHT (base
));
6115 alignment
= MIN (alignment
, step_alignment
);
6116 base
= CHREC_LEFT (base
);
6119 /* Punt if the expression is too complicated to handle. */
6120 if (tree_contains_chrecs (base
, NULL
) || !POINTER_TYPE_P (TREE_TYPE (base
)))
6123 /* The only useful cases are those for which a dereference folds to something
6124 other than an INDIRECT_REF. */
6125 tree ref_type
= TREE_TYPE (TREE_TYPE (base
));
6126 tree ref
= fold_indirect_ref_1 (UNKNOWN_LOCATION
, ref_type
, base
);
6130 /* Analyze the base to which the steps we peeled were applied. */
6131 poly_int64 bitsize
, bitpos
, bytepos
;
6133 int unsignedp
, reversep
, volatilep
;
6135 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
6136 &unsignedp
, &reversep
, &volatilep
);
6137 if (!base
|| !multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
6140 /* Restrict the alignment to that guaranteed by the offsets. */
6141 unsigned int bytepos_alignment
= known_alignment (bytepos
);
6142 if (bytepos_alignment
!= 0)
6143 alignment
= MIN (alignment
, bytepos_alignment
);
6146 unsigned int offset_alignment
= highest_pow2_factor (offset
);
6147 alignment
= MIN (alignment
, offset_alignment
);
6150 *alignment_out
= alignment
;
6154 /* Return the object whose alignment would need to be changed in order
6155 to increase the alignment of ADDR. Store the maximum achievable
6156 alignment in *MAX_ALIGNMENT. */
6159 get_base_for_alignment (tree addr
, unsigned int *max_alignment
)
6161 tree base
= get_base_for_alignment_1 (addr
, max_alignment
);
6165 if (TREE_CODE (addr
) == ADDR_EXPR
)
6166 addr
= TREE_OPERAND (addr
, 0);
6167 *max_alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6171 /* Recursive helper function. */
6174 find_loop_nest_1 (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6176 /* Inner loops of the nest should not contain siblings. Example:
6177 when there are two consecutive loops,
6188 the dependence relation cannot be captured by the distance
6193 loop_nest
->safe_push (loop
);
6195 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6199 /* Return false when the LOOP is not well nested. Otherwise return
6200 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6201 contain the loops from the outermost to the innermost, as they will
6202 appear in the classic distance vector. */
6205 find_loop_nest (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6207 loop_nest
->safe_push (loop
);
6209 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6213 /* Returns true when the data dependences have been computed, false otherwise.
6214 Given a loop nest LOOP, the following vectors are returned:
6215 DATAREFS is initialized to all the array elements contained in this loop,
6216 DEPENDENCE_RELATIONS contains the relations between the data references.
6217 Compute read-read and self relations if
6218 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6221 compute_data_dependences_for_loop (class loop
*loop
,
6222 bool compute_self_and_read_read_dependences
,
6223 vec
<loop_p
> *loop_nest
,
6224 vec
<data_reference_p
> *datarefs
,
6225 vec
<ddr_p
> *dependence_relations
)
6229 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
6231 /* If the loop nest is not well formed, or one of the data references
6232 is not computable, give up without spending time to compute other
6235 || !find_loop_nest (loop
, loop_nest
)
6236 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
6237 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
6238 compute_self_and_read_read_dependences
))
6241 if (dump_file
&& (dump_flags
& TDF_STATS
))
6243 fprintf (dump_file
, "Dependence tester statistics:\n");
6245 fprintf (dump_file
, "Number of dependence tests: %d\n",
6246 dependence_stats
.num_dependence_tests
);
6247 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
6248 dependence_stats
.num_dependence_dependent
);
6249 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
6250 dependence_stats
.num_dependence_independent
);
6251 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
6252 dependence_stats
.num_dependence_undetermined
);
6254 fprintf (dump_file
, "Number of subscript tests: %d\n",
6255 dependence_stats
.num_subscript_tests
);
6256 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
6257 dependence_stats
.num_subscript_undetermined
);
6258 fprintf (dump_file
, "Number of same subscript function: %d\n",
6259 dependence_stats
.num_same_subscript_function
);
6261 fprintf (dump_file
, "Number of ziv tests: %d\n",
6262 dependence_stats
.num_ziv
);
6263 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
6264 dependence_stats
.num_ziv_dependent
);
6265 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
6266 dependence_stats
.num_ziv_independent
);
6267 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
6268 dependence_stats
.num_ziv_unimplemented
);
6270 fprintf (dump_file
, "Number of siv tests: %d\n",
6271 dependence_stats
.num_siv
);
6272 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
6273 dependence_stats
.num_siv_dependent
);
6274 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
6275 dependence_stats
.num_siv_independent
);
6276 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
6277 dependence_stats
.num_siv_unimplemented
);
6279 fprintf (dump_file
, "Number of miv tests: %d\n",
6280 dependence_stats
.num_miv
);
6281 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
6282 dependence_stats
.num_miv_dependent
);
6283 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
6284 dependence_stats
.num_miv_independent
);
6285 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
6286 dependence_stats
.num_miv_unimplemented
);
6292 /* Free the memory used by a data dependence relation DDR. */
6295 free_dependence_relation (struct data_dependence_relation
*ddr
)
6300 if (DDR_SUBSCRIPTS (ddr
).exists ())
6301 free_subscripts (DDR_SUBSCRIPTS (ddr
));
6302 DDR_DIST_VECTS (ddr
).release ();
6303 DDR_DIR_VECTS (ddr
).release ();
6308 /* Free the memory used by the data dependence relations from
6309 DEPENDENCE_RELATIONS. */
6312 free_dependence_relations (vec
<ddr_p
>& dependence_relations
)
6314 for (data_dependence_relation
*ddr
: dependence_relations
)
6316 free_dependence_relation (ddr
);
6318 dependence_relations
.release ();
6321 /* Free the memory used by the data references from DATAREFS. */
6324 free_data_refs (vec
<data_reference_p
>& datarefs
)
6326 for (data_reference
*dr
: datarefs
)
6328 datarefs
.release ();
6331 /* Common routine implementing both dr_direction_indicator and
6332 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6333 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6334 Return the step as the indicator otherwise. */
6337 dr_step_indicator (struct data_reference
*dr
, int useful_min
)
6339 tree step
= DR_STEP (dr
);
6343 /* Look for cases where the step is scaled by a positive constant
6344 integer, which will often be the access size. If the multiplication
6345 doesn't change the sign (due to overflow effects) then we can
6346 test the unscaled value instead. */
6347 if (TREE_CODE (step
) == MULT_EXPR
6348 && TREE_CODE (TREE_OPERAND (step
, 1)) == INTEGER_CST
6349 && tree_int_cst_sgn (TREE_OPERAND (step
, 1)) > 0)
6351 tree factor
= TREE_OPERAND (step
, 1);
6352 step
= TREE_OPERAND (step
, 0);
6354 /* Strip widening and truncating conversions as well as nops. */
6355 if (CONVERT_EXPR_P (step
)
6356 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step
, 0))))
6357 step
= TREE_OPERAND (step
, 0);
6358 tree type
= TREE_TYPE (step
);
6360 /* Get the range of step values that would not cause overflow. */
6361 widest_int minv
= (wi::to_widest (TYPE_MIN_VALUE (ssizetype
))
6362 / wi::to_widest (factor
));
6363 widest_int maxv
= (wi::to_widest (TYPE_MAX_VALUE (ssizetype
))
6364 / wi::to_widest (factor
));
6366 /* Get the range of values that the unconverted step actually has. */
6367 wide_int step_min
, step_max
;
6369 if (TREE_CODE (step
) != SSA_NAME
6370 || !get_range_query (cfun
)->range_of_expr (vr
, step
)
6371 || vr
.undefined_p ())
6373 step_min
= wi::to_wide (TYPE_MIN_VALUE (type
));
6374 step_max
= wi::to_wide (TYPE_MAX_VALUE (type
));
6378 step_min
= vr
.lower_bound ();
6379 step_max
= vr
.upper_bound ();
6382 /* Check whether the unconverted step has an acceptable range. */
6383 signop sgn
= TYPE_SIGN (type
);
6384 if (wi::les_p (minv
, widest_int::from (step_min
, sgn
))
6385 && wi::ges_p (maxv
, widest_int::from (step_max
, sgn
)))
6387 if (wi::ge_p (step_min
, useful_min
, sgn
))
6388 return ssize_int (useful_min
);
6389 else if (wi::lt_p (step_max
, 0, sgn
))
6390 return ssize_int (-1);
6392 return fold_convert (ssizetype
, step
);
6395 return DR_STEP (dr
);
6398 /* Return a value that is negative iff DR has a negative step. */
6401 dr_direction_indicator (struct data_reference
*dr
)
6403 return dr_step_indicator (dr
, 0);
6406 /* Return a value that is zero iff DR has a zero step. */
6409 dr_zero_step_indicator (struct data_reference
*dr
)
6411 return dr_step_indicator (dr
, 1);
6414 /* Return true if DR is known to have a nonnegative (but possibly zero)
6418 dr_known_forward_stride_p (struct data_reference
*dr
)
6420 tree indicator
= dr_direction_indicator (dr
);
6421 tree neg_step_val
= fold_binary (LT_EXPR
, boolean_type_node
,
6422 fold_convert (ssizetype
, indicator
),
6424 return neg_step_val
&& integer_zerop (neg_step_val
);