1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 #include "gimple-pretty-print.h"
85 #include "fold-const.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
95 #include "tree-affine.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
104 static struct datadep_stats
106 int num_dependence_tests
;
107 int num_dependence_dependent
;
108 int num_dependence_independent
;
109 int num_dependence_undetermined
;
111 int num_subscript_tests
;
112 int num_subscript_undetermined
;
113 int num_same_subscript_function
;
116 int num_ziv_independent
;
117 int num_ziv_dependent
;
118 int num_ziv_unimplemented
;
121 int num_siv_independent
;
122 int num_siv_dependent
;
123 int num_siv_unimplemented
;
126 int num_miv_independent
;
127 int num_miv_dependent
;
128 int num_miv_unimplemented
;
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
132 unsigned int, unsigned int,
134 /* Returns true iff A divides B. */
137 tree_fold_divides_p (const_tree a
, const_tree b
)
139 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
140 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
144 /* Returns true iff A divides B. */
147 int_divides_p (lambda_int a
, lambda_int b
)
149 return ((b
% a
) == 0);
152 /* Return true if reference REF contains a union access. */
155 ref_contains_union_access_p (tree ref
)
157 while (handled_component_p (ref
))
159 ref
= TREE_OPERAND (ref
, 0);
160 if (TREE_CODE (TREE_TYPE (ref
)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref
)) == QUAL_UNION_TYPE
)
169 /* Dump into FILE all the data references from DATAREFS. */
172 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
174 for (data_reference
*dr
: datarefs
)
175 dump_data_reference (file
, dr
);
178 /* Unified dump into FILE all the data references from DATAREFS. */
181 debug (vec
<data_reference_p
> &ref
)
183 dump_data_references (stderr
, ref
);
187 debug (vec
<data_reference_p
> *ptr
)
192 fprintf (stderr
, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
199 debug_data_references (vec
<data_reference_p
> datarefs
)
201 dump_data_references (stderr
, datarefs
);
204 /* Print to STDERR the data_reference DR. */
207 debug_data_reference (struct data_reference
*dr
)
209 dump_data_reference (stderr
, dr
);
212 /* Dump function for a DATA_REFERENCE structure. */
215 dump_data_reference (FILE *outf
,
216 struct data_reference
*dr
)
220 fprintf (outf
, "#(Data Ref: \n");
221 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
222 fprintf (outf
, "# stmt: ");
223 print_gimple_stmt (outf
, DR_STMT (dr
), 0);
224 fprintf (outf
, "# ref: ");
225 print_generic_stmt (outf
, DR_REF (dr
));
226 fprintf (outf
, "# base_object: ");
227 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
));
229 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
231 fprintf (outf
, "# Access function %d: ", i
);
232 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
));
234 fprintf (outf
, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
240 debug (data_reference
&ref
)
242 dump_data_reference (stderr
, &ref
);
246 debug (data_reference
*ptr
)
251 fprintf (stderr
, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
258 dump_affine_function (FILE *outf
, affine_fn fn
)
263 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
264 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
266 fprintf (outf
, " + ");
267 print_generic_expr (outf
, coef
, TDF_SLIM
);
268 fprintf (outf
, " * x_%u", i
);
272 /* Dumps the conflict function CF to the file OUTF. */
275 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
279 if (cf
->n
== NO_DEPENDENCE
)
280 fprintf (outf
, "no dependence");
281 else if (cf
->n
== NOT_KNOWN
)
282 fprintf (outf
, "not known");
285 for (i
= 0; i
< cf
->n
; i
++)
290 dump_affine_function (outf
, cf
->fns
[i
]);
296 /* Dump function for a SUBSCRIPT structure. */
299 dump_subscript (FILE *outf
, struct subscript
*subscript
)
301 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
303 fprintf (outf
, "\n (subscript \n");
304 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf
, cf
);
306 if (CF_NONTRIVIAL_P (cf
))
308 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
309 fprintf (outf
, "\n last_conflict: ");
310 print_generic_expr (outf
, last_iteration
);
313 cf
= SUB_CONFLICTS_IN_B (subscript
);
314 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf
, cf
);
316 if (CF_NONTRIVIAL_P (cf
))
318 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
319 fprintf (outf
, "\n last_conflict: ");
320 print_generic_expr (outf
, last_iteration
);
323 fprintf (outf
, "\n (Subscript distance: ");
324 print_generic_expr (outf
, SUB_DISTANCE (subscript
));
325 fprintf (outf
, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
331 print_direction_vector (FILE *outf
,
337 for (eq
= 0; eq
< length
; eq
++)
339 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
345 fprintf (outf
, " +");
348 fprintf (outf
, " -");
351 fprintf (outf
, " =");
353 case dir_positive_or_equal
:
354 fprintf (outf
, " +=");
356 case dir_positive_or_negative
:
357 fprintf (outf
, " +-");
359 case dir_negative_or_equal
:
360 fprintf (outf
, " -=");
363 fprintf (outf
, " *");
366 fprintf (outf
, "indep");
370 fprintf (outf
, "\n");
373 /* Print a vector of direction vectors. */
376 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
379 for (lambda_vector v
: dir_vects
)
380 print_direction_vector (outf
, v
, length
);
383 /* Print out a vector VEC of length N to OUTFILE. */
386 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
390 for (i
= 0; i
< n
; i
++)
391 fprintf (outfile
, HOST_WIDE_INT_PRINT_DEC
" ", vector
[i
]);
392 fprintf (outfile
, "\n");
395 /* Print a vector of distance vectors. */
398 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
401 for (lambda_vector v
: dist_vects
)
402 print_lambda_vector (outf
, v
, length
);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 dump_data_dependence_relation (FILE *outf
, const data_dependence_relation
*ddr
)
410 struct data_reference
*dra
, *drb
;
412 fprintf (outf
, "(Data Dep: \n");
414 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
421 dump_data_reference (outf
, dra
);
423 fprintf (outf
, " (nil)\n");
425 dump_data_reference (outf
, drb
);
427 fprintf (outf
, " (nil)\n");
429 fprintf (outf
, " (don't know)\n)\n");
435 dump_data_reference (outf
, dra
);
436 dump_data_reference (outf
, drb
);
438 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
439 fprintf (outf
, " (no dependence)\n");
441 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
449 fprintf (outf
, " access_fn_A: ");
450 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 0));
451 fprintf (outf
, " access_fn_B: ");
452 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 1));
453 dump_subscript (outf
, sub
);
456 fprintf (outf
, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
458 fprintf (outf
, "%d ", loopi
->num
);
459 fprintf (outf
, ")\n");
461 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
463 fprintf (outf
, " distance_vector: ");
464 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
468 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
470 fprintf (outf
, " direction_vector: ");
471 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
476 fprintf (outf
, ")\n");
482 debug_data_dependence_relation (const struct data_dependence_relation
*ddr
)
484 dump_data_dependence_relation (stderr
, ddr
);
487 /* Dump into FILE all the dependence relations from DDRS. */
490 dump_data_dependence_relations (FILE *file
, const vec
<ddr_p
> &ddrs
)
492 for (auto ddr
: ddrs
)
493 dump_data_dependence_relation (file
, ddr
);
497 debug (vec
<ddr_p
> &ref
)
499 dump_data_dependence_relations (stderr
, ref
);
503 debug (vec
<ddr_p
> *ptr
)
508 fprintf (stderr
, "<nil>\n");
512 /* Dump to STDERR all the dependence relations from DDRS. */
515 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
517 dump_data_dependence_relations (stderr
, ddrs
);
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
526 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
528 for (data_dependence_relation
*ddr
: ddrs
)
529 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
531 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
533 fprintf (file
, "DISTANCE_V (");
534 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
535 fprintf (file
, ")\n");
538 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
540 fprintf (file
, "DIRECTION_V (");
541 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
542 fprintf (file
, ")\n");
546 fprintf (file
, "\n\n");
549 /* Dumps the data dependence relations DDRS in FILE. */
552 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
554 for (data_dependence_relation
*ddr
: ddrs
)
555 dump_data_dependence_relation (file
, ddr
);
557 fprintf (file
, "\n\n");
561 debug_ddrs (vec
<ddr_p
> ddrs
)
563 dump_ddrs (stderr
, ddrs
);
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
573 Independently of RESULT_RANGE, try to compute:
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
581 Truncation and sign changes are known to distribute over CODE, i.e.
583 (itype) (A CODE B) == (itype) A CODE (itype) B
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
589 compute_distributive_range (tree type
, value_range
&op0_range
,
590 tree_code code
, value_range
&op1_range
,
591 tree
*off
, value_range
*result_range
)
593 gcc_assert (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
));
596 range_op_handler
op (code
, type
);
597 if (!op
.fold_range (*result_range
, type
, op0_range
, op1_range
))
598 result_range
->set_varying (type
);
601 /* The distributive property guarantees that if TYPE is no narrower
604 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606 and so we can treat DELTA as zero. */
607 if (TYPE_PRECISION (type
) >= TYPE_PRECISION (sizetype
))
610 /* If overflow is undefined, we can assume that:
612 X == (ssizetype) OP0 CODE (ssizetype) OP1
614 is within the range of TYPE, i.e.:
616 X == (ssizetype) (TYPE) X
618 Distributing the (TYPE) truncation over X gives:
620 X == (ssizetype) (OP0 CODE OP1)
622 Casting both sides to sizetype and distributing the sizetype cast
625 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627 and so we can treat DELTA as zero. */
628 if (TYPE_OVERFLOW_UNDEFINED (type
))
631 /* Compute the range of:
633 (ssizetype) OP0 CODE (ssizetype) OP1
635 The distributive property guarantees that this has the same bitpattern as:
637 (sizetype) OP0 CODE (sizetype) OP1
639 but its range is more conducive to analysis. */
640 range_cast (op0_range
, ssizetype
);
641 range_cast (op1_range
, ssizetype
);
642 value_range wide_range
;
643 range_op_handler
op (code
, ssizetype
);
644 bool saved_flag_wrapv
= flag_wrapv
;
646 if (!op
.fold_range (wide_range
, ssizetype
, op0_range
, op1_range
))
647 wide_range
.set_varying (ssizetype
);;
648 flag_wrapv
= saved_flag_wrapv
;
649 if (wide_range
.num_pairs () != 1 || !range_int_cst_p (&wide_range
))
652 wide_int lb
= wide_range
.lower_bound ();
653 wide_int ub
= wide_range
.upper_bound ();
655 /* Calculate the number of times that each end of the range overflows or
656 underflows TYPE. We can only calculate DELTA if the numbers match. */
657 unsigned int precision
= TYPE_PRECISION (type
);
658 if (!TYPE_UNSIGNED (type
))
660 wide_int type_min
= wi::mask (precision
- 1, true, lb
.get_precision ());
664 wide_int upper_bits
= wi::mask (precision
, true, lb
.get_precision ());
670 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
671 negative values indicating underflow. The low PRECISION bits of LB
672 are clear, so DELTA is therefore LB (== UB). */
673 *off
= wide_int_to_tree (ssizetype
, wi::to_wide (*off
) - lb
);
677 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
678 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
679 FROM_TYPE are integral types. */
682 nop_conversion_for_offset_p (tree to_type
, tree from_type
, value_range
&range
)
684 gcc_assert (INTEGRAL_TYPE_P (to_type
)
685 && INTEGRAL_TYPE_P (from_type
)
686 && !TYPE_OVERFLOW_TRAPS (to_type
)
687 && !TYPE_OVERFLOW_TRAPS (from_type
));
689 /* Converting to something no narrower than sizetype and then to sizetype
690 is equivalent to converting directly to sizetype. */
691 if (TYPE_PRECISION (to_type
) >= TYPE_PRECISION (sizetype
))
694 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
695 if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
)
696 && (TYPE_UNSIGNED (from_type
) || !TYPE_UNSIGNED (to_type
)))
699 /* For narrowing conversions, we could in principle test whether
700 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
701 and apply a constant adjustment.
703 For other conversions (which involve a sign change) we could
704 check that the signs are always equal, and apply a constant
705 adjustment if the signs are negative.
707 However, both cases should be rare. */
708 return range_fits_type_p (&range
, TYPE_PRECISION (to_type
),
709 TYPE_SIGN (to_type
));
713 split_constant_offset (tree type
, tree
*var
, tree
*off
,
714 value_range
*result_range
,
715 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
718 /* Helper function for split_constant_offset. If TYPE is a pointer type,
719 try to express OP0 CODE OP1 as:
721 POINTER_PLUS <*VAR, (sizetype) *OFF>
726 - *OFF is a constant of type ssizetype.
728 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
730 *VAR + (sizetype) *OFF
734 - *VAR has type sizetype
735 - *OFF is a constant of type ssizetype.
737 In both cases, OP0 CODE OP1 has type TYPE.
739 Return true on success. A false return value indicates that we can't
740 do better than set *OFF to zero.
742 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
743 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
745 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
746 visited. LIMIT counts down the number of SSA names that we are
747 allowed to process before giving up. */
750 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
751 tree
*var
, tree
*off
, value_range
*result_range
,
752 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
757 value_range op0_range
, op1_range
;
762 if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
))
769 *off
= fold_convert (ssizetype
, op0
);
771 result_range
->set (op0
, op0
);
774 case POINTER_PLUS_EXPR
:
775 split_constant_offset (op0
, &var0
, &off0
, nullptr, cache
, limit
);
776 split_constant_offset (op1
, &var1
, &off1
, nullptr, cache
, limit
);
777 *var
= fold_build2 (POINTER_PLUS_EXPR
, type
, var0
, var1
);
778 *off
= size_binop (PLUS_EXPR
, off0
, off1
);
783 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
784 split_constant_offset (op1
, &var1
, &off1
, &op1_range
, cache
, limit
);
785 *off
= size_binop (code
, off0
, off1
);
786 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
789 *var
= fold_build2 (code
, sizetype
, var0
, var1
);
793 if (TREE_CODE (op1
) != INTEGER_CST
)
796 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
797 op1_range
.set (op1
, op1
);
798 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
799 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
802 *var
= fold_build2 (MULT_EXPR
, sizetype
, var0
,
803 fold_convert (sizetype
, op1
));
809 poly_int64 pbitsize
, pbitpos
, pbytepos
;
811 int punsignedp
, preversep
, pvolatilep
;
813 op0
= TREE_OPERAND (op0
, 0);
815 = get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
816 &punsignedp
, &preversep
, &pvolatilep
);
818 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
820 base
= build_fold_addr_expr (base
);
821 off0
= ssize_int (pbytepos
);
825 split_constant_offset (poffset
, &poffset
, &off1
, nullptr,
827 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
828 base
= fold_build_pointer_plus (base
, poffset
);
831 var0
= fold_convert (type
, base
);
833 /* If variable length types are involved, punt, otherwise casts
834 might be converted into ARRAY_REFs in gimplify_conversion.
835 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
836 possibly no longer appears in current GIMPLE, might resurface.
837 This perhaps could run
838 if (CONVERT_EXPR_P (var0))
840 gimplify_conversion (&var0);
841 // Attempt to fill in any within var0 found ARRAY_REF's
842 // element size from corresponding op embedded ARRAY_REF,
843 // if unsuccessful, just punt.
845 while (POINTER_TYPE_P (type
))
846 type
= TREE_TYPE (type
);
847 if (int_size_in_bytes (type
) < 0)
857 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
860 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
861 enum tree_code subcode
;
863 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
866 subcode
= gimple_assign_rhs_code (def_stmt
);
868 /* We are using a cache to avoid un-CSEing large amounts of code. */
869 bool use_cache
= false;
870 if (!has_single_use (op0
)
871 && (subcode
== POINTER_PLUS_EXPR
872 || subcode
== PLUS_EXPR
873 || subcode
== MINUS_EXPR
874 || subcode
== MULT_EXPR
875 || subcode
== ADDR_EXPR
876 || CONVERT_EXPR_CODE_P (subcode
)))
880 std::pair
<tree
, tree
> &e
= cache
.get_or_insert (op0
, &existed
);
883 if (integer_zerop (e
.second
))
887 /* The caller sets the range in this case. */
890 e
= std::make_pair (op0
, ssize_int (0));
897 var0
= gimple_assign_rhs1 (def_stmt
);
898 var1
= gimple_assign_rhs2 (def_stmt
);
900 bool res
= split_constant_offset_1 (type
, var0
, subcode
, var1
,
901 var
, off
, nullptr, cache
, limit
);
902 if (res
&& use_cache
)
903 *cache
.get (op0
) = std::make_pair (*var
, *off
);
904 /* The caller sets the range in this case. */
909 /* We can only handle the following conversions:
911 - Conversions from one pointer type to another pointer type.
913 - Conversions from one non-trapping integral type to another
914 non-trapping integral type. In this case, the recursive
915 call makes sure that:
919 can be expressed as a sizetype operation involving VAR and OFF,
920 and all we need to do is check whether:
922 (sizetype) OP0 == (sizetype) (TYPE) OP0
924 - Conversions from a non-trapping sizetype-size integral type to
925 a like-sized pointer type. In this case, the recursive call
928 (sizetype) OP0 == *VAR + (sizetype) *OFF
930 and we can convert that to:
932 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
934 - Conversions from a sizetype-sized pointer type to a like-sized
935 non-trapping integral type. In this case, the recursive call
938 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
940 where the POINTER_PLUS and *VAR have the same precision as
941 TYPE (and the same precision as sizetype). Then:
943 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
944 tree itype
= TREE_TYPE (op0
);
945 if ((POINTER_TYPE_P (itype
)
946 || (INTEGRAL_TYPE_P (itype
) && !TYPE_OVERFLOW_TRAPS (itype
)))
947 && (POINTER_TYPE_P (type
)
948 || (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
)))
949 && (POINTER_TYPE_P (type
) == POINTER_TYPE_P (itype
)
950 || (TYPE_PRECISION (type
) == TYPE_PRECISION (sizetype
)
951 && TYPE_PRECISION (itype
) == TYPE_PRECISION (sizetype
))))
953 if (POINTER_TYPE_P (type
))
955 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
956 *var
= fold_convert (type
, *var
);
958 else if (POINTER_TYPE_P (itype
))
960 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
961 *var
= fold_convert (sizetype
, *var
);
965 split_constant_offset (op0
, var
, off
, &op0_range
,
967 if (!nop_conversion_for_offset_p (type
, itype
, op0_range
))
971 *result_range
= op0_range
;
972 range_cast (*result_range
, type
);
985 /* If EXP has pointer type, try to express it as:
987 POINTER_PLUS <*VAR, (sizetype) *OFF>
991 - *VAR has the same type as EXP
992 - *OFF is a constant of type ssizetype.
994 If EXP has an integral type, try to express (sizetype) EXP as:
996 *VAR + (sizetype) *OFF
1000 - *VAR has type sizetype
1001 - *OFF is a constant of type ssizetype.
1003 If EXP_RANGE is nonnull, set it to the range of EXP.
1005 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1006 visited. LIMIT counts down the number of SSA names that we are
1007 allowed to process before giving up. */
1010 split_constant_offset (tree exp
, tree
*var
, tree
*off
, value_range
*exp_range
,
1011 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
1014 tree type
= TREE_TYPE (exp
), op0
, op1
;
1015 enum tree_code code
;
1017 code
= TREE_CODE (exp
);
1021 if (code
== SSA_NAME
)
1024 get_range_query (cfun
)->range_of_expr (vr
, exp
);
1025 if (vr
.undefined_p ())
1026 vr
.set_varying (TREE_TYPE (exp
));
1027 wide_int var_min
= wi::to_wide (vr
.min ());
1028 wide_int var_max
= wi::to_wide (vr
.max ());
1029 value_range_kind vr_kind
= vr
.kind ();
1030 wide_int var_nonzero
= get_nonzero_bits (exp
);
1031 vr_kind
= intersect_range_with_nonzero_bits (vr_kind
,
1035 /* This check for VR_VARYING is here because the old code
1036 using get_range_info would return VR_RANGE for the entire
1037 domain, instead of VR_VARYING. The new code normalizes
1038 full-domain ranges to VR_VARYING. */
1039 if (vr_kind
== VR_RANGE
|| vr_kind
== VR_VARYING
)
1040 *exp_range
= value_range (type
, var_min
, var_max
);
1044 if (!tree_is_chrec (exp
)
1045 && get_gimple_rhs_class (TREE_CODE (exp
)) != GIMPLE_TERNARY_RHS
)
1047 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
1048 if (split_constant_offset_1 (type
, op0
, code
, op1
, var
, off
,
1049 exp_range
, cache
, limit
))
1054 if (INTEGRAL_TYPE_P (type
))
1055 *var
= fold_convert (sizetype
, *var
);
1056 *off
= ssize_int (0);
1059 if (exp_range
&& code
!= SSA_NAME
1060 && get_range_query (cfun
)->range_of_expr (r
, exp
)
1061 && !r
.undefined_p ())
1065 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1066 type as EXP while OFF has type ssizetype. */
1069 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
1071 unsigned limit
= param_ssa_name_def_chain_limit
;
1072 static hash_map
<tree
, std::pair
<tree
, tree
> > *cache
;
1074 cache
= new hash_map
<tree
, std::pair
<tree
, tree
> > (37);
1075 split_constant_offset (exp
, var
, off
, nullptr, *cache
, &limit
);
1076 *var
= fold_convert (TREE_TYPE (exp
), *var
);
1080 /* Returns the address ADDR of an object in a canonical shape (without nop
1081 casts, and with type of pointer to the object). */
1084 canonicalize_base_object_address (tree addr
)
1090 /* The base address may be obtained by casting from integer, in that case
1092 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
1095 if (TREE_CODE (addr
) != ADDR_EXPR
)
1098 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
1101 /* Analyze the behavior of memory reference REF within STMT.
1102 There are two modes:
1104 - BB analysis. In this case we simply split the address into base,
1105 init and offset components, without reference to any containing loop.
1106 The resulting base and offset are general expressions and they can
1107 vary arbitrarily from one iteration of the containing loop to the next.
1108 The step is always zero.
1110 - loop analysis. In this case we analyze the reference both wrt LOOP
1111 and on the basis that the reference occurs (is "used") in LOOP;
1112 see the comment above analyze_scalar_evolution_in_loop for more
1113 information about this distinction. The base, init, offset and
1114 step fields are all invariant in LOOP.
1116 Perform BB analysis if LOOP is null, or if LOOP is the function's
1117 dummy outermost loop. In other cases perform loop analysis.
1119 Return true if the analysis succeeded and store the results in DRB if so.
1120 BB analysis can only fail for bitfield or reversed-storage accesses. */
1123 dr_analyze_innermost (innermost_loop_behavior
*drb
, tree ref
,
1124 class loop
*loop
, const gimple
*stmt
)
1126 poly_int64 pbitsize
, pbitpos
;
1129 int punsignedp
, preversep
, pvolatilep
;
1130 affine_iv base_iv
, offset_iv
;
1131 tree init
, dinit
, step
;
1132 bool in_loop
= (loop
&& loop
->num
);
1134 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1135 fprintf (dump_file
, "analyze_innermost: ");
1137 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
1138 &punsignedp
, &preversep
, &pvolatilep
);
1139 gcc_assert (base
!= NULL_TREE
);
1141 poly_int64 pbytepos
;
1142 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
1143 return opt_result::failure_at (stmt
,
1144 "failed: bit offset alignment.\n");
1147 return opt_result::failure_at (stmt
,
1148 "failed: reverse storage order.\n");
1150 /* Calculate the alignment and misalignment for the inner reference. */
1151 unsigned int HOST_WIDE_INT bit_base_misalignment
;
1152 unsigned int bit_base_alignment
;
1153 get_object_alignment_1 (base
, &bit_base_alignment
, &bit_base_misalignment
);
1155 /* There are no bitfield references remaining in BASE, so the values
1156 we got back must be whole bytes. */
1157 gcc_assert (bit_base_alignment
% BITS_PER_UNIT
== 0
1158 && bit_base_misalignment
% BITS_PER_UNIT
== 0);
1159 unsigned int base_alignment
= bit_base_alignment
/ BITS_PER_UNIT
;
1160 poly_int64 base_misalignment
= bit_base_misalignment
/ BITS_PER_UNIT
;
1162 if (TREE_CODE (base
) == MEM_REF
)
1164 if (!integer_zerop (TREE_OPERAND (base
, 1)))
1166 /* Subtract MOFF from the base and add it to POFFSET instead.
1167 Adjust the misalignment to reflect the amount we subtracted. */
1168 poly_offset_int moff
= mem_ref_offset (base
);
1169 base_misalignment
-= moff
.force_shwi ();
1170 tree mofft
= wide_int_to_tree (sizetype
, moff
);
1174 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
1176 base
= TREE_OPERAND (base
, 0);
1179 base
= build_fold_addr_expr (base
);
1183 if (!simple_iv (loop
, loop
, base
, &base_iv
, true))
1184 return opt_result::failure_at
1185 (stmt
, "failed: evolution of base is not affine.\n");
1189 base_iv
.base
= base
;
1190 base_iv
.step
= ssize_int (0);
1191 base_iv
.no_overflow
= true;
1196 offset_iv
.base
= ssize_int (0);
1197 offset_iv
.step
= ssize_int (0);
1203 offset_iv
.base
= poffset
;
1204 offset_iv
.step
= ssize_int (0);
1206 else if (!simple_iv (loop
, loop
, poffset
, &offset_iv
, true))
1207 return opt_result::failure_at
1208 (stmt
, "failed: evolution of offset is not affine.\n");
1211 init
= ssize_int (pbytepos
);
1213 /* Subtract any constant component from the base and add it to INIT instead.
1214 Adjust the misalignment to reflect the amount we subtracted. */
1215 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
1216 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1217 base_misalignment
-= TREE_INT_CST_LOW (dinit
);
1219 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
1220 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1222 step
= size_binop (PLUS_EXPR
,
1223 fold_convert (ssizetype
, base_iv
.step
),
1224 fold_convert (ssizetype
, offset_iv
.step
));
1226 base
= canonicalize_base_object_address (base_iv
.base
);
1228 /* See if get_pointer_alignment can guarantee a higher alignment than
1229 the one we calculated above. */
1230 unsigned int HOST_WIDE_INT alt_misalignment
;
1231 unsigned int alt_alignment
;
1232 get_pointer_alignment_1 (base
, &alt_alignment
, &alt_misalignment
);
1234 /* As above, these values must be whole bytes. */
1235 gcc_assert (alt_alignment
% BITS_PER_UNIT
== 0
1236 && alt_misalignment
% BITS_PER_UNIT
== 0);
1237 alt_alignment
/= BITS_PER_UNIT
;
1238 alt_misalignment
/= BITS_PER_UNIT
;
1240 if (base_alignment
< alt_alignment
)
1242 base_alignment
= alt_alignment
;
1243 base_misalignment
= alt_misalignment
;
1246 drb
->base_address
= base
;
1247 drb
->offset
= fold_convert (ssizetype
, offset_iv
.base
);
1250 if (known_misalignment (base_misalignment
, base_alignment
,
1251 &drb
->base_misalignment
))
1252 drb
->base_alignment
= base_alignment
;
1255 drb
->base_alignment
= known_alignment (base_misalignment
);
1256 drb
->base_misalignment
= 0;
1258 drb
->offset_alignment
= highest_pow2_factor (offset_iv
.base
);
1259 drb
->step_alignment
= highest_pow2_factor (step
);
1261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1262 fprintf (dump_file
, "success.\n");
1264 return opt_result::success ();
1267 /* Return true if OP is a valid component reference for a DR access
1268 function. This accepts a subset of what handled_component_p accepts. */
1271 access_fn_component_p (tree op
)
1273 switch (TREE_CODE (op
))
1281 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op
, 0))) == RECORD_TYPE
;
1288 /* Returns whether BASE can have a access_fn_component_p with BASE
1292 base_supports_access_fn_components_p (tree base
)
1294 switch (TREE_CODE (TREE_TYPE (base
)))
1305 /* Determines the base object and the list of indices of memory reference
1306 DR, analyzed in LOOP and instantiated before NEST. */
1309 dr_analyze_indices (struct indices
*dri
, tree ref
, edge nest
, loop_p loop
)
1311 /* If analyzing a basic-block there are no indices to analyze
1312 and thus no access functions. */
1315 dri
->base_object
= ref
;
1316 dri
->access_fns
.create (0);
1320 vec
<tree
> access_fns
= vNULL
;
1322 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1323 into a two element array with a constant index. The base is
1324 then just the immediate underlying object. */
1325 if (TREE_CODE (ref
) == REALPART_EXPR
)
1327 ref
= TREE_OPERAND (ref
, 0);
1328 access_fns
.safe_push (integer_zero_node
);
1330 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
1332 ref
= TREE_OPERAND (ref
, 0);
1333 access_fns
.safe_push (integer_one_node
);
1336 /* Analyze access functions of dimensions we know to be independent.
1337 The list of component references handled here should be kept in
1338 sync with access_fn_component_p. */
1339 while (handled_component_p (ref
))
1341 if (TREE_CODE (ref
) == ARRAY_REF
)
1343 tree op
= TREE_OPERAND (ref
, 1);
1344 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1345 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1346 access_fns
.safe_push (access_fn
);
1348 else if (TREE_CODE (ref
) == COMPONENT_REF
1349 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
1351 /* For COMPONENT_REFs of records (but not unions!) use the
1352 FIELD_DECL offset as constant access function so we can
1353 disambiguate a[i].f1 and a[i].f2. */
1354 tree off
= component_ref_field_offset (ref
);
1355 off
= size_binop (PLUS_EXPR
,
1356 size_binop (MULT_EXPR
,
1357 fold_convert (bitsizetype
, off
),
1358 bitsize_int (BITS_PER_UNIT
)),
1359 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
1360 access_fns
.safe_push (off
);
1363 /* If we have an unhandled component we could not translate
1364 to an access function stop analyzing. We have determined
1365 our base object in this case. */
1368 ref
= TREE_OPERAND (ref
, 0);
1371 /* If the address operand of a MEM_REF base has an evolution in the
1372 analyzed nest, add it as an additional independent access-function. */
1373 if (TREE_CODE (ref
) == MEM_REF
)
1375 tree op
= TREE_OPERAND (ref
, 0);
1376 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1377 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1378 STRIP_NOPS (access_fn
);
1379 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
1381 tree memoff
= TREE_OPERAND (ref
, 1);
1382 tree base
= initial_condition (access_fn
);
1383 tree orig_type
= TREE_TYPE (base
);
1384 STRIP_USELESS_TYPE_CONVERSION (base
);
1386 split_constant_offset (base
, &base
, &off
);
1387 STRIP_USELESS_TYPE_CONVERSION (base
);
1388 /* Fold the MEM_REF offset into the evolutions initial
1389 value to make more bases comparable. */
1390 if (!integer_zerop (memoff
))
1392 off
= size_binop (PLUS_EXPR
, off
,
1393 fold_convert (ssizetype
, memoff
));
1394 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1396 /* Adjust the offset so it is a multiple of the access type
1397 size and thus we separate bases that can possibly be used
1398 to produce partial overlaps (which the access_fn machinery
1401 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1402 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1403 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1406 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref
))),
1409 /* If we can't compute the remainder simply force the initial
1410 condition to zero. */
1411 rem
= wi::to_wide (off
);
1412 off
= wide_int_to_tree (ssizetype
, wi::to_wide (off
) - rem
);
1413 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1414 /* And finally replace the initial condition. */
1415 access_fn
= chrec_replace_initial_condition
1416 (access_fn
, fold_convert (orig_type
, off
));
1417 /* ??? This is still not a suitable base object for
1418 dr_may_alias_p - the base object needs to be an
1419 access that covers the object as whole. With
1420 an evolution in the pointer this cannot be
1422 As a band-aid, mark the access so we can special-case
1423 it in dr_may_alias_p. */
1425 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1426 MEM_REF
, TREE_TYPE (ref
),
1428 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1429 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1430 dri
->unconstrained_base
= true;
1431 access_fns
.safe_push (access_fn
);
1434 else if (DECL_P (ref
))
1436 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1437 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1438 build_fold_addr_expr (ref
),
1439 build_int_cst (reference_alias_ptr_type (ref
), 0));
1442 dri
->base_object
= ref
;
1443 dri
->access_fns
= access_fns
;
1446 /* Extracts the alias analysis information from the memory reference DR. */
1449 dr_analyze_alias (struct data_reference
*dr
)
1451 tree ref
= DR_REF (dr
);
1452 tree base
= get_base_address (ref
), addr
;
1454 if (INDIRECT_REF_P (base
)
1455 || TREE_CODE (base
) == MEM_REF
)
1457 addr
= TREE_OPERAND (base
, 0);
1458 if (TREE_CODE (addr
) == SSA_NAME
)
1459 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1463 /* Frees data reference DR. */
1466 free_data_ref (data_reference_p dr
)
1468 DR_ACCESS_FNS (dr
).release ();
1469 if (dr
->alt_indices
.base_object
)
1470 dr
->alt_indices
.access_fns
.release ();
1474 /* Analyze memory reference MEMREF, which is accessed in STMT.
1475 The reference is a read if IS_READ is true, otherwise it is a write.
1476 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1477 within STMT, i.e. that it might not occur even if STMT is executed
1478 and runs to completion.
1480 Return the data_reference description of MEMREF. NEST is the outermost
1481 loop in which the reference should be instantiated, LOOP is the loop
1482 in which the data reference should be analyzed. */
1484 struct data_reference
*
1485 create_data_ref (edge nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1486 bool is_read
, bool is_conditional_in_stmt
)
1488 struct data_reference
*dr
;
1490 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1492 fprintf (dump_file
, "Creating dr for ");
1493 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1494 fprintf (dump_file
, "\n");
1497 dr
= XCNEW (struct data_reference
);
1498 DR_STMT (dr
) = stmt
;
1499 DR_REF (dr
) = memref
;
1500 DR_IS_READ (dr
) = is_read
;
1501 DR_IS_CONDITIONAL_IN_STMT (dr
) = is_conditional_in_stmt
;
1503 dr_analyze_innermost (&DR_INNERMOST (dr
), memref
,
1504 nest
!= NULL
? loop
: NULL
, stmt
);
1505 dr_analyze_indices (&dr
->indices
, DR_REF (dr
), nest
, loop
);
1506 dr_analyze_alias (dr
);
1508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1511 fprintf (dump_file
, "\tbase_address: ");
1512 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1513 fprintf (dump_file
, "\n\toffset from base address: ");
1514 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1515 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1516 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1517 fprintf (dump_file
, "\n\tstep: ");
1518 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1519 fprintf (dump_file
, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr
));
1520 fprintf (dump_file
, "\n\tbase misalignment: %d",
1521 DR_BASE_MISALIGNMENT (dr
));
1522 fprintf (dump_file
, "\n\toffset alignment: %d",
1523 DR_OFFSET_ALIGNMENT (dr
));
1524 fprintf (dump_file
, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr
));
1525 fprintf (dump_file
, "\n\tbase_object: ");
1526 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1527 fprintf (dump_file
, "\n");
1528 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1530 fprintf (dump_file
, "\tAccess function %d: ", i
);
1531 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1538 /* A helper function computes order between two tree expressions T1 and T2.
1539 This is used in comparator functions sorting objects based on the order
1540 of tree expressions. The function returns -1, 0, or 1. */
1543 data_ref_compare_tree (tree t1
, tree t2
)
1546 enum tree_code code
;
1556 STRIP_USELESS_TYPE_CONVERSION (t1
);
1557 STRIP_USELESS_TYPE_CONVERSION (t2
);
1561 if (TREE_CODE (t1
) != TREE_CODE (t2
)
1562 && ! (CONVERT_EXPR_P (t1
) && CONVERT_EXPR_P (t2
)))
1563 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
1565 code
= TREE_CODE (t1
);
1569 return tree_int_cst_compare (t1
, t2
);
1572 if (TREE_STRING_LENGTH (t1
) != TREE_STRING_LENGTH (t2
))
1573 return TREE_STRING_LENGTH (t1
) < TREE_STRING_LENGTH (t2
) ? -1 : 1;
1574 return memcmp (TREE_STRING_POINTER (t1
), TREE_STRING_POINTER (t2
),
1575 TREE_STRING_LENGTH (t1
));
1578 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
1579 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
1583 if (POLY_INT_CST_P (t1
))
1584 return compare_sizes_for_sort (wi::to_poly_widest (t1
),
1585 wi::to_poly_widest (t2
));
1587 tclass
= TREE_CODE_CLASS (code
);
1589 /* For decls, compare their UIDs. */
1590 if (tclass
== tcc_declaration
)
1592 if (DECL_UID (t1
) != DECL_UID (t2
))
1593 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
1596 /* For expressions, compare their operands recursively. */
1597 else if (IS_EXPR_CODE_CLASS (tclass
))
1599 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
1601 cmp
= data_ref_compare_tree (TREE_OPERAND (t1
, i
),
1602 TREE_OPERAND (t2
, i
));
1614 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1618 runtime_alias_check_p (ddr_p ddr
, class loop
*loop
, bool speed_p
)
1620 if (dump_enabled_p ())
1621 dump_printf (MSG_NOTE
,
1622 "consider run-time aliasing test between %T and %T\n",
1623 DR_REF (DDR_A (ddr
)), DR_REF (DDR_B (ddr
)));
1626 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1627 "runtime alias check not supported when"
1628 " optimizing for size.\n");
1630 /* FORNOW: We don't support versioning with outer-loop in either
1631 vectorization or loop distribution. */
1632 if (loop
!= NULL
&& loop
->inner
!= NULL
)
1633 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1634 "runtime alias check not supported for"
1637 return opt_result::success ();
1640 /* Operator == between two dr_with_seg_len objects.
1642 This equality operator is used to make sure two data refs
1643 are the same one so that we will consider to combine the
1644 aliasing checks of those two pairs of data dependent data
1648 operator == (const dr_with_seg_len
& d1
,
1649 const dr_with_seg_len
& d2
)
1651 return (operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
1652 DR_BASE_ADDRESS (d2
.dr
), 0)
1653 && data_ref_compare_tree (DR_OFFSET (d1
.dr
), DR_OFFSET (d2
.dr
)) == 0
1654 && data_ref_compare_tree (DR_INIT (d1
.dr
), DR_INIT (d2
.dr
)) == 0
1655 && data_ref_compare_tree (d1
.seg_len
, d2
.seg_len
) == 0
1656 && known_eq (d1
.access_size
, d2
.access_size
)
1657 && d1
.align
== d2
.align
);
1660 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1661 so that we can combine aliasing checks in one scan. */
1664 comp_dr_with_seg_len_pair (const void *pa_
, const void *pb_
)
1666 const dr_with_seg_len_pair_t
* pa
= (const dr_with_seg_len_pair_t
*) pa_
;
1667 const dr_with_seg_len_pair_t
* pb
= (const dr_with_seg_len_pair_t
*) pb_
;
1668 const dr_with_seg_len
&a1
= pa
->first
, &a2
= pa
->second
;
1669 const dr_with_seg_len
&b1
= pb
->first
, &b2
= pb
->second
;
1671 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1672 if a and c have the same basic address snd step, and b and d have the same
1673 address and step. Therefore, if any a&c or b&d don't have the same address
1674 and step, we don't care the order of those two pairs after sorting. */
1677 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a1
.dr
),
1678 DR_BASE_ADDRESS (b1
.dr
))) != 0)
1680 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a2
.dr
),
1681 DR_BASE_ADDRESS (b2
.dr
))) != 0)
1683 if ((comp_res
= data_ref_compare_tree (DR_STEP (a1
.dr
),
1684 DR_STEP (b1
.dr
))) != 0)
1686 if ((comp_res
= data_ref_compare_tree (DR_STEP (a2
.dr
),
1687 DR_STEP (b2
.dr
))) != 0)
1689 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a1
.dr
),
1690 DR_OFFSET (b1
.dr
))) != 0)
1692 if ((comp_res
= data_ref_compare_tree (DR_INIT (a1
.dr
),
1693 DR_INIT (b1
.dr
))) != 0)
1695 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a2
.dr
),
1696 DR_OFFSET (b2
.dr
))) != 0)
1698 if ((comp_res
= data_ref_compare_tree (DR_INIT (a2
.dr
),
1699 DR_INIT (b2
.dr
))) != 0)
1705 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1708 dump_alias_pair (dr_with_seg_len_pair_t
*alias_pair
, const char *indent
)
1710 dump_printf (MSG_NOTE
, "%sreference: %T vs. %T\n", indent
,
1711 DR_REF (alias_pair
->first
.dr
),
1712 DR_REF (alias_pair
->second
.dr
));
1714 dump_printf (MSG_NOTE
, "%ssegment length: %T", indent
,
1715 alias_pair
->first
.seg_len
);
1716 if (!operand_equal_p (alias_pair
->first
.seg_len
,
1717 alias_pair
->second
.seg_len
, 0))
1718 dump_printf (MSG_NOTE
, " vs. %T", alias_pair
->second
.seg_len
);
1720 dump_printf (MSG_NOTE
, "\n%saccess size: ", indent
);
1721 dump_dec (MSG_NOTE
, alias_pair
->first
.access_size
);
1722 if (maybe_ne (alias_pair
->first
.access_size
, alias_pair
->second
.access_size
))
1724 dump_printf (MSG_NOTE
, " vs. ");
1725 dump_dec (MSG_NOTE
, alias_pair
->second
.access_size
);
1728 dump_printf (MSG_NOTE
, "\n%salignment: %d", indent
,
1729 alias_pair
->first
.align
);
1730 if (alias_pair
->first
.align
!= alias_pair
->second
.align
)
1731 dump_printf (MSG_NOTE
, " vs. %d", alias_pair
->second
.align
);
1733 dump_printf (MSG_NOTE
, "\n%sflags: ", indent
);
1734 if (alias_pair
->flags
& DR_ALIAS_RAW
)
1735 dump_printf (MSG_NOTE
, " RAW");
1736 if (alias_pair
->flags
& DR_ALIAS_WAR
)
1737 dump_printf (MSG_NOTE
, " WAR");
1738 if (alias_pair
->flags
& DR_ALIAS_WAW
)
1739 dump_printf (MSG_NOTE
, " WAW");
1740 if (alias_pair
->flags
& DR_ALIAS_ARBITRARY
)
1741 dump_printf (MSG_NOTE
, " ARBITRARY");
1742 if (alias_pair
->flags
& DR_ALIAS_SWAPPED
)
1743 dump_printf (MSG_NOTE
, " SWAPPED");
1744 if (alias_pair
->flags
& DR_ALIAS_UNSWAPPED
)
1745 dump_printf (MSG_NOTE
, " UNSWAPPED");
1746 if (alias_pair
->flags
& DR_ALIAS_MIXED_STEPS
)
1747 dump_printf (MSG_NOTE
, " MIXED_STEPS");
1748 if (alias_pair
->flags
== 0)
1749 dump_printf (MSG_NOTE
, " <none>");
1750 dump_printf (MSG_NOTE
, "\n");
1753 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1754 FACTOR is number of iterations that each data reference is accessed.
1756 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1757 we create an expression:
1759 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1760 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1762 for aliasing checks. However, in some cases we can decrease the number
1763 of checks by combining two checks into one. For example, suppose we have
1764 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1765 condition is satisfied:
1767 load_ptr_0 < load_ptr_1 &&
1768 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1770 (this condition means, in each iteration of vectorized loop, the accessed
1771 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1774 we then can use only the following expression to finish the alising checks
1775 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1777 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1778 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1780 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1784 prune_runtime_alias_test_list (vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1787 if (alias_pairs
->is_empty ())
1790 /* Canonicalize each pair so that the base components are ordered wrt
1791 data_ref_compare_tree. This allows the loop below to merge more
1794 dr_with_seg_len_pair_t
*alias_pair
;
1795 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1797 data_reference_p dr_a
= alias_pair
->first
.dr
;
1798 data_reference_p dr_b
= alias_pair
->second
.dr
;
1799 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
1800 DR_BASE_ADDRESS (dr_b
));
1802 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
1804 comp_res
= data_ref_compare_tree (DR_INIT (dr_a
), DR_INIT (dr_b
));
1807 std::swap (alias_pair
->first
, alias_pair
->second
);
1808 alias_pair
->flags
|= DR_ALIAS_SWAPPED
;
1811 alias_pair
->flags
|= DR_ALIAS_UNSWAPPED
;
1814 /* Sort the collected data ref pairs so that we can scan them once to
1815 combine all possible aliasing checks. */
1816 alias_pairs
->qsort (comp_dr_with_seg_len_pair
);
1818 /* Scan the sorted dr pairs and check if we can combine alias checks
1819 of two neighboring dr pairs. */
1820 unsigned int last
= 0;
1821 for (i
= 1; i
< alias_pairs
->length (); ++i
)
1823 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1824 dr_with_seg_len_pair_t
*alias_pair1
= &(*alias_pairs
)[last
];
1825 dr_with_seg_len_pair_t
*alias_pair2
= &(*alias_pairs
)[i
];
1827 dr_with_seg_len
*dr_a1
= &alias_pair1
->first
;
1828 dr_with_seg_len
*dr_b1
= &alias_pair1
->second
;
1829 dr_with_seg_len
*dr_a2
= &alias_pair2
->first
;
1830 dr_with_seg_len
*dr_b2
= &alias_pair2
->second
;
1832 /* Remove duplicate data ref pairs. */
1833 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
1835 if (dump_enabled_p ())
1836 dump_printf (MSG_NOTE
, "found equal ranges %T, %T and %T, %T\n",
1837 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1838 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1839 alias_pair1
->flags
|= alias_pair2
->flags
;
1843 /* Assume that we won't be able to merge the pairs, then correct
1847 (*alias_pairs
)[last
] = (*alias_pairs
)[i
];
1849 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
1851 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1852 and DR_A1 and DR_A2 are two consecutive memrefs. */
1853 if (*dr_a1
== *dr_a2
)
1855 std::swap (dr_a1
, dr_b1
);
1856 std::swap (dr_a2
, dr_b2
);
1859 poly_int64 init_a1
, init_a2
;
1860 /* Only consider cases in which the distance between the initial
1861 DR_A1 and the initial DR_A2 is known at compile time. */
1862 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
1863 DR_BASE_ADDRESS (dr_a2
->dr
), 0)
1864 || !operand_equal_p (DR_OFFSET (dr_a1
->dr
),
1865 DR_OFFSET (dr_a2
->dr
), 0)
1866 || !poly_int_tree_p (DR_INIT (dr_a1
->dr
), &init_a1
)
1867 || !poly_int_tree_p (DR_INIT (dr_a2
->dr
), &init_a2
))
1870 /* Don't combine if we can't tell which one comes first. */
1871 if (!ordered_p (init_a1
, init_a2
))
1874 /* Work out what the segment length would be if we did combine
1877 - If DR_A1 and DR_A2 have equal lengths, that length is
1878 also the combined length.
1880 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1881 length is the lower bound on those lengths.
1883 - If DR_A1 and DR_A2 both have positive lengths, the combined
1884 length is the upper bound on those lengths.
1886 Other cases are unlikely to give a useful combination.
1888 The lengths both have sizetype, so the sign is taken from
1889 the step instead. */
1890 poly_uint64 new_seg_len
= 0;
1891 bool new_seg_len_p
= !operand_equal_p (dr_a1
->seg_len
,
1895 poly_uint64 seg_len_a1
, seg_len_a2
;
1896 if (!poly_int_tree_p (dr_a1
->seg_len
, &seg_len_a1
)
1897 || !poly_int_tree_p (dr_a2
->seg_len
, &seg_len_a2
))
1900 tree indicator_a
= dr_direction_indicator (dr_a1
->dr
);
1901 if (TREE_CODE (indicator_a
) != INTEGER_CST
)
1904 tree indicator_b
= dr_direction_indicator (dr_a2
->dr
);
1905 if (TREE_CODE (indicator_b
) != INTEGER_CST
)
1908 int sign_a
= tree_int_cst_sgn (indicator_a
);
1909 int sign_b
= tree_int_cst_sgn (indicator_b
);
1911 if (sign_a
<= 0 && sign_b
<= 0)
1912 new_seg_len
= lower_bound (seg_len_a1
, seg_len_a2
);
1913 else if (sign_a
>= 0 && sign_b
>= 0)
1914 new_seg_len
= upper_bound (seg_len_a1
, seg_len_a2
);
1918 /* At this point we're committed to merging the refs. */
1920 /* Make sure dr_a1 starts left of dr_a2. */
1921 if (maybe_gt (init_a1
, init_a2
))
1923 std::swap (*dr_a1
, *dr_a2
);
1924 std::swap (init_a1
, init_a2
);
1927 /* The DR_Bs are equal, so only the DR_As can introduce
1929 if (!operand_equal_p (DR_STEP (dr_a1
->dr
), DR_STEP (dr_a2
->dr
), 0))
1930 alias_pair1
->flags
|= DR_ALIAS_MIXED_STEPS
;
1934 dr_a1
->seg_len
= build_int_cst (TREE_TYPE (dr_a1
->seg_len
),
1936 dr_a1
->align
= MIN (dr_a1
->align
, known_alignment (new_seg_len
));
1939 /* This is always positive due to the swap above. */
1940 poly_uint64 diff
= init_a2
- init_a1
;
1942 /* The new check will start at DR_A1. Make sure that its access
1943 size encompasses the initial DR_A2. */
1944 if (maybe_lt (dr_a1
->access_size
, diff
+ dr_a2
->access_size
))
1946 dr_a1
->access_size
= upper_bound (dr_a1
->access_size
,
1947 diff
+ dr_a2
->access_size
);
1948 unsigned int new_align
= known_alignment (dr_a1
->access_size
);
1949 dr_a1
->align
= MIN (dr_a1
->align
, new_align
);
1951 if (dump_enabled_p ())
1952 dump_printf (MSG_NOTE
, "merging ranges for %T, %T and %T, %T\n",
1953 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1954 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1955 alias_pair1
->flags
|= alias_pair2
->flags
;
1959 alias_pairs
->truncate (last
+ 1);
1961 /* Try to restore the original dr_with_seg_len order within each
1962 dr_with_seg_len_pair_t. If we ended up combining swapped and
1963 unswapped pairs into the same check, we have to invalidate any
1964 RAW, WAR and WAW information for it. */
1965 if (dump_enabled_p ())
1966 dump_printf (MSG_NOTE
, "merged alias checks:\n");
1967 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1969 unsigned int swap_mask
= (DR_ALIAS_SWAPPED
| DR_ALIAS_UNSWAPPED
);
1970 unsigned int swapped
= (alias_pair
->flags
& swap_mask
);
1971 if (swapped
== DR_ALIAS_SWAPPED
)
1972 std::swap (alias_pair
->first
, alias_pair
->second
);
1973 else if (swapped
!= DR_ALIAS_UNSWAPPED
)
1974 alias_pair
->flags
|= DR_ALIAS_ARBITRARY
;
1975 alias_pair
->flags
&= ~swap_mask
;
1976 if (dump_enabled_p ())
1977 dump_alias_pair (alias_pair
, " ");
1981 /* A subroutine of create_intersect_range_checks, with a subset of the
1982 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1983 to optimize cases in which the references form a simple RAW, WAR or
1987 create_ifn_alias_checks (tree
*cond_expr
,
1988 const dr_with_seg_len_pair_t
&alias_pair
)
1990 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
1991 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
1993 /* Check for cases in which:
1995 (a) we have a known RAW, WAR or WAR dependence
1996 (b) the accesses are well-ordered in both the original and new code
1997 (see the comment above the DR_ALIAS_* flags for details); and
1998 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
1999 if (alias_pair
.flags
& ~(DR_ALIAS_RAW
| DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2002 /* Make sure that both DRs access the same pattern of bytes,
2003 with a constant length and step. */
2004 poly_uint64 seg_len
;
2005 if (!operand_equal_p (dr_a
.seg_len
, dr_b
.seg_len
, 0)
2006 || !poly_int_tree_p (dr_a
.seg_len
, &seg_len
)
2007 || maybe_ne (dr_a
.access_size
, dr_b
.access_size
)
2008 || !operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0)
2009 || !tree_fits_uhwi_p (DR_STEP (dr_a
.dr
)))
2012 unsigned HOST_WIDE_INT bytes
= tree_to_uhwi (DR_STEP (dr_a
.dr
));
2013 tree addr_a
= DR_BASE_ADDRESS (dr_a
.dr
);
2014 tree addr_b
= DR_BASE_ADDRESS (dr_b
.dr
);
2016 /* See whether the target suports what we want to do. WAW checks are
2017 equivalent to WAR checks here. */
2018 internal_fn ifn
= (alias_pair
.flags
& DR_ALIAS_RAW
2019 ? IFN_CHECK_RAW_PTRS
2020 : IFN_CHECK_WAR_PTRS
);
2021 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2022 poly_uint64 full_length
= seg_len
+ bytes
;
2023 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2024 full_length
, align
))
2026 full_length
= seg_len
+ dr_a
.access_size
;
2027 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2028 full_length
, align
))
2032 /* Commit to using this form of test. */
2033 addr_a
= fold_build_pointer_plus (addr_a
, DR_OFFSET (dr_a
.dr
));
2034 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2036 addr_b
= fold_build_pointer_plus (addr_b
, DR_OFFSET (dr_b
.dr
));
2037 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2039 *cond_expr
= build_call_expr_internal_loc (UNKNOWN_LOCATION
,
2040 ifn
, boolean_type_node
,
2042 size_int (full_length
),
2045 if (dump_enabled_p ())
2047 if (ifn
== IFN_CHECK_RAW_PTRS
)
2048 dump_printf (MSG_NOTE
, "using an IFN_CHECK_RAW_PTRS test\n");
2050 dump_printf (MSG_NOTE
, "using an IFN_CHECK_WAR_PTRS test\n");
2055 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2056 free of aliases, using a condition based on index values instead
2057 of a condition based on addresses. Return true on success,
2058 storing the condition in *COND_EXPR.
2060 This can only be done if the two data references in ALIAS_PAIR access
2061 the same array object and the index is the only difference. For example,
2062 if the two data references are DR_A and DR_B:
2065 data-ref arr[i] arr[j]
2067 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2069 The addresses and their index are like:
2071 |<- ADDR_A ->| |<- ADDR_B ->|
2072 ------------------------------------------------------->
2074 ------------------------------------------------------->
2075 i_0 ... i_0+4 j_0 ... j_0+4
2077 We can create expression based on index rather than address:
2079 (unsigned) (i_0 - j_0 + 3) <= 6
2081 i.e. the indices are less than 4 apart.
2083 Note evolution step of index needs to be considered in comparison. */
2086 create_intersect_range_checks_index (class loop
*loop
, tree
*cond_expr
,
2087 const dr_with_seg_len_pair_t
&alias_pair
)
2089 const dr_with_seg_len
&dr_a
= alias_pair
.first
;
2090 const dr_with_seg_len
&dr_b
= alias_pair
.second
;
2091 if ((alias_pair
.flags
& DR_ALIAS_MIXED_STEPS
)
2092 || integer_zerop (DR_STEP (dr_a
.dr
))
2093 || integer_zerop (DR_STEP (dr_b
.dr
))
2094 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
2097 poly_uint64 seg_len1
, seg_len2
;
2098 if (!poly_int_tree_p (dr_a
.seg_len
, &seg_len1
)
2099 || !poly_int_tree_p (dr_b
.seg_len
, &seg_len2
))
2102 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
2105 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
2108 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
2111 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
2113 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
2114 unsigned HOST_WIDE_INT abs_step
= tree_to_shwi (DR_STEP (dr_a
.dr
));
2117 abs_step
= -abs_step
;
2118 seg_len1
= (-wi::to_poly_wide (dr_a
.seg_len
)).force_uhwi ();
2119 seg_len2
= (-wi::to_poly_wide (dr_b
.seg_len
)).force_uhwi ();
2122 /* Infer the number of iterations with which the memory segment is accessed
2123 by DR. In other words, alias is checked if memory segment accessed by
2124 DR_A in some iterations intersect with memory segment accessed by DR_B
2125 in the same amount iterations.
2126 Note segnment length is a linear function of number of iterations with
2127 DR_STEP as the coefficient. */
2128 poly_uint64 niter_len1
, niter_len2
;
2129 if (!can_div_trunc_p (seg_len1
+ abs_step
- 1, abs_step
, &niter_len1
)
2130 || !can_div_trunc_p (seg_len2
+ abs_step
- 1, abs_step
, &niter_len2
))
2133 /* Divide each access size by the byte step, rounding up. */
2134 poly_uint64 niter_access1
, niter_access2
;
2135 if (!can_div_trunc_p (dr_a
.access_size
+ abs_step
- 1,
2136 abs_step
, &niter_access1
)
2137 || !can_div_trunc_p (dr_b
.access_size
+ abs_step
- 1,
2138 abs_step
, &niter_access2
))
2141 bool waw_or_war_p
= (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
)) == 0;
2144 for (unsigned int i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
2146 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
2147 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
2148 /* Two indices must be the same if they are not scev, or not scev wrto
2149 current loop being vecorized. */
2150 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
2151 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
2152 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
2153 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
2155 if (operand_equal_p (access1
, access2
, 0))
2165 /* Ought not to happen in practice, since if all accesses are equal then the
2166 alias should be decidable at compile time. */
2170 /* The two indices must have the same step. */
2171 tree access1
= DR_ACCESS_FN (dr_a
.dr
, found
);
2172 tree access2
= DR_ACCESS_FN (dr_b
.dr
, found
);
2173 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
2176 tree idx_step
= CHREC_RIGHT (access1
);
2177 /* Index must have const step, otherwise DR_STEP won't be constant. */
2178 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
2179 /* Index must evaluate in the same direction as DR. */
2180 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
2182 tree min1
= CHREC_LEFT (access1
);
2183 tree min2
= CHREC_LEFT (access2
);
2184 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
2187 /* Ideally, alias can be checked against loop's control IV, but we
2188 need to prove linear mapping between control IV and reference
2189 index. Although that should be true, we check against (array)
2190 index of data reference. Like segment length, index length is
2191 linear function of the number of iterations with index_step as
2192 the coefficient, i.e, niter_len * idx_step. */
2193 offset_int abs_idx_step
= offset_int::from (wi::to_wide (idx_step
),
2196 abs_idx_step
= -abs_idx_step
;
2197 poly_offset_int idx_len1
= abs_idx_step
* niter_len1
;
2198 poly_offset_int idx_len2
= abs_idx_step
* niter_len2
;
2199 poly_offset_int idx_access1
= abs_idx_step
* niter_access1
;
2200 poly_offset_int idx_access2
= abs_idx_step
* niter_access2
;
2202 gcc_assert (known_ge (idx_len1
, 0)
2203 && known_ge (idx_len2
, 0)
2204 && known_ge (idx_access1
, 0)
2205 && known_ge (idx_access2
, 0));
2207 /* Each access has the following pattern, with lengths measured
2211 <--- A: -ve step --->
2212 +-----+-------+-----+-------+-----+
2213 | n-1 | ..... | 0 | ..... | n-1 |
2214 +-----+-------+-----+-------+-----+
2215 <--- B: +ve step --->
2220 where "n" is the number of scalar iterations covered by the segment
2221 and where each access spans idx_access units.
2223 A is the range of bytes accessed when the step is negative,
2224 B is the range when the step is positive.
2226 When checking for general overlap, we need to test whether
2229 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2233 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2237 low_offsetN = +ve step ? 0 : -idx_lenN;
2238 high_offsetN = +ve step ? idx_lenN : 0;
2240 This is equivalent to testing whether:
2242 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2243 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2245 Converting this into a single test, there is an overlap if:
2247 0 <= min2 - min1 + bias <= limit
2249 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2250 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2251 + (high_offset2 - low_offset2 + idx_access2 - 1)
2252 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2254 Combining the tests requires limit to be computable in an unsigned
2255 form of the index type; if it isn't, we fall back to the usual
2256 pointer-based checks.
2258 We can do better if DR_B is a write and if DR_A and DR_B are
2259 well-ordered in both the original and the new code (see the
2260 comment above the DR_ALIAS_* flags for details). In this case
2261 we know that for each i in [0, n-1], the write performed by
2262 access i of DR_B occurs after access numbers j<=i of DR_A in
2263 both the original and the new code. Any write or anti
2264 dependencies wrt those DR_A accesses are therefore maintained.
2266 We just need to make sure that each individual write in DR_B does not
2267 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2268 after the DR_B access in the original code but happen before it in
2271 We know the steps for both accesses are equal, so by induction, we
2272 just need to test whether the first write of DR_B overlaps a later
2273 access of DR_A. In other words, we need to move min1 along by
2276 min1' = min1 + idx_step
2280 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2284 [min2, min2 + idx_access2 - 1]
2288 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2289 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2291 idx_len1
-= abs_idx_step
;
2293 poly_offset_int limit
= idx_len1
+ idx_access1
- 1 + idx_access2
- 1;
2297 tree utype
= unsigned_type_for (TREE_TYPE (min1
));
2298 if (!wi::fits_to_tree_p (limit
, utype
))
2301 poly_offset_int low_offset1
= neg_step
? -idx_len1
: 0;
2302 poly_offset_int high_offset2
= neg_step
|| waw_or_war_p
? 0 : idx_len2
;
2303 poly_offset_int bias
= high_offset2
+ idx_access2
- 1 - low_offset1
;
2304 /* Equivalent to adding IDX_STEP to MIN1. */
2306 bias
-= wi::to_offset (idx_step
);
2308 tree subject
= fold_build2 (MINUS_EXPR
, utype
,
2309 fold_convert (utype
, min2
),
2310 fold_convert (utype
, min1
));
2311 subject
= fold_build2 (PLUS_EXPR
, utype
, subject
,
2312 wide_int_to_tree (utype
, bias
));
2313 tree part_cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
,
2314 wide_int_to_tree (utype
, limit
));
2316 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2317 *cond_expr
, part_cond_expr
);
2319 *cond_expr
= part_cond_expr
;
2320 if (dump_enabled_p ())
2323 dump_printf (MSG_NOTE
, "using an index-based WAR/WAW test\n");
2325 dump_printf (MSG_NOTE
, "using an index-based overlap test\n");
2330 /* A subroutine of create_intersect_range_checks, with a subset of the
2331 same arguments. Try to optimize cases in which the second access
2332 is a write and in which some overlap is valid. */
2335 create_waw_or_war_checks (tree
*cond_expr
,
2336 const dr_with_seg_len_pair_t
&alias_pair
)
2338 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2339 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2341 /* Check for cases in which:
2343 (a) DR_B is always a write;
2344 (b) the accesses are well-ordered in both the original and new code
2345 (see the comment above the DR_ALIAS_* flags for details); and
2346 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2347 if (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2350 /* Check for equal (but possibly variable) steps. */
2351 tree step
= DR_STEP (dr_a
.dr
);
2352 if (!operand_equal_p (step
, DR_STEP (dr_b
.dr
)))
2355 /* Make sure that we can operate on sizetype without loss of precision. */
2356 tree addr_type
= TREE_TYPE (DR_BASE_ADDRESS (dr_a
.dr
));
2357 if (TYPE_PRECISION (addr_type
) != TYPE_PRECISION (sizetype
))
2360 /* All addresses involved are known to have a common alignment ALIGN.
2361 We can therefore subtract ALIGN from an exclusive endpoint to get
2362 an inclusive endpoint. In the best (and common) case, ALIGN is the
2363 same as the access sizes of both DRs, and so subtracting ALIGN
2364 cancels out the addition of an access size. */
2365 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2366 poly_uint64 last_chunk_a
= dr_a
.access_size
- align
;
2367 poly_uint64 last_chunk_b
= dr_b
.access_size
- align
;
2369 /* Get a boolean expression that is true when the step is negative. */
2370 tree indicator
= dr_direction_indicator (dr_a
.dr
);
2371 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2372 fold_convert (ssizetype
, indicator
),
2375 /* Get lengths in sizetype. */
2377 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (dr_a
.seg_len
));
2378 step
= fold_convert (sizetype
, rewrite_to_non_trapping_overflow (step
));
2380 /* Each access has the following pattern:
2383 <--- A: -ve step --->
2384 +-----+-------+-----+-------+-----+
2385 | n-1 | ..... | 0 | ..... | n-1 |
2386 +-----+-------+-----+-------+-----+
2387 <--- B: +ve step --->
2392 where "n" is the number of scalar iterations covered by the segment.
2394 A is the range of bytes accessed when the step is negative,
2395 B is the range when the step is positive.
2397 We know that DR_B is a write. We also know (from checking that
2398 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2399 the write performed by access i of DR_B occurs after access numbers
2400 j<=i of DR_A in both the original and the new code. Any write or
2401 anti dependencies wrt those DR_A accesses are therefore maintained.
2403 We just need to make sure that each individual write in DR_B does not
2404 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2405 after the DR_B access in the original code but happen before it in
2408 We know the steps for both accesses are equal, so by induction, we
2409 just need to test whether the first write of DR_B overlaps a later
2410 access of DR_A. In other words, we need to move addr_a along by
2413 addr_a' = addr_a + step
2417 [addr_b, addr_b + last_chunk_b]
2421 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2423 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2425 low_offset_a = +ve step ? 0 : seg_len_a - step
2426 high_offset_a = +ve step ? seg_len_a - step : 0
2428 This is equivalent to testing whether:
2430 addr_a' + low_offset_a <= addr_b + last_chunk_b
2431 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2433 Converting this into a single test, there is an overlap if:
2435 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2437 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2439 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2440 less than the size of the object underlying DR_A. We also know
2441 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2442 guaranteed at compile time. There can therefore be no overflow if
2443 "limit" is calculated in an unsigned type with pointer precision. */
2444 tree addr_a
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a
.dr
),
2445 DR_OFFSET (dr_a
.dr
));
2446 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2448 tree addr_b
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b
.dr
),
2449 DR_OFFSET (dr_b
.dr
));
2450 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2452 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2453 addr_a
= fold_build_pointer_plus (addr_a
, step
);
2454 tree seg_len_a_minus_step
= fold_build2 (MINUS_EXPR
, sizetype
,
2456 if (!CONSTANT_CLASS_P (seg_len_a_minus_step
))
2457 seg_len_a_minus_step
= build1 (SAVE_EXPR
, sizetype
, seg_len_a_minus_step
);
2459 tree low_offset_a
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2460 seg_len_a_minus_step
, size_zero_node
);
2461 if (!CONSTANT_CLASS_P (low_offset_a
))
2462 low_offset_a
= build1 (SAVE_EXPR
, sizetype
, low_offset_a
);
2464 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2465 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2466 tree high_offset_a
= fold_build2 (MINUS_EXPR
, sizetype
, seg_len_a_minus_step
,
2469 /* The amount added to addr_b - addr_a'. */
2470 tree bias
= fold_build2 (MINUS_EXPR
, sizetype
,
2471 size_int (last_chunk_b
), low_offset_a
);
2473 tree limit
= fold_build2 (MINUS_EXPR
, sizetype
, high_offset_a
, low_offset_a
);
2474 limit
= fold_build2 (PLUS_EXPR
, sizetype
, limit
,
2475 size_int (last_chunk_a
+ last_chunk_b
));
2477 tree subject
= fold_build2 (POINTER_DIFF_EXPR
, ssizetype
, addr_b
, addr_a
);
2478 subject
= fold_build2 (PLUS_EXPR
, sizetype
,
2479 fold_convert (sizetype
, subject
), bias
);
2481 *cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
, limit
);
2482 if (dump_enabled_p ())
2483 dump_printf (MSG_NOTE
, "using an address-based WAR/WAW test\n");
2487 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2488 every address ADDR accessed by D:
2490 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2492 In this case, every element accessed by D is aligned to at least
2495 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2497 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2500 get_segment_min_max (const dr_with_seg_len
&d
, tree
*seg_min_out
,
2501 tree
*seg_max_out
, HOST_WIDE_INT align
)
2503 /* Each access has the following pattern:
2506 <--- A: -ve step --->
2507 +-----+-------+-----+-------+-----+
2508 | n-1 | ,.... | 0 | ..... | n-1 |
2509 +-----+-------+-----+-------+-----+
2510 <--- B: +ve step --->
2515 where "n" is the number of scalar iterations covered by the segment.
2516 (This should be VF for a particular pair if we know that both steps
2517 are the same, otherwise it will be the full number of scalar loop
2520 A is the range of bytes accessed when the step is negative,
2521 B is the range when the step is positive.
2523 If the access size is "access_size" bytes, the lowest addressed byte is:
2525 base + (step < 0 ? seg_len : 0) [LB]
2527 and the highest addressed byte is always below:
2529 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2535 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2538 LB <= ADDR <= UB - ALIGN
2540 where "- ALIGN" folds naturally with the "+ access_size" and often
2543 We don't try to simplify LB and UB beyond this (e.g. by using
2544 MIN and MAX based on whether seg_len rather than the stride is
2545 negative) because it is possible for the absolute size of the
2546 segment to overflow the range of a ssize_t.
2548 Keeping the pointer_plus outside of the cond_expr should allow
2549 the cond_exprs to be shared with other alias checks. */
2550 tree indicator
= dr_direction_indicator (d
.dr
);
2551 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2552 fold_convert (ssizetype
, indicator
),
2554 tree addr_base
= fold_build_pointer_plus (DR_BASE_ADDRESS (d
.dr
),
2556 addr_base
= fold_build_pointer_plus (addr_base
, DR_INIT (d
.dr
));
2558 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (d
.seg_len
));
2560 tree min_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2561 seg_len
, size_zero_node
);
2562 tree max_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2563 size_zero_node
, seg_len
);
2564 max_reach
= fold_build2 (PLUS_EXPR
, sizetype
, max_reach
,
2565 size_int (d
.access_size
- align
));
2567 *seg_min_out
= fold_build_pointer_plus (addr_base
, min_reach
);
2568 *seg_max_out
= fold_build_pointer_plus (addr_base
, max_reach
);
2571 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2572 storing the condition in *COND_EXPR. The fallback is to generate a
2573 a test that the two accesses do not overlap:
2575 end_a <= start_b || end_b <= start_a. */
2578 create_intersect_range_checks (class loop
*loop
, tree
*cond_expr
,
2579 const dr_with_seg_len_pair_t
&alias_pair
)
2581 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2582 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2583 *cond_expr
= NULL_TREE
;
2584 if (create_intersect_range_checks_index (loop
, cond_expr
, alias_pair
))
2587 if (create_ifn_alias_checks (cond_expr
, alias_pair
))
2590 if (create_waw_or_war_checks (cond_expr
, alias_pair
))
2593 unsigned HOST_WIDE_INT min_align
;
2595 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2596 are equivalent. This is just an optimization heuristic. */
2597 if (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
2598 && TREE_CODE (DR_STEP (dr_b
.dr
)) == INTEGER_CST
)
2600 /* In this case adding access_size to seg_len is likely to give
2601 a simple X * step, where X is either the number of scalar
2602 iterations or the vectorization factor. We're better off
2603 keeping that, rather than subtracting an alignment from it.
2605 In this case the maximum values are exclusive and so there is
2606 no alias if the maximum of one segment equals the minimum
2613 /* Calculate the minimum alignment shared by all four pointers,
2614 then arrange for this alignment to be subtracted from the
2615 exclusive maximum values to get inclusive maximum values.
2616 This "- min_align" is cumulative with a "+ access_size"
2617 in the calculation of the maximum values. In the best
2618 (and common) case, the two cancel each other out, leaving
2619 us with an inclusive bound based only on seg_len. In the
2620 worst case we're simply adding a smaller number than before.
2622 Because the maximum values are inclusive, there is an alias
2623 if the maximum value of one segment is equal to the minimum
2624 value of the other. */
2625 min_align
= MIN (dr_a
.align
, dr_b
.align
);
2629 tree seg_a_min
, seg_a_max
, seg_b_min
, seg_b_max
;
2630 get_segment_min_max (dr_a
, &seg_a_min
, &seg_a_max
, min_align
);
2631 get_segment_min_max (dr_b
, &seg_b_min
, &seg_b_max
, min_align
);
2634 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2635 fold_build2 (cmp_code
, boolean_type_node
, seg_a_max
, seg_b_min
),
2636 fold_build2 (cmp_code
, boolean_type_node
, seg_b_max
, seg_a_min
));
2637 if (dump_enabled_p ())
2638 dump_printf (MSG_NOTE
, "using an address-based overlap test\n");
2641 /* Create a conditional expression that represents the run-time checks for
2642 overlapping of address ranges represented by a list of data references
2643 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2644 COND_EXPR is the conditional expression to be used in the if statement
2645 that controls which version of the loop gets executed at runtime. */
2648 create_runtime_alias_checks (class loop
*loop
,
2649 const vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
2652 tree part_cond_expr
;
2654 fold_defer_overflow_warnings ();
2655 for (const dr_with_seg_len_pair_t
&alias_pair
: alias_pairs
)
2657 gcc_assert (alias_pair
.flags
);
2658 if (dump_enabled_p ())
2659 dump_printf (MSG_NOTE
,
2660 "create runtime check for data references %T and %T\n",
2661 DR_REF (alias_pair
.first
.dr
),
2662 DR_REF (alias_pair
.second
.dr
));
2664 /* Create condition expression for each pair data references. */
2665 create_intersect_range_checks (loop
, &part_cond_expr
, alias_pair
);
2667 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2668 *cond_expr
, part_cond_expr
);
2670 *cond_expr
= part_cond_expr
;
2672 fold_undefer_and_ignore_overflow_warnings ();
2675 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2678 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
2682 STRIP_NOPS (offset1
);
2683 STRIP_NOPS (offset2
);
2685 if (offset1
== offset2
)
2688 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
2689 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
2692 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
2693 TREE_OPERAND (offset2
, 0));
2695 if (!res
|| !BINARY_CLASS_P (offset1
))
2698 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
2699 TREE_OPERAND (offset2
, 1));
2704 /* Check if DRA and DRB have equal offsets. */
2706 dr_equal_offsets_p (struct data_reference
*dra
,
2707 struct data_reference
*drb
)
2709 tree offset1
, offset2
;
2711 offset1
= DR_OFFSET (dra
);
2712 offset2
= DR_OFFSET (drb
);
2714 return dr_equal_offsets_p1 (offset1
, offset2
);
2717 /* Returns true if FNA == FNB. */
2720 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
2722 unsigned i
, n
= fna
.length ();
2724 if (n
!= fnb
.length ())
2727 for (i
= 0; i
< n
; i
++)
2728 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
2734 /* If all the functions in CF are the same, returns one of them,
2735 otherwise returns NULL. */
2738 common_affine_function (conflict_function
*cf
)
2743 if (!CF_NONTRIVIAL_P (cf
))
2744 return affine_fn ();
2748 for (i
= 1; i
< cf
->n
; i
++)
2749 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
2750 return affine_fn ();
2755 /* Returns the base of the affine function FN. */
2758 affine_function_base (affine_fn fn
)
2763 /* Returns true if FN is a constant. */
2766 affine_function_constant_p (affine_fn fn
)
2771 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
2772 if (!integer_zerop (coef
))
2778 /* Returns true if FN is the zero constant function. */
2781 affine_function_zero_p (affine_fn fn
)
2783 return (integer_zerop (affine_function_base (fn
))
2784 && affine_function_constant_p (fn
));
2787 /* Returns a signed integer type with the largest precision from TA
2791 signed_type_for_types (tree ta
, tree tb
)
2793 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
2794 return signed_type_for (ta
);
2796 return signed_type_for (tb
);
2799 /* Applies operation OP on affine functions FNA and FNB, and returns the
2803 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
2809 if (fnb
.length () > fna
.length ())
2821 for (i
= 0; i
< n
; i
++)
2823 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
2824 TREE_TYPE (fnb
[i
]));
2825 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
2828 for (; fna
.iterate (i
, &coef
); i
++)
2829 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2830 coef
, integer_zero_node
));
2831 for (; fnb
.iterate (i
, &coef
); i
++)
2832 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2833 integer_zero_node
, coef
));
2838 /* Returns the sum of affine functions FNA and FNB. */
2841 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
2843 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
2846 /* Returns the difference of affine functions FNA and FNB. */
2849 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
2851 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
2854 /* Frees affine function FN. */
2857 affine_fn_free (affine_fn fn
)
2862 /* Determine for each subscript in the data dependence relation DDR
2866 compute_subscript_distance (struct data_dependence_relation
*ddr
)
2868 conflict_function
*cf_a
, *cf_b
;
2869 affine_fn fn_a
, fn_b
, diff
;
2871 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
2875 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2877 struct subscript
*subscript
;
2879 subscript
= DDR_SUBSCRIPT (ddr
, i
);
2880 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
2881 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
2883 fn_a
= common_affine_function (cf_a
);
2884 fn_b
= common_affine_function (cf_b
);
2885 if (!fn_a
.exists () || !fn_b
.exists ())
2887 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2890 diff
= affine_fn_minus (fn_a
, fn_b
);
2892 if (affine_function_constant_p (diff
))
2893 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
2895 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2897 affine_fn_free (diff
);
2902 /* Returns the conflict function for "unknown". */
2904 static conflict_function
*
2905 conflict_fn_not_known (void)
2907 conflict_function
*fn
= XCNEW (conflict_function
);
2913 /* Returns the conflict function for "independent". */
2915 static conflict_function
*
2916 conflict_fn_no_dependence (void)
2918 conflict_function
*fn
= XCNEW (conflict_function
);
2919 fn
->n
= NO_DEPENDENCE
;
2924 /* Returns true if the address of OBJ is invariant in LOOP. */
2927 object_address_invariant_in_loop_p (const class loop
*loop
, const_tree obj
)
2929 while (handled_component_p (obj
))
2931 if (TREE_CODE (obj
) == ARRAY_REF
)
2933 for (int i
= 1; i
< 4; ++i
)
2934 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, i
),
2938 else if (TREE_CODE (obj
) == COMPONENT_REF
)
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2944 obj
= TREE_OPERAND (obj
, 0);
2947 if (!INDIRECT_REF_P (obj
)
2948 && TREE_CODE (obj
) != MEM_REF
)
2951 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
2955 /* Returns false if we can prove that data references A and B do not alias,
2956 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2960 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
2961 class loop
*loop_nest
)
2963 tree addr_a
= DR_BASE_OBJECT (a
);
2964 tree addr_b
= DR_BASE_OBJECT (b
);
2966 /* If we are not processing a loop nest but scalar code we
2967 do not need to care about possible cross-iteration dependences
2968 and thus can process the full original reference. Do so,
2969 similar to how loop invariant motion applies extra offset-based
2973 tree tree_size_a
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a
)));
2974 tree tree_size_b
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b
)));
2976 if (DR_BASE_ADDRESS (a
)
2977 && DR_BASE_ADDRESS (b
)
2978 && operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
))
2979 && operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
))
2980 && poly_int_tree_p (tree_size_a
)
2981 && poly_int_tree_p (tree_size_b
)
2982 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a
)),
2983 wi::to_poly_widest (tree_size_a
),
2984 wi::to_poly_widest (DR_INIT (b
)),
2985 wi::to_poly_widest (tree_size_b
)))
2987 gcc_assert (integer_zerop (DR_STEP (a
))
2988 && integer_zerop (DR_STEP (b
)));
2992 aff_tree off1
, off2
;
2993 poly_widest_int size1
, size2
;
2994 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
2995 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
2996 aff_combination_scale (&off1
, -1);
2997 aff_combination_add (&off2
, &off1
);
2998 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
3002 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
3003 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
3004 /* For cross-iteration dependences the cliques must be valid for the
3005 whole loop, not just individual iterations. */
3007 || MR_DEPENDENCE_CLIQUE (addr_a
) == 1
3008 || MR_DEPENDENCE_CLIQUE (addr_a
) == loop_nest
->owned_clique
)
3009 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
3010 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
3013 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3014 do not know the size of the base-object. So we cannot do any
3015 offset/overlap based analysis but have to rely on points-to
3016 information only. */
3017 if (TREE_CODE (addr_a
) == MEM_REF
3018 && (DR_UNCONSTRAINED_BASE (a
)
3019 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
3021 /* For true dependences we can apply TBAA. */
3022 if (flag_strict_aliasing
3023 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3024 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3025 get_alias_set (DR_REF (b
))))
3027 if (TREE_CODE (addr_b
) == MEM_REF
)
3028 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3029 TREE_OPERAND (addr_b
, 0));
3031 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3032 build_fold_addr_expr (addr_b
));
3034 else if (TREE_CODE (addr_b
) == MEM_REF
3035 && (DR_UNCONSTRAINED_BASE (b
)
3036 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
3038 /* For true dependences we can apply TBAA. */
3039 if (flag_strict_aliasing
3040 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3041 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3042 get_alias_set (DR_REF (b
))))
3044 if (TREE_CODE (addr_a
) == MEM_REF
)
3045 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3046 TREE_OPERAND (addr_b
, 0));
3048 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
3049 TREE_OPERAND (addr_b
, 0));
3052 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3053 that is being subsetted in the loop nest. */
3054 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
3055 return refs_output_dependent_p (addr_a
, addr_b
);
3056 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
3057 return refs_anti_dependent_p (addr_a
, addr_b
);
3058 return refs_may_alias_p (addr_a
, addr_b
);
3061 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3062 if it is meaningful to compare their associated access functions
3063 when checking for dependencies. */
3066 access_fn_components_comparable_p (tree ref_a
, tree ref_b
)
3068 /* Allow pairs of component refs from the following sets:
3070 { REALPART_EXPR, IMAGPART_EXPR }
3073 tree_code code_a
= TREE_CODE (ref_a
);
3074 tree_code code_b
= TREE_CODE (ref_b
);
3075 if (code_a
== IMAGPART_EXPR
)
3076 code_a
= REALPART_EXPR
;
3077 if (code_b
== IMAGPART_EXPR
)
3078 code_b
= REALPART_EXPR
;
3079 if (code_a
!= code_b
)
3082 if (TREE_CODE (ref_a
) == COMPONENT_REF
)
3083 /* ??? We cannot simply use the type of operand #0 of the refs here as
3084 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3085 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3086 return (DECL_CONTEXT (TREE_OPERAND (ref_a
, 1))
3087 == DECL_CONTEXT (TREE_OPERAND (ref_b
, 1)));
3089 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a
, 0)),
3090 TREE_TYPE (TREE_OPERAND (ref_b
, 0)));
3093 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3094 is true when the main indices of A and B were not comparable so we try again
3095 with alternate indices computed on an indirect reference. */
3097 struct data_dependence_relation
*
3098 initialize_data_dependence_relation (struct data_dependence_relation
*res
,
3099 vec
<loop_p
> loop_nest
,
3100 bool use_alt_indices
)
3102 struct data_reference
*a
= DDR_A (res
);
3103 struct data_reference
*b
= DDR_B (res
);
3106 struct indices
*indices_a
= &a
->indices
;
3107 struct indices
*indices_b
= &b
->indices
;
3108 if (use_alt_indices
)
3110 if (TREE_CODE (DR_REF (a
)) != MEM_REF
)
3111 indices_a
= &a
->alt_indices
;
3112 if (TREE_CODE (DR_REF (b
)) != MEM_REF
)
3113 indices_b
= &b
->alt_indices
;
3115 unsigned int num_dimensions_a
= indices_a
->access_fns
.length ();
3116 unsigned int num_dimensions_b
= indices_b
->access_fns
.length ();
3117 if (num_dimensions_a
== 0 || num_dimensions_b
== 0)
3119 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3123 /* For unconstrained bases, the root (highest-indexed) subscript
3124 describes a variation in the base of the original DR_REF rather
3125 than a component access. We have no type that accurately describes
3126 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3127 applying this subscript) so limit the search to the last real
3133 f (int a[][8], int b[][8])
3135 for (int i = 0; i < 8; ++i)
3136 a[i * 2][0] = b[i][0];
3139 the a and b accesses have a single ARRAY_REF component reference [0]
3140 but have two subscripts. */
3141 if (indices_a
->unconstrained_base
)
3142 num_dimensions_a
-= 1;
3143 if (indices_b
->unconstrained_base
)
3144 num_dimensions_b
-= 1;
3146 /* These structures describe sequences of component references in
3147 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3148 specific access function. */
3150 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3151 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3152 indices. In C notation, these are the indices of the rightmost
3153 component references; e.g. for a sequence .b.c.d, the start
3155 unsigned int start_a
;
3156 unsigned int start_b
;
3158 /* The sequence contains LENGTH consecutive access functions from
3160 unsigned int length
;
3162 /* The enclosing objects for the A and B sequences respectively,
3163 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3164 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3167 } full_seq
= {}, struct_seq
= {};
3169 /* Before each iteration of the loop:
3171 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3172 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3173 unsigned int index_a
= 0;
3174 unsigned int index_b
= 0;
3175 tree ref_a
= DR_REF (a
);
3176 tree ref_b
= DR_REF (b
);
3178 /* Now walk the component references from the final DR_REFs back up to
3179 the enclosing base objects. Each component reference corresponds
3180 to one access function in the DR, with access function 0 being for
3181 the final DR_REF and the highest-indexed access function being the
3182 one that is applied to the base of the DR.
3184 Look for a sequence of component references whose access functions
3185 are comparable (see access_fn_components_comparable_p). If more
3186 than one such sequence exists, pick the one nearest the base
3187 (which is the leftmost sequence in C notation). Store this sequence
3190 For example, if we have:
3192 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3195 B: __real b[0][i].s.e[i].f
3197 (where d is the same type as the real component of f) then the access
3204 B: __real .f [i] .e .s [i]
3206 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3207 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3208 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3209 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3210 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3211 index foo[10] arrays, so is again comparable. The sequence is
3214 A: [1, 3] (i.e. [i].s.c)
3215 B: [3, 5] (i.e. [i].s.e)
3217 Also look for sequences of component references whose access
3218 functions are comparable and whose enclosing objects have the same
3219 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3220 example, STRUCT_SEQ would be:
3222 A: [1, 2] (i.e. s.c)
3223 B: [3, 4] (i.e. s.e) */
3224 while (index_a
< num_dimensions_a
&& index_b
< num_dimensions_b
)
3226 /* The alternate indices form always has a single dimension
3227 with unconstrained base. */
3228 gcc_assert (!use_alt_indices
);
3230 /* REF_A and REF_B must be one of the component access types
3231 allowed by dr_analyze_indices. */
3232 gcc_checking_assert (access_fn_component_p (ref_a
));
3233 gcc_checking_assert (access_fn_component_p (ref_b
));
3235 /* Get the immediately-enclosing objects for REF_A and REF_B,
3236 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3237 and DR_ACCESS_FN (B, INDEX_B). */
3238 tree object_a
= TREE_OPERAND (ref_a
, 0);
3239 tree object_b
= TREE_OPERAND (ref_b
, 0);
3241 tree type_a
= TREE_TYPE (object_a
);
3242 tree type_b
= TREE_TYPE (object_b
);
3243 if (access_fn_components_comparable_p (ref_a
, ref_b
))
3245 /* This pair of component accesses is comparable for dependence
3246 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3247 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3248 if (full_seq
.start_a
+ full_seq
.length
!= index_a
3249 || full_seq
.start_b
+ full_seq
.length
!= index_b
)
3251 /* The accesses don't extend the current sequence,
3252 so start a new one here. */
3253 full_seq
.start_a
= index_a
;
3254 full_seq
.start_b
= index_b
;
3255 full_seq
.length
= 0;
3258 /* Add this pair of references to the sequence. */
3259 full_seq
.length
+= 1;
3260 full_seq
.object_a
= object_a
;
3261 full_seq
.object_b
= object_b
;
3263 /* If the enclosing objects are structures (and thus have the
3264 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3265 if (TREE_CODE (type_a
) == RECORD_TYPE
)
3266 struct_seq
= full_seq
;
3268 /* Move to the next containing reference for both A and B. */
3276 /* Try to approach equal type sizes. */
3277 if (!COMPLETE_TYPE_P (type_a
)
3278 || !COMPLETE_TYPE_P (type_b
)
3279 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a
))
3280 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b
)))
3283 unsigned HOST_WIDE_INT size_a
= tree_to_uhwi (TYPE_SIZE_UNIT (type_a
));
3284 unsigned HOST_WIDE_INT size_b
= tree_to_uhwi (TYPE_SIZE_UNIT (type_b
));
3285 if (size_a
<= size_b
)
3290 if (size_b
<= size_a
)
3297 /* See whether FULL_SEQ ends at the base and whether the two bases
3298 are equal. We do not care about TBAA or alignment info so we can
3299 use OEP_ADDRESS_OF to avoid false negatives. */
3300 tree base_a
= indices_a
->base_object
;
3301 tree base_b
= indices_b
->base_object
;
3302 bool same_base_p
= (full_seq
.start_a
+ full_seq
.length
== num_dimensions_a
3303 && full_seq
.start_b
+ full_seq
.length
== num_dimensions_b
3304 && (indices_a
->unconstrained_base
3305 == indices_b
->unconstrained_base
)
3306 && operand_equal_p (base_a
, base_b
, OEP_ADDRESS_OF
)
3307 && (types_compatible_p (TREE_TYPE (base_a
),
3309 || (!base_supports_access_fn_components_p (base_a
)
3310 && !base_supports_access_fn_components_p (base_b
)
3312 (TYPE_SIZE (TREE_TYPE (base_a
)),
3313 TYPE_SIZE (TREE_TYPE (base_b
)), 0)))
3314 && (!loop_nest
.exists ()
3315 || (object_address_invariant_in_loop_p
3316 (loop_nest
[0], base_a
))));
3318 /* If the bases are the same, we can include the base variation too.
3319 E.g. the b accesses in:
3321 for (int i = 0; i < n; ++i)
3322 b[i + 4][0] = b[i][0];
3324 have a definite dependence distance of 4, while for:
3326 for (int i = 0; i < n; ++i)
3327 a[i + 4][0] = b[i][0];
3329 the dependence distance depends on the gap between a and b.
3331 If the bases are different then we can only rely on the sequence
3332 rooted at a structure access, since arrays are allowed to overlap
3333 arbitrarily and change shape arbitrarily. E.g. we treat this as
3338 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3340 where two lvalues with the same int[4][3] type overlap, and where
3341 both lvalues are distinct from the object's declared type. */
3344 if (indices_a
->unconstrained_base
)
3345 full_seq
.length
+= 1;
3348 full_seq
= struct_seq
;
3350 /* Punt if we didn't find a suitable sequence. */
3351 if (full_seq
.length
== 0)
3354 || (TREE_CODE (DR_REF (a
)) == MEM_REF
3355 && TREE_CODE (DR_REF (b
)) == MEM_REF
)
3356 || may_be_nonaddressable_p (DR_REF (a
))
3357 || may_be_nonaddressable_p (DR_REF (b
)))
3359 /* Fully exhausted possibilities. */
3360 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3364 /* Try evaluating both DRs as dereferences of pointers. */
3365 if (!a
->alt_indices
.base_object
3366 && TREE_CODE (DR_REF (a
)) != MEM_REF
)
3368 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (a
)),
3369 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (a
)),
3371 (reference_alias_ptr_type (DR_REF (a
)), 0));
3372 dr_analyze_indices (&a
->alt_indices
, alt_ref
,
3373 loop_preheader_edge (loop_nest
[0]),
3374 loop_containing_stmt (DR_STMT (a
)));
3376 if (!b
->alt_indices
.base_object
3377 && TREE_CODE (DR_REF (b
)) != MEM_REF
)
3379 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (b
)),
3380 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (b
)),
3382 (reference_alias_ptr_type (DR_REF (b
)), 0));
3383 dr_analyze_indices (&b
->alt_indices
, alt_ref
,
3384 loop_preheader_edge (loop_nest
[0]),
3385 loop_containing_stmt (DR_STMT (b
)));
3387 return initialize_data_dependence_relation (res
, loop_nest
, true);
3392 /* Partial overlap is possible for different bases when strict aliasing
3393 is not in effect. It's also possible if either base involves a union
3396 struct s1 { int a[2]; };
3397 struct s2 { struct s1 b; int c; };
3398 struct s3 { int d; struct s1 e; };
3399 union u { struct s2 f; struct s3 g; } *p, *q;
3401 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3402 "p->g.e" (base "p->g") and might partially overlap the s1 at
3403 "q->g.e" (base "q->g"). */
3404 if (!flag_strict_aliasing
3405 || ref_contains_union_access_p (full_seq
.object_a
)
3406 || ref_contains_union_access_p (full_seq
.object_b
))
3408 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3412 DDR_COULD_BE_INDEPENDENT_P (res
) = true;
3413 if (!loop_nest
.exists ()
3414 || (object_address_invariant_in_loop_p (loop_nest
[0],
3416 && object_address_invariant_in_loop_p (loop_nest
[0],
3417 full_seq
.object_b
)))
3419 DDR_OBJECT_A (res
) = full_seq
.object_a
;
3420 DDR_OBJECT_B (res
) = full_seq
.object_b
;
3424 DDR_AFFINE_P (res
) = true;
3425 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
3426 DDR_SUBSCRIPTS (res
).create (full_seq
.length
);
3427 DDR_LOOP_NEST (res
) = loop_nest
;
3428 DDR_SELF_REFERENCE (res
) = false;
3430 for (i
= 0; i
< full_seq
.length
; ++i
)
3432 struct subscript
*subscript
;
3434 subscript
= XNEW (struct subscript
);
3435 SUB_ACCESS_FN (subscript
, 0) = indices_a
->access_fns
[full_seq
.start_a
+ i
];
3436 SUB_ACCESS_FN (subscript
, 1) = indices_b
->access_fns
[full_seq
.start_b
+ i
];
3437 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
3438 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
3439 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
3440 SUB_DISTANCE (subscript
) = chrec_dont_know
;
3441 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
3447 /* Initialize a data dependence relation between data accesses A and
3448 B. NB_LOOPS is the number of loops surrounding the references: the
3449 size of the classic distance/direction vectors. */
3451 struct data_dependence_relation
*
3452 initialize_data_dependence_relation (struct data_reference
*a
,
3453 struct data_reference
*b
,
3454 vec
<loop_p
> loop_nest
)
3456 data_dependence_relation
*res
= XCNEW (struct data_dependence_relation
);
3459 DDR_LOOP_NEST (res
).create (0);
3460 DDR_SUBSCRIPTS (res
).create (0);
3461 DDR_DIR_VECTS (res
).create (0);
3462 DDR_DIST_VECTS (res
).create (0);
3464 if (a
== NULL
|| b
== NULL
)
3466 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3470 /* If the data references do not alias, then they are independent. */
3471 if (!dr_may_alias_p (a
, b
, loop_nest
.exists () ? loop_nest
[0] : NULL
))
3473 DDR_ARE_DEPENDENT (res
) = chrec_known
;
3477 return initialize_data_dependence_relation (res
, loop_nest
, false);
3481 /* Frees memory used by the conflict function F. */
3484 free_conflict_function (conflict_function
*f
)
3488 if (CF_NONTRIVIAL_P (f
))
3490 for (i
= 0; i
< f
->n
; i
++)
3491 affine_fn_free (f
->fns
[i
]);
3496 /* Frees memory used by SUBSCRIPTS. */
3499 free_subscripts (vec
<subscript_p
> subscripts
)
3501 for (subscript_p s
: subscripts
)
3503 free_conflict_function (s
->conflicting_iterations_in_a
);
3504 free_conflict_function (s
->conflicting_iterations_in_b
);
3507 subscripts
.release ();
3510 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3514 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
3517 DDR_ARE_DEPENDENT (ddr
) = chrec
;
3518 free_subscripts (DDR_SUBSCRIPTS (ddr
));
3519 DDR_SUBSCRIPTS (ddr
).create (0);
3522 /* The dependence relation DDR cannot be represented by a distance
3526 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
3528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3529 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
3531 DDR_AFFINE_P (ddr
) = false;
3536 /* This section contains the classic Banerjee tests. */
3538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3542 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3544 return (evolution_function_is_constant_p (chrec_a
)
3545 && evolution_function_is_constant_p (chrec_b
));
3548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3549 variable, i.e., if the SIV (Single Index Variable) test is true. */
3552 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3554 if ((evolution_function_is_constant_p (chrec_a
)
3555 && evolution_function_is_univariate_p (chrec_b
))
3556 || (evolution_function_is_constant_p (chrec_b
)
3557 && evolution_function_is_univariate_p (chrec_a
)))
3560 if (evolution_function_is_univariate_p (chrec_a
)
3561 && evolution_function_is_univariate_p (chrec_b
))
3563 switch (TREE_CODE (chrec_a
))
3565 case POLYNOMIAL_CHREC
:
3566 switch (TREE_CODE (chrec_b
))
3568 case POLYNOMIAL_CHREC
:
3569 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
3585 /* Creates a conflict function with N dimensions. The affine functions
3586 in each dimension follow. */
3588 static conflict_function
*
3589 conflict_fn (unsigned n
, ...)
3592 conflict_function
*ret
= XCNEW (conflict_function
);
3595 gcc_assert (n
> 0 && n
<= MAX_DIM
);
3599 for (i
= 0; i
< n
; i
++)
3600 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
3606 /* Returns constant affine function with value CST. */
3609 affine_fn_cst (tree cst
)
3613 fn
.quick_push (cst
);
3617 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3620 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
3623 fn
.create (dim
+ 1);
3626 gcc_assert (dim
> 0);
3627 fn
.quick_push (cst
);
3628 for (i
= 1; i
< dim
; i
++)
3629 fn
.quick_push (integer_zero_node
);
3630 fn
.quick_push (coef
);
3634 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3635 *OVERLAPS_B are initialized to the functions that describe the
3636 relation between the elements accessed twice by CHREC_A and
3637 CHREC_B. For k >= 0, the following property is verified:
3639 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3642 analyze_ziv_subscript (tree chrec_a
,
3644 conflict_function
**overlaps_a
,
3645 conflict_function
**overlaps_b
,
3646 tree
*last_conflicts
)
3648 tree type
, difference
;
3649 dependence_stats
.num_ziv
++;
3651 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3652 fprintf (dump_file
, "(analyze_ziv_subscript \n");
3654 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3655 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3656 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3657 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
3659 switch (TREE_CODE (difference
))
3662 if (integer_zerop (difference
))
3664 /* The difference is equal to zero: the accessed index
3665 overlaps for each iteration in the loop. */
3666 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3667 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3668 *last_conflicts
= chrec_dont_know
;
3669 dependence_stats
.num_ziv_dependent
++;
3673 /* The accesses do not overlap. */
3674 *overlaps_a
= conflict_fn_no_dependence ();
3675 *overlaps_b
= conflict_fn_no_dependence ();
3676 *last_conflicts
= integer_zero_node
;
3677 dependence_stats
.num_ziv_independent
++;
3682 /* We're not sure whether the indexes overlap. For the moment,
3683 conservatively answer "don't know". */
3684 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3685 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
3687 *overlaps_a
= conflict_fn_not_known ();
3688 *overlaps_b
= conflict_fn_not_known ();
3689 *last_conflicts
= chrec_dont_know
;
3690 dependence_stats
.num_ziv_unimplemented
++;
3694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3695 fprintf (dump_file
, ")\n");
3698 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3699 and only if it fits to the int type. If this is not the case, or the
3700 bound on the number of iterations of LOOP could not be derived, returns
3704 max_stmt_executions_tree (class loop
*loop
)
3708 if (!max_stmt_executions (loop
, &nit
))
3709 return chrec_dont_know
;
3711 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
3712 return chrec_dont_know
;
3714 return wide_int_to_tree (unsigned_type_node
, nit
);
3717 /* Determine whether the CHREC is always positive/negative. If the expression
3718 cannot be statically analyzed, return false, otherwise set the answer into
3722 chrec_is_positive (tree chrec
, bool *value
)
3724 bool value0
, value1
, value2
;
3725 tree end_value
, nb_iter
;
3727 switch (TREE_CODE (chrec
))
3729 case POLYNOMIAL_CHREC
:
3730 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
3731 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
3734 /* FIXME -- overflows. */
3735 if (value0
== value1
)
3741 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3742 and the proof consists in showing that the sign never
3743 changes during the execution of the loop, from 0 to
3744 loop->nb_iterations. */
3745 if (!evolution_function_is_affine_p (chrec
))
3748 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
3749 if (chrec_contains_undetermined (nb_iter
))
3753 /* TODO -- If the test is after the exit, we may decrease the number of
3754 iterations by one. */
3756 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
3759 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
3761 if (!chrec_is_positive (end_value
, &value2
))
3765 return value0
== value1
;
3768 switch (tree_int_cst_sgn (chrec
))
3787 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3788 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3789 *OVERLAPS_B are initialized to the functions that describe the
3790 relation between the elements accessed twice by CHREC_A and
3791 CHREC_B. For k >= 0, the following property is verified:
3793 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3796 analyze_siv_subscript_cst_affine (tree chrec_a
,
3798 conflict_function
**overlaps_a
,
3799 conflict_function
**overlaps_b
,
3800 tree
*last_conflicts
)
3802 bool value0
, value1
, value2
;
3803 tree type
, difference
, tmp
;
3805 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3806 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3807 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3808 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
3810 /* Special case overlap in the first iteration. */
3811 if (integer_zerop (difference
))
3813 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3814 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3815 *last_conflicts
= integer_one_node
;
3819 if (!chrec_is_positive (initial_condition (difference
), &value0
))
3821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3822 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
3824 dependence_stats
.num_siv_unimplemented
++;
3825 *overlaps_a
= conflict_fn_not_known ();
3826 *overlaps_b
= conflict_fn_not_known ();
3827 *last_conflicts
= chrec_dont_know
;
3832 if (value0
== false)
3834 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3835 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
3837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3838 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3840 *overlaps_a
= conflict_fn_not_known ();
3841 *overlaps_b
= conflict_fn_not_known ();
3842 *last_conflicts
= chrec_dont_know
;
3843 dependence_stats
.num_siv_unimplemented
++;
3852 chrec_b = {10, +, 1}
3855 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3857 HOST_WIDE_INT numiter
;
3858 class loop
*loop
= get_chrec_loop (chrec_b
);
3860 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3861 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
3862 fold_build1 (ABS_EXPR
, type
, difference
),
3863 CHREC_RIGHT (chrec_b
));
3864 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3865 *last_conflicts
= integer_one_node
;
3868 /* Perform weak-zero siv test to see if overlap is
3869 outside the loop bounds. */
3870 numiter
= max_stmt_executions_int (loop
);
3873 && compare_tree_int (tmp
, numiter
) > 0)
3875 free_conflict_function (*overlaps_a
);
3876 free_conflict_function (*overlaps_b
);
3877 *overlaps_a
= conflict_fn_no_dependence ();
3878 *overlaps_b
= conflict_fn_no_dependence ();
3879 *last_conflicts
= integer_zero_node
;
3880 dependence_stats
.num_siv_independent
++;
3883 dependence_stats
.num_siv_dependent
++;
3887 /* When the step does not divide the difference, there are
3891 *overlaps_a
= conflict_fn_no_dependence ();
3892 *overlaps_b
= conflict_fn_no_dependence ();
3893 *last_conflicts
= integer_zero_node
;
3894 dependence_stats
.num_siv_independent
++;
3903 chrec_b = {10, +, -1}
3905 In this case, chrec_a will not overlap with chrec_b. */
3906 *overlaps_a
= conflict_fn_no_dependence ();
3907 *overlaps_b
= conflict_fn_no_dependence ();
3908 *last_conflicts
= integer_zero_node
;
3909 dependence_stats
.num_siv_independent
++;
3916 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3917 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
3919 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3920 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3922 *overlaps_a
= conflict_fn_not_known ();
3923 *overlaps_b
= conflict_fn_not_known ();
3924 *last_conflicts
= chrec_dont_know
;
3925 dependence_stats
.num_siv_unimplemented
++;
3930 if (value2
== false)
3934 chrec_b = {10, +, -1}
3936 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3938 HOST_WIDE_INT numiter
;
3939 class loop
*loop
= get_chrec_loop (chrec_b
);
3941 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3942 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
3943 CHREC_RIGHT (chrec_b
));
3944 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3945 *last_conflicts
= integer_one_node
;
3947 /* Perform weak-zero siv test to see if overlap is
3948 outside the loop bounds. */
3949 numiter
= max_stmt_executions_int (loop
);
3952 && compare_tree_int (tmp
, numiter
) > 0)
3954 free_conflict_function (*overlaps_a
);
3955 free_conflict_function (*overlaps_b
);
3956 *overlaps_a
= conflict_fn_no_dependence ();
3957 *overlaps_b
= conflict_fn_no_dependence ();
3958 *last_conflicts
= integer_zero_node
;
3959 dependence_stats
.num_siv_independent
++;
3962 dependence_stats
.num_siv_dependent
++;
3966 /* When the step does not divide the difference, there
3970 *overlaps_a
= conflict_fn_no_dependence ();
3971 *overlaps_b
= conflict_fn_no_dependence ();
3972 *last_conflicts
= integer_zero_node
;
3973 dependence_stats
.num_siv_independent
++;
3983 In this case, chrec_a will not overlap with chrec_b. */
3984 *overlaps_a
= conflict_fn_no_dependence ();
3985 *overlaps_b
= conflict_fn_no_dependence ();
3986 *last_conflicts
= integer_zero_node
;
3987 dependence_stats
.num_siv_independent
++;
3995 /* Helper recursive function for initializing the matrix A. Returns
3996 the initial value of CHREC. */
3999 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
4003 switch (TREE_CODE (chrec
))
4005 case POLYNOMIAL_CHREC
:
4006 HOST_WIDE_INT chrec_right
;
4007 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec
)))
4008 return chrec_dont_know
;
4009 chrec_right
= int_cst_value (CHREC_RIGHT (chrec
));
4010 /* We want to be able to negate without overflow. */
4011 if (chrec_right
== HOST_WIDE_INT_MIN
)
4012 return chrec_dont_know
;
4013 A
[index
][0] = mult
* chrec_right
;
4014 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
4020 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4021 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
4023 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
4028 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4029 return chrec_convert (chrec_type (chrec
), op
, NULL
);
4034 /* Handle ~X as -1 - X. */
4035 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4036 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
4037 build_int_cst (TREE_TYPE (chrec
), -1), op
);
4049 #define FLOOR_DIV(x,y) ((x) / (y))
4051 /* Solves the special case of the Diophantine equation:
4052 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4054 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4055 number of iterations that loops X and Y run. The overlaps will be
4056 constructed as evolutions in dimension DIM. */
4059 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter
,
4060 HOST_WIDE_INT step_a
,
4061 HOST_WIDE_INT step_b
,
4062 affine_fn
*overlaps_a
,
4063 affine_fn
*overlaps_b
,
4064 tree
*last_conflicts
, int dim
)
4066 if (((step_a
> 0 && step_b
> 0)
4067 || (step_a
< 0 && step_b
< 0)))
4069 HOST_WIDE_INT step_overlaps_a
, step_overlaps_b
;
4070 HOST_WIDE_INT gcd_steps_a_b
, last_conflict
, tau2
;
4072 gcd_steps_a_b
= gcd (step_a
, step_b
);
4073 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
4074 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
4078 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
4079 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
4080 last_conflict
= tau2
;
4081 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
4084 *last_conflicts
= chrec_dont_know
;
4086 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
4087 build_int_cst (NULL_TREE
,
4089 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
4090 build_int_cst (NULL_TREE
,
4096 *overlaps_a
= affine_fn_cst (integer_zero_node
);
4097 *overlaps_b
= affine_fn_cst (integer_zero_node
);
4098 *last_conflicts
= integer_zero_node
;
4102 /* Solves the special case of a Diophantine equation where CHREC_A is
4103 an affine bivariate function, and CHREC_B is an affine univariate
4104 function. For example,
4106 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4108 has the following overlapping functions:
4110 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4111 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4112 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4114 FORNOW: This is a specialized implementation for a case occurring in
4115 a common benchmark. Implement the general algorithm. */
4118 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
4119 conflict_function
**overlaps_a
,
4120 conflict_function
**overlaps_b
,
4121 tree
*last_conflicts
)
4123 bool xz_p
, yz_p
, xyz_p
;
4124 HOST_WIDE_INT step_x
, step_y
, step_z
;
4125 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
4126 affine_fn overlaps_a_xz
, overlaps_b_xz
;
4127 affine_fn overlaps_a_yz
, overlaps_b_yz
;
4128 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
4129 affine_fn ova1
, ova2
, ovb
;
4130 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
4132 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
4133 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
4134 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
4136 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
4137 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4138 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4140 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
4142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4143 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
4145 *overlaps_a
= conflict_fn_not_known ();
4146 *overlaps_b
= conflict_fn_not_known ();
4147 *last_conflicts
= chrec_dont_know
;
4151 niter
= MIN (niter_x
, niter_z
);
4152 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
4155 &last_conflicts_xz
, 1);
4156 niter
= MIN (niter_y
, niter_z
);
4157 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
4160 &last_conflicts_yz
, 2);
4161 niter
= MIN (niter_x
, niter_z
);
4162 niter
= MIN (niter_y
, niter
);
4163 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
4166 &last_conflicts_xyz
, 3);
4168 xz_p
= !integer_zerop (last_conflicts_xz
);
4169 yz_p
= !integer_zerop (last_conflicts_yz
);
4170 xyz_p
= !integer_zerop (last_conflicts_xyz
);
4172 if (xz_p
|| yz_p
|| xyz_p
)
4174 ova1
= affine_fn_cst (integer_zero_node
);
4175 ova2
= affine_fn_cst (integer_zero_node
);
4176 ovb
= affine_fn_cst (integer_zero_node
);
4179 affine_fn t0
= ova1
;
4182 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
4183 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
4184 affine_fn_free (t0
);
4185 affine_fn_free (t2
);
4186 *last_conflicts
= last_conflicts_xz
;
4190 affine_fn t0
= ova2
;
4193 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
4194 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
4195 affine_fn_free (t0
);
4196 affine_fn_free (t2
);
4197 *last_conflicts
= last_conflicts_yz
;
4201 affine_fn t0
= ova1
;
4202 affine_fn t2
= ova2
;
4205 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
4206 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
4207 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
4208 affine_fn_free (t0
);
4209 affine_fn_free (t2
);
4210 affine_fn_free (t4
);
4211 *last_conflicts
= last_conflicts_xyz
;
4213 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
4214 *overlaps_b
= conflict_fn (1, ovb
);
4218 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4219 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4220 *last_conflicts
= integer_zero_node
;
4223 affine_fn_free (overlaps_a_xz
);
4224 affine_fn_free (overlaps_b_xz
);
4225 affine_fn_free (overlaps_a_yz
);
4226 affine_fn_free (overlaps_b_yz
);
4227 affine_fn_free (overlaps_a_xyz
);
4228 affine_fn_free (overlaps_b_xyz
);
4231 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4234 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
4237 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
4240 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4243 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
4248 for (i
= 0; i
< m
; i
++)
4249 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
4252 /* Store the N x N identity matrix in MAT. */
4255 lambda_matrix_id (lambda_matrix mat
, int size
)
4259 for (i
= 0; i
< size
; i
++)
4260 for (j
= 0; j
< size
; j
++)
4261 mat
[i
][j
] = (i
== j
) ? 1 : 0;
4264 /* Return the index of the first nonzero element of vector VEC1 between
4265 START and N. We must have START <= N.
4266 Returns N if VEC1 is the zero vector. */
4269 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
4272 while (j
< n
&& vec1
[j
] == 0)
4277 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4278 R2 = R2 + CONST1 * R1. */
4281 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
,
4289 for (i
= 0; i
< n
; i
++)
4292 lambda_int tem
= mul_hwi (mat
[r1
][i
], const1
, &ovf
);
4295 lambda_int tem2
= add_hwi (mat
[r2
][i
], tem
, &ovf
);
4296 if (ovf
|| tem2
== HOST_WIDE_INT_MIN
)
4304 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4305 and store the result in VEC2. */
4308 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
4309 int size
, lambda_int const1
)
4314 lambda_vector_clear (vec2
, size
);
4316 for (i
= 0; i
< size
; i
++)
4317 vec2
[i
] = const1
* vec1
[i
];
4320 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4323 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
4326 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
4329 /* Negate row R1 of matrix MAT which has N columns. */
4332 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
4334 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
4337 /* Return true if two vectors are equal. */
4340 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
4343 for (i
= 0; i
< size
; i
++)
4344 if (vec1
[i
] != vec2
[i
])
4349 /* Given an M x N integer matrix A, this function determines an M x
4350 M unimodular matrix U, and an M x N echelon matrix S such that
4351 "U.A = S". This decomposition is also known as "right Hermite".
4353 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4354 Restructuring Compilers" Utpal Banerjee. */
4357 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
4358 lambda_matrix S
, lambda_matrix U
)
4362 lambda_matrix_copy (A
, S
, m
, n
);
4363 lambda_matrix_id (U
, m
);
4365 for (j
= 0; j
< n
; j
++)
4367 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
4370 for (i
= m
- 1; i
>= i0
; i
--)
4372 while (S
[i
][j
] != 0)
4374 lambda_int factor
, a
, b
;
4378 gcc_assert (a
!= HOST_WIDE_INT_MIN
);
4381 if (!lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
))
4383 std::swap (S
[i
], S
[i
-1]);
4385 if (!lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
))
4387 std::swap (U
[i
], U
[i
-1]);
4396 /* Determines the overlapping elements due to accesses CHREC_A and
4397 CHREC_B, that are affine functions. This function cannot handle
4398 symbolic evolution functions, ie. when initial conditions are
4399 parameters, because it uses lambda matrices of integers. */
4402 analyze_subscript_affine_affine (tree chrec_a
,
4404 conflict_function
**overlaps_a
,
4405 conflict_function
**overlaps_b
,
4406 tree
*last_conflicts
)
4408 unsigned nb_vars_a
, nb_vars_b
, dim
;
4409 lambda_int gamma
, gcd_alpha_beta
;
4410 lambda_matrix A
, U
, S
;
4411 struct obstack scratch_obstack
;
4413 if (eq_evolutions_p (chrec_a
, chrec_b
))
4415 /* The accessed index overlaps for each iteration in the
4417 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4418 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4419 *last_conflicts
= chrec_dont_know
;
4422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4423 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
4425 /* For determining the initial intersection, we have to solve a
4426 Diophantine equation. This is the most time consuming part.
4428 For answering to the question: "Is there a dependence?" we have
4429 to prove that there exists a solution to the Diophantine
4430 equation, and that the solution is in the iteration domain,
4431 i.e. the solution is positive or zero, and that the solution
4432 happens before the upper bound loop.nb_iterations. Otherwise
4433 there is no dependence. This function outputs a description of
4434 the iterations that hold the intersections. */
4436 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
4437 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
4439 gcc_obstack_init (&scratch_obstack
);
4441 dim
= nb_vars_a
+ nb_vars_b
;
4442 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
4443 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4444 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4446 tree init_a
= initialize_matrix_A (A
, chrec_a
, 0, 1);
4447 tree init_b
= initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1);
4448 if (init_a
== chrec_dont_know
4449 || init_b
== chrec_dont_know
)
4451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4452 fprintf (dump_file
, "affine-affine test failed: "
4453 "representation issue.\n");
4454 *overlaps_a
= conflict_fn_not_known ();
4455 *overlaps_b
= conflict_fn_not_known ();
4456 *last_conflicts
= chrec_dont_know
;
4457 goto end_analyze_subs_aa
;
4459 gamma
= int_cst_value (init_b
) - int_cst_value (init_a
);
4461 /* Don't do all the hard work of solving the Diophantine equation
4462 when we already know the solution: for example,
4465 | gamma = 3 - 3 = 0.
4466 Then the first overlap occurs during the first iterations:
4467 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4471 if (nb_vars_a
== 1 && nb_vars_b
== 1)
4473 HOST_WIDE_INT step_a
, step_b
;
4474 HOST_WIDE_INT niter
, niter_a
, niter_b
;
4477 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4478 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4479 niter
= MIN (niter_a
, niter_b
);
4480 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
4481 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
4483 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
4486 *overlaps_a
= conflict_fn (1, ova
);
4487 *overlaps_b
= conflict_fn (1, ovb
);
4490 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
4491 compute_overlap_steps_for_affine_1_2
4492 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
4494 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
4495 compute_overlap_steps_for_affine_1_2
4496 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
4500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4501 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
4502 *overlaps_a
= conflict_fn_not_known ();
4503 *overlaps_b
= conflict_fn_not_known ();
4504 *last_conflicts
= chrec_dont_know
;
4506 goto end_analyze_subs_aa
;
4510 if (!lambda_matrix_right_hermite (A
, dim
, 1, S
, U
))
4512 *overlaps_a
= conflict_fn_not_known ();
4513 *overlaps_b
= conflict_fn_not_known ();
4514 *last_conflicts
= chrec_dont_know
;
4515 goto end_analyze_subs_aa
;
4521 lambda_matrix_row_negate (U
, dim
, 0);
4523 gcd_alpha_beta
= S
[0][0];
4525 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4526 but that is a quite strange case. Instead of ICEing, answer
4528 if (gcd_alpha_beta
== 0)
4530 *overlaps_a
= conflict_fn_not_known ();
4531 *overlaps_b
= conflict_fn_not_known ();
4532 *last_conflicts
= chrec_dont_know
;
4533 goto end_analyze_subs_aa
;
4536 /* The classic "gcd-test". */
4537 if (!int_divides_p (gcd_alpha_beta
, gamma
))
4539 /* The "gcd-test" has determined that there is no integer
4540 solution, i.e. there is no dependence. */
4541 *overlaps_a
= conflict_fn_no_dependence ();
4542 *overlaps_b
= conflict_fn_no_dependence ();
4543 *last_conflicts
= integer_zero_node
;
4546 /* Both access functions are univariate. This includes SIV and MIV cases. */
4547 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
4549 /* Both functions should have the same evolution sign. */
4550 if (((A
[0][0] > 0 && -A
[1][0] > 0)
4551 || (A
[0][0] < 0 && -A
[1][0] < 0)))
4553 /* The solutions are given by:
4555 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4558 For a given integer t. Using the following variables,
4560 | i0 = u11 * gamma / gcd_alpha_beta
4561 | j0 = u12 * gamma / gcd_alpha_beta
4568 | y0 = j0 + j1 * t. */
4569 HOST_WIDE_INT i0
, j0
, i1
, j1
;
4571 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
4572 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
4576 if ((i1
== 0 && i0
< 0)
4577 || (j1
== 0 && j0
< 0))
4579 /* There is no solution.
4580 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4581 falls in here, but for the moment we don't look at the
4582 upper bound of the iteration domain. */
4583 *overlaps_a
= conflict_fn_no_dependence ();
4584 *overlaps_b
= conflict_fn_no_dependence ();
4585 *last_conflicts
= integer_zero_node
;
4586 goto end_analyze_subs_aa
;
4589 if (i1
> 0 && j1
> 0)
4591 HOST_WIDE_INT niter_a
4592 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
4593 HOST_WIDE_INT niter_b
4594 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
4595 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
4597 /* (X0, Y0) is a solution of the Diophantine equation:
4598 "chrec_a (X0) = chrec_b (Y0)". */
4599 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
4601 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
4602 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
4604 /* (X1, Y1) is the smallest positive solution of the eq
4605 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4606 first conflict occurs. */
4607 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
4608 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
4609 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
4613 /* If the overlap occurs outside of the bounds of the
4614 loop, there is no dependence. */
4615 if (x1
>= niter_a
|| y1
>= niter_b
)
4617 *overlaps_a
= conflict_fn_no_dependence ();
4618 *overlaps_b
= conflict_fn_no_dependence ();
4619 *last_conflicts
= integer_zero_node
;
4620 goto end_analyze_subs_aa
;
4623 /* max stmt executions can get quite large, avoid
4624 overflows by using wide ints here. */
4626 = wi::smin (wi::sdiv_floor (wi::sub (niter_a
, i0
), i1
),
4627 wi::sdiv_floor (wi::sub (niter_b
, j0
), j1
));
4628 widest_int last_conflict
= wi::sub (tau2
, (x1
- i0
)/i1
);
4629 if (wi::min_precision (last_conflict
, SIGNED
)
4630 <= TYPE_PRECISION (integer_type_node
))
4632 = build_int_cst (integer_type_node
,
4633 last_conflict
.to_shwi ());
4635 *last_conflicts
= chrec_dont_know
;
4638 *last_conflicts
= chrec_dont_know
;
4642 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
4644 build_int_cst (NULL_TREE
, i1
)));
4647 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
4649 build_int_cst (NULL_TREE
, j1
)));
4653 /* FIXME: For the moment, the upper bound of the
4654 iteration domain for i and j is not checked. */
4655 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4656 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4657 *overlaps_a
= conflict_fn_not_known ();
4658 *overlaps_b
= conflict_fn_not_known ();
4659 *last_conflicts
= chrec_dont_know
;
4664 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4665 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4666 *overlaps_a
= conflict_fn_not_known ();
4667 *overlaps_b
= conflict_fn_not_known ();
4668 *last_conflicts
= chrec_dont_know
;
4673 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4674 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4675 *overlaps_a
= conflict_fn_not_known ();
4676 *overlaps_b
= conflict_fn_not_known ();
4677 *last_conflicts
= chrec_dont_know
;
4680 end_analyze_subs_aa
:
4681 obstack_free (&scratch_obstack
, NULL
);
4682 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4684 fprintf (dump_file
, " (overlaps_a = ");
4685 dump_conflict_function (dump_file
, *overlaps_a
);
4686 fprintf (dump_file
, ")\n (overlaps_b = ");
4687 dump_conflict_function (dump_file
, *overlaps_b
);
4688 fprintf (dump_file
, "))\n");
4692 /* Returns true when analyze_subscript_affine_affine can be used for
4693 determining the dependence relation between chrec_a and chrec_b,
4694 that contain symbols. This function modifies chrec_a and chrec_b
4695 such that the analysis result is the same, and such that they don't
4696 contain symbols, and then can safely be passed to the analyzer.
4698 Example: The analysis of the following tuples of evolutions produce
4699 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4702 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4703 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4707 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
4709 tree diff
, type
, left_a
, left_b
, right_b
;
4711 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
4712 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
4713 /* FIXME: For the moment not handled. Might be refined later. */
4716 type
= chrec_type (*chrec_a
);
4717 left_a
= CHREC_LEFT (*chrec_a
);
4718 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
4719 diff
= chrec_fold_minus (type
, left_a
, left_b
);
4721 if (!evolution_function_is_constant_p (diff
))
4724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4725 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
4727 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
4728 diff
, CHREC_RIGHT (*chrec_a
));
4729 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
4730 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
4731 build_int_cst (type
, 0),
4736 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4737 *OVERLAPS_B are initialized to the functions that describe the
4738 relation between the elements accessed twice by CHREC_A and
4739 CHREC_B. For k >= 0, the following property is verified:
4741 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4744 analyze_siv_subscript (tree chrec_a
,
4746 conflict_function
**overlaps_a
,
4747 conflict_function
**overlaps_b
,
4748 tree
*last_conflicts
,
4751 dependence_stats
.num_siv
++;
4753 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4754 fprintf (dump_file
, "(analyze_siv_subscript \n");
4756 if (evolution_function_is_constant_p (chrec_a
)
4757 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4758 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
4759 overlaps_a
, overlaps_b
, last_conflicts
);
4761 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4762 && evolution_function_is_constant_p (chrec_b
))
4763 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
4764 overlaps_b
, overlaps_a
, last_conflicts
);
4766 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4767 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4769 if (!chrec_contains_symbols (chrec_a
)
4770 && !chrec_contains_symbols (chrec_b
))
4772 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4773 overlaps_a
, overlaps_b
,
4776 if (CF_NOT_KNOWN_P (*overlaps_a
)
4777 || CF_NOT_KNOWN_P (*overlaps_b
))
4778 dependence_stats
.num_siv_unimplemented
++;
4779 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4780 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4781 dependence_stats
.num_siv_independent
++;
4783 dependence_stats
.num_siv_dependent
++;
4785 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
4788 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4789 overlaps_a
, overlaps_b
,
4792 if (CF_NOT_KNOWN_P (*overlaps_a
)
4793 || CF_NOT_KNOWN_P (*overlaps_b
))
4794 dependence_stats
.num_siv_unimplemented
++;
4795 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4796 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4797 dependence_stats
.num_siv_independent
++;
4799 dependence_stats
.num_siv_dependent
++;
4802 goto siv_subscript_dontknow
;
4807 siv_subscript_dontknow
:;
4808 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4809 fprintf (dump_file
, " siv test failed: unimplemented");
4810 *overlaps_a
= conflict_fn_not_known ();
4811 *overlaps_b
= conflict_fn_not_known ();
4812 *last_conflicts
= chrec_dont_know
;
4813 dependence_stats
.num_siv_unimplemented
++;
4816 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4817 fprintf (dump_file
, ")\n");
4820 /* Returns false if we can prove that the greatest common divisor of the steps
4821 of CHREC does not divide CST, false otherwise. */
4824 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
4826 HOST_WIDE_INT cd
= 0, val
;
4829 if (!tree_fits_shwi_p (cst
))
4831 val
= tree_to_shwi (cst
);
4833 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
4835 step
= CHREC_RIGHT (chrec
);
4836 if (!tree_fits_shwi_p (step
))
4838 cd
= gcd (cd
, tree_to_shwi (step
));
4839 chrec
= CHREC_LEFT (chrec
);
4842 return val
% cd
== 0;
4845 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4846 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4847 functions that describe the relation between the elements accessed
4848 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4851 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4854 analyze_miv_subscript (tree chrec_a
,
4856 conflict_function
**overlaps_a
,
4857 conflict_function
**overlaps_b
,
4858 tree
*last_conflicts
,
4859 class loop
*loop_nest
)
4861 tree type
, difference
;
4863 dependence_stats
.num_miv
++;
4864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4865 fprintf (dump_file
, "(analyze_miv_subscript \n");
4867 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
4868 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
4869 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
4870 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
4872 if (eq_evolutions_p (chrec_a
, chrec_b
))
4874 /* Access functions are the same: all the elements are accessed
4875 in the same order. */
4876 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4877 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4878 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
4879 dependence_stats
.num_miv_dependent
++;
4882 else if (evolution_function_is_constant_p (difference
)
4883 && evolution_function_is_affine_multivariate_p (chrec_a
,
4885 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
4887 /* testsuite/.../ssa-chrec-33.c
4888 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4890 The difference is 1, and all the evolution steps are multiples
4891 of 2, consequently there are no overlapping elements. */
4892 *overlaps_a
= conflict_fn_no_dependence ();
4893 *overlaps_b
= conflict_fn_no_dependence ();
4894 *last_conflicts
= integer_zero_node
;
4895 dependence_stats
.num_miv_independent
++;
4898 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest
->num
)
4899 && !chrec_contains_symbols (chrec_a
, loop_nest
)
4900 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest
->num
)
4901 && !chrec_contains_symbols (chrec_b
, loop_nest
))
4903 /* testsuite/.../ssa-chrec-35.c
4904 {0, +, 1}_2 vs. {0, +, 1}_3
4905 the overlapping elements are respectively located at iterations:
4906 {0, +, 1}_x and {0, +, 1}_x,
4907 in other words, we have the equality:
4908 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4911 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4912 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4914 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4915 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4917 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4918 overlaps_a
, overlaps_b
, last_conflicts
);
4920 if (CF_NOT_KNOWN_P (*overlaps_a
)
4921 || CF_NOT_KNOWN_P (*overlaps_b
))
4922 dependence_stats
.num_miv_unimplemented
++;
4923 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4924 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4925 dependence_stats
.num_miv_independent
++;
4927 dependence_stats
.num_miv_dependent
++;
4932 /* When the analysis is too difficult, answer "don't know". */
4933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4934 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
4936 *overlaps_a
= conflict_fn_not_known ();
4937 *overlaps_b
= conflict_fn_not_known ();
4938 *last_conflicts
= chrec_dont_know
;
4939 dependence_stats
.num_miv_unimplemented
++;
4942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4943 fprintf (dump_file
, ")\n");
4946 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4947 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4948 OVERLAP_ITERATIONS_B are initialized with two functions that
4949 describe the iterations that contain conflicting elements.
4951 Remark: For an integer k >= 0, the following equality is true:
4953 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4957 analyze_overlapping_iterations (tree chrec_a
,
4959 conflict_function
**overlap_iterations_a
,
4960 conflict_function
**overlap_iterations_b
,
4961 tree
*last_conflicts
, class loop
*loop_nest
)
4963 unsigned int lnn
= loop_nest
->num
;
4965 dependence_stats
.num_subscript_tests
++;
4967 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4969 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
4970 fprintf (dump_file
, " (chrec_a = ");
4971 print_generic_expr (dump_file
, chrec_a
);
4972 fprintf (dump_file
, ")\n (chrec_b = ");
4973 print_generic_expr (dump_file
, chrec_b
);
4974 fprintf (dump_file
, ")\n");
4977 if (chrec_a
== NULL_TREE
4978 || chrec_b
== NULL_TREE
4979 || chrec_contains_undetermined (chrec_a
)
4980 || chrec_contains_undetermined (chrec_b
))
4982 dependence_stats
.num_subscript_undetermined
++;
4984 *overlap_iterations_a
= conflict_fn_not_known ();
4985 *overlap_iterations_b
= conflict_fn_not_known ();
4988 /* If they are the same chrec, and are affine, they overlap
4989 on every iteration. */
4990 else if (eq_evolutions_p (chrec_a
, chrec_b
)
4991 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
4992 || operand_equal_p (chrec_a
, chrec_b
, 0)))
4994 dependence_stats
.num_same_subscript_function
++;
4995 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4996 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4997 *last_conflicts
= chrec_dont_know
;
5000 /* If they aren't the same, and aren't affine, we can't do anything
5002 else if ((chrec_contains_symbols (chrec_a
)
5003 || chrec_contains_symbols (chrec_b
))
5004 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
5005 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
5007 dependence_stats
.num_subscript_undetermined
++;
5008 *overlap_iterations_a
= conflict_fn_not_known ();
5009 *overlap_iterations_b
= conflict_fn_not_known ();
5012 else if (ziv_subscript_p (chrec_a
, chrec_b
))
5013 analyze_ziv_subscript (chrec_a
, chrec_b
,
5014 overlap_iterations_a
, overlap_iterations_b
,
5017 else if (siv_subscript_p (chrec_a
, chrec_b
))
5018 analyze_siv_subscript (chrec_a
, chrec_b
,
5019 overlap_iterations_a
, overlap_iterations_b
,
5020 last_conflicts
, lnn
);
5023 analyze_miv_subscript (chrec_a
, chrec_b
,
5024 overlap_iterations_a
, overlap_iterations_b
,
5025 last_conflicts
, loop_nest
);
5027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5029 fprintf (dump_file
, " (overlap_iterations_a = ");
5030 dump_conflict_function (dump_file
, *overlap_iterations_a
);
5031 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
5032 dump_conflict_function (dump_file
, *overlap_iterations_b
);
5033 fprintf (dump_file
, "))\n");
5037 /* Helper function for uniquely inserting distance vectors. */
5040 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
5042 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
5043 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
5046 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
5049 /* Helper function for uniquely inserting direction vectors. */
5052 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
5054 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
5055 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
5058 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
5061 /* Add a distance of 1 on all the loops outer than INDEX. If we
5062 haven't yet determined a distance for this outer loop, push a new
5063 distance vector composed of the previous distance, and a distance
5064 of 1 for this outer loop. Example:
5072 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5073 save (0, 1), then we have to save (1, 0). */
5076 add_outer_distances (struct data_dependence_relation
*ddr
,
5077 lambda_vector dist_v
, int index
)
5079 /* For each outer loop where init_v is not set, the accesses are
5080 in dependence of distance 1 in the loop. */
5081 while (--index
>= 0)
5083 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5084 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5086 save_dist_v (ddr
, save_v
);
5090 /* Return false when fail to represent the data dependence as a
5091 distance vector. A_INDEX is the index of the first reference
5092 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5093 second reference. INIT_B is set to true when a component has been
5094 added to the distance vector DIST_V. INDEX_CARRY is then set to
5095 the index in DIST_V that carries the dependence. */
5098 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
5099 unsigned int a_index
, unsigned int b_index
,
5100 lambda_vector dist_v
, bool *init_b
,
5104 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5105 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5107 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5109 tree access_fn_a
, access_fn_b
;
5110 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
5112 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5114 non_affine_dependence_relation (ddr
);
5118 access_fn_a
= SUB_ACCESS_FN (subscript
, a_index
);
5119 access_fn_b
= SUB_ACCESS_FN (subscript
, b_index
);
5121 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
5122 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
5126 int var_a
= CHREC_VARIABLE (access_fn_a
);
5127 int var_b
= CHREC_VARIABLE (access_fn_b
);
5130 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5132 non_affine_dependence_relation (ddr
);
5136 /* When data references are collected in a loop while data
5137 dependences are analyzed in loop nest nested in the loop, we
5138 would have more number of access functions than number of
5139 loops. Skip access functions of loops not in the loop nest.
5141 See PR89725 for more information. */
5142 if (flow_loop_nested_p (get_loop (cfun
, var_a
), loop
))
5145 dist
= int_cst_value (SUB_DISTANCE (subscript
));
5146 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
5147 *index_carry
= MIN (index
, *index_carry
);
5149 /* This is the subscript coupling test. If we have already
5150 recorded a distance for this loop (a distance coming from
5151 another subscript), it should be the same. For example,
5152 in the following code, there is no dependence:
5159 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
5161 finalize_ddr_dependent (ddr
, chrec_known
);
5165 dist_v
[index
] = dist
;
5169 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
5171 /* This can be for example an affine vs. constant dependence
5172 (T[i] vs. T[3]) that is not an affine dependence and is
5173 not representable as a distance vector. */
5174 non_affine_dependence_relation (ddr
);
5184 /* Return true when the DDR contains only invariant access functions wrto. loop
5188 invariant_access_functions (const struct data_dependence_relation
*ddr
,
5191 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5192 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 0), lnum
)
5193 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 1), lnum
))
5199 /* Helper function for the case where DDR_A and DDR_B are the same
5200 multivariate access function with a constant step. For an example
5204 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
5207 tree c_1
= CHREC_LEFT (c_2
);
5208 tree c_0
= CHREC_LEFT (c_1
);
5209 lambda_vector dist_v
;
5210 HOST_WIDE_INT v1
, v2
, cd
;
5212 /* Polynomials with more than 2 variables are not handled yet. When
5213 the evolution steps are parameters, it is not possible to
5214 represent the dependence using classical distance vectors. */
5215 if (TREE_CODE (c_0
) != INTEGER_CST
5216 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
5217 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
5219 DDR_AFFINE_P (ddr
) = false;
5223 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
5224 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
5226 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5227 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5228 v1
= int_cst_value (CHREC_RIGHT (c_1
));
5229 v2
= int_cst_value (CHREC_RIGHT (c_2
));
5242 save_dist_v (ddr
, dist_v
);
5244 add_outer_distances (ddr
, dist_v
, x_1
);
5247 /* Helper function for the case where DDR_A and DDR_B are the same
5248 access functions. */
5251 add_other_self_distances (struct data_dependence_relation
*ddr
)
5253 lambda_vector dist_v
;
5255 int index_carry
= DDR_NB_LOOPS (ddr
);
5257 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5259 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
5261 tree access_fun
= SUB_ACCESS_FN (sub
, 0);
5263 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
5265 if (!evolution_function_is_univariate_p (access_fun
, loop
->num
))
5267 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
5269 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
5273 access_fun
= SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr
, 0), 0);
5275 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
5276 add_multivariate_self_dist (ddr
, access_fun
);
5278 /* The evolution step is not constant: it varies in
5279 the outer loop, so this cannot be represented by a
5280 distance vector. For example in pr34635.c the
5281 evolution is {0, +, {0, +, 4}_1}_2. */
5282 DDR_AFFINE_P (ddr
) = false;
5287 /* When data references are collected in a loop while data
5288 dependences are analyzed in loop nest nested in the loop, we
5289 would have more number of access functions than number of
5290 loops. Skip access functions of loops not in the loop nest.
5292 See PR89725 for more information. */
5293 if (flow_loop_nested_p (get_loop (cfun
, CHREC_VARIABLE (access_fun
)),
5297 index_carry
= MIN (index_carry
,
5298 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
5299 DDR_LOOP_NEST (ddr
)));
5303 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5304 add_outer_distances (ddr
, dist_v
, index_carry
);
5308 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
5310 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5313 save_dist_v (ddr
, dist_v
);
5316 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5317 is the case for example when access functions are the same and
5318 equal to a constant, as in:
5325 in which case the distance vectors are (0) and (1). */
5328 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
5332 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5334 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
5335 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
5336 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
5338 for (j
= 0; j
< ca
->n
; j
++)
5339 if (affine_function_zero_p (ca
->fns
[j
]))
5341 insert_innermost_unit_dist_vector (ddr
);
5345 for (j
= 0; j
< cb
->n
; j
++)
5346 if (affine_function_zero_p (cb
->fns
[j
]))
5348 insert_innermost_unit_dist_vector (ddr
);
5354 /* Return true when the DDR contains two data references that have the
5355 same access functions. */
5358 same_access_functions (const struct data_dependence_relation
*ddr
)
5360 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5361 if (!eq_evolutions_p (SUB_ACCESS_FN (sub
, 0),
5362 SUB_ACCESS_FN (sub
, 1)))
5368 /* Compute the classic per loop distance vector. DDR is the data
5369 dependence relation to build a vector from. Return false when fail
5370 to represent the data dependence as a distance vector. */
5373 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
5374 class loop
*loop_nest
)
5376 bool init_b
= false;
5377 int index_carry
= DDR_NB_LOOPS (ddr
);
5378 lambda_vector dist_v
;
5380 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
5383 if (same_access_functions (ddr
))
5385 /* Save the 0 vector. */
5386 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5387 save_dist_v (ddr
, dist_v
);
5389 if (invariant_access_functions (ddr
, loop_nest
->num
))
5390 add_distance_for_zero_overlaps (ddr
);
5392 if (DDR_NB_LOOPS (ddr
) > 1)
5393 add_other_self_distances (ddr
);
5398 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5399 if (!build_classic_dist_vector_1 (ddr
, 0, 1, dist_v
, &init_b
, &index_carry
))
5402 /* Save the distance vector if we initialized one. */
5405 /* Verify a basic constraint: classic distance vectors should
5406 always be lexicographically positive.
5408 Data references are collected in the order of execution of
5409 the program, thus for the following loop
5411 | for (i = 1; i < 100; i++)
5412 | for (j = 1; j < 100; j++)
5414 | t = T[j+1][i-1]; // A
5415 | T[j][i] = t + 2; // B
5418 references are collected following the direction of the wind:
5419 A then B. The data dependence tests are performed also
5420 following this order, such that we're looking at the distance
5421 separating the elements accessed by A from the elements later
5422 accessed by B. But in this example, the distance returned by
5423 test_dep (A, B) is lexicographically negative (-1, 1), that
5424 means that the access A occurs later than B with respect to
5425 the outer loop, ie. we're actually looking upwind. In this
5426 case we solve test_dep (B, A) looking downwind to the
5427 lexicographically positive solution, that returns the
5428 distance vector (1, -1). */
5429 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
5431 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5432 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5434 compute_subscript_distance (ddr
);
5435 if (!build_classic_dist_vector_1 (ddr
, 1, 0, save_v
, &init_b
,
5438 save_dist_v (ddr
, save_v
);
5439 DDR_REVERSED_P (ddr
) = true;
5441 /* In this case there is a dependence forward for all the
5444 | for (k = 1; k < 100; k++)
5445 | for (i = 1; i < 100; i++)
5446 | for (j = 1; j < 100; j++)
5448 | t = T[j+1][i-1]; // A
5449 | T[j][i] = t + 2; // B
5457 if (DDR_NB_LOOPS (ddr
) > 1)
5459 add_outer_distances (ddr
, save_v
, index_carry
);
5460 add_outer_distances (ddr
, dist_v
, index_carry
);
5465 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5466 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5468 if (DDR_NB_LOOPS (ddr
) > 1)
5470 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5472 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5474 compute_subscript_distance (ddr
);
5475 if (!build_classic_dist_vector_1 (ddr
, 1, 0, opposite_v
, &init_b
,
5479 save_dist_v (ddr
, save_v
);
5480 add_outer_distances (ddr
, dist_v
, index_carry
);
5481 add_outer_distances (ddr
, opposite_v
, index_carry
);
5484 save_dist_v (ddr
, save_v
);
5489 /* There is a distance of 1 on all the outer loops: Example:
5490 there is a dependence of distance 1 on loop_1 for the array A.
5496 add_outer_distances (ddr
, dist_v
,
5497 lambda_vector_first_nz (dist_v
,
5498 DDR_NB_LOOPS (ddr
), 0));
5501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5505 fprintf (dump_file
, "(build_classic_dist_vector\n");
5506 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
5508 fprintf (dump_file
, " dist_vector = (");
5509 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
5510 DDR_NB_LOOPS (ddr
));
5511 fprintf (dump_file
, " )\n");
5513 fprintf (dump_file
, ")\n");
5519 /* Return the direction for a given distance.
5520 FIXME: Computing dir this way is suboptimal, since dir can catch
5521 cases that dist is unable to represent. */
5523 static inline enum data_dependence_direction
5524 dir_from_dist (int dist
)
5527 return dir_positive
;
5529 return dir_negative
;
5534 /* Compute the classic per loop direction vector. DDR is the data
5535 dependence relation to build a vector from. */
5538 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
5541 lambda_vector dist_v
;
5543 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
5545 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5547 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
5548 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
5550 save_dir_v (ddr
, dir_v
);
5554 /* Helper function. Returns true when there is a dependence between the
5555 data references. A_INDEX is the index of the first reference (0 for
5556 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5559 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
5560 unsigned int a_index
, unsigned int b_index
,
5561 class loop
*loop_nest
)
5564 tree last_conflicts
;
5565 struct subscript
*subscript
;
5566 tree res
= NULL_TREE
;
5568 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
5570 conflict_function
*overlaps_a
, *overlaps_b
;
5572 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript
, a_index
),
5573 SUB_ACCESS_FN (subscript
, b_index
),
5574 &overlaps_a
, &overlaps_b
,
5575 &last_conflicts
, loop_nest
);
5577 if (SUB_CONFLICTS_IN_A (subscript
))
5578 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
5579 if (SUB_CONFLICTS_IN_B (subscript
))
5580 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
5582 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
5583 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
5584 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
5586 /* If there is any undetermined conflict function we have to
5587 give a conservative answer in case we cannot prove that
5588 no dependence exists when analyzing another subscript. */
5589 if (CF_NOT_KNOWN_P (overlaps_a
)
5590 || CF_NOT_KNOWN_P (overlaps_b
))
5592 res
= chrec_dont_know
;
5596 /* When there is a subscript with no dependence we can stop. */
5597 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
5598 || CF_NO_DEPENDENCE_P (overlaps_b
))
5605 if (res
== NULL_TREE
)
5608 if (res
== chrec_known
)
5609 dependence_stats
.num_dependence_independent
++;
5611 dependence_stats
.num_dependence_undetermined
++;
5612 finalize_ddr_dependent (ddr
, res
);
5616 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5619 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
5620 class loop
*loop_nest
)
5622 if (subscript_dependence_tester_1 (ddr
, 0, 1, loop_nest
))
5623 dependence_stats
.num_dependence_dependent
++;
5625 compute_subscript_distance (ddr
);
5626 if (build_classic_dist_vector (ddr
, loop_nest
))
5627 build_classic_dir_vector (ddr
);
5630 /* Returns true when all the access functions of A are affine or
5631 constant with respect to LOOP_NEST. */
5634 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
5635 const class loop
*loop_nest
)
5637 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
5639 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
5640 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
5646 /* This computes the affine dependence relation between A and B with
5647 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5648 independence between two accesses, while CHREC_DONT_KNOW is used
5649 for representing the unknown relation.
5651 Note that it is possible to stop the computation of the dependence
5652 relation the first time we detect a CHREC_KNOWN element for a given
5656 compute_affine_dependence (struct data_dependence_relation
*ddr
,
5657 class loop
*loop_nest
)
5659 struct data_reference
*dra
= DDR_A (ddr
);
5660 struct data_reference
*drb
= DDR_B (ddr
);
5662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5664 fprintf (dump_file
, "(compute_affine_dependence\n");
5665 fprintf (dump_file
, " ref_a: ");
5666 print_generic_expr (dump_file
, DR_REF (dra
));
5667 fprintf (dump_file
, ", stmt_a: ");
5668 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
5669 fprintf (dump_file
, " ref_b: ");
5670 print_generic_expr (dump_file
, DR_REF (drb
));
5671 fprintf (dump_file
, ", stmt_b: ");
5672 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
5675 /* Analyze only when the dependence relation is not yet known. */
5676 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
5678 dependence_stats
.num_dependence_tests
++;
5680 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
5681 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
5682 subscript_dependence_tester (ddr
, loop_nest
);
5684 /* As a last case, if the dependence cannot be determined, or if
5685 the dependence is considered too difficult to determine, answer
5689 dependence_stats
.num_dependence_undetermined
++;
5691 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5693 fprintf (dump_file
, "Data ref a:\n");
5694 dump_data_reference (dump_file
, dra
);
5695 fprintf (dump_file
, "Data ref b:\n");
5696 dump_data_reference (dump_file
, drb
);
5697 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
5699 finalize_ddr_dependent (ddr
, chrec_dont_know
);
5703 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5705 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
5706 fprintf (dump_file
, ") -> no dependence\n");
5707 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5708 fprintf (dump_file
, ") -> dependence analysis failed\n");
5710 fprintf (dump_file
, ")\n");
5714 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5715 the data references in DATAREFS, in the LOOP_NEST. When
5716 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5717 relations. Return true when successful, i.e. data references number
5718 is small enough to be handled. */
5721 compute_all_dependences (const vec
<data_reference_p
> &datarefs
,
5722 vec
<ddr_p
> *dependence_relations
,
5723 const vec
<loop_p
> &loop_nest
,
5724 bool compute_self_and_rr
)
5726 struct data_dependence_relation
*ddr
;
5727 struct data_reference
*a
, *b
;
5730 if ((int) datarefs
.length ()
5731 > param_loop_max_datarefs_for_datadeps
)
5733 struct data_dependence_relation
*ddr
;
5735 /* Insert a single relation into dependence_relations:
5737 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
5738 dependence_relations
->safe_push (ddr
);
5742 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5743 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
5744 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
5746 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
5747 dependence_relations
->safe_push (ddr
);
5748 if (loop_nest
.exists ())
5749 compute_affine_dependence (ddr
, loop_nest
[0]);
5752 if (compute_self_and_rr
)
5753 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5755 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
5756 dependence_relations
->safe_push (ddr
);
5757 if (loop_nest
.exists ())
5758 compute_affine_dependence (ddr
, loop_nest
[0]);
5764 /* Describes a location of a memory reference. */
5768 /* The memory reference. */
5771 /* True if the memory reference is read. */
5774 /* True if the data reference is conditional within the containing
5775 statement, i.e. if it might not occur even when the statement
5776 is executed and runs to completion. */
5777 bool is_conditional_in_stmt
;
5781 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5782 true if STMT clobbers memory, false otherwise. */
5785 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
5787 bool clobbers_memory
= false;
5790 enum gimple_code stmt_code
= gimple_code (stmt
);
5792 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5793 As we cannot model data-references to not spelled out
5794 accesses give up if they may occur. */
5795 if (stmt_code
== GIMPLE_CALL
5796 && !(gimple_call_flags (stmt
) & ECF_CONST
))
5798 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5799 if (gimple_call_internal_p (stmt
))
5800 switch (gimple_call_internal_fn (stmt
))
5802 case IFN_GOMP_SIMD_LANE
:
5804 class loop
*loop
= gimple_bb (stmt
)->loop_father
;
5805 tree uid
= gimple_call_arg (stmt
, 0);
5806 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
5808 || loop
->simduid
!= SSA_NAME_VAR (uid
))
5809 clobbers_memory
= true;
5813 case IFN_MASK_STORE
:
5816 clobbers_memory
= true;
5820 clobbers_memory
= true;
5822 else if (stmt_code
== GIMPLE_ASM
5823 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
5824 || gimple_vuse (stmt
)))
5825 clobbers_memory
= true;
5827 if (!gimple_vuse (stmt
))
5828 return clobbers_memory
;
5830 if (stmt_code
== GIMPLE_ASSIGN
)
5833 op0
= gimple_assign_lhs (stmt
);
5834 op1
= gimple_assign_rhs1 (stmt
);
5837 || (REFERENCE_CLASS_P (op1
)
5838 && (base
= get_base_address (op1
))
5839 && TREE_CODE (base
) != SSA_NAME
5840 && !is_gimple_min_invariant (base
)))
5844 ref
.is_conditional_in_stmt
= false;
5845 references
->safe_push (ref
);
5848 else if (stmt_code
== GIMPLE_CALL
)
5854 ref
.is_read
= false;
5855 if (gimple_call_internal_p (stmt
))
5856 switch (gimple_call_internal_fn (stmt
))
5859 if (gimple_call_lhs (stmt
) == NULL_TREE
)
5863 case IFN_MASK_STORE
:
5864 ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)), 0);
5865 align
= tree_to_shwi (gimple_call_arg (stmt
, 1));
5867 type
= TREE_TYPE (gimple_call_lhs (stmt
));
5869 type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
5870 if (TYPE_ALIGN (type
) != align
)
5871 type
= build_aligned_type (type
, align
);
5872 ref
.is_conditional_in_stmt
= true;
5873 ref
.ref
= fold_build2 (MEM_REF
, type
, gimple_call_arg (stmt
, 0),
5875 references
->safe_push (ref
);
5881 op0
= gimple_call_lhs (stmt
);
5882 n
= gimple_call_num_args (stmt
);
5883 for (i
= 0; i
< n
; i
++)
5885 op1
= gimple_call_arg (stmt
, i
);
5888 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
5892 ref
.is_conditional_in_stmt
= false;
5893 references
->safe_push (ref
);
5898 return clobbers_memory
;
5902 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
5905 ref
.is_read
= false;
5906 ref
.is_conditional_in_stmt
= false;
5907 references
->safe_push (ref
);
5909 return clobbers_memory
;
5913 /* Returns true if the loop-nest has any data reference. */
5916 loop_nest_has_data_refs (loop_p loop
)
5918 basic_block
*bbs
= get_loop_body (loop
);
5919 auto_vec
<data_ref_loc
, 3> references
;
5921 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
5923 basic_block bb
= bbs
[i
];
5924 gimple_stmt_iterator bsi
;
5926 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5928 gimple
*stmt
= gsi_stmt (bsi
);
5929 get_references_in_stmt (stmt
, &references
);
5930 if (references
.length ())
5941 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5942 reference, returns false, otherwise returns true. NEST is the outermost
5943 loop of the loop nest in which the references should be analyzed. */
5946 find_data_references_in_stmt (class loop
*nest
, gimple
*stmt
,
5947 vec
<data_reference_p
> *datarefs
)
5949 auto_vec
<data_ref_loc
, 2> references
;
5950 data_reference_p dr
;
5952 if (get_references_in_stmt (stmt
, &references
))
5953 return opt_result::failure_at (stmt
, "statement clobbers memory: %G",
5956 for (const data_ref_loc
&ref
: references
)
5958 dr
= create_data_ref (nest
? loop_preheader_edge (nest
) : NULL
,
5959 loop_containing_stmt (stmt
), ref
.ref
,
5960 stmt
, ref
.is_read
, ref
.is_conditional_in_stmt
);
5961 gcc_assert (dr
!= NULL
);
5962 datarefs
->safe_push (dr
);
5965 return opt_result::success ();
5968 /* Stores the data references in STMT to DATAREFS. If there is an
5969 unanalyzable reference, returns false, otherwise returns true.
5970 NEST is the outermost loop of the loop nest in which the references
5971 should be instantiated, LOOP is the loop in which the references
5972 should be analyzed. */
5975 graphite_find_data_references_in_stmt (edge nest
, loop_p loop
, gimple
*stmt
,
5976 vec
<data_reference_p
> *datarefs
)
5978 auto_vec
<data_ref_loc
, 2> references
;
5980 data_reference_p dr
;
5982 if (get_references_in_stmt (stmt
, &references
))
5985 for (const data_ref_loc
&ref
: references
)
5987 dr
= create_data_ref (nest
, loop
, ref
.ref
, stmt
, ref
.is_read
,
5988 ref
.is_conditional_in_stmt
);
5989 gcc_assert (dr
!= NULL
);
5990 datarefs
->safe_push (dr
);
5996 /* Search the data references in LOOP, and record the information into
5997 DATAREFS. Returns chrec_dont_know when failing to analyze a
5998 difficult case, returns NULL_TREE otherwise. */
6001 find_data_references_in_bb (class loop
*loop
, basic_block bb
,
6002 vec
<data_reference_p
> *datarefs
)
6004 gimple_stmt_iterator bsi
;
6006 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
6008 gimple
*stmt
= gsi_stmt (bsi
);
6010 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
6012 struct data_reference
*res
;
6013 res
= XCNEW (struct data_reference
);
6014 datarefs
->safe_push (res
);
6016 return chrec_dont_know
;
6023 /* Search the data references in LOOP, and record the information into
6024 DATAREFS. Returns chrec_dont_know when failing to analyze a
6025 difficult case, returns NULL_TREE otherwise.
6027 TODO: This function should be made smarter so that it can handle address
6028 arithmetic as if they were array accesses, etc. */
6031 find_data_references_in_loop (class loop
*loop
,
6032 vec
<data_reference_p
> *datarefs
)
6034 basic_block bb
, *bbs
;
6037 bbs
= get_loop_body_in_dom_order (loop
);
6039 for (i
= 0; i
< loop
->num_nodes
; i
++)
6043 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
6046 return chrec_dont_know
;
6054 /* Return the alignment in bytes that DRB is guaranteed to have at all
6058 dr_alignment (innermost_loop_behavior
*drb
)
6060 /* Get the alignment of BASE_ADDRESS + INIT. */
6061 unsigned int alignment
= drb
->base_alignment
;
6062 unsigned int misalignment
= (drb
->base_misalignment
6063 + TREE_INT_CST_LOW (drb
->init
));
6064 if (misalignment
!= 0)
6065 alignment
= MIN (alignment
, misalignment
& -misalignment
);
6067 /* Cap it to the alignment of OFFSET. */
6068 if (!integer_zerop (drb
->offset
))
6069 alignment
= MIN (alignment
, drb
->offset_alignment
);
6071 /* Cap it to the alignment of STEP. */
6072 if (!integer_zerop (drb
->step
))
6073 alignment
= MIN (alignment
, drb
->step_alignment
);
6078 /* If BASE is a pointer-typed SSA name, try to find the object that it
6079 is based on. Return this object X on success and store the alignment
6080 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6083 get_base_for_alignment_1 (tree base
, unsigned int *alignment_out
)
6085 if (TREE_CODE (base
) != SSA_NAME
|| !POINTER_TYPE_P (TREE_TYPE (base
)))
6088 gimple
*def
= SSA_NAME_DEF_STMT (base
);
6089 base
= analyze_scalar_evolution (loop_containing_stmt (def
), base
);
6091 /* Peel chrecs and record the minimum alignment preserved by
6093 unsigned int alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6094 while (TREE_CODE (base
) == POLYNOMIAL_CHREC
)
6096 unsigned int step_alignment
= highest_pow2_factor (CHREC_RIGHT (base
));
6097 alignment
= MIN (alignment
, step_alignment
);
6098 base
= CHREC_LEFT (base
);
6101 /* Punt if the expression is too complicated to handle. */
6102 if (tree_contains_chrecs (base
, NULL
) || !POINTER_TYPE_P (TREE_TYPE (base
)))
6105 /* The only useful cases are those for which a dereference folds to something
6106 other than an INDIRECT_REF. */
6107 tree ref_type
= TREE_TYPE (TREE_TYPE (base
));
6108 tree ref
= fold_indirect_ref_1 (UNKNOWN_LOCATION
, ref_type
, base
);
6112 /* Analyze the base to which the steps we peeled were applied. */
6113 poly_int64 bitsize
, bitpos
, bytepos
;
6115 int unsignedp
, reversep
, volatilep
;
6117 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
6118 &unsignedp
, &reversep
, &volatilep
);
6119 if (!base
|| !multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
6122 /* Restrict the alignment to that guaranteed by the offsets. */
6123 unsigned int bytepos_alignment
= known_alignment (bytepos
);
6124 if (bytepos_alignment
!= 0)
6125 alignment
= MIN (alignment
, bytepos_alignment
);
6128 unsigned int offset_alignment
= highest_pow2_factor (offset
);
6129 alignment
= MIN (alignment
, offset_alignment
);
6132 *alignment_out
= alignment
;
6136 /* Return the object whose alignment would need to be changed in order
6137 to increase the alignment of ADDR. Store the maximum achievable
6138 alignment in *MAX_ALIGNMENT. */
6141 get_base_for_alignment (tree addr
, unsigned int *max_alignment
)
6143 tree base
= get_base_for_alignment_1 (addr
, max_alignment
);
6147 if (TREE_CODE (addr
) == ADDR_EXPR
)
6148 addr
= TREE_OPERAND (addr
, 0);
6149 *max_alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6153 /* Recursive helper function. */
6156 find_loop_nest_1 (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6158 /* Inner loops of the nest should not contain siblings. Example:
6159 when there are two consecutive loops,
6170 the dependence relation cannot be captured by the distance
6175 loop_nest
->safe_push (loop
);
6177 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6181 /* Return false when the LOOP is not well nested. Otherwise return
6182 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6183 contain the loops from the outermost to the innermost, as they will
6184 appear in the classic distance vector. */
6187 find_loop_nest (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6189 loop_nest
->safe_push (loop
);
6191 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6195 /* Returns true when the data dependences have been computed, false otherwise.
6196 Given a loop nest LOOP, the following vectors are returned:
6197 DATAREFS is initialized to all the array elements contained in this loop,
6198 DEPENDENCE_RELATIONS contains the relations between the data references.
6199 Compute read-read and self relations if
6200 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6203 compute_data_dependences_for_loop (class loop
*loop
,
6204 bool compute_self_and_read_read_dependences
,
6205 vec
<loop_p
> *loop_nest
,
6206 vec
<data_reference_p
> *datarefs
,
6207 vec
<ddr_p
> *dependence_relations
)
6211 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
6213 /* If the loop nest is not well formed, or one of the data references
6214 is not computable, give up without spending time to compute other
6217 || !find_loop_nest (loop
, loop_nest
)
6218 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
6219 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
6220 compute_self_and_read_read_dependences
))
6223 if (dump_file
&& (dump_flags
& TDF_STATS
))
6225 fprintf (dump_file
, "Dependence tester statistics:\n");
6227 fprintf (dump_file
, "Number of dependence tests: %d\n",
6228 dependence_stats
.num_dependence_tests
);
6229 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
6230 dependence_stats
.num_dependence_dependent
);
6231 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
6232 dependence_stats
.num_dependence_independent
);
6233 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
6234 dependence_stats
.num_dependence_undetermined
);
6236 fprintf (dump_file
, "Number of subscript tests: %d\n",
6237 dependence_stats
.num_subscript_tests
);
6238 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
6239 dependence_stats
.num_subscript_undetermined
);
6240 fprintf (dump_file
, "Number of same subscript function: %d\n",
6241 dependence_stats
.num_same_subscript_function
);
6243 fprintf (dump_file
, "Number of ziv tests: %d\n",
6244 dependence_stats
.num_ziv
);
6245 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
6246 dependence_stats
.num_ziv_dependent
);
6247 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
6248 dependence_stats
.num_ziv_independent
);
6249 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
6250 dependence_stats
.num_ziv_unimplemented
);
6252 fprintf (dump_file
, "Number of siv tests: %d\n",
6253 dependence_stats
.num_siv
);
6254 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
6255 dependence_stats
.num_siv_dependent
);
6256 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
6257 dependence_stats
.num_siv_independent
);
6258 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
6259 dependence_stats
.num_siv_unimplemented
);
6261 fprintf (dump_file
, "Number of miv tests: %d\n",
6262 dependence_stats
.num_miv
);
6263 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
6264 dependence_stats
.num_miv_dependent
);
6265 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
6266 dependence_stats
.num_miv_independent
);
6267 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
6268 dependence_stats
.num_miv_unimplemented
);
6274 /* Free the memory used by a data dependence relation DDR. */
6277 free_dependence_relation (struct data_dependence_relation
*ddr
)
6282 if (DDR_SUBSCRIPTS (ddr
).exists ())
6283 free_subscripts (DDR_SUBSCRIPTS (ddr
));
6284 DDR_DIST_VECTS (ddr
).release ();
6285 DDR_DIR_VECTS (ddr
).release ();
6290 /* Free the memory used by the data dependence relations from
6291 DEPENDENCE_RELATIONS. */
6294 free_dependence_relations (vec
<ddr_p
>& dependence_relations
)
6296 for (data_dependence_relation
*ddr
: dependence_relations
)
6298 free_dependence_relation (ddr
);
6300 dependence_relations
.release ();
6303 /* Free the memory used by the data references from DATAREFS. */
6306 free_data_refs (vec
<data_reference_p
>& datarefs
)
6308 for (data_reference
*dr
: datarefs
)
6310 datarefs
.release ();
6313 /* Common routine implementing both dr_direction_indicator and
6314 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6315 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6316 Return the step as the indicator otherwise. */
6319 dr_step_indicator (struct data_reference
*dr
, int useful_min
)
6321 tree step
= DR_STEP (dr
);
6325 /* Look for cases where the step is scaled by a positive constant
6326 integer, which will often be the access size. If the multiplication
6327 doesn't change the sign (due to overflow effects) then we can
6328 test the unscaled value instead. */
6329 if (TREE_CODE (step
) == MULT_EXPR
6330 && TREE_CODE (TREE_OPERAND (step
, 1)) == INTEGER_CST
6331 && tree_int_cst_sgn (TREE_OPERAND (step
, 1)) > 0)
6333 tree factor
= TREE_OPERAND (step
, 1);
6334 step
= TREE_OPERAND (step
, 0);
6336 /* Strip widening and truncating conversions as well as nops. */
6337 if (CONVERT_EXPR_P (step
)
6338 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step
, 0))))
6339 step
= TREE_OPERAND (step
, 0);
6340 tree type
= TREE_TYPE (step
);
6342 /* Get the range of step values that would not cause overflow. */
6343 widest_int minv
= (wi::to_widest (TYPE_MIN_VALUE (ssizetype
))
6344 / wi::to_widest (factor
));
6345 widest_int maxv
= (wi::to_widest (TYPE_MAX_VALUE (ssizetype
))
6346 / wi::to_widest (factor
));
6348 /* Get the range of values that the unconverted step actually has. */
6349 wide_int step_min
, step_max
;
6351 if (TREE_CODE (step
) != SSA_NAME
6352 || !get_range_query (cfun
)->range_of_expr (vr
, step
)
6353 || vr
.kind () != VR_RANGE
)
6355 step_min
= wi::to_wide (TYPE_MIN_VALUE (type
));
6356 step_max
= wi::to_wide (TYPE_MAX_VALUE (type
));
6360 step_min
= vr
.lower_bound ();
6361 step_max
= vr
.upper_bound ();
6364 /* Check whether the unconverted step has an acceptable range. */
6365 signop sgn
= TYPE_SIGN (type
);
6366 if (wi::les_p (minv
, widest_int::from (step_min
, sgn
))
6367 && wi::ges_p (maxv
, widest_int::from (step_max
, sgn
)))
6369 if (wi::ge_p (step_min
, useful_min
, sgn
))
6370 return ssize_int (useful_min
);
6371 else if (wi::lt_p (step_max
, 0, sgn
))
6372 return ssize_int (-1);
6374 return fold_convert (ssizetype
, step
);
6377 return DR_STEP (dr
);
6380 /* Return a value that is negative iff DR has a negative step. */
6383 dr_direction_indicator (struct data_reference
*dr
)
6385 return dr_step_indicator (dr
, 0);
6388 /* Return a value that is zero iff DR has a zero step. */
6391 dr_zero_step_indicator (struct data_reference
*dr
)
6393 return dr_step_indicator (dr
, 1);
6396 /* Return true if DR is known to have a nonnegative (but possibly zero)
6400 dr_known_forward_stride_p (struct data_reference
*dr
)
6402 tree indicator
= dr_direction_indicator (dr
);
6403 tree neg_step_val
= fold_binary (LT_EXPR
, boolean_type_node
,
6404 fold_convert (ssizetype
, indicator
),
6406 return neg_step_val
&& integer_zerop (neg_step_val
);