ada: Fix wrong resolution for hidden discriminant in predicate
[official-gcc.git] / gcc / tree-data-ref.cc
blob6d3b7c2290e4db9c1168a4c763facb481157c97c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
104 static struct datadep_stats
106 int num_dependence_tests;
107 int num_dependence_dependent;
108 int num_dependence_independent;
109 int num_dependence_undetermined;
111 int num_subscript_tests;
112 int num_subscript_undetermined;
113 int num_same_subscript_function;
115 int num_ziv;
116 int num_ziv_independent;
117 int num_ziv_dependent;
118 int num_ziv_unimplemented;
120 int num_siv;
121 int num_siv_independent;
122 int num_siv_dependent;
123 int num_siv_unimplemented;
125 int num_miv;
126 int num_miv_independent;
127 int num_miv_dependent;
128 int num_miv_unimplemented;
129 } dependence_stats;
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 unsigned int, unsigned int,
133 class loop *);
134 /* Returns true iff A divides B. */
136 static inline bool
137 tree_fold_divides_p (const_tree a, const_tree b)
139 gcc_assert (TREE_CODE (a) == INTEGER_CST);
140 gcc_assert (TREE_CODE (b) == INTEGER_CST);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 /* Returns true iff A divides B. */
146 static inline bool
147 int_divides_p (lambda_int a, lambda_int b)
149 return ((b % a) == 0);
152 /* Return true if reference REF contains a union access. */
154 static bool
155 ref_contains_union_access_p (tree ref)
157 while (handled_component_p (ref))
159 ref = TREE_OPERAND (ref, 0);
160 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 return true;
164 return false;
169 /* Dump into FILE all the data references from DATAREFS. */
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 for (data_reference *dr : datarefs)
175 dump_data_reference (file, dr);
178 /* Unified dump into FILE all the data references from DATAREFS. */
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
183 dump_data_references (stderr, ref);
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
201 dump_data_references (stderr, datarefs);
204 /* Print to STDERR the data_reference DR. */
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
209 dump_data_reference (stderr, dr);
212 /* Dump function for a DATA_REFERENCE structure. */
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
218 unsigned int i;
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 fprintf (outf, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
242 dump_data_reference (stderr, &ref);
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
260 unsigned i;
261 tree coef;
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
272 /* Dumps the conflict function CF to the file OUTF. */
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
277 unsigned i;
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
285 for (i = 0; i < cf->n; i++)
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
296 /* Dump function for a SUBSCRIPT structure. */
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
335 int eq;
337 for (eq = 0; eq < length; eq++)
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
342 switch (dir)
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
370 fprintf (outf, "\n");
373 /* Print a vector of direction vectors. */
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
379 for (lambda_vector v : dir_vects)
380 print_direction_vector (outf, v, length);
383 /* Print out a vector VEC of length N to OUTFILE. */
385 DEBUG_FUNCTION void
386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 int i;
390 for (i = 0; i < n; i++)
391 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392 fprintf (outfile, "\n");
395 /* Print a vector of distance vectors. */
397 DEBUG_FUNCTION void
398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 int length)
401 for (lambda_vector v : dist_vects)
402 print_lambda_vector (outf, v, length);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 struct data_reference *dra, *drb;
412 fprintf (outf, "(Data Dep: \n");
414 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 if (ddr)
418 dra = DDR_A (ddr);
419 drb = DDR_B (ddr);
420 if (dra)
421 dump_data_reference (outf, dra);
422 else
423 fprintf (outf, " (nil)\n");
424 if (drb)
425 dump_data_reference (outf, drb);
426 else
427 fprintf (outf, " (nil)\n");
429 fprintf (outf, " (don't know)\n)\n");
430 return;
433 dra = DDR_A (ddr);
434 drb = DDR_B (ddr);
435 dump_data_reference (outf, dra);
436 dump_data_reference (outf, drb);
438 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439 fprintf (outf, " (no dependence)\n");
441 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 unsigned int i;
444 class loop *loopi;
446 subscript *sub;
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 dump_subscript (outf, sub);
456 fprintf (outf, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 fprintf (outf, "%d ", loopi->num);
459 fprintf (outf, ")\n");
461 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 fprintf (outf, " distance_vector: ");
464 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 DDR_NB_LOOPS (ddr));
468 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 fprintf (outf, " direction_vector: ");
471 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
476 fprintf (outf, ")\n");
479 /* Debug version. */
481 DEBUG_FUNCTION void
482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 dump_data_dependence_relation (stderr, ddr);
487 /* Dump into FILE all the dependence relations from DDRS. */
489 DEBUG_FUNCTION void
490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 for (auto ddr : ddrs)
493 dump_data_dependence_relation (file, ddr);
496 DEBUG_FUNCTION void
497 debug (vec<ddr_p> &ref)
499 dump_data_dependence_relations (stderr, ref);
502 DEBUG_FUNCTION void
503 debug (vec<ddr_p> *ptr)
505 if (ptr)
506 debug (*ptr);
507 else
508 fprintf (stderr, "<nil>\n");
512 /* Dump to STDERR all the dependence relations from DDRS. */
514 DEBUG_FUNCTION void
515 debug_data_dependence_relations (vec<ddr_p> ddrs)
517 dump_data_dependence_relations (stderr, ddrs);
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
523 considered nest. */
525 DEBUG_FUNCTION void
526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 for (data_dependence_relation *ddr : ddrs)
529 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 fprintf (file, "DISTANCE_V (");
534 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 fprintf (file, ")\n");
538 for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 fprintf (file, "DIRECTION_V (");
541 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 fprintf (file, ")\n");
546 fprintf (file, "\n\n");
549 /* Dumps the data dependence relations DDRS in FILE. */
551 DEBUG_FUNCTION void
552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 for (data_dependence_relation *ddr : ddrs)
555 dump_data_dependence_relation (file, ddr);
557 fprintf (file, "\n\n");
560 DEBUG_FUNCTION void
561 debug_ddrs (vec<ddr_p> ddrs)
563 dump_ddrs (stderr, ddrs);
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567 OP0 CODE OP1, where:
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
573 Independently of RESULT_RANGE, try to compute:
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
581 Truncation and sign changes are known to distribute over CODE, i.e.
583 (itype) (A CODE B) == (itype) A CODE (itype) B
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
588 static bool
589 compute_distributive_range (tree type, value_range &op0_range,
590 tree_code code, value_range &op1_range,
591 tree *off, value_range *result_range)
593 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594 if (result_range)
596 range_op_handler op (code);
597 if (!op.fold_range (*result_range, type, op0_range, op1_range))
598 result_range->set_varying (type);
601 /* The distributive property guarantees that if TYPE is no narrower
602 than SIZETYPE,
604 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606 and so we can treat DELTA as zero. */
607 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
608 return true;
610 /* If overflow is undefined, we can assume that:
612 X == (ssizetype) OP0 CODE (ssizetype) OP1
614 is within the range of TYPE, i.e.:
616 X == (ssizetype) (TYPE) X
618 Distributing the (TYPE) truncation over X gives:
620 X == (ssizetype) (OP0 CODE OP1)
622 Casting both sides to sizetype and distributing the sizetype cast
623 over X gives:
625 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627 and so we can treat DELTA as zero. */
628 if (TYPE_OVERFLOW_UNDEFINED (type))
629 return true;
631 /* Compute the range of:
633 (ssizetype) OP0 CODE (ssizetype) OP1
635 The distributive property guarantees that this has the same bitpattern as:
637 (sizetype) OP0 CODE (sizetype) OP1
639 but its range is more conducive to analysis. */
640 range_cast (op0_range, ssizetype);
641 range_cast (op1_range, ssizetype);
642 value_range wide_range;
643 range_op_handler op (code);
644 bool saved_flag_wrapv = flag_wrapv;
645 flag_wrapv = 1;
646 if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
647 wide_range.set_varying (ssizetype);;
648 flag_wrapv = saved_flag_wrapv;
649 if (wide_range.num_pairs () != 1
650 || wide_range.varying_p () || wide_range.undefined_p ())
651 return false;
653 wide_int lb = wide_range.lower_bound ();
654 wide_int ub = wide_range.upper_bound ();
656 /* Calculate the number of times that each end of the range overflows or
657 underflows TYPE. We can only calculate DELTA if the numbers match. */
658 unsigned int precision = TYPE_PRECISION (type);
659 if (!TYPE_UNSIGNED (type))
661 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
662 lb -= type_min;
663 ub -= type_min;
665 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
666 lb &= upper_bits;
667 ub &= upper_bits;
668 if (lb != ub)
669 return false;
671 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
672 negative values indicating underflow. The low PRECISION bits of LB
673 are clear, so DELTA is therefore LB (== UB). */
674 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
675 return true;
678 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
679 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
680 FROM_TYPE are integral types. */
682 static bool
683 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
685 gcc_assert (INTEGRAL_TYPE_P (to_type)
686 && INTEGRAL_TYPE_P (from_type)
687 && !TYPE_OVERFLOW_TRAPS (to_type)
688 && !TYPE_OVERFLOW_TRAPS (from_type));
690 /* Converting to something no narrower than sizetype and then to sizetype
691 is equivalent to converting directly to sizetype. */
692 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
693 return true;
695 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
696 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
697 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
698 return true;
700 /* For narrowing conversions, we could in principle test whether
701 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
702 and apply a constant adjustment.
704 For other conversions (which involve a sign change) we could
705 check that the signs are always equal, and apply a constant
706 adjustment if the signs are negative.
708 However, both cases should be rare. */
709 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
710 TYPE_SIGN (to_type));
713 static void
714 split_constant_offset (tree type, tree *var, tree *off,
715 value_range *result_range,
716 hash_map<tree, std::pair<tree, tree> > &cache,
717 unsigned *limit);
719 /* Helper function for split_constant_offset. If TYPE is a pointer type,
720 try to express OP0 CODE OP1 as:
722 POINTER_PLUS <*VAR, (sizetype) *OFF>
724 where:
726 - *VAR has type TYPE
727 - *OFF is a constant of type ssizetype.
729 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
731 *VAR + (sizetype) *OFF
733 where:
735 - *VAR has type sizetype
736 - *OFF is a constant of type ssizetype.
738 In both cases, OP0 CODE OP1 has type TYPE.
740 Return true on success. A false return value indicates that we can't
741 do better than set *OFF to zero.
743 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
744 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
746 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
747 visited. LIMIT counts down the number of SSA names that we are
748 allowed to process before giving up. */
750 static bool
751 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
752 tree *var, tree *off, value_range *result_range,
753 hash_map<tree, std::pair<tree, tree> > &cache,
754 unsigned *limit)
756 tree var0, var1;
757 tree off0, off1;
758 value_range op0_range, op1_range;
760 *var = NULL_TREE;
761 *off = NULL_TREE;
763 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
764 return false;
766 switch (code)
768 case INTEGER_CST:
769 *var = size_int (0);
770 *off = fold_convert (ssizetype, op0);
771 if (result_range)
773 wide_int w = wi::to_wide (op0);
774 result_range->set (TREE_TYPE (op0), w, w);
776 return true;
778 case POINTER_PLUS_EXPR:
779 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
780 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
781 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
782 *off = size_binop (PLUS_EXPR, off0, off1);
783 return true;
785 case PLUS_EXPR:
786 case MINUS_EXPR:
787 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
788 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
789 *off = size_binop (code, off0, off1);
790 if (!compute_distributive_range (type, op0_range, code, op1_range,
791 off, result_range))
792 return false;
793 *var = fold_build2 (code, sizetype, var0, var1);
794 return true;
796 case MULT_EXPR:
797 if (TREE_CODE (op1) != INTEGER_CST)
798 return false;
800 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
801 op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
802 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
803 if (!compute_distributive_range (type, op0_range, code, op1_range,
804 off, result_range))
805 return false;
806 *var = fold_build2 (MULT_EXPR, sizetype, var0,
807 fold_convert (sizetype, op1));
808 return true;
810 case ADDR_EXPR:
812 tree base, poffset;
813 poly_int64 pbitsize, pbitpos, pbytepos;
814 machine_mode pmode;
815 int punsignedp, preversep, pvolatilep;
817 op0 = TREE_OPERAND (op0, 0);
818 base
819 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
820 &punsignedp, &preversep, &pvolatilep);
822 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
823 return false;
824 base = build_fold_addr_expr (base);
825 off0 = ssize_int (pbytepos);
827 if (poffset)
829 split_constant_offset (poffset, &poffset, &off1, nullptr,
830 cache, limit);
831 off0 = size_binop (PLUS_EXPR, off0, off1);
832 base = fold_build_pointer_plus (base, poffset);
835 var0 = fold_convert (type, base);
837 /* If variable length types are involved, punt, otherwise casts
838 might be converted into ARRAY_REFs in gimplify_conversion.
839 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
840 possibly no longer appears in current GIMPLE, might resurface.
841 This perhaps could run
842 if (CONVERT_EXPR_P (var0))
844 gimplify_conversion (&var0);
845 // Attempt to fill in any within var0 found ARRAY_REF's
846 // element size from corresponding op embedded ARRAY_REF,
847 // if unsuccessful, just punt.
848 } */
849 while (POINTER_TYPE_P (type))
850 type = TREE_TYPE (type);
851 if (int_size_in_bytes (type) < 0)
852 return false;
854 *var = var0;
855 *off = off0;
856 return true;
859 case SSA_NAME:
861 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
862 return false;
864 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
865 enum tree_code subcode;
867 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
868 return false;
870 subcode = gimple_assign_rhs_code (def_stmt);
872 /* We are using a cache to avoid un-CSEing large amounts of code. */
873 bool use_cache = false;
874 if (!has_single_use (op0)
875 && (subcode == POINTER_PLUS_EXPR
876 || subcode == PLUS_EXPR
877 || subcode == MINUS_EXPR
878 || subcode == MULT_EXPR
879 || subcode == ADDR_EXPR
880 || CONVERT_EXPR_CODE_P (subcode)))
882 use_cache = true;
883 bool existed;
884 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
885 if (existed)
887 if (integer_zerop (e.second))
888 return false;
889 *var = e.first;
890 *off = e.second;
891 /* The caller sets the range in this case. */
892 return true;
894 e = std::make_pair (op0, ssize_int (0));
897 if (*limit == 0)
898 return false;
899 --*limit;
901 var0 = gimple_assign_rhs1 (def_stmt);
902 var1 = gimple_assign_rhs2 (def_stmt);
904 bool res = split_constant_offset_1 (type, var0, subcode, var1,
905 var, off, nullptr, cache, limit);
906 if (res && use_cache)
907 *cache.get (op0) = std::make_pair (*var, *off);
908 /* The caller sets the range in this case. */
909 return res;
911 CASE_CONVERT:
913 /* We can only handle the following conversions:
915 - Conversions from one pointer type to another pointer type.
917 - Conversions from one non-trapping integral type to another
918 non-trapping integral type. In this case, the recursive
919 call makes sure that:
921 (sizetype) OP0
923 can be expressed as a sizetype operation involving VAR and OFF,
924 and all we need to do is check whether:
926 (sizetype) OP0 == (sizetype) (TYPE) OP0
928 - Conversions from a non-trapping sizetype-size integral type to
929 a like-sized pointer type. In this case, the recursive call
930 makes sure that:
932 (sizetype) OP0 == *VAR + (sizetype) *OFF
934 and we can convert that to:
936 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
938 - Conversions from a sizetype-sized pointer type to a like-sized
939 non-trapping integral type. In this case, the recursive call
940 makes sure that:
942 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
944 where the POINTER_PLUS and *VAR have the same precision as
945 TYPE (and the same precision as sizetype). Then:
947 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
948 tree itype = TREE_TYPE (op0);
949 if ((POINTER_TYPE_P (itype)
950 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
951 && (POINTER_TYPE_P (type)
952 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
953 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
954 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
955 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
957 if (POINTER_TYPE_P (type))
959 split_constant_offset (op0, var, off, nullptr, cache, limit);
960 *var = fold_convert (type, *var);
962 else if (POINTER_TYPE_P (itype))
964 split_constant_offset (op0, var, off, nullptr, cache, limit);
965 *var = fold_convert (sizetype, *var);
967 else
969 split_constant_offset (op0, var, off, &op0_range,
970 cache, limit);
971 if (!nop_conversion_for_offset_p (type, itype, op0_range))
972 return false;
973 if (result_range)
975 *result_range = op0_range;
976 range_cast (*result_range, type);
979 return true;
981 return false;
984 default:
985 return false;
989 /* If EXP has pointer type, try to express it as:
991 POINTER_PLUS <*VAR, (sizetype) *OFF>
993 where:
995 - *VAR has the same type as EXP
996 - *OFF is a constant of type ssizetype.
998 If EXP has an integral type, try to express (sizetype) EXP as:
1000 *VAR + (sizetype) *OFF
1002 where:
1004 - *VAR has type sizetype
1005 - *OFF is a constant of type ssizetype.
1007 If EXP_RANGE is nonnull, set it to the range of EXP.
1009 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1010 visited. LIMIT counts down the number of SSA names that we are
1011 allowed to process before giving up. */
1013 static void
1014 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1015 hash_map<tree, std::pair<tree, tree> > &cache,
1016 unsigned *limit)
1018 tree type = TREE_TYPE (exp), op0, op1;
1019 enum tree_code code;
1021 code = TREE_CODE (exp);
1022 if (exp_range)
1024 *exp_range = type;
1025 if (code == SSA_NAME)
1027 value_range vr;
1028 get_range_query (cfun)->range_of_expr (vr, exp);
1029 if (vr.undefined_p ())
1030 vr.set_varying (TREE_TYPE (exp));
1031 tree vr_min, vr_max;
1032 value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1033 wide_int var_min = wi::to_wide (vr_min);
1034 wide_int var_max = wi::to_wide (vr_max);
1035 wide_int var_nonzero = get_nonzero_bits (exp);
1036 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1037 &var_min, &var_max,
1038 var_nonzero,
1039 TYPE_SIGN (type));
1040 /* This check for VR_VARYING is here because the old code
1041 using get_range_info would return VR_RANGE for the entire
1042 domain, instead of VR_VARYING. The new code normalizes
1043 full-domain ranges to VR_VARYING. */
1044 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1045 *exp_range = value_range (type, var_min, var_max);
1049 if (!tree_is_chrec (exp)
1050 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052 extract_ops_from_tree (exp, &code, &op0, &op1);
1053 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1054 exp_range, cache, limit))
1055 return;
1058 *var = exp;
1059 if (INTEGRAL_TYPE_P (type))
1060 *var = fold_convert (sizetype, *var);
1061 *off = ssize_int (0);
1063 value_range r;
1064 if (exp_range && code != SSA_NAME
1065 && get_range_query (cfun)->range_of_expr (r, exp)
1066 && !r.undefined_p ())
1067 *exp_range = r;
1070 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1071 type as EXP while OFF has type ssizetype. */
1073 void
1074 split_constant_offset (tree exp, tree *var, tree *off)
1076 unsigned limit = param_ssa_name_def_chain_limit;
1077 static hash_map<tree, std::pair<tree, tree> > *cache;
1078 if (!cache)
1079 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1080 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1081 *var = fold_convert (TREE_TYPE (exp), *var);
1082 cache->empty ();
1085 /* Returns the address ADDR of an object in a canonical shape (without nop
1086 casts, and with type of pointer to the object). */
1088 static tree
1089 canonicalize_base_object_address (tree addr)
1091 tree orig = addr;
1093 STRIP_NOPS (addr);
1095 /* The base address may be obtained by casting from integer, in that case
1096 keep the cast. */
1097 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1098 return orig;
1100 if (TREE_CODE (addr) != ADDR_EXPR)
1101 return addr;
1103 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1106 /* Analyze the behavior of memory reference REF within STMT.
1107 There are two modes:
1109 - BB analysis. In this case we simply split the address into base,
1110 init and offset components, without reference to any containing loop.
1111 The resulting base and offset are general expressions and they can
1112 vary arbitrarily from one iteration of the containing loop to the next.
1113 The step is always zero.
1115 - loop analysis. In this case we analyze the reference both wrt LOOP
1116 and on the basis that the reference occurs (is "used") in LOOP;
1117 see the comment above analyze_scalar_evolution_in_loop for more
1118 information about this distinction. The base, init, offset and
1119 step fields are all invariant in LOOP.
1121 Perform BB analysis if LOOP is null, or if LOOP is the function's
1122 dummy outermost loop. In other cases perform loop analysis.
1124 Return true if the analysis succeeded and store the results in DRB if so.
1125 BB analysis can only fail for bitfield or reversed-storage accesses. */
1127 opt_result
1128 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1129 class loop *loop, const gimple *stmt)
1131 poly_int64 pbitsize, pbitpos;
1132 tree base, poffset;
1133 machine_mode pmode;
1134 int punsignedp, preversep, pvolatilep;
1135 affine_iv base_iv, offset_iv;
1136 tree init, dinit, step;
1137 bool in_loop = (loop && loop->num);
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1140 fprintf (dump_file, "analyze_innermost: ");
1142 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1143 &punsignedp, &preversep, &pvolatilep);
1144 gcc_assert (base != NULL_TREE);
1146 poly_int64 pbytepos;
1147 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1148 return opt_result::failure_at (stmt,
1149 "failed: bit offset alignment.\n");
1151 if (preversep)
1152 return opt_result::failure_at (stmt,
1153 "failed: reverse storage order.\n");
1155 /* Calculate the alignment and misalignment for the inner reference. */
1156 unsigned int HOST_WIDE_INT bit_base_misalignment;
1157 unsigned int bit_base_alignment;
1158 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160 /* There are no bitfield references remaining in BASE, so the values
1161 we got back must be whole bytes. */
1162 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1163 && bit_base_misalignment % BITS_PER_UNIT == 0);
1164 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1165 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167 if (TREE_CODE (base) == MEM_REF)
1169 if (!integer_zerop (TREE_OPERAND (base, 1)))
1171 /* Subtract MOFF from the base and add it to POFFSET instead.
1172 Adjust the misalignment to reflect the amount we subtracted. */
1173 poly_offset_int moff = mem_ref_offset (base);
1174 base_misalignment -= moff.force_shwi ();
1175 tree mofft = wide_int_to_tree (sizetype, moff);
1176 if (!poffset)
1177 poffset = mofft;
1178 else
1179 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181 base = TREE_OPERAND (base, 0);
1183 else
1184 base = build_fold_addr_expr (base);
1186 if (in_loop)
1188 if (!simple_iv (loop, loop, base, &base_iv, true))
1189 return opt_result::failure_at
1190 (stmt, "failed: evolution of base is not affine.\n");
1192 else
1194 base_iv.base = base;
1195 base_iv.step = ssize_int (0);
1196 base_iv.no_overflow = true;
1199 if (!poffset)
1201 offset_iv.base = ssize_int (0);
1202 offset_iv.step = ssize_int (0);
1204 else
1206 if (!in_loop)
1208 offset_iv.base = poffset;
1209 offset_iv.step = ssize_int (0);
1211 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1212 return opt_result::failure_at
1213 (stmt, "failed: evolution of offset is not affine.\n");
1216 init = ssize_int (pbytepos);
1218 /* Subtract any constant component from the base and add it to INIT instead.
1219 Adjust the misalignment to reflect the amount we subtracted. */
1220 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1221 init = size_binop (PLUS_EXPR, init, dinit);
1222 base_misalignment -= TREE_INT_CST_LOW (dinit);
1224 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1225 init = size_binop (PLUS_EXPR, init, dinit);
1227 step = size_binop (PLUS_EXPR,
1228 fold_convert (ssizetype, base_iv.step),
1229 fold_convert (ssizetype, offset_iv.step));
1231 base = canonicalize_base_object_address (base_iv.base);
1233 /* See if get_pointer_alignment can guarantee a higher alignment than
1234 the one we calculated above. */
1235 unsigned int HOST_WIDE_INT alt_misalignment;
1236 unsigned int alt_alignment;
1237 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1239 /* As above, these values must be whole bytes. */
1240 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1241 && alt_misalignment % BITS_PER_UNIT == 0);
1242 alt_alignment /= BITS_PER_UNIT;
1243 alt_misalignment /= BITS_PER_UNIT;
1245 if (base_alignment < alt_alignment)
1247 base_alignment = alt_alignment;
1248 base_misalignment = alt_misalignment;
1251 drb->base_address = base;
1252 drb->offset = fold_convert (ssizetype, offset_iv.base);
1253 drb->init = init;
1254 drb->step = step;
1255 if (known_misalignment (base_misalignment, base_alignment,
1256 &drb->base_misalignment))
1257 drb->base_alignment = base_alignment;
1258 else
1260 drb->base_alignment = known_alignment (base_misalignment);
1261 drb->base_misalignment = 0;
1263 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1264 drb->step_alignment = highest_pow2_factor (step);
1266 if (dump_file && (dump_flags & TDF_DETAILS))
1267 fprintf (dump_file, "success.\n");
1269 return opt_result::success ();
1272 /* Return true if OP is a valid component reference for a DR access
1273 function. This accepts a subset of what handled_component_p accepts. */
1275 static bool
1276 access_fn_component_p (tree op)
1278 switch (TREE_CODE (op))
1280 case REALPART_EXPR:
1281 case IMAGPART_EXPR:
1282 case ARRAY_REF:
1283 return true;
1285 case COMPONENT_REF:
1286 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1288 default:
1289 return false;
1293 /* Returns whether BASE can have a access_fn_component_p with BASE
1294 as base. */
1296 static bool
1297 base_supports_access_fn_components_p (tree base)
1299 switch (TREE_CODE (TREE_TYPE (base)))
1301 case COMPLEX_TYPE:
1302 case ARRAY_TYPE:
1303 case RECORD_TYPE:
1304 return true;
1305 default:
1306 return false;
1310 /* Determines the base object and the list of indices of memory reference
1311 DR, analyzed in LOOP and instantiated before NEST. */
1313 static void
1314 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1316 /* If analyzing a basic-block there are no indices to analyze
1317 and thus no access functions. */
1318 if (!nest)
1320 dri->base_object = ref;
1321 dri->access_fns.create (0);
1322 return;
1325 vec<tree> access_fns = vNULL;
1327 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1328 into a two element array with a constant index. The base is
1329 then just the immediate underlying object. */
1330 if (TREE_CODE (ref) == REALPART_EXPR)
1332 ref = TREE_OPERAND (ref, 0);
1333 access_fns.safe_push (integer_zero_node);
1335 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1337 ref = TREE_OPERAND (ref, 0);
1338 access_fns.safe_push (integer_one_node);
1341 /* Analyze access functions of dimensions we know to be independent.
1342 The list of component references handled here should be kept in
1343 sync with access_fn_component_p. */
1344 while (handled_component_p (ref))
1346 if (TREE_CODE (ref) == ARRAY_REF)
1348 tree op = TREE_OPERAND (ref, 1);
1349 tree access_fn = analyze_scalar_evolution (loop, op);
1350 access_fn = instantiate_scev (nest, loop, access_fn);
1351 access_fns.safe_push (access_fn);
1353 else if (TREE_CODE (ref) == COMPONENT_REF
1354 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1356 /* For COMPONENT_REFs of records (but not unions!) use the
1357 FIELD_DECL offset as constant access function so we can
1358 disambiguate a[i].f1 and a[i].f2. */
1359 tree off = component_ref_field_offset (ref);
1360 off = size_binop (PLUS_EXPR,
1361 size_binop (MULT_EXPR,
1362 fold_convert (bitsizetype, off),
1363 bitsize_int (BITS_PER_UNIT)),
1364 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1365 access_fns.safe_push (off);
1367 else
1368 /* If we have an unhandled component we could not translate
1369 to an access function stop analyzing. We have determined
1370 our base object in this case. */
1371 break;
1373 ref = TREE_OPERAND (ref, 0);
1376 /* If the address operand of a MEM_REF base has an evolution in the
1377 analyzed nest, add it as an additional independent access-function. */
1378 if (TREE_CODE (ref) == MEM_REF)
1380 tree op = TREE_OPERAND (ref, 0);
1381 tree access_fn = analyze_scalar_evolution (loop, op);
1382 access_fn = instantiate_scev (nest, loop, access_fn);
1383 STRIP_NOPS (access_fn);
1384 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1386 tree memoff = TREE_OPERAND (ref, 1);
1387 tree base = initial_condition (access_fn);
1388 tree orig_type = TREE_TYPE (base);
1389 STRIP_USELESS_TYPE_CONVERSION (base);
1390 tree off;
1391 split_constant_offset (base, &base, &off);
1392 STRIP_USELESS_TYPE_CONVERSION (base);
1393 /* Fold the MEM_REF offset into the evolutions initial
1394 value to make more bases comparable. */
1395 if (!integer_zerop (memoff))
1397 off = size_binop (PLUS_EXPR, off,
1398 fold_convert (ssizetype, memoff));
1399 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1401 /* Adjust the offset so it is a multiple of the access type
1402 size and thus we separate bases that can possibly be used
1403 to produce partial overlaps (which the access_fn machinery
1404 cannot handle). */
1405 wide_int rem;
1406 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1407 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1408 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1409 rem = wi::mod_trunc
1410 (wi::to_wide (off),
1411 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1412 SIGNED);
1413 else
1414 /* If we can't compute the remainder simply force the initial
1415 condition to zero. */
1416 rem = wi::to_wide (off);
1417 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1418 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1419 /* And finally replace the initial condition. */
1420 access_fn = chrec_replace_initial_condition
1421 (access_fn, fold_convert (orig_type, off));
1422 /* ??? This is still not a suitable base object for
1423 dr_may_alias_p - the base object needs to be an
1424 access that covers the object as whole. With
1425 an evolution in the pointer this cannot be
1426 guaranteed.
1427 As a band-aid, mark the access so we can special-case
1428 it in dr_may_alias_p. */
1429 tree old = ref;
1430 ref = fold_build2_loc (EXPR_LOCATION (ref),
1431 MEM_REF, TREE_TYPE (ref),
1432 base, memoff);
1433 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1434 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1435 dri->unconstrained_base = true;
1436 access_fns.safe_push (access_fn);
1439 else if (DECL_P (ref))
1441 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1442 ref = build2 (MEM_REF, TREE_TYPE (ref),
1443 build_fold_addr_expr (ref),
1444 build_int_cst (reference_alias_ptr_type (ref), 0));
1447 dri->base_object = ref;
1448 dri->access_fns = access_fns;
1451 /* Extracts the alias analysis information from the memory reference DR. */
1453 static void
1454 dr_analyze_alias (struct data_reference *dr)
1456 tree ref = DR_REF (dr);
1457 tree base = get_base_address (ref), addr;
1459 if (INDIRECT_REF_P (base)
1460 || TREE_CODE (base) == MEM_REF)
1462 addr = TREE_OPERAND (base, 0);
1463 if (TREE_CODE (addr) == SSA_NAME)
1464 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1468 /* Frees data reference DR. */
1470 void
1471 free_data_ref (data_reference_p dr)
1473 DR_ACCESS_FNS (dr).release ();
1474 if (dr->alt_indices.base_object)
1475 dr->alt_indices.access_fns.release ();
1476 free (dr);
1479 /* Analyze memory reference MEMREF, which is accessed in STMT.
1480 The reference is a read if IS_READ is true, otherwise it is a write.
1481 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1482 within STMT, i.e. that it might not occur even if STMT is executed
1483 and runs to completion.
1485 Return the data_reference description of MEMREF. NEST is the outermost
1486 loop in which the reference should be instantiated, LOOP is the loop
1487 in which the data reference should be analyzed. */
1489 struct data_reference *
1490 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1491 bool is_read, bool is_conditional_in_stmt)
1493 struct data_reference *dr;
1495 if (dump_file && (dump_flags & TDF_DETAILS))
1497 fprintf (dump_file, "Creating dr for ");
1498 print_generic_expr (dump_file, memref, TDF_SLIM);
1499 fprintf (dump_file, "\n");
1502 dr = XCNEW (struct data_reference);
1503 DR_STMT (dr) = stmt;
1504 DR_REF (dr) = memref;
1505 DR_IS_READ (dr) = is_read;
1506 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1508 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1509 nest != NULL ? loop : NULL, stmt);
1510 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1511 dr_analyze_alias (dr);
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1515 unsigned i;
1516 fprintf (dump_file, "\tbase_address: ");
1517 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1518 fprintf (dump_file, "\n\toffset from base address: ");
1519 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1520 fprintf (dump_file, "\n\tconstant offset from base address: ");
1521 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1522 fprintf (dump_file, "\n\tstep: ");
1523 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1524 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1525 fprintf (dump_file, "\n\tbase misalignment: %d",
1526 DR_BASE_MISALIGNMENT (dr));
1527 fprintf (dump_file, "\n\toffset alignment: %d",
1528 DR_OFFSET_ALIGNMENT (dr));
1529 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1530 fprintf (dump_file, "\n\tbase_object: ");
1531 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1532 fprintf (dump_file, "\n");
1533 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1535 fprintf (dump_file, "\tAccess function %d: ", i);
1536 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1540 return dr;
1543 /* A helper function computes order between two tree expressions T1 and T2.
1544 This is used in comparator functions sorting objects based on the order
1545 of tree expressions. The function returns -1, 0, or 1. */
1548 data_ref_compare_tree (tree t1, tree t2)
1550 int i, cmp;
1551 enum tree_code code;
1552 char tclass;
1554 if (t1 == t2)
1555 return 0;
1556 if (t1 == NULL)
1557 return -1;
1558 if (t2 == NULL)
1559 return 1;
1561 STRIP_USELESS_TYPE_CONVERSION (t1);
1562 STRIP_USELESS_TYPE_CONVERSION (t2);
1563 if (t1 == t2)
1564 return 0;
1566 if (TREE_CODE (t1) != TREE_CODE (t2)
1567 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1568 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1570 code = TREE_CODE (t1);
1571 switch (code)
1573 case INTEGER_CST:
1574 return tree_int_cst_compare (t1, t2);
1576 case STRING_CST:
1577 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1578 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1579 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1580 TREE_STRING_LENGTH (t1));
1582 case SSA_NAME:
1583 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1584 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1585 break;
1587 default:
1588 if (POLY_INT_CST_P (t1))
1589 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1590 wi::to_poly_widest (t2));
1592 tclass = TREE_CODE_CLASS (code);
1594 /* For decls, compare their UIDs. */
1595 if (tclass == tcc_declaration)
1597 if (DECL_UID (t1) != DECL_UID (t2))
1598 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1599 break;
1601 /* For expressions, compare their operands recursively. */
1602 else if (IS_EXPR_CODE_CLASS (tclass))
1604 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1606 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1607 TREE_OPERAND (t2, i));
1608 if (cmp != 0)
1609 return cmp;
1612 else
1613 gcc_unreachable ();
1616 return 0;
1619 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1620 check. */
1622 opt_result
1623 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1625 if (dump_enabled_p ())
1626 dump_printf (MSG_NOTE,
1627 "consider run-time aliasing test between %T and %T\n",
1628 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1630 if (!speed_p)
1631 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632 "runtime alias check not supported when"
1633 " optimizing for size.\n");
1635 /* FORNOW: We don't support versioning with outer-loop in either
1636 vectorization or loop distribution. */
1637 if (loop != NULL && loop->inner != NULL)
1638 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1639 "runtime alias check not supported for"
1640 " outer loop.\n");
1642 return opt_result::success ();
1645 /* Operator == between two dr_with_seg_len objects.
1647 This equality operator is used to make sure two data refs
1648 are the same one so that we will consider to combine the
1649 aliasing checks of those two pairs of data dependent data
1650 refs. */
1652 static bool
1653 operator == (const dr_with_seg_len& d1,
1654 const dr_with_seg_len& d2)
1656 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1657 DR_BASE_ADDRESS (d2.dr), 0)
1658 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1659 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1660 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1661 && known_eq (d1.access_size, d2.access_size)
1662 && d1.align == d2.align);
1665 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1666 so that we can combine aliasing checks in one scan. */
1668 static int
1669 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1671 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1672 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1673 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1674 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1676 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1677 if a and c have the same basic address snd step, and b and d have the same
1678 address and step. Therefore, if any a&c or b&d don't have the same address
1679 and step, we don't care the order of those two pairs after sorting. */
1680 int comp_res;
1682 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1683 DR_BASE_ADDRESS (b1.dr))) != 0)
1684 return comp_res;
1685 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1686 DR_BASE_ADDRESS (b2.dr))) != 0)
1687 return comp_res;
1688 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1689 DR_STEP (b1.dr))) != 0)
1690 return comp_res;
1691 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1692 DR_STEP (b2.dr))) != 0)
1693 return comp_res;
1694 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1695 DR_OFFSET (b1.dr))) != 0)
1696 return comp_res;
1697 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1698 DR_INIT (b1.dr))) != 0)
1699 return comp_res;
1700 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1701 DR_OFFSET (b2.dr))) != 0)
1702 return comp_res;
1703 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1704 DR_INIT (b2.dr))) != 0)
1705 return comp_res;
1707 return 0;
1710 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1712 static void
1713 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1715 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1716 DR_REF (alias_pair->first.dr),
1717 DR_REF (alias_pair->second.dr));
1719 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1720 alias_pair->first.seg_len);
1721 if (!operand_equal_p (alias_pair->first.seg_len,
1722 alias_pair->second.seg_len, 0))
1723 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1725 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1726 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1727 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1729 dump_printf (MSG_NOTE, " vs. ");
1730 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1733 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1734 alias_pair->first.align);
1735 if (alias_pair->first.align != alias_pair->second.align)
1736 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1738 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1739 if (alias_pair->flags & DR_ALIAS_RAW)
1740 dump_printf (MSG_NOTE, " RAW");
1741 if (alias_pair->flags & DR_ALIAS_WAR)
1742 dump_printf (MSG_NOTE, " WAR");
1743 if (alias_pair->flags & DR_ALIAS_WAW)
1744 dump_printf (MSG_NOTE, " WAW");
1745 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1746 dump_printf (MSG_NOTE, " ARBITRARY");
1747 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1748 dump_printf (MSG_NOTE, " SWAPPED");
1749 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1750 dump_printf (MSG_NOTE, " UNSWAPPED");
1751 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1752 dump_printf (MSG_NOTE, " MIXED_STEPS");
1753 if (alias_pair->flags == 0)
1754 dump_printf (MSG_NOTE, " <none>");
1755 dump_printf (MSG_NOTE, "\n");
1758 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1759 FACTOR is number of iterations that each data reference is accessed.
1761 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1762 we create an expression:
1764 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1765 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1767 for aliasing checks. However, in some cases we can decrease the number
1768 of checks by combining two checks into one. For example, suppose we have
1769 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1770 condition is satisfied:
1772 load_ptr_0 < load_ptr_1 &&
1773 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1775 (this condition means, in each iteration of vectorized loop, the accessed
1776 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1777 load_ptr_1.)
1779 we then can use only the following expression to finish the alising checks
1780 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1782 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1783 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1785 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1786 basic address. */
1788 void
1789 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1790 poly_uint64)
1792 if (alias_pairs->is_empty ())
1793 return;
1795 /* Canonicalize each pair so that the base components are ordered wrt
1796 data_ref_compare_tree. This allows the loop below to merge more
1797 cases. */
1798 unsigned int i;
1799 dr_with_seg_len_pair_t *alias_pair;
1800 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1802 data_reference_p dr_a = alias_pair->first.dr;
1803 data_reference_p dr_b = alias_pair->second.dr;
1804 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1805 DR_BASE_ADDRESS (dr_b));
1806 if (comp_res == 0)
1807 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1808 if (comp_res == 0)
1809 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1810 if (comp_res > 0)
1812 std::swap (alias_pair->first, alias_pair->second);
1813 alias_pair->flags |= DR_ALIAS_SWAPPED;
1815 else
1816 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1819 /* Sort the collected data ref pairs so that we can scan them once to
1820 combine all possible aliasing checks. */
1821 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1823 /* Scan the sorted dr pairs and check if we can combine alias checks
1824 of two neighboring dr pairs. */
1825 unsigned int last = 0;
1826 for (i = 1; i < alias_pairs->length (); ++i)
1828 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1829 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1830 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1832 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1833 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1834 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1835 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1837 /* Remove duplicate data ref pairs. */
1838 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1840 if (dump_enabled_p ())
1841 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1842 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1843 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1844 alias_pair1->flags |= alias_pair2->flags;
1845 continue;
1848 /* Assume that we won't be able to merge the pairs, then correct
1849 if we do. */
1850 last += 1;
1851 if (last != i)
1852 (*alias_pairs)[last] = (*alias_pairs)[i];
1854 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1856 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1857 and DR_A1 and DR_A2 are two consecutive memrefs. */
1858 if (*dr_a1 == *dr_a2)
1860 std::swap (dr_a1, dr_b1);
1861 std::swap (dr_a2, dr_b2);
1864 poly_int64 init_a1, init_a2;
1865 /* Only consider cases in which the distance between the initial
1866 DR_A1 and the initial DR_A2 is known at compile time. */
1867 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1868 DR_BASE_ADDRESS (dr_a2->dr), 0)
1869 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1870 DR_OFFSET (dr_a2->dr), 0)
1871 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1872 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1873 continue;
1875 /* Don't combine if we can't tell which one comes first. */
1876 if (!ordered_p (init_a1, init_a2))
1877 continue;
1879 /* Work out what the segment length would be if we did combine
1880 DR_A1 and DR_A2:
1882 - If DR_A1 and DR_A2 have equal lengths, that length is
1883 also the combined length.
1885 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1886 length is the lower bound on those lengths.
1888 - If DR_A1 and DR_A2 both have positive lengths, the combined
1889 length is the upper bound on those lengths.
1891 Other cases are unlikely to give a useful combination.
1893 The lengths both have sizetype, so the sign is taken from
1894 the step instead. */
1895 poly_uint64 new_seg_len = 0;
1896 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1897 dr_a2->seg_len, 0);
1898 if (new_seg_len_p)
1900 poly_uint64 seg_len_a1, seg_len_a2;
1901 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1902 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1903 continue;
1905 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1906 if (TREE_CODE (indicator_a) != INTEGER_CST)
1907 continue;
1909 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1910 if (TREE_CODE (indicator_b) != INTEGER_CST)
1911 continue;
1913 int sign_a = tree_int_cst_sgn (indicator_a);
1914 int sign_b = tree_int_cst_sgn (indicator_b);
1916 if (sign_a <= 0 && sign_b <= 0)
1917 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1918 else if (sign_a >= 0 && sign_b >= 0)
1919 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1920 else
1921 continue;
1923 /* At this point we're committed to merging the refs. */
1925 /* Make sure dr_a1 starts left of dr_a2. */
1926 if (maybe_gt (init_a1, init_a2))
1928 std::swap (*dr_a1, *dr_a2);
1929 std::swap (init_a1, init_a2);
1932 /* The DR_Bs are equal, so only the DR_As can introduce
1933 mixed steps. */
1934 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1935 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1937 if (new_seg_len_p)
1939 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1940 new_seg_len);
1941 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1944 /* This is always positive due to the swap above. */
1945 poly_uint64 diff = init_a2 - init_a1;
1947 /* The new check will start at DR_A1. Make sure that its access
1948 size encompasses the initial DR_A2. */
1949 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1951 dr_a1->access_size = upper_bound (dr_a1->access_size,
1952 diff + dr_a2->access_size);
1953 unsigned int new_align = known_alignment (dr_a1->access_size);
1954 dr_a1->align = MIN (dr_a1->align, new_align);
1956 if (dump_enabled_p ())
1957 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1958 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1959 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1960 alias_pair1->flags |= alias_pair2->flags;
1961 last -= 1;
1964 alias_pairs->truncate (last + 1);
1966 /* Try to restore the original dr_with_seg_len order within each
1967 dr_with_seg_len_pair_t. If we ended up combining swapped and
1968 unswapped pairs into the same check, we have to invalidate any
1969 RAW, WAR and WAW information for it. */
1970 if (dump_enabled_p ())
1971 dump_printf (MSG_NOTE, "merged alias checks:\n");
1972 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1974 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1975 unsigned int swapped = (alias_pair->flags & swap_mask);
1976 if (swapped == DR_ALIAS_SWAPPED)
1977 std::swap (alias_pair->first, alias_pair->second);
1978 else if (swapped != DR_ALIAS_UNSWAPPED)
1979 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1980 alias_pair->flags &= ~swap_mask;
1981 if (dump_enabled_p ())
1982 dump_alias_pair (alias_pair, " ");
1986 /* A subroutine of create_intersect_range_checks, with a subset of the
1987 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1988 to optimize cases in which the references form a simple RAW, WAR or
1989 WAR dependence. */
1991 static bool
1992 create_ifn_alias_checks (tree *cond_expr,
1993 const dr_with_seg_len_pair_t &alias_pair)
1995 const dr_with_seg_len& dr_a = alias_pair.first;
1996 const dr_with_seg_len& dr_b = alias_pair.second;
1998 /* Check for cases in which:
2000 (a) we have a known RAW, WAR or WAR dependence
2001 (b) the accesses are well-ordered in both the original and new code
2002 (see the comment above the DR_ALIAS_* flags for details); and
2003 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2004 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2005 return false;
2007 /* Make sure that both DRs access the same pattern of bytes,
2008 with a constant length and step. */
2009 poly_uint64 seg_len;
2010 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2011 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2012 || maybe_ne (dr_a.access_size, dr_b.access_size)
2013 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2014 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2015 return false;
2017 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2018 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2019 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2021 /* See whether the target suports what we want to do. WAW checks are
2022 equivalent to WAR checks here. */
2023 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2024 ? IFN_CHECK_RAW_PTRS
2025 : IFN_CHECK_WAR_PTRS);
2026 unsigned int align = MIN (dr_a.align, dr_b.align);
2027 poly_uint64 full_length = seg_len + bytes;
2028 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2029 full_length, align))
2031 full_length = seg_len + dr_a.access_size;
2032 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2033 full_length, align))
2034 return false;
2037 /* Commit to using this form of test. */
2038 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2039 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2041 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2042 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2044 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2045 ifn, boolean_type_node,
2046 4, addr_a, addr_b,
2047 size_int (full_length),
2048 size_int (align));
2050 if (dump_enabled_p ())
2052 if (ifn == IFN_CHECK_RAW_PTRS)
2053 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2054 else
2055 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2057 return true;
2060 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2061 free of aliases, using a condition based on index values instead
2062 of a condition based on addresses. Return true on success,
2063 storing the condition in *COND_EXPR.
2065 This can only be done if the two data references in ALIAS_PAIR access
2066 the same array object and the index is the only difference. For example,
2067 if the two data references are DR_A and DR_B:
2069 DR_A DR_B
2070 data-ref arr[i] arr[j]
2071 base_object arr arr
2072 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2074 The addresses and their index are like:
2076 |<- ADDR_A ->| |<- ADDR_B ->|
2077 ------------------------------------------------------->
2078 | | | | | | | | | |
2079 ------------------------------------------------------->
2080 i_0 ... i_0+4 j_0 ... j_0+4
2082 We can create expression based on index rather than address:
2084 (unsigned) (i_0 - j_0 + 3) <= 6
2086 i.e. the indices are less than 4 apart.
2088 Note evolution step of index needs to be considered in comparison. */
2090 static bool
2091 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2092 const dr_with_seg_len_pair_t &alias_pair)
2094 const dr_with_seg_len &dr_a = alias_pair.first;
2095 const dr_with_seg_len &dr_b = alias_pair.second;
2096 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2097 || integer_zerop (DR_STEP (dr_a.dr))
2098 || integer_zerop (DR_STEP (dr_b.dr))
2099 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2100 return false;
2102 poly_uint64 seg_len1, seg_len2;
2103 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2104 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2105 return false;
2107 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2108 return false;
2110 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2111 return false;
2113 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2114 return false;
2116 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2118 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2119 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2120 if (neg_step)
2122 abs_step = -abs_step;
2123 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2124 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2127 /* Infer the number of iterations with which the memory segment is accessed
2128 by DR. In other words, alias is checked if memory segment accessed by
2129 DR_A in some iterations intersect with memory segment accessed by DR_B
2130 in the same amount iterations.
2131 Note segnment length is a linear function of number of iterations with
2132 DR_STEP as the coefficient. */
2133 poly_uint64 niter_len1, niter_len2;
2134 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2135 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2136 return false;
2138 /* Divide each access size by the byte step, rounding up. */
2139 poly_uint64 niter_access1, niter_access2;
2140 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2141 abs_step, &niter_access1)
2142 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2143 abs_step, &niter_access2))
2144 return false;
2146 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2148 int found = -1;
2149 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2151 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2152 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2153 /* Two indices must be the same if they are not scev, or not scev wrto
2154 current loop being vecorized. */
2155 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2156 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2157 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2158 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2160 if (operand_equal_p (access1, access2, 0))
2161 continue;
2163 return false;
2165 if (found >= 0)
2166 return false;
2167 found = i;
2170 /* Ought not to happen in practice, since if all accesses are equal then the
2171 alias should be decidable at compile time. */
2172 if (found < 0)
2173 return false;
2175 /* The two indices must have the same step. */
2176 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2177 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2178 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2179 return false;
2181 tree idx_step = CHREC_RIGHT (access1);
2182 /* Index must have const step, otherwise DR_STEP won't be constant. */
2183 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2184 /* Index must evaluate in the same direction as DR. */
2185 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2187 tree min1 = CHREC_LEFT (access1);
2188 tree min2 = CHREC_LEFT (access2);
2189 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2190 return false;
2192 /* Ideally, alias can be checked against loop's control IV, but we
2193 need to prove linear mapping between control IV and reference
2194 index. Although that should be true, we check against (array)
2195 index of data reference. Like segment length, index length is
2196 linear function of the number of iterations with index_step as
2197 the coefficient, i.e, niter_len * idx_step. */
2198 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2199 SIGNED);
2200 if (neg_step)
2201 abs_idx_step = -abs_idx_step;
2202 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2203 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2204 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2205 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2207 gcc_assert (known_ge (idx_len1, 0)
2208 && known_ge (idx_len2, 0)
2209 && known_ge (idx_access1, 0)
2210 && known_ge (idx_access2, 0));
2212 /* Each access has the following pattern, with lengths measured
2213 in units of INDEX:
2215 <-- idx_len -->
2216 <--- A: -ve step --->
2217 +-----+-------+-----+-------+-----+
2218 | n-1 | ..... | 0 | ..... | n-1 |
2219 +-----+-------+-----+-------+-----+
2220 <--- B: +ve step --->
2221 <-- idx_len -->
2225 where "n" is the number of scalar iterations covered by the segment
2226 and where each access spans idx_access units.
2228 A is the range of bytes accessed when the step is negative,
2229 B is the range when the step is positive.
2231 When checking for general overlap, we need to test whether
2232 the range:
2234 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2236 overlaps:
2238 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2240 where:
2242 low_offsetN = +ve step ? 0 : -idx_lenN;
2243 high_offsetN = +ve step ? idx_lenN : 0;
2245 This is equivalent to testing whether:
2247 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2248 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2250 Converting this into a single test, there is an overlap if:
2252 0 <= min2 - min1 + bias <= limit
2254 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2255 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2256 + (high_offset2 - low_offset2 + idx_access2 - 1)
2257 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2259 Combining the tests requires limit to be computable in an unsigned
2260 form of the index type; if it isn't, we fall back to the usual
2261 pointer-based checks.
2263 We can do better if DR_B is a write and if DR_A and DR_B are
2264 well-ordered in both the original and the new code (see the
2265 comment above the DR_ALIAS_* flags for details). In this case
2266 we know that for each i in [0, n-1], the write performed by
2267 access i of DR_B occurs after access numbers j<=i of DR_A in
2268 both the original and the new code. Any write or anti
2269 dependencies wrt those DR_A accesses are therefore maintained.
2271 We just need to make sure that each individual write in DR_B does not
2272 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2273 after the DR_B access in the original code but happen before it in
2274 the new code.
2276 We know the steps for both accesses are equal, so by induction, we
2277 just need to test whether the first write of DR_B overlaps a later
2278 access of DR_A. In other words, we need to move min1 along by
2279 one iteration:
2281 min1' = min1 + idx_step
2283 and use the ranges:
2285 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2287 and:
2289 [min2, min2 + idx_access2 - 1]
2291 where:
2293 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2294 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2295 if (waw_or_war_p)
2296 idx_len1 -= abs_idx_step;
2298 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2299 if (!waw_or_war_p)
2300 limit += idx_len2;
2302 tree utype = unsigned_type_for (TREE_TYPE (min1));
2303 if (!wi::fits_to_tree_p (limit, utype))
2304 return false;
2306 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2307 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2308 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2309 /* Equivalent to adding IDX_STEP to MIN1. */
2310 if (waw_or_war_p)
2311 bias -= wi::to_offset (idx_step);
2313 tree subject = fold_build2 (MINUS_EXPR, utype,
2314 fold_convert (utype, min2),
2315 fold_convert (utype, min1));
2316 subject = fold_build2 (PLUS_EXPR, utype, subject,
2317 wide_int_to_tree (utype, bias));
2318 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2319 wide_int_to_tree (utype, limit));
2320 if (*cond_expr)
2321 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2322 *cond_expr, part_cond_expr);
2323 else
2324 *cond_expr = part_cond_expr;
2325 if (dump_enabled_p ())
2327 if (waw_or_war_p)
2328 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2329 else
2330 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2332 return true;
2335 /* A subroutine of create_intersect_range_checks, with a subset of the
2336 same arguments. Try to optimize cases in which the second access
2337 is a write and in which some overlap is valid. */
2339 static bool
2340 create_waw_or_war_checks (tree *cond_expr,
2341 const dr_with_seg_len_pair_t &alias_pair)
2343 const dr_with_seg_len& dr_a = alias_pair.first;
2344 const dr_with_seg_len& dr_b = alias_pair.second;
2346 /* Check for cases in which:
2348 (a) DR_B is always a write;
2349 (b) the accesses are well-ordered in both the original and new code
2350 (see the comment above the DR_ALIAS_* flags for details); and
2351 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2352 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2353 return false;
2355 /* Check for equal (but possibly variable) steps. */
2356 tree step = DR_STEP (dr_a.dr);
2357 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2358 return false;
2360 /* Make sure that we can operate on sizetype without loss of precision. */
2361 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2362 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2363 return false;
2365 /* All addresses involved are known to have a common alignment ALIGN.
2366 We can therefore subtract ALIGN from an exclusive endpoint to get
2367 an inclusive endpoint. In the best (and common) case, ALIGN is the
2368 same as the access sizes of both DRs, and so subtracting ALIGN
2369 cancels out the addition of an access size. */
2370 unsigned int align = MIN (dr_a.align, dr_b.align);
2371 poly_uint64 last_chunk_a = dr_a.access_size - align;
2372 poly_uint64 last_chunk_b = dr_b.access_size - align;
2374 /* Get a boolean expression that is true when the step is negative. */
2375 tree indicator = dr_direction_indicator (dr_a.dr);
2376 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2377 fold_convert (ssizetype, indicator),
2378 ssize_int (0));
2380 /* Get lengths in sizetype. */
2381 tree seg_len_a
2382 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2383 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2385 /* Each access has the following pattern:
2387 <- |seg_len| ->
2388 <--- A: -ve step --->
2389 +-----+-------+-----+-------+-----+
2390 | n-1 | ..... | 0 | ..... | n-1 |
2391 +-----+-------+-----+-------+-----+
2392 <--- B: +ve step --->
2393 <- |seg_len| ->
2395 base address
2397 where "n" is the number of scalar iterations covered by the segment.
2399 A is the range of bytes accessed when the step is negative,
2400 B is the range when the step is positive.
2402 We know that DR_B is a write. We also know (from checking that
2403 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2404 the write performed by access i of DR_B occurs after access numbers
2405 j<=i of DR_A in both the original and the new code. Any write or
2406 anti dependencies wrt those DR_A accesses are therefore maintained.
2408 We just need to make sure that each individual write in DR_B does not
2409 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2410 after the DR_B access in the original code but happen before it in
2411 the new code.
2413 We know the steps for both accesses are equal, so by induction, we
2414 just need to test whether the first write of DR_B overlaps a later
2415 access of DR_A. In other words, we need to move addr_a along by
2416 one iteration:
2418 addr_a' = addr_a + step
2420 and check whether:
2422 [addr_b, addr_b + last_chunk_b]
2424 overlaps:
2426 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2428 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2430 low_offset_a = +ve step ? 0 : seg_len_a - step
2431 high_offset_a = +ve step ? seg_len_a - step : 0
2433 This is equivalent to testing whether:
2435 addr_a' + low_offset_a <= addr_b + last_chunk_b
2436 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2438 Converting this into a single test, there is an overlap if:
2440 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2442 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2444 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2445 less than the size of the object underlying DR_A. We also know
2446 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2447 guaranteed at compile time. There can therefore be no overflow if
2448 "limit" is calculated in an unsigned type with pointer precision. */
2449 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2450 DR_OFFSET (dr_a.dr));
2451 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2453 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2454 DR_OFFSET (dr_b.dr));
2455 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2457 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2458 addr_a = fold_build_pointer_plus (addr_a, step);
2459 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2460 seg_len_a, step);
2461 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2462 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2464 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2465 seg_len_a_minus_step, size_zero_node);
2466 if (!CONSTANT_CLASS_P (low_offset_a))
2467 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2469 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2470 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2471 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2472 low_offset_a);
2474 /* The amount added to addr_b - addr_a'. */
2475 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2476 size_int (last_chunk_b), low_offset_a);
2478 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2479 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2480 size_int (last_chunk_a + last_chunk_b));
2482 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2483 subject = fold_build2 (PLUS_EXPR, sizetype,
2484 fold_convert (sizetype, subject), bias);
2486 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2487 if (dump_enabled_p ())
2488 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2489 return true;
2492 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2493 every address ADDR accessed by D:
2495 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2497 In this case, every element accessed by D is aligned to at least
2498 ALIGN bytes.
2500 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2502 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2504 static void
2505 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2506 tree *seg_max_out, HOST_WIDE_INT align)
2508 /* Each access has the following pattern:
2510 <- |seg_len| ->
2511 <--- A: -ve step --->
2512 +-----+-------+-----+-------+-----+
2513 | n-1 | ,.... | 0 | ..... | n-1 |
2514 +-----+-------+-----+-------+-----+
2515 <--- B: +ve step --->
2516 <- |seg_len| ->
2518 base address
2520 where "n" is the number of scalar iterations covered by the segment.
2521 (This should be VF for a particular pair if we know that both steps
2522 are the same, otherwise it will be the full number of scalar loop
2523 iterations.)
2525 A is the range of bytes accessed when the step is negative,
2526 B is the range when the step is positive.
2528 If the access size is "access_size" bytes, the lowest addressed byte is:
2530 base + (step < 0 ? seg_len : 0) [LB]
2532 and the highest addressed byte is always below:
2534 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2536 Thus:
2538 LB <= ADDR < UB
2540 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2541 bytes, so:
2543 LB <= ADDR <= UB - ALIGN
2545 where "- ALIGN" folds naturally with the "+ access_size" and often
2546 cancels it out.
2548 We don't try to simplify LB and UB beyond this (e.g. by using
2549 MIN and MAX based on whether seg_len rather than the stride is
2550 negative) because it is possible for the absolute size of the
2551 segment to overflow the range of a ssize_t.
2553 Keeping the pointer_plus outside of the cond_expr should allow
2554 the cond_exprs to be shared with other alias checks. */
2555 tree indicator = dr_direction_indicator (d.dr);
2556 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2557 fold_convert (ssizetype, indicator),
2558 ssize_int (0));
2559 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2560 DR_OFFSET (d.dr));
2561 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2562 tree seg_len
2563 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2565 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2566 seg_len, size_zero_node);
2567 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2568 size_zero_node, seg_len);
2569 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2570 size_int (d.access_size - align));
2572 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2573 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2576 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2577 storing the condition in *COND_EXPR. The fallback is to generate a
2578 a test that the two accesses do not overlap:
2580 end_a <= start_b || end_b <= start_a. */
2582 static void
2583 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2584 const dr_with_seg_len_pair_t &alias_pair)
2586 const dr_with_seg_len& dr_a = alias_pair.first;
2587 const dr_with_seg_len& dr_b = alias_pair.second;
2588 *cond_expr = NULL_TREE;
2589 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2590 return;
2592 if (create_ifn_alias_checks (cond_expr, alias_pair))
2593 return;
2595 if (create_waw_or_war_checks (cond_expr, alias_pair))
2596 return;
2598 unsigned HOST_WIDE_INT min_align;
2599 tree_code cmp_code;
2600 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2601 are equivalent. This is just an optimization heuristic. */
2602 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2603 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2605 /* In this case adding access_size to seg_len is likely to give
2606 a simple X * step, where X is either the number of scalar
2607 iterations or the vectorization factor. We're better off
2608 keeping that, rather than subtracting an alignment from it.
2610 In this case the maximum values are exclusive and so there is
2611 no alias if the maximum of one segment equals the minimum
2612 of another. */
2613 min_align = 0;
2614 cmp_code = LE_EXPR;
2616 else
2618 /* Calculate the minimum alignment shared by all four pointers,
2619 then arrange for this alignment to be subtracted from the
2620 exclusive maximum values to get inclusive maximum values.
2621 This "- min_align" is cumulative with a "+ access_size"
2622 in the calculation of the maximum values. In the best
2623 (and common) case, the two cancel each other out, leaving
2624 us with an inclusive bound based only on seg_len. In the
2625 worst case we're simply adding a smaller number than before.
2627 Because the maximum values are inclusive, there is an alias
2628 if the maximum value of one segment is equal to the minimum
2629 value of the other. */
2630 min_align = MIN (dr_a.align, dr_b.align);
2631 cmp_code = LT_EXPR;
2634 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2635 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2636 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2638 *cond_expr
2639 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2640 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2641 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2642 if (dump_enabled_p ())
2643 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2646 /* Create a conditional expression that represents the run-time checks for
2647 overlapping of address ranges represented by a list of data references
2648 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2649 COND_EXPR is the conditional expression to be used in the if statement
2650 that controls which version of the loop gets executed at runtime. */
2652 void
2653 create_runtime_alias_checks (class loop *loop,
2654 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2655 tree * cond_expr)
2657 tree part_cond_expr;
2659 fold_defer_overflow_warnings ();
2660 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2662 gcc_assert (alias_pair.flags);
2663 if (dump_enabled_p ())
2664 dump_printf (MSG_NOTE,
2665 "create runtime check for data references %T and %T\n",
2666 DR_REF (alias_pair.first.dr),
2667 DR_REF (alias_pair.second.dr));
2669 /* Create condition expression for each pair data references. */
2670 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2671 if (*cond_expr)
2672 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2673 *cond_expr, part_cond_expr);
2674 else
2675 *cond_expr = part_cond_expr;
2677 fold_undefer_and_ignore_overflow_warnings ();
2680 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2681 expressions. */
2682 static bool
2683 dr_equal_offsets_p1 (tree offset1, tree offset2)
2685 bool res;
2687 STRIP_NOPS (offset1);
2688 STRIP_NOPS (offset2);
2690 if (offset1 == offset2)
2691 return true;
2693 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2694 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2695 return false;
2697 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2698 TREE_OPERAND (offset2, 0));
2700 if (!res || !BINARY_CLASS_P (offset1))
2701 return res;
2703 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2704 TREE_OPERAND (offset2, 1));
2706 return res;
2709 /* Check if DRA and DRB have equal offsets. */
2710 bool
2711 dr_equal_offsets_p (struct data_reference *dra,
2712 struct data_reference *drb)
2714 tree offset1, offset2;
2716 offset1 = DR_OFFSET (dra);
2717 offset2 = DR_OFFSET (drb);
2719 return dr_equal_offsets_p1 (offset1, offset2);
2722 /* Returns true if FNA == FNB. */
2724 static bool
2725 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2727 unsigned i, n = fna.length ();
2729 if (n != fnb.length ())
2730 return false;
2732 for (i = 0; i < n; i++)
2733 if (!operand_equal_p (fna[i], fnb[i], 0))
2734 return false;
2736 return true;
2739 /* If all the functions in CF are the same, returns one of them,
2740 otherwise returns NULL. */
2742 static affine_fn
2743 common_affine_function (conflict_function *cf)
2745 unsigned i;
2746 affine_fn comm;
2748 if (!CF_NONTRIVIAL_P (cf))
2749 return affine_fn ();
2751 comm = cf->fns[0];
2753 for (i = 1; i < cf->n; i++)
2754 if (!affine_function_equal_p (comm, cf->fns[i]))
2755 return affine_fn ();
2757 return comm;
2760 /* Returns the base of the affine function FN. */
2762 static tree
2763 affine_function_base (affine_fn fn)
2765 return fn[0];
2768 /* Returns true if FN is a constant. */
2770 static bool
2771 affine_function_constant_p (affine_fn fn)
2773 unsigned i;
2774 tree coef;
2776 for (i = 1; fn.iterate (i, &coef); i++)
2777 if (!integer_zerop (coef))
2778 return false;
2780 return true;
2783 /* Returns true if FN is the zero constant function. */
2785 static bool
2786 affine_function_zero_p (affine_fn fn)
2788 return (integer_zerop (affine_function_base (fn))
2789 && affine_function_constant_p (fn));
2792 /* Returns a signed integer type with the largest precision from TA
2793 and TB. */
2795 static tree
2796 signed_type_for_types (tree ta, tree tb)
2798 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2799 return signed_type_for (ta);
2800 else
2801 return signed_type_for (tb);
2804 /* Applies operation OP on affine functions FNA and FNB, and returns the
2805 result. */
2807 static affine_fn
2808 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2810 unsigned i, n, m;
2811 affine_fn ret;
2812 tree coef;
2814 if (fnb.length () > fna.length ())
2816 n = fna.length ();
2817 m = fnb.length ();
2819 else
2821 n = fnb.length ();
2822 m = fna.length ();
2825 ret.create (m);
2826 for (i = 0; i < n; i++)
2828 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2829 TREE_TYPE (fnb[i]));
2830 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2833 for (; fna.iterate (i, &coef); i++)
2834 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2835 coef, integer_zero_node));
2836 for (; fnb.iterate (i, &coef); i++)
2837 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2838 integer_zero_node, coef));
2840 return ret;
2843 /* Returns the sum of affine functions FNA and FNB. */
2845 static affine_fn
2846 affine_fn_plus (affine_fn fna, affine_fn fnb)
2848 return affine_fn_op (PLUS_EXPR, fna, fnb);
2851 /* Returns the difference of affine functions FNA and FNB. */
2853 static affine_fn
2854 affine_fn_minus (affine_fn fna, affine_fn fnb)
2856 return affine_fn_op (MINUS_EXPR, fna, fnb);
2859 /* Frees affine function FN. */
2861 static void
2862 affine_fn_free (affine_fn fn)
2864 fn.release ();
2867 /* Determine for each subscript in the data dependence relation DDR
2868 the distance. */
2870 static void
2871 compute_subscript_distance (struct data_dependence_relation *ddr)
2873 conflict_function *cf_a, *cf_b;
2874 affine_fn fn_a, fn_b, diff;
2876 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2878 unsigned int i;
2880 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2882 struct subscript *subscript;
2884 subscript = DDR_SUBSCRIPT (ddr, i);
2885 cf_a = SUB_CONFLICTS_IN_A (subscript);
2886 cf_b = SUB_CONFLICTS_IN_B (subscript);
2888 fn_a = common_affine_function (cf_a);
2889 fn_b = common_affine_function (cf_b);
2890 if (!fn_a.exists () || !fn_b.exists ())
2892 SUB_DISTANCE (subscript) = chrec_dont_know;
2893 return;
2895 diff = affine_fn_minus (fn_a, fn_b);
2897 if (affine_function_constant_p (diff))
2898 SUB_DISTANCE (subscript) = affine_function_base (diff);
2899 else
2900 SUB_DISTANCE (subscript) = chrec_dont_know;
2902 affine_fn_free (diff);
2907 /* Returns the conflict function for "unknown". */
2909 static conflict_function *
2910 conflict_fn_not_known (void)
2912 conflict_function *fn = XCNEW (conflict_function);
2913 fn->n = NOT_KNOWN;
2915 return fn;
2918 /* Returns the conflict function for "independent". */
2920 static conflict_function *
2921 conflict_fn_no_dependence (void)
2923 conflict_function *fn = XCNEW (conflict_function);
2924 fn->n = NO_DEPENDENCE;
2926 return fn;
2929 /* Returns true if the address of OBJ is invariant in LOOP. */
2931 static bool
2932 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2934 while (handled_component_p (obj))
2936 if (TREE_CODE (obj) == ARRAY_REF)
2938 for (int i = 1; i < 4; ++i)
2939 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2940 loop->num))
2941 return false;
2943 else if (TREE_CODE (obj) == COMPONENT_REF)
2945 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2946 loop->num))
2947 return false;
2949 obj = TREE_OPERAND (obj, 0);
2952 if (!INDIRECT_REF_P (obj)
2953 && TREE_CODE (obj) != MEM_REF)
2954 return true;
2956 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2957 loop->num);
2960 /* Returns false if we can prove that data references A and B do not alias,
2961 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2962 considered. */
2964 bool
2965 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2966 class loop *loop_nest)
2968 tree addr_a = DR_BASE_OBJECT (a);
2969 tree addr_b = DR_BASE_OBJECT (b);
2971 /* If we are not processing a loop nest but scalar code we
2972 do not need to care about possible cross-iteration dependences
2973 and thus can process the full original reference. Do so,
2974 similar to how loop invariant motion applies extra offset-based
2975 disambiguation. */
2976 if (!loop_nest)
2978 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2979 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2981 if (DR_BASE_ADDRESS (a)
2982 && DR_BASE_ADDRESS (b)
2983 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2984 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2985 && poly_int_tree_p (tree_size_a)
2986 && poly_int_tree_p (tree_size_b)
2987 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2988 wi::to_poly_widest (tree_size_a),
2989 wi::to_poly_widest (DR_INIT (b)),
2990 wi::to_poly_widest (tree_size_b)))
2992 gcc_assert (integer_zerop (DR_STEP (a))
2993 && integer_zerop (DR_STEP (b)));
2994 return false;
2997 aff_tree off1, off2;
2998 poly_widest_int size1, size2;
2999 get_inner_reference_aff (DR_REF (a), &off1, &size1);
3000 get_inner_reference_aff (DR_REF (b), &off2, &size2);
3001 aff_combination_scale (&off1, -1);
3002 aff_combination_add (&off2, &off1);
3003 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3004 return false;
3007 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3008 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3009 /* For cross-iteration dependences the cliques must be valid for the
3010 whole loop, not just individual iterations. */
3011 && (!loop_nest
3012 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3013 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3014 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3015 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3016 return false;
3018 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3019 do not know the size of the base-object. So we cannot do any
3020 offset/overlap based analysis but have to rely on points-to
3021 information only. */
3022 if (TREE_CODE (addr_a) == MEM_REF
3023 && (DR_UNCONSTRAINED_BASE (a)
3024 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3026 /* For true dependences we can apply TBAA. */
3027 if (flag_strict_aliasing
3028 && DR_IS_WRITE (a) && DR_IS_READ (b)
3029 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3030 get_alias_set (DR_REF (b))))
3031 return false;
3032 if (TREE_CODE (addr_b) == MEM_REF)
3033 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3034 TREE_OPERAND (addr_b, 0));
3035 else
3036 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3037 build_fold_addr_expr (addr_b));
3039 else if (TREE_CODE (addr_b) == MEM_REF
3040 && (DR_UNCONSTRAINED_BASE (b)
3041 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3043 /* For true dependences we can apply TBAA. */
3044 if (flag_strict_aliasing
3045 && DR_IS_WRITE (a) && DR_IS_READ (b)
3046 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3047 get_alias_set (DR_REF (b))))
3048 return false;
3049 if (TREE_CODE (addr_a) == MEM_REF)
3050 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3051 TREE_OPERAND (addr_b, 0));
3052 else
3053 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3054 TREE_OPERAND (addr_b, 0));
3057 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3058 that is being subsetted in the loop nest. */
3059 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3060 return refs_output_dependent_p (addr_a, addr_b);
3061 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3062 return refs_anti_dependent_p (addr_a, addr_b);
3063 return refs_may_alias_p (addr_a, addr_b);
3066 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3067 if it is meaningful to compare their associated access functions
3068 when checking for dependencies. */
3070 static bool
3071 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3073 /* Allow pairs of component refs from the following sets:
3075 { REALPART_EXPR, IMAGPART_EXPR }
3076 { COMPONENT_REF }
3077 { ARRAY_REF }. */
3078 tree_code code_a = TREE_CODE (ref_a);
3079 tree_code code_b = TREE_CODE (ref_b);
3080 if (code_a == IMAGPART_EXPR)
3081 code_a = REALPART_EXPR;
3082 if (code_b == IMAGPART_EXPR)
3083 code_b = REALPART_EXPR;
3084 if (code_a != code_b)
3085 return false;
3087 if (TREE_CODE (ref_a) == COMPONENT_REF)
3088 /* ??? We cannot simply use the type of operand #0 of the refs here as
3089 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3090 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3091 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3092 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3094 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3095 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3098 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3099 is true when the main indices of A and B were not comparable so we try again
3100 with alternate indices computed on an indirect reference. */
3102 struct data_dependence_relation *
3103 initialize_data_dependence_relation (struct data_dependence_relation *res,
3104 vec<loop_p> loop_nest,
3105 bool use_alt_indices)
3107 struct data_reference *a = DDR_A (res);
3108 struct data_reference *b = DDR_B (res);
3109 unsigned int i;
3111 struct indices *indices_a = &a->indices;
3112 struct indices *indices_b = &b->indices;
3113 if (use_alt_indices)
3115 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3116 indices_a = &a->alt_indices;
3117 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3118 indices_b = &b->alt_indices;
3120 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3121 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3122 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3124 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3125 return res;
3128 /* For unconstrained bases, the root (highest-indexed) subscript
3129 describes a variation in the base of the original DR_REF rather
3130 than a component access. We have no type that accurately describes
3131 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3132 applying this subscript) so limit the search to the last real
3133 component access.
3135 E.g. for:
3137 void
3138 f (int a[][8], int b[][8])
3140 for (int i = 0; i < 8; ++i)
3141 a[i * 2][0] = b[i][0];
3144 the a and b accesses have a single ARRAY_REF component reference [0]
3145 but have two subscripts. */
3146 if (indices_a->unconstrained_base)
3147 num_dimensions_a -= 1;
3148 if (indices_b->unconstrained_base)
3149 num_dimensions_b -= 1;
3151 /* These structures describe sequences of component references in
3152 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3153 specific access function. */
3154 struct {
3155 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3156 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3157 indices. In C notation, these are the indices of the rightmost
3158 component references; e.g. for a sequence .b.c.d, the start
3159 index is for .d. */
3160 unsigned int start_a;
3161 unsigned int start_b;
3163 /* The sequence contains LENGTH consecutive access functions from
3164 each DR. */
3165 unsigned int length;
3167 /* The enclosing objects for the A and B sequences respectively,
3168 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3169 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3170 tree object_a;
3171 tree object_b;
3172 } full_seq = {}, struct_seq = {};
3174 /* Before each iteration of the loop:
3176 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3177 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3178 unsigned int index_a = 0;
3179 unsigned int index_b = 0;
3180 tree ref_a = DR_REF (a);
3181 tree ref_b = DR_REF (b);
3183 /* Now walk the component references from the final DR_REFs back up to
3184 the enclosing base objects. Each component reference corresponds
3185 to one access function in the DR, with access function 0 being for
3186 the final DR_REF and the highest-indexed access function being the
3187 one that is applied to the base of the DR.
3189 Look for a sequence of component references whose access functions
3190 are comparable (see access_fn_components_comparable_p). If more
3191 than one such sequence exists, pick the one nearest the base
3192 (which is the leftmost sequence in C notation). Store this sequence
3193 in FULL_SEQ.
3195 For example, if we have:
3197 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3199 A: a[0][i].s.c.d
3200 B: __real b[0][i].s.e[i].f
3202 (where d is the same type as the real component of f) then the access
3203 functions would be:
3205 0 1 2 3
3206 A: .d .c .s [i]
3208 0 1 2 3 4 5
3209 B: __real .f [i] .e .s [i]
3211 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3212 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3213 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3214 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3215 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3216 index foo[10] arrays, so is again comparable. The sequence is
3217 therefore:
3219 A: [1, 3] (i.e. [i].s.c)
3220 B: [3, 5] (i.e. [i].s.e)
3222 Also look for sequences of component references whose access
3223 functions are comparable and whose enclosing objects have the same
3224 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3225 example, STRUCT_SEQ would be:
3227 A: [1, 2] (i.e. s.c)
3228 B: [3, 4] (i.e. s.e) */
3229 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3231 /* The alternate indices form always has a single dimension
3232 with unconstrained base. */
3233 gcc_assert (!use_alt_indices);
3235 /* REF_A and REF_B must be one of the component access types
3236 allowed by dr_analyze_indices. */
3237 gcc_checking_assert (access_fn_component_p (ref_a));
3238 gcc_checking_assert (access_fn_component_p (ref_b));
3240 /* Get the immediately-enclosing objects for REF_A and REF_B,
3241 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3242 and DR_ACCESS_FN (B, INDEX_B). */
3243 tree object_a = TREE_OPERAND (ref_a, 0);
3244 tree object_b = TREE_OPERAND (ref_b, 0);
3246 tree type_a = TREE_TYPE (object_a);
3247 tree type_b = TREE_TYPE (object_b);
3248 if (access_fn_components_comparable_p (ref_a, ref_b))
3250 /* This pair of component accesses is comparable for dependence
3251 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3252 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3253 if (full_seq.start_a + full_seq.length != index_a
3254 || full_seq.start_b + full_seq.length != index_b)
3256 /* The accesses don't extend the current sequence,
3257 so start a new one here. */
3258 full_seq.start_a = index_a;
3259 full_seq.start_b = index_b;
3260 full_seq.length = 0;
3263 /* Add this pair of references to the sequence. */
3264 full_seq.length += 1;
3265 full_seq.object_a = object_a;
3266 full_seq.object_b = object_b;
3268 /* If the enclosing objects are structures (and thus have the
3269 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3270 if (TREE_CODE (type_a) == RECORD_TYPE)
3271 struct_seq = full_seq;
3273 /* Move to the next containing reference for both A and B. */
3274 ref_a = object_a;
3275 ref_b = object_b;
3276 index_a += 1;
3277 index_b += 1;
3278 continue;
3281 /* Try to approach equal type sizes. */
3282 if (!COMPLETE_TYPE_P (type_a)
3283 || !COMPLETE_TYPE_P (type_b)
3284 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3285 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3286 break;
3288 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3289 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3290 if (size_a <= size_b)
3292 index_a += 1;
3293 ref_a = object_a;
3295 if (size_b <= size_a)
3297 index_b += 1;
3298 ref_b = object_b;
3302 /* See whether FULL_SEQ ends at the base and whether the two bases
3303 are equal. We do not care about TBAA or alignment info so we can
3304 use OEP_ADDRESS_OF to avoid false negatives. */
3305 tree base_a = indices_a->base_object;
3306 tree base_b = indices_b->base_object;
3307 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3308 && full_seq.start_b + full_seq.length == num_dimensions_b
3309 && (indices_a->unconstrained_base
3310 == indices_b->unconstrained_base)
3311 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3312 && (types_compatible_p (TREE_TYPE (base_a),
3313 TREE_TYPE (base_b))
3314 || (!base_supports_access_fn_components_p (base_a)
3315 && !base_supports_access_fn_components_p (base_b)
3316 && operand_equal_p
3317 (TYPE_SIZE (TREE_TYPE (base_a)),
3318 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3319 && (!loop_nest.exists ()
3320 || (object_address_invariant_in_loop_p
3321 (loop_nest[0], base_a))));
3323 /* If the bases are the same, we can include the base variation too.
3324 E.g. the b accesses in:
3326 for (int i = 0; i < n; ++i)
3327 b[i + 4][0] = b[i][0];
3329 have a definite dependence distance of 4, while for:
3331 for (int i = 0; i < n; ++i)
3332 a[i + 4][0] = b[i][0];
3334 the dependence distance depends on the gap between a and b.
3336 If the bases are different then we can only rely on the sequence
3337 rooted at a structure access, since arrays are allowed to overlap
3338 arbitrarily and change shape arbitrarily. E.g. we treat this as
3339 valid code:
3341 int a[256];
3343 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3345 where two lvalues with the same int[4][3] type overlap, and where
3346 both lvalues are distinct from the object's declared type. */
3347 if (same_base_p)
3349 if (indices_a->unconstrained_base)
3350 full_seq.length += 1;
3352 else
3353 full_seq = struct_seq;
3355 /* Punt if we didn't find a suitable sequence. */
3356 if (full_seq.length == 0)
3358 if (use_alt_indices
3359 || (TREE_CODE (DR_REF (a)) == MEM_REF
3360 && TREE_CODE (DR_REF (b)) == MEM_REF)
3361 || may_be_nonaddressable_p (DR_REF (a))
3362 || may_be_nonaddressable_p (DR_REF (b)))
3364 /* Fully exhausted possibilities. */
3365 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3366 return res;
3369 /* Try evaluating both DRs as dereferences of pointers. */
3370 if (!a->alt_indices.base_object
3371 && TREE_CODE (DR_REF (a)) != MEM_REF)
3373 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3374 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3375 build_int_cst
3376 (reference_alias_ptr_type (DR_REF (a)), 0));
3377 dr_analyze_indices (&a->alt_indices, alt_ref,
3378 loop_preheader_edge (loop_nest[0]),
3379 loop_containing_stmt (DR_STMT (a)));
3381 if (!b->alt_indices.base_object
3382 && TREE_CODE (DR_REF (b)) != MEM_REF)
3384 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3385 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3386 build_int_cst
3387 (reference_alias_ptr_type (DR_REF (b)), 0));
3388 dr_analyze_indices (&b->alt_indices, alt_ref,
3389 loop_preheader_edge (loop_nest[0]),
3390 loop_containing_stmt (DR_STMT (b)));
3392 return initialize_data_dependence_relation (res, loop_nest, true);
3395 if (!same_base_p)
3397 /* Partial overlap is possible for different bases when strict aliasing
3398 is not in effect. It's also possible if either base involves a union
3399 access; e.g. for:
3401 struct s1 { int a[2]; };
3402 struct s2 { struct s1 b; int c; };
3403 struct s3 { int d; struct s1 e; };
3404 union u { struct s2 f; struct s3 g; } *p, *q;
3406 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3407 "p->g.e" (base "p->g") and might partially overlap the s1 at
3408 "q->g.e" (base "q->g"). */
3409 if (!flag_strict_aliasing
3410 || ref_contains_union_access_p (full_seq.object_a)
3411 || ref_contains_union_access_p (full_seq.object_b))
3413 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3414 return res;
3417 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3418 if (!loop_nest.exists ()
3419 || (object_address_invariant_in_loop_p (loop_nest[0],
3420 full_seq.object_a)
3421 && object_address_invariant_in_loop_p (loop_nest[0],
3422 full_seq.object_b)))
3424 DDR_OBJECT_A (res) = full_seq.object_a;
3425 DDR_OBJECT_B (res) = full_seq.object_b;
3429 DDR_AFFINE_P (res) = true;
3430 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3431 DDR_SUBSCRIPTS (res).create (full_seq.length);
3432 DDR_LOOP_NEST (res) = loop_nest;
3433 DDR_SELF_REFERENCE (res) = false;
3435 for (i = 0; i < full_seq.length; ++i)
3437 struct subscript *subscript;
3439 subscript = XNEW (struct subscript);
3440 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3441 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3442 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3443 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3444 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3445 SUB_DISTANCE (subscript) = chrec_dont_know;
3446 DDR_SUBSCRIPTS (res).safe_push (subscript);
3449 return res;
3452 /* Initialize a data dependence relation between data accesses A and
3453 B. NB_LOOPS is the number of loops surrounding the references: the
3454 size of the classic distance/direction vectors. */
3456 struct data_dependence_relation *
3457 initialize_data_dependence_relation (struct data_reference *a,
3458 struct data_reference *b,
3459 vec<loop_p> loop_nest)
3461 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3462 DDR_A (res) = a;
3463 DDR_B (res) = b;
3464 DDR_LOOP_NEST (res).create (0);
3465 DDR_SUBSCRIPTS (res).create (0);
3466 DDR_DIR_VECTS (res).create (0);
3467 DDR_DIST_VECTS (res).create (0);
3469 if (a == NULL || b == NULL)
3471 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3472 return res;
3475 /* If the data references do not alias, then they are independent. */
3476 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3478 DDR_ARE_DEPENDENT (res) = chrec_known;
3479 return res;
3482 return initialize_data_dependence_relation (res, loop_nest, false);
3486 /* Frees memory used by the conflict function F. */
3488 static void
3489 free_conflict_function (conflict_function *f)
3491 unsigned i;
3493 if (CF_NONTRIVIAL_P (f))
3495 for (i = 0; i < f->n; i++)
3496 affine_fn_free (f->fns[i]);
3498 free (f);
3501 /* Frees memory used by SUBSCRIPTS. */
3503 static void
3504 free_subscripts (vec<subscript_p> subscripts)
3506 for (subscript_p s : subscripts)
3508 free_conflict_function (s->conflicting_iterations_in_a);
3509 free_conflict_function (s->conflicting_iterations_in_b);
3510 free (s);
3512 subscripts.release ();
3515 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3516 description. */
3518 static inline void
3519 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3520 tree chrec)
3522 DDR_ARE_DEPENDENT (ddr) = chrec;
3523 free_subscripts (DDR_SUBSCRIPTS (ddr));
3524 DDR_SUBSCRIPTS (ddr).create (0);
3527 /* The dependence relation DDR cannot be represented by a distance
3528 vector. */
3530 static inline void
3531 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3533 if (dump_file && (dump_flags & TDF_DETAILS))
3534 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3536 DDR_AFFINE_P (ddr) = false;
3541 /* This section contains the classic Banerjee tests. */
3543 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3544 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3546 static inline bool
3547 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3549 return (evolution_function_is_constant_p (chrec_a)
3550 && evolution_function_is_constant_p (chrec_b));
3553 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3554 variable, i.e., if the SIV (Single Index Variable) test is true. */
3556 static bool
3557 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3559 if ((evolution_function_is_constant_p (chrec_a)
3560 && evolution_function_is_univariate_p (chrec_b))
3561 || (evolution_function_is_constant_p (chrec_b)
3562 && evolution_function_is_univariate_p (chrec_a)))
3563 return true;
3565 if (evolution_function_is_univariate_p (chrec_a)
3566 && evolution_function_is_univariate_p (chrec_b))
3568 switch (TREE_CODE (chrec_a))
3570 case POLYNOMIAL_CHREC:
3571 switch (TREE_CODE (chrec_b))
3573 case POLYNOMIAL_CHREC:
3574 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3575 return false;
3576 /* FALLTHRU */
3578 default:
3579 return true;
3582 default:
3583 return true;
3587 return false;
3590 /* Creates a conflict function with N dimensions. The affine functions
3591 in each dimension follow. */
3593 static conflict_function *
3594 conflict_fn (unsigned n, ...)
3596 unsigned i;
3597 conflict_function *ret = XCNEW (conflict_function);
3598 va_list ap;
3600 gcc_assert (n > 0 && n <= MAX_DIM);
3601 va_start (ap, n);
3603 ret->n = n;
3604 for (i = 0; i < n; i++)
3605 ret->fns[i] = va_arg (ap, affine_fn);
3606 va_end (ap);
3608 return ret;
3611 /* Returns constant affine function with value CST. */
3613 static affine_fn
3614 affine_fn_cst (tree cst)
3616 affine_fn fn;
3617 fn.create (1);
3618 fn.quick_push (cst);
3619 return fn;
3622 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3624 static affine_fn
3625 affine_fn_univar (tree cst, unsigned dim, tree coef)
3627 affine_fn fn;
3628 fn.create (dim + 1);
3629 unsigned i;
3631 gcc_assert (dim > 0);
3632 fn.quick_push (cst);
3633 for (i = 1; i < dim; i++)
3634 fn.quick_push (integer_zero_node);
3635 fn.quick_push (coef);
3636 return fn;
3639 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3640 *OVERLAPS_B are initialized to the functions that describe the
3641 relation between the elements accessed twice by CHREC_A and
3642 CHREC_B. For k >= 0, the following property is verified:
3644 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3646 static void
3647 analyze_ziv_subscript (tree chrec_a,
3648 tree chrec_b,
3649 conflict_function **overlaps_a,
3650 conflict_function **overlaps_b,
3651 tree *last_conflicts)
3653 tree type, difference;
3654 dependence_stats.num_ziv++;
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3657 fprintf (dump_file, "(analyze_ziv_subscript \n");
3659 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3660 chrec_a = chrec_convert (type, chrec_a, NULL);
3661 chrec_b = chrec_convert (type, chrec_b, NULL);
3662 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3664 switch (TREE_CODE (difference))
3666 case INTEGER_CST:
3667 if (integer_zerop (difference))
3669 /* The difference is equal to zero: the accessed index
3670 overlaps for each iteration in the loop. */
3671 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3672 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3673 *last_conflicts = chrec_dont_know;
3674 dependence_stats.num_ziv_dependent++;
3676 else
3678 /* The accesses do not overlap. */
3679 *overlaps_a = conflict_fn_no_dependence ();
3680 *overlaps_b = conflict_fn_no_dependence ();
3681 *last_conflicts = integer_zero_node;
3682 dependence_stats.num_ziv_independent++;
3684 break;
3686 default:
3687 /* We're not sure whether the indexes overlap. For the moment,
3688 conservatively answer "don't know". */
3689 if (dump_file && (dump_flags & TDF_DETAILS))
3690 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3692 *overlaps_a = conflict_fn_not_known ();
3693 *overlaps_b = conflict_fn_not_known ();
3694 *last_conflicts = chrec_dont_know;
3695 dependence_stats.num_ziv_unimplemented++;
3696 break;
3699 if (dump_file && (dump_flags & TDF_DETAILS))
3700 fprintf (dump_file, ")\n");
3703 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3704 and only if it fits to the int type. If this is not the case, or the
3705 bound on the number of iterations of LOOP could not be derived, returns
3706 chrec_dont_know. */
3708 static tree
3709 max_stmt_executions_tree (class loop *loop)
3711 widest_int nit;
3713 if (!max_stmt_executions (loop, &nit))
3714 return chrec_dont_know;
3716 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3717 return chrec_dont_know;
3719 return wide_int_to_tree (unsigned_type_node, nit);
3722 /* Determine whether the CHREC is always positive/negative. If the expression
3723 cannot be statically analyzed, return false, otherwise set the answer into
3724 VALUE. */
3726 static bool
3727 chrec_is_positive (tree chrec, bool *value)
3729 bool value0, value1, value2;
3730 tree end_value, nb_iter;
3732 switch (TREE_CODE (chrec))
3734 case POLYNOMIAL_CHREC:
3735 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3736 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3737 return false;
3739 /* FIXME -- overflows. */
3740 if (value0 == value1)
3742 *value = value0;
3743 return true;
3746 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3747 and the proof consists in showing that the sign never
3748 changes during the execution of the loop, from 0 to
3749 loop->nb_iterations. */
3750 if (!evolution_function_is_affine_p (chrec))
3751 return false;
3753 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3754 if (chrec_contains_undetermined (nb_iter))
3755 return false;
3757 #if 0
3758 /* TODO -- If the test is after the exit, we may decrease the number of
3759 iterations by one. */
3760 if (after_exit)
3761 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3762 #endif
3764 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3766 if (!chrec_is_positive (end_value, &value2))
3767 return false;
3769 *value = value0;
3770 return value0 == value1;
3772 case INTEGER_CST:
3773 switch (tree_int_cst_sgn (chrec))
3775 case -1:
3776 *value = false;
3777 break;
3778 case 1:
3779 *value = true;
3780 break;
3781 default:
3782 return false;
3784 return true;
3786 default:
3787 return false;
3792 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3793 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3794 *OVERLAPS_B are initialized to the functions that describe the
3795 relation between the elements accessed twice by CHREC_A and
3796 CHREC_B. For k >= 0, the following property is verified:
3798 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3800 static void
3801 analyze_siv_subscript_cst_affine (tree chrec_a,
3802 tree chrec_b,
3803 conflict_function **overlaps_a,
3804 conflict_function **overlaps_b,
3805 tree *last_conflicts)
3807 bool value0, value1, value2;
3808 tree type, difference, tmp;
3810 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3811 chrec_a = chrec_convert (type, chrec_a, NULL);
3812 chrec_b = chrec_convert (type, chrec_b, NULL);
3813 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3815 /* Special case overlap in the first iteration. */
3816 if (integer_zerop (difference))
3818 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3819 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3820 *last_conflicts = integer_one_node;
3821 return;
3824 if (!chrec_is_positive (initial_condition (difference), &value0))
3826 if (dump_file && (dump_flags & TDF_DETAILS))
3827 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3829 dependence_stats.num_siv_unimplemented++;
3830 *overlaps_a = conflict_fn_not_known ();
3831 *overlaps_b = conflict_fn_not_known ();
3832 *last_conflicts = chrec_dont_know;
3833 return;
3835 else
3837 if (value0 == false)
3839 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3840 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3842 if (dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3845 *overlaps_a = conflict_fn_not_known ();
3846 *overlaps_b = conflict_fn_not_known ();
3847 *last_conflicts = chrec_dont_know;
3848 dependence_stats.num_siv_unimplemented++;
3849 return;
3851 else
3853 if (value1 == true)
3855 /* Example:
3856 chrec_a = 12
3857 chrec_b = {10, +, 1}
3860 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3862 HOST_WIDE_INT numiter;
3863 class loop *loop = get_chrec_loop (chrec_b);
3865 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3866 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3867 fold_build1 (ABS_EXPR, type, difference),
3868 CHREC_RIGHT (chrec_b));
3869 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3870 *last_conflicts = integer_one_node;
3873 /* Perform weak-zero siv test to see if overlap is
3874 outside the loop bounds. */
3875 numiter = max_stmt_executions_int (loop);
3877 if (numiter >= 0
3878 && compare_tree_int (tmp, numiter) > 0)
3880 free_conflict_function (*overlaps_a);
3881 free_conflict_function (*overlaps_b);
3882 *overlaps_a = conflict_fn_no_dependence ();
3883 *overlaps_b = conflict_fn_no_dependence ();
3884 *last_conflicts = integer_zero_node;
3885 dependence_stats.num_siv_independent++;
3886 return;
3888 dependence_stats.num_siv_dependent++;
3889 return;
3892 /* When the step does not divide the difference, there are
3893 no overlaps. */
3894 else
3896 *overlaps_a = conflict_fn_no_dependence ();
3897 *overlaps_b = conflict_fn_no_dependence ();
3898 *last_conflicts = integer_zero_node;
3899 dependence_stats.num_siv_independent++;
3900 return;
3904 else
3906 /* Example:
3907 chrec_a = 12
3908 chrec_b = {10, +, -1}
3910 In this case, chrec_a will not overlap with chrec_b. */
3911 *overlaps_a = conflict_fn_no_dependence ();
3912 *overlaps_b = conflict_fn_no_dependence ();
3913 *last_conflicts = integer_zero_node;
3914 dependence_stats.num_siv_independent++;
3915 return;
3919 else
3921 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3922 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3924 if (dump_file && (dump_flags & TDF_DETAILS))
3925 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3927 *overlaps_a = conflict_fn_not_known ();
3928 *overlaps_b = conflict_fn_not_known ();
3929 *last_conflicts = chrec_dont_know;
3930 dependence_stats.num_siv_unimplemented++;
3931 return;
3933 else
3935 if (value2 == false)
3937 /* Example:
3938 chrec_a = 3
3939 chrec_b = {10, +, -1}
3941 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3943 HOST_WIDE_INT numiter;
3944 class loop *loop = get_chrec_loop (chrec_b);
3946 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3947 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3948 CHREC_RIGHT (chrec_b));
3949 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3950 *last_conflicts = integer_one_node;
3952 /* Perform weak-zero siv test to see if overlap is
3953 outside the loop bounds. */
3954 numiter = max_stmt_executions_int (loop);
3956 if (numiter >= 0
3957 && compare_tree_int (tmp, numiter) > 0)
3959 free_conflict_function (*overlaps_a);
3960 free_conflict_function (*overlaps_b);
3961 *overlaps_a = conflict_fn_no_dependence ();
3962 *overlaps_b = conflict_fn_no_dependence ();
3963 *last_conflicts = integer_zero_node;
3964 dependence_stats.num_siv_independent++;
3965 return;
3967 dependence_stats.num_siv_dependent++;
3968 return;
3971 /* When the step does not divide the difference, there
3972 are no overlaps. */
3973 else
3975 *overlaps_a = conflict_fn_no_dependence ();
3976 *overlaps_b = conflict_fn_no_dependence ();
3977 *last_conflicts = integer_zero_node;
3978 dependence_stats.num_siv_independent++;
3979 return;
3982 else
3984 /* Example:
3985 chrec_a = 3
3986 chrec_b = {4, +, 1}
3988 In this case, chrec_a will not overlap with chrec_b. */
3989 *overlaps_a = conflict_fn_no_dependence ();
3990 *overlaps_b = conflict_fn_no_dependence ();
3991 *last_conflicts = integer_zero_node;
3992 dependence_stats.num_siv_independent++;
3993 return;
4000 /* Helper recursive function for initializing the matrix A. Returns
4001 the initial value of CHREC. */
4003 static tree
4004 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4006 gcc_assert (chrec);
4008 switch (TREE_CODE (chrec))
4010 case POLYNOMIAL_CHREC:
4011 HOST_WIDE_INT chrec_right;
4012 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4013 return chrec_dont_know;
4014 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4015 /* We want to be able to negate without overflow. */
4016 if (chrec_right == HOST_WIDE_INT_MIN)
4017 return chrec_dont_know;
4018 A[index][0] = mult * chrec_right;
4019 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4021 case PLUS_EXPR:
4022 case MULT_EXPR:
4023 case MINUS_EXPR:
4025 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4026 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4028 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4031 CASE_CONVERT:
4033 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034 return chrec_convert (chrec_type (chrec), op, NULL);
4037 case BIT_NOT_EXPR:
4039 /* Handle ~X as -1 - X. */
4040 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4041 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4042 build_int_cst (TREE_TYPE (chrec), -1), op);
4045 case INTEGER_CST:
4046 return chrec;
4048 default:
4049 gcc_unreachable ();
4050 return NULL_TREE;
4054 #define FLOOR_DIV(x,y) ((x) / (y))
4056 /* Solves the special case of the Diophantine equation:
4057 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4059 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4060 number of iterations that loops X and Y run. The overlaps will be
4061 constructed as evolutions in dimension DIM. */
4063 static void
4064 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4065 HOST_WIDE_INT step_a,
4066 HOST_WIDE_INT step_b,
4067 affine_fn *overlaps_a,
4068 affine_fn *overlaps_b,
4069 tree *last_conflicts, int dim)
4071 if (((step_a > 0 && step_b > 0)
4072 || (step_a < 0 && step_b < 0)))
4074 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4075 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4077 gcd_steps_a_b = gcd (step_a, step_b);
4078 step_overlaps_a = step_b / gcd_steps_a_b;
4079 step_overlaps_b = step_a / gcd_steps_a_b;
4081 if (niter > 0)
4083 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4084 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4085 last_conflict = tau2;
4086 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4088 else
4089 *last_conflicts = chrec_dont_know;
4091 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4092 build_int_cst (NULL_TREE,
4093 step_overlaps_a));
4094 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4095 build_int_cst (NULL_TREE,
4096 step_overlaps_b));
4099 else
4101 *overlaps_a = affine_fn_cst (integer_zero_node);
4102 *overlaps_b = affine_fn_cst (integer_zero_node);
4103 *last_conflicts = integer_zero_node;
4107 /* Solves the special case of a Diophantine equation where CHREC_A is
4108 an affine bivariate function, and CHREC_B is an affine univariate
4109 function. For example,
4111 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4113 has the following overlapping functions:
4115 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4116 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4117 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4119 FORNOW: This is a specialized implementation for a case occurring in
4120 a common benchmark. Implement the general algorithm. */
4122 static void
4123 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4124 conflict_function **overlaps_a,
4125 conflict_function **overlaps_b,
4126 tree *last_conflicts)
4128 bool xz_p, yz_p, xyz_p;
4129 HOST_WIDE_INT step_x, step_y, step_z;
4130 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4131 affine_fn overlaps_a_xz, overlaps_b_xz;
4132 affine_fn overlaps_a_yz, overlaps_b_yz;
4133 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4134 affine_fn ova1, ova2, ovb;
4135 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4137 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4138 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4139 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4141 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4142 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4143 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4145 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4147 if (dump_file && (dump_flags & TDF_DETAILS))
4148 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4150 *overlaps_a = conflict_fn_not_known ();
4151 *overlaps_b = conflict_fn_not_known ();
4152 *last_conflicts = chrec_dont_know;
4153 return;
4156 niter = MIN (niter_x, niter_z);
4157 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4158 &overlaps_a_xz,
4159 &overlaps_b_xz,
4160 &last_conflicts_xz, 1);
4161 niter = MIN (niter_y, niter_z);
4162 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4163 &overlaps_a_yz,
4164 &overlaps_b_yz,
4165 &last_conflicts_yz, 2);
4166 niter = MIN (niter_x, niter_z);
4167 niter = MIN (niter_y, niter);
4168 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4169 &overlaps_a_xyz,
4170 &overlaps_b_xyz,
4171 &last_conflicts_xyz, 3);
4173 xz_p = !integer_zerop (last_conflicts_xz);
4174 yz_p = !integer_zerop (last_conflicts_yz);
4175 xyz_p = !integer_zerop (last_conflicts_xyz);
4177 if (xz_p || yz_p || xyz_p)
4179 ova1 = affine_fn_cst (integer_zero_node);
4180 ova2 = affine_fn_cst (integer_zero_node);
4181 ovb = affine_fn_cst (integer_zero_node);
4182 if (xz_p)
4184 affine_fn t0 = ova1;
4185 affine_fn t2 = ovb;
4187 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4188 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4189 affine_fn_free (t0);
4190 affine_fn_free (t2);
4191 *last_conflicts = last_conflicts_xz;
4193 if (yz_p)
4195 affine_fn t0 = ova2;
4196 affine_fn t2 = ovb;
4198 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4199 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4200 affine_fn_free (t0);
4201 affine_fn_free (t2);
4202 *last_conflicts = last_conflicts_yz;
4204 if (xyz_p)
4206 affine_fn t0 = ova1;
4207 affine_fn t2 = ova2;
4208 affine_fn t4 = ovb;
4210 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4211 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4212 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4213 affine_fn_free (t0);
4214 affine_fn_free (t2);
4215 affine_fn_free (t4);
4216 *last_conflicts = last_conflicts_xyz;
4218 *overlaps_a = conflict_fn (2, ova1, ova2);
4219 *overlaps_b = conflict_fn (1, ovb);
4221 else
4223 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4224 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4225 *last_conflicts = integer_zero_node;
4228 affine_fn_free (overlaps_a_xz);
4229 affine_fn_free (overlaps_b_xz);
4230 affine_fn_free (overlaps_a_yz);
4231 affine_fn_free (overlaps_b_yz);
4232 affine_fn_free (overlaps_a_xyz);
4233 affine_fn_free (overlaps_b_xyz);
4236 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4238 static void
4239 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4240 int size)
4242 memcpy (vec2, vec1, size * sizeof (*vec1));
4245 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4247 static void
4248 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4249 int m, int n)
4251 int i;
4253 for (i = 0; i < m; i++)
4254 lambda_vector_copy (mat1[i], mat2[i], n);
4257 /* Store the N x N identity matrix in MAT. */
4259 static void
4260 lambda_matrix_id (lambda_matrix mat, int size)
4262 int i, j;
4264 for (i = 0; i < size; i++)
4265 for (j = 0; j < size; j++)
4266 mat[i][j] = (i == j) ? 1 : 0;
4269 /* Return the index of the first nonzero element of vector VEC1 between
4270 START and N. We must have START <= N.
4271 Returns N if VEC1 is the zero vector. */
4273 static int
4274 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4276 int j = start;
4277 while (j < n && vec1[j] == 0)
4278 j++;
4279 return j;
4282 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4283 R2 = R2 + CONST1 * R1. */
4285 static bool
4286 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4287 lambda_int const1)
4289 int i;
4291 if (const1 == 0)
4292 return true;
4294 for (i = 0; i < n; i++)
4296 bool ovf;
4297 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4298 if (ovf)
4299 return false;
4300 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4301 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4302 return false;
4303 mat[r2][i] = tem2;
4306 return true;
4309 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4310 and store the result in VEC2. */
4312 static void
4313 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4314 int size, lambda_int const1)
4316 int i;
4318 if (const1 == 0)
4319 lambda_vector_clear (vec2, size);
4320 else
4321 for (i = 0; i < size; i++)
4322 vec2[i] = const1 * vec1[i];
4325 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4327 static void
4328 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4329 int size)
4331 lambda_vector_mult_const (vec1, vec2, size, -1);
4334 /* Negate row R1 of matrix MAT which has N columns. */
4336 static void
4337 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4339 lambda_vector_negate (mat[r1], mat[r1], n);
4342 /* Return true if two vectors are equal. */
4344 static bool
4345 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4347 int i;
4348 for (i = 0; i < size; i++)
4349 if (vec1[i] != vec2[i])
4350 return false;
4351 return true;
4354 /* Given an M x N integer matrix A, this function determines an M x
4355 M unimodular matrix U, and an M x N echelon matrix S such that
4356 "U.A = S". This decomposition is also known as "right Hermite".
4358 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4359 Restructuring Compilers" Utpal Banerjee. */
4361 static bool
4362 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4363 lambda_matrix S, lambda_matrix U)
4365 int i, j, i0 = 0;
4367 lambda_matrix_copy (A, S, m, n);
4368 lambda_matrix_id (U, m);
4370 for (j = 0; j < n; j++)
4372 if (lambda_vector_first_nz (S[j], m, i0) < m)
4374 ++i0;
4375 for (i = m - 1; i >= i0; i--)
4377 while (S[i][j] != 0)
4379 lambda_int factor, a, b;
4381 a = S[i-1][j];
4382 b = S[i][j];
4383 gcc_assert (a != HOST_WIDE_INT_MIN);
4384 factor = a / b;
4386 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4387 return false;
4388 std::swap (S[i], S[i-1]);
4390 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4391 return false;
4392 std::swap (U[i], U[i-1]);
4398 return true;
4401 /* Determines the overlapping elements due to accesses CHREC_A and
4402 CHREC_B, that are affine functions. This function cannot handle
4403 symbolic evolution functions, ie. when initial conditions are
4404 parameters, because it uses lambda matrices of integers. */
4406 static void
4407 analyze_subscript_affine_affine (tree chrec_a,
4408 tree chrec_b,
4409 conflict_function **overlaps_a,
4410 conflict_function **overlaps_b,
4411 tree *last_conflicts)
4413 unsigned nb_vars_a, nb_vars_b, dim;
4414 lambda_int gamma, gcd_alpha_beta;
4415 lambda_matrix A, U, S;
4416 struct obstack scratch_obstack;
4418 if (eq_evolutions_p (chrec_a, chrec_b))
4420 /* The accessed index overlaps for each iteration in the
4421 loop. */
4422 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4423 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4424 *last_conflicts = chrec_dont_know;
4425 return;
4427 if (dump_file && (dump_flags & TDF_DETAILS))
4428 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4430 /* For determining the initial intersection, we have to solve a
4431 Diophantine equation. This is the most time consuming part.
4433 For answering to the question: "Is there a dependence?" we have
4434 to prove that there exists a solution to the Diophantine
4435 equation, and that the solution is in the iteration domain,
4436 i.e. the solution is positive or zero, and that the solution
4437 happens before the upper bound loop.nb_iterations. Otherwise
4438 there is no dependence. This function outputs a description of
4439 the iterations that hold the intersections. */
4441 nb_vars_a = nb_vars_in_chrec (chrec_a);
4442 nb_vars_b = nb_vars_in_chrec (chrec_b);
4444 gcc_obstack_init (&scratch_obstack);
4446 dim = nb_vars_a + nb_vars_b;
4447 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4448 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4449 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4451 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4452 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4453 if (init_a == chrec_dont_know
4454 || init_b == chrec_dont_know)
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4457 fprintf (dump_file, "affine-affine test failed: "
4458 "representation issue.\n");
4459 *overlaps_a = conflict_fn_not_known ();
4460 *overlaps_b = conflict_fn_not_known ();
4461 *last_conflicts = chrec_dont_know;
4462 goto end_analyze_subs_aa;
4464 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4466 /* Don't do all the hard work of solving the Diophantine equation
4467 when we already know the solution: for example,
4468 | {3, +, 1}_1
4469 | {3, +, 4}_2
4470 | gamma = 3 - 3 = 0.
4471 Then the first overlap occurs during the first iterations:
4472 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4474 if (gamma == 0)
4476 if (nb_vars_a == 1 && nb_vars_b == 1)
4478 HOST_WIDE_INT step_a, step_b;
4479 HOST_WIDE_INT niter, niter_a, niter_b;
4480 affine_fn ova, ovb;
4482 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4483 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4484 niter = MIN (niter_a, niter_b);
4485 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4486 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4488 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4489 &ova, &ovb,
4490 last_conflicts, 1);
4491 *overlaps_a = conflict_fn (1, ova);
4492 *overlaps_b = conflict_fn (1, ovb);
4495 else if (nb_vars_a == 2 && nb_vars_b == 1)
4496 compute_overlap_steps_for_affine_1_2
4497 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4499 else if (nb_vars_a == 1 && nb_vars_b == 2)
4500 compute_overlap_steps_for_affine_1_2
4501 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4503 else
4505 if (dump_file && (dump_flags & TDF_DETAILS))
4506 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4507 *overlaps_a = conflict_fn_not_known ();
4508 *overlaps_b = conflict_fn_not_known ();
4509 *last_conflicts = chrec_dont_know;
4511 goto end_analyze_subs_aa;
4514 /* U.A = S */
4515 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4517 *overlaps_a = conflict_fn_not_known ();
4518 *overlaps_b = conflict_fn_not_known ();
4519 *last_conflicts = chrec_dont_know;
4520 goto end_analyze_subs_aa;
4523 if (S[0][0] < 0)
4525 S[0][0] *= -1;
4526 lambda_matrix_row_negate (U, dim, 0);
4528 gcd_alpha_beta = S[0][0];
4530 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4531 but that is a quite strange case. Instead of ICEing, answer
4532 don't know. */
4533 if (gcd_alpha_beta == 0)
4535 *overlaps_a = conflict_fn_not_known ();
4536 *overlaps_b = conflict_fn_not_known ();
4537 *last_conflicts = chrec_dont_know;
4538 goto end_analyze_subs_aa;
4541 /* The classic "gcd-test". */
4542 if (!int_divides_p (gcd_alpha_beta, gamma))
4544 /* The "gcd-test" has determined that there is no integer
4545 solution, i.e. there is no dependence. */
4546 *overlaps_a = conflict_fn_no_dependence ();
4547 *overlaps_b = conflict_fn_no_dependence ();
4548 *last_conflicts = integer_zero_node;
4551 /* Both access functions are univariate. This includes SIV and MIV cases. */
4552 else if (nb_vars_a == 1 && nb_vars_b == 1)
4554 /* Both functions should have the same evolution sign. */
4555 if (((A[0][0] > 0 && -A[1][0] > 0)
4556 || (A[0][0] < 0 && -A[1][0] < 0)))
4558 /* The solutions are given by:
4560 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4561 | [u21 u22] [y0]
4563 For a given integer t. Using the following variables,
4565 | i0 = u11 * gamma / gcd_alpha_beta
4566 | j0 = u12 * gamma / gcd_alpha_beta
4567 | i1 = u21
4568 | j1 = u22
4570 the solutions are:
4572 | x0 = i0 + i1 * t,
4573 | y0 = j0 + j1 * t. */
4574 HOST_WIDE_INT i0, j0, i1, j1;
4576 i0 = U[0][0] * gamma / gcd_alpha_beta;
4577 j0 = U[0][1] * gamma / gcd_alpha_beta;
4578 i1 = U[1][0];
4579 j1 = U[1][1];
4581 if ((i1 == 0 && i0 < 0)
4582 || (j1 == 0 && j0 < 0))
4584 /* There is no solution.
4585 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4586 falls in here, but for the moment we don't look at the
4587 upper bound of the iteration domain. */
4588 *overlaps_a = conflict_fn_no_dependence ();
4589 *overlaps_b = conflict_fn_no_dependence ();
4590 *last_conflicts = integer_zero_node;
4591 goto end_analyze_subs_aa;
4594 if (i1 > 0 && j1 > 0)
4596 HOST_WIDE_INT niter_a
4597 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4598 HOST_WIDE_INT niter_b
4599 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4600 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4602 /* (X0, Y0) is a solution of the Diophantine equation:
4603 "chrec_a (X0) = chrec_b (Y0)". */
4604 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4605 CEIL (-j0, j1));
4606 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4607 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4609 /* (X1, Y1) is the smallest positive solution of the eq
4610 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4611 first conflict occurs. */
4612 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4613 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4614 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4616 if (niter > 0)
4618 /* If the overlap occurs outside of the bounds of the
4619 loop, there is no dependence. */
4620 if (x1 >= niter_a || y1 >= niter_b)
4622 *overlaps_a = conflict_fn_no_dependence ();
4623 *overlaps_b = conflict_fn_no_dependence ();
4624 *last_conflicts = integer_zero_node;
4625 goto end_analyze_subs_aa;
4628 /* max stmt executions can get quite large, avoid
4629 overflows by using wide ints here. */
4630 widest_int tau2
4631 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4632 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4633 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4634 if (wi::min_precision (last_conflict, SIGNED)
4635 <= TYPE_PRECISION (integer_type_node))
4636 *last_conflicts
4637 = build_int_cst (integer_type_node,
4638 last_conflict.to_shwi ());
4639 else
4640 *last_conflicts = chrec_dont_know;
4642 else
4643 *last_conflicts = chrec_dont_know;
4645 *overlaps_a
4646 = conflict_fn (1,
4647 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4649 build_int_cst (NULL_TREE, i1)));
4650 *overlaps_b
4651 = conflict_fn (1,
4652 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4654 build_int_cst (NULL_TREE, j1)));
4656 else
4658 /* FIXME: For the moment, the upper bound of the
4659 iteration domain for i and j is not checked. */
4660 if (dump_file && (dump_flags & TDF_DETAILS))
4661 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4662 *overlaps_a = conflict_fn_not_known ();
4663 *overlaps_b = conflict_fn_not_known ();
4664 *last_conflicts = chrec_dont_know;
4667 else
4669 if (dump_file && (dump_flags & TDF_DETAILS))
4670 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4671 *overlaps_a = conflict_fn_not_known ();
4672 *overlaps_b = conflict_fn_not_known ();
4673 *last_conflicts = chrec_dont_know;
4676 else
4678 if (dump_file && (dump_flags & TDF_DETAILS))
4679 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4680 *overlaps_a = conflict_fn_not_known ();
4681 *overlaps_b = conflict_fn_not_known ();
4682 *last_conflicts = chrec_dont_know;
4685 end_analyze_subs_aa:
4686 obstack_free (&scratch_obstack, NULL);
4687 if (dump_file && (dump_flags & TDF_DETAILS))
4689 fprintf (dump_file, " (overlaps_a = ");
4690 dump_conflict_function (dump_file, *overlaps_a);
4691 fprintf (dump_file, ")\n (overlaps_b = ");
4692 dump_conflict_function (dump_file, *overlaps_b);
4693 fprintf (dump_file, "))\n");
4697 /* Returns true when analyze_subscript_affine_affine can be used for
4698 determining the dependence relation between chrec_a and chrec_b,
4699 that contain symbols. This function modifies chrec_a and chrec_b
4700 such that the analysis result is the same, and such that they don't
4701 contain symbols, and then can safely be passed to the analyzer.
4703 Example: The analysis of the following tuples of evolutions produce
4704 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4705 vs. {0, +, 1}_1
4707 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4708 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4711 static bool
4712 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4714 tree diff, type, left_a, left_b, right_b;
4716 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4717 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4718 /* FIXME: For the moment not handled. Might be refined later. */
4719 return false;
4721 type = chrec_type (*chrec_a);
4722 left_a = CHREC_LEFT (*chrec_a);
4723 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4724 diff = chrec_fold_minus (type, left_a, left_b);
4726 if (!evolution_function_is_constant_p (diff))
4727 return false;
4729 if (dump_file && (dump_flags & TDF_DETAILS))
4730 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4732 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4733 diff, CHREC_RIGHT (*chrec_a));
4734 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4735 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4736 build_int_cst (type, 0),
4737 right_b);
4738 return true;
4741 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4742 *OVERLAPS_B are initialized to the functions that describe the
4743 relation between the elements accessed twice by CHREC_A and
4744 CHREC_B. For k >= 0, the following property is verified:
4746 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4748 static void
4749 analyze_siv_subscript (tree chrec_a,
4750 tree chrec_b,
4751 conflict_function **overlaps_a,
4752 conflict_function **overlaps_b,
4753 tree *last_conflicts,
4754 int loop_nest_num)
4756 dependence_stats.num_siv++;
4758 if (dump_file && (dump_flags & TDF_DETAILS))
4759 fprintf (dump_file, "(analyze_siv_subscript \n");
4761 if (evolution_function_is_constant_p (chrec_a)
4762 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4763 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4764 overlaps_a, overlaps_b, last_conflicts);
4766 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4767 && evolution_function_is_constant_p (chrec_b))
4768 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4769 overlaps_b, overlaps_a, last_conflicts);
4771 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4772 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4774 if (!chrec_contains_symbols (chrec_a)
4775 && !chrec_contains_symbols (chrec_b))
4777 analyze_subscript_affine_affine (chrec_a, chrec_b,
4778 overlaps_a, overlaps_b,
4779 last_conflicts);
4781 if (CF_NOT_KNOWN_P (*overlaps_a)
4782 || CF_NOT_KNOWN_P (*overlaps_b))
4783 dependence_stats.num_siv_unimplemented++;
4784 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4785 || CF_NO_DEPENDENCE_P (*overlaps_b))
4786 dependence_stats.num_siv_independent++;
4787 else
4788 dependence_stats.num_siv_dependent++;
4790 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4791 &chrec_b))
4793 analyze_subscript_affine_affine (chrec_a, chrec_b,
4794 overlaps_a, overlaps_b,
4795 last_conflicts);
4797 if (CF_NOT_KNOWN_P (*overlaps_a)
4798 || CF_NOT_KNOWN_P (*overlaps_b))
4799 dependence_stats.num_siv_unimplemented++;
4800 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4801 || CF_NO_DEPENDENCE_P (*overlaps_b))
4802 dependence_stats.num_siv_independent++;
4803 else
4804 dependence_stats.num_siv_dependent++;
4806 else
4807 goto siv_subscript_dontknow;
4810 else
4812 siv_subscript_dontknow:;
4813 if (dump_file && (dump_flags & TDF_DETAILS))
4814 fprintf (dump_file, " siv test failed: unimplemented");
4815 *overlaps_a = conflict_fn_not_known ();
4816 *overlaps_b = conflict_fn_not_known ();
4817 *last_conflicts = chrec_dont_know;
4818 dependence_stats.num_siv_unimplemented++;
4821 if (dump_file && (dump_flags & TDF_DETAILS))
4822 fprintf (dump_file, ")\n");
4825 /* Returns false if we can prove that the greatest common divisor of the steps
4826 of CHREC does not divide CST, false otherwise. */
4828 static bool
4829 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4831 HOST_WIDE_INT cd = 0, val;
4832 tree step;
4834 if (!tree_fits_shwi_p (cst))
4835 return true;
4836 val = tree_to_shwi (cst);
4838 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4840 step = CHREC_RIGHT (chrec);
4841 if (!tree_fits_shwi_p (step))
4842 return true;
4843 cd = gcd (cd, tree_to_shwi (step));
4844 chrec = CHREC_LEFT (chrec);
4847 return val % cd == 0;
4850 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4851 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4852 functions that describe the relation between the elements accessed
4853 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4854 is verified:
4856 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4858 static void
4859 analyze_miv_subscript (tree chrec_a,
4860 tree chrec_b,
4861 conflict_function **overlaps_a,
4862 conflict_function **overlaps_b,
4863 tree *last_conflicts,
4864 class loop *loop_nest)
4866 tree type, difference;
4868 dependence_stats.num_miv++;
4869 if (dump_file && (dump_flags & TDF_DETAILS))
4870 fprintf (dump_file, "(analyze_miv_subscript \n");
4872 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4873 chrec_a = chrec_convert (type, chrec_a, NULL);
4874 chrec_b = chrec_convert (type, chrec_b, NULL);
4875 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4877 if (eq_evolutions_p (chrec_a, chrec_b))
4879 /* Access functions are the same: all the elements are accessed
4880 in the same order. */
4881 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4882 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4883 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4884 dependence_stats.num_miv_dependent++;
4887 else if (evolution_function_is_constant_p (difference)
4888 && evolution_function_is_affine_multivariate_p (chrec_a,
4889 loop_nest->num)
4890 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4892 /* testsuite/.../ssa-chrec-33.c
4893 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4895 The difference is 1, and all the evolution steps are multiples
4896 of 2, consequently there are no overlapping elements. */
4897 *overlaps_a = conflict_fn_no_dependence ();
4898 *overlaps_b = conflict_fn_no_dependence ();
4899 *last_conflicts = integer_zero_node;
4900 dependence_stats.num_miv_independent++;
4903 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4904 && !chrec_contains_symbols (chrec_a, loop_nest)
4905 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4906 && !chrec_contains_symbols (chrec_b, loop_nest))
4908 /* testsuite/.../ssa-chrec-35.c
4909 {0, +, 1}_2 vs. {0, +, 1}_3
4910 the overlapping elements are respectively located at iterations:
4911 {0, +, 1}_x and {0, +, 1}_x,
4912 in other words, we have the equality:
4913 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4915 Other examples:
4916 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4917 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4919 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4920 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4922 analyze_subscript_affine_affine (chrec_a, chrec_b,
4923 overlaps_a, overlaps_b, last_conflicts);
4925 if (CF_NOT_KNOWN_P (*overlaps_a)
4926 || CF_NOT_KNOWN_P (*overlaps_b))
4927 dependence_stats.num_miv_unimplemented++;
4928 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4929 || CF_NO_DEPENDENCE_P (*overlaps_b))
4930 dependence_stats.num_miv_independent++;
4931 else
4932 dependence_stats.num_miv_dependent++;
4935 else
4937 /* When the analysis is too difficult, answer "don't know". */
4938 if (dump_file && (dump_flags & TDF_DETAILS))
4939 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4941 *overlaps_a = conflict_fn_not_known ();
4942 *overlaps_b = conflict_fn_not_known ();
4943 *last_conflicts = chrec_dont_know;
4944 dependence_stats.num_miv_unimplemented++;
4947 if (dump_file && (dump_flags & TDF_DETAILS))
4948 fprintf (dump_file, ")\n");
4951 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4952 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4953 OVERLAP_ITERATIONS_B are initialized with two functions that
4954 describe the iterations that contain conflicting elements.
4956 Remark: For an integer k >= 0, the following equality is true:
4958 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4961 static void
4962 analyze_overlapping_iterations (tree chrec_a,
4963 tree chrec_b,
4964 conflict_function **overlap_iterations_a,
4965 conflict_function **overlap_iterations_b,
4966 tree *last_conflicts, class loop *loop_nest)
4968 unsigned int lnn = loop_nest->num;
4970 dependence_stats.num_subscript_tests++;
4972 if (dump_file && (dump_flags & TDF_DETAILS))
4974 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4975 fprintf (dump_file, " (chrec_a = ");
4976 print_generic_expr (dump_file, chrec_a);
4977 fprintf (dump_file, ")\n (chrec_b = ");
4978 print_generic_expr (dump_file, chrec_b);
4979 fprintf (dump_file, ")\n");
4982 if (chrec_a == NULL_TREE
4983 || chrec_b == NULL_TREE
4984 || chrec_contains_undetermined (chrec_a)
4985 || chrec_contains_undetermined (chrec_b))
4987 dependence_stats.num_subscript_undetermined++;
4989 *overlap_iterations_a = conflict_fn_not_known ();
4990 *overlap_iterations_b = conflict_fn_not_known ();
4993 /* If they are the same chrec, and are affine, they overlap
4994 on every iteration. */
4995 else if (eq_evolutions_p (chrec_a, chrec_b)
4996 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4997 || operand_equal_p (chrec_a, chrec_b, 0)))
4999 dependence_stats.num_same_subscript_function++;
5000 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5001 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5002 *last_conflicts = chrec_dont_know;
5005 /* If they aren't the same, and aren't affine, we can't do anything
5006 yet. */
5007 else if ((chrec_contains_symbols (chrec_a)
5008 || chrec_contains_symbols (chrec_b))
5009 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5010 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5012 dependence_stats.num_subscript_undetermined++;
5013 *overlap_iterations_a = conflict_fn_not_known ();
5014 *overlap_iterations_b = conflict_fn_not_known ();
5017 else if (ziv_subscript_p (chrec_a, chrec_b))
5018 analyze_ziv_subscript (chrec_a, chrec_b,
5019 overlap_iterations_a, overlap_iterations_b,
5020 last_conflicts);
5022 else if (siv_subscript_p (chrec_a, chrec_b))
5023 analyze_siv_subscript (chrec_a, chrec_b,
5024 overlap_iterations_a, overlap_iterations_b,
5025 last_conflicts, lnn);
5027 else
5028 analyze_miv_subscript (chrec_a, chrec_b,
5029 overlap_iterations_a, overlap_iterations_b,
5030 last_conflicts, loop_nest);
5032 if (dump_file && (dump_flags & TDF_DETAILS))
5034 fprintf (dump_file, " (overlap_iterations_a = ");
5035 dump_conflict_function (dump_file, *overlap_iterations_a);
5036 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5037 dump_conflict_function (dump_file, *overlap_iterations_b);
5038 fprintf (dump_file, "))\n");
5042 /* Helper function for uniquely inserting distance vectors. */
5044 static void
5045 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5047 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5048 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5049 return;
5051 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5054 /* Helper function for uniquely inserting direction vectors. */
5056 static void
5057 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5059 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5060 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5061 return;
5063 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5066 /* Add a distance of 1 on all the loops outer than INDEX. If we
5067 haven't yet determined a distance for this outer loop, push a new
5068 distance vector composed of the previous distance, and a distance
5069 of 1 for this outer loop. Example:
5071 | loop_1
5072 | loop_2
5073 | A[10]
5074 | endloop_2
5075 | endloop_1
5077 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5078 save (0, 1), then we have to save (1, 0). */
5080 static void
5081 add_outer_distances (struct data_dependence_relation *ddr,
5082 lambda_vector dist_v, int index)
5084 /* For each outer loop where init_v is not set, the accesses are
5085 in dependence of distance 1 in the loop. */
5086 while (--index >= 0)
5088 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5089 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5090 save_v[index] = 1;
5091 save_dist_v (ddr, save_v);
5095 /* Return false when fail to represent the data dependence as a
5096 distance vector. A_INDEX is the index of the first reference
5097 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5098 second reference. INIT_B is set to true when a component has been
5099 added to the distance vector DIST_V. INDEX_CARRY is then set to
5100 the index in DIST_V that carries the dependence. */
5102 static bool
5103 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5104 unsigned int a_index, unsigned int b_index,
5105 lambda_vector dist_v, bool *init_b,
5106 int *index_carry)
5108 unsigned i;
5109 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5110 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5112 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5114 tree access_fn_a, access_fn_b;
5115 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5117 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5119 non_affine_dependence_relation (ddr);
5120 return false;
5123 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5124 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5126 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5127 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5129 HOST_WIDE_INT dist;
5130 int index;
5131 int var_a = CHREC_VARIABLE (access_fn_a);
5132 int var_b = CHREC_VARIABLE (access_fn_b);
5134 if (var_a != var_b
5135 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5137 non_affine_dependence_relation (ddr);
5138 return false;
5141 /* When data references are collected in a loop while data
5142 dependences are analyzed in loop nest nested in the loop, we
5143 would have more number of access functions than number of
5144 loops. Skip access functions of loops not in the loop nest.
5146 See PR89725 for more information. */
5147 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5148 continue;
5150 dist = int_cst_value (SUB_DISTANCE (subscript));
5151 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5152 *index_carry = MIN (index, *index_carry);
5154 /* This is the subscript coupling test. If we have already
5155 recorded a distance for this loop (a distance coming from
5156 another subscript), it should be the same. For example,
5157 in the following code, there is no dependence:
5159 | loop i = 0, N, 1
5160 | T[i+1][i] = ...
5161 | ... = T[i][i]
5162 | endloop
5164 if (init_v[index] != 0 && dist_v[index] != dist)
5166 finalize_ddr_dependent (ddr, chrec_known);
5167 return false;
5170 dist_v[index] = dist;
5171 init_v[index] = 1;
5172 *init_b = true;
5174 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5176 /* This can be for example an affine vs. constant dependence
5177 (T[i] vs. T[3]) that is not an affine dependence and is
5178 not representable as a distance vector. */
5179 non_affine_dependence_relation (ddr);
5180 return false;
5182 else
5183 *init_b = true;
5186 return true;
5189 /* Return true when the DDR contains only invariant access functions wrto. loop
5190 number LNUM. */
5192 static bool
5193 invariant_access_functions (const struct data_dependence_relation *ddr,
5194 int lnum)
5196 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5197 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5198 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5199 return false;
5201 return true;
5204 /* Helper function for the case where DDR_A and DDR_B are the same
5205 multivariate access function with a constant step. For an example
5206 see pr34635-1.c. */
5208 static void
5209 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5211 int x_1, x_2;
5212 tree c_1 = CHREC_LEFT (c_2);
5213 tree c_0 = CHREC_LEFT (c_1);
5214 lambda_vector dist_v;
5215 HOST_WIDE_INT v1, v2, cd;
5217 /* Polynomials with more than 2 variables are not handled yet. When
5218 the evolution steps are parameters, it is not possible to
5219 represent the dependence using classical distance vectors. */
5220 if (TREE_CODE (c_0) != INTEGER_CST
5221 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5222 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5224 DDR_AFFINE_P (ddr) = false;
5225 return;
5228 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5229 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5231 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5232 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5233 v1 = int_cst_value (CHREC_RIGHT (c_1));
5234 v2 = int_cst_value (CHREC_RIGHT (c_2));
5235 cd = gcd (v1, v2);
5236 v1 /= cd;
5237 v2 /= cd;
5239 if (v2 < 0)
5241 v2 = -v2;
5242 v1 = -v1;
5245 dist_v[x_1] = v2;
5246 dist_v[x_2] = -v1;
5247 save_dist_v (ddr, dist_v);
5249 add_outer_distances (ddr, dist_v, x_1);
5252 /* Helper function for the case where DDR_A and DDR_B are the same
5253 access functions. */
5255 static void
5256 add_other_self_distances (struct data_dependence_relation *ddr)
5258 lambda_vector dist_v;
5259 unsigned i;
5260 int index_carry = DDR_NB_LOOPS (ddr);
5261 subscript *sub;
5262 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5264 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5266 tree access_fun = SUB_ACCESS_FN (sub, 0);
5268 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5270 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5272 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5274 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5275 return;
5278 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5280 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5281 add_multivariate_self_dist (ddr, access_fun);
5282 else
5283 /* The evolution step is not constant: it varies in
5284 the outer loop, so this cannot be represented by a
5285 distance vector. For example in pr34635.c the
5286 evolution is {0, +, {0, +, 4}_1}_2. */
5287 DDR_AFFINE_P (ddr) = false;
5289 return;
5292 /* When data references are collected in a loop while data
5293 dependences are analyzed in loop nest nested in the loop, we
5294 would have more number of access functions than number of
5295 loops. Skip access functions of loops not in the loop nest.
5297 See PR89725 for more information. */
5298 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5299 loop))
5300 continue;
5302 index_carry = MIN (index_carry,
5303 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5304 DDR_LOOP_NEST (ddr)));
5308 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5309 add_outer_distances (ddr, dist_v, index_carry);
5312 static void
5313 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5315 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5317 dist_v[0] = 1;
5318 save_dist_v (ddr, dist_v);
5321 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5322 is the case for example when access functions are the same and
5323 equal to a constant, as in:
5325 | loop_1
5326 | A[3] = ...
5327 | ... = A[3]
5328 | endloop_1
5330 in which case the distance vectors are (0) and (1). */
5332 static void
5333 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5335 unsigned i, j;
5337 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5339 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5340 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5341 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5343 for (j = 0; j < ca->n; j++)
5344 if (affine_function_zero_p (ca->fns[j]))
5346 insert_innermost_unit_dist_vector (ddr);
5347 return;
5350 for (j = 0; j < cb->n; j++)
5351 if (affine_function_zero_p (cb->fns[j]))
5353 insert_innermost_unit_dist_vector (ddr);
5354 return;
5359 /* Return true when the DDR contains two data references that have the
5360 same access functions. */
5362 static inline bool
5363 same_access_functions (const struct data_dependence_relation *ddr)
5365 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5366 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5367 SUB_ACCESS_FN (sub, 1)))
5368 return false;
5370 return true;
5373 /* Compute the classic per loop distance vector. DDR is the data
5374 dependence relation to build a vector from. Return false when fail
5375 to represent the data dependence as a distance vector. */
5377 static bool
5378 build_classic_dist_vector (struct data_dependence_relation *ddr,
5379 class loop *loop_nest)
5381 bool init_b = false;
5382 int index_carry = DDR_NB_LOOPS (ddr);
5383 lambda_vector dist_v;
5385 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5386 return false;
5388 if (same_access_functions (ddr))
5390 /* Save the 0 vector. */
5391 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5392 save_dist_v (ddr, dist_v);
5394 if (invariant_access_functions (ddr, loop_nest->num))
5395 add_distance_for_zero_overlaps (ddr);
5397 if (DDR_NB_LOOPS (ddr) > 1)
5398 add_other_self_distances (ddr);
5400 return true;
5403 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5404 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5405 return false;
5407 /* Save the distance vector if we initialized one. */
5408 if (init_b)
5410 /* Verify a basic constraint: classic distance vectors should
5411 always be lexicographically positive.
5413 Data references are collected in the order of execution of
5414 the program, thus for the following loop
5416 | for (i = 1; i < 100; i++)
5417 | for (j = 1; j < 100; j++)
5419 | t = T[j+1][i-1]; // A
5420 | T[j][i] = t + 2; // B
5423 references are collected following the direction of the wind:
5424 A then B. The data dependence tests are performed also
5425 following this order, such that we're looking at the distance
5426 separating the elements accessed by A from the elements later
5427 accessed by B. But in this example, the distance returned by
5428 test_dep (A, B) is lexicographically negative (-1, 1), that
5429 means that the access A occurs later than B with respect to
5430 the outer loop, ie. we're actually looking upwind. In this
5431 case we solve test_dep (B, A) looking downwind to the
5432 lexicographically positive solution, that returns the
5433 distance vector (1, -1). */
5434 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5436 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5437 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5438 return false;
5439 compute_subscript_distance (ddr);
5440 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5441 &index_carry))
5442 return false;
5443 save_dist_v (ddr, save_v);
5444 DDR_REVERSED_P (ddr) = true;
5446 /* In this case there is a dependence forward for all the
5447 outer loops:
5449 | for (k = 1; k < 100; k++)
5450 | for (i = 1; i < 100; i++)
5451 | for (j = 1; j < 100; j++)
5453 | t = T[j+1][i-1]; // A
5454 | T[j][i] = t + 2; // B
5457 the vectors are:
5458 (0, 1, -1)
5459 (1, 1, -1)
5460 (1, -1, 1)
5462 if (DDR_NB_LOOPS (ddr) > 1)
5464 add_outer_distances (ddr, save_v, index_carry);
5465 add_outer_distances (ddr, dist_v, index_carry);
5468 else
5470 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5471 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5473 if (DDR_NB_LOOPS (ddr) > 1)
5475 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5477 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5478 return false;
5479 compute_subscript_distance (ddr);
5480 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5481 &index_carry))
5482 return false;
5484 save_dist_v (ddr, save_v);
5485 add_outer_distances (ddr, dist_v, index_carry);
5486 add_outer_distances (ddr, opposite_v, index_carry);
5488 else
5489 save_dist_v (ddr, save_v);
5492 else
5494 /* There is a distance of 1 on all the outer loops: Example:
5495 there is a dependence of distance 1 on loop_1 for the array A.
5497 | loop_1
5498 | A[5] = ...
5499 | endloop
5501 add_outer_distances (ddr, dist_v,
5502 lambda_vector_first_nz (dist_v,
5503 DDR_NB_LOOPS (ddr), 0));
5506 if (dump_file && (dump_flags & TDF_DETAILS))
5508 unsigned i;
5510 fprintf (dump_file, "(build_classic_dist_vector\n");
5511 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5513 fprintf (dump_file, " dist_vector = (");
5514 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5515 DDR_NB_LOOPS (ddr));
5516 fprintf (dump_file, " )\n");
5518 fprintf (dump_file, ")\n");
5521 return true;
5524 /* Return the direction for a given distance.
5525 FIXME: Computing dir this way is suboptimal, since dir can catch
5526 cases that dist is unable to represent. */
5528 static inline enum data_dependence_direction
5529 dir_from_dist (int dist)
5531 if (dist > 0)
5532 return dir_positive;
5533 else if (dist < 0)
5534 return dir_negative;
5535 else
5536 return dir_equal;
5539 /* Compute the classic per loop direction vector. DDR is the data
5540 dependence relation to build a vector from. */
5542 static void
5543 build_classic_dir_vector (struct data_dependence_relation *ddr)
5545 unsigned i, j;
5546 lambda_vector dist_v;
5548 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5550 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5552 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5553 dir_v[j] = dir_from_dist (dist_v[j]);
5555 save_dir_v (ddr, dir_v);
5559 /* Helper function. Returns true when there is a dependence between the
5560 data references. A_INDEX is the index of the first reference (0 for
5561 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5563 static bool
5564 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5565 unsigned int a_index, unsigned int b_index,
5566 class loop *loop_nest)
5568 unsigned int i;
5569 tree last_conflicts;
5570 struct subscript *subscript;
5571 tree res = NULL_TREE;
5573 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5575 conflict_function *overlaps_a, *overlaps_b;
5577 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5578 SUB_ACCESS_FN (subscript, b_index),
5579 &overlaps_a, &overlaps_b,
5580 &last_conflicts, loop_nest);
5582 if (SUB_CONFLICTS_IN_A (subscript))
5583 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5584 if (SUB_CONFLICTS_IN_B (subscript))
5585 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5587 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5588 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5589 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5591 /* If there is any undetermined conflict function we have to
5592 give a conservative answer in case we cannot prove that
5593 no dependence exists when analyzing another subscript. */
5594 if (CF_NOT_KNOWN_P (overlaps_a)
5595 || CF_NOT_KNOWN_P (overlaps_b))
5597 res = chrec_dont_know;
5598 continue;
5601 /* When there is a subscript with no dependence we can stop. */
5602 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5603 || CF_NO_DEPENDENCE_P (overlaps_b))
5605 res = chrec_known;
5606 break;
5610 if (res == NULL_TREE)
5611 return true;
5613 if (res == chrec_known)
5614 dependence_stats.num_dependence_independent++;
5615 else
5616 dependence_stats.num_dependence_undetermined++;
5617 finalize_ddr_dependent (ddr, res);
5618 return false;
5621 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5623 static void
5624 subscript_dependence_tester (struct data_dependence_relation *ddr,
5625 class loop *loop_nest)
5627 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5628 dependence_stats.num_dependence_dependent++;
5630 compute_subscript_distance (ddr);
5631 if (build_classic_dist_vector (ddr, loop_nest))
5632 build_classic_dir_vector (ddr);
5635 /* Returns true when all the access functions of A are affine or
5636 constant with respect to LOOP_NEST. */
5638 static bool
5639 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5640 const class loop *loop_nest)
5642 vec<tree> fns = DR_ACCESS_FNS (a);
5643 for (tree t : fns)
5644 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5645 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5646 return false;
5648 return true;
5651 /* This computes the affine dependence relation between A and B with
5652 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5653 independence between two accesses, while CHREC_DONT_KNOW is used
5654 for representing the unknown relation.
5656 Note that it is possible to stop the computation of the dependence
5657 relation the first time we detect a CHREC_KNOWN element for a given
5658 subscript. */
5660 void
5661 compute_affine_dependence (struct data_dependence_relation *ddr,
5662 class loop *loop_nest)
5664 struct data_reference *dra = DDR_A (ddr);
5665 struct data_reference *drb = DDR_B (ddr);
5667 if (dump_file && (dump_flags & TDF_DETAILS))
5669 fprintf (dump_file, "(compute_affine_dependence\n");
5670 fprintf (dump_file, " ref_a: ");
5671 print_generic_expr (dump_file, DR_REF (dra));
5672 fprintf (dump_file, ", stmt_a: ");
5673 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5674 fprintf (dump_file, " ref_b: ");
5675 print_generic_expr (dump_file, DR_REF (drb));
5676 fprintf (dump_file, ", stmt_b: ");
5677 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5680 /* Analyze only when the dependence relation is not yet known. */
5681 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5683 dependence_stats.num_dependence_tests++;
5685 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5686 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5687 subscript_dependence_tester (ddr, loop_nest);
5689 /* As a last case, if the dependence cannot be determined, or if
5690 the dependence is considered too difficult to determine, answer
5691 "don't know". */
5692 else
5694 dependence_stats.num_dependence_undetermined++;
5696 if (dump_file && (dump_flags & TDF_DETAILS))
5698 fprintf (dump_file, "Data ref a:\n");
5699 dump_data_reference (dump_file, dra);
5700 fprintf (dump_file, "Data ref b:\n");
5701 dump_data_reference (dump_file, drb);
5702 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5704 finalize_ddr_dependent (ddr, chrec_dont_know);
5708 if (dump_file && (dump_flags & TDF_DETAILS))
5710 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5711 fprintf (dump_file, ") -> no dependence\n");
5712 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5713 fprintf (dump_file, ") -> dependence analysis failed\n");
5714 else
5715 fprintf (dump_file, ")\n");
5719 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5720 the data references in DATAREFS, in the LOOP_NEST. When
5721 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5722 relations. Return true when successful, i.e. data references number
5723 is small enough to be handled. */
5725 bool
5726 compute_all_dependences (const vec<data_reference_p> &datarefs,
5727 vec<ddr_p> *dependence_relations,
5728 const vec<loop_p> &loop_nest,
5729 bool compute_self_and_rr)
5731 struct data_dependence_relation *ddr;
5732 struct data_reference *a, *b;
5733 unsigned int i, j;
5735 if ((int) datarefs.length ()
5736 > param_loop_max_datarefs_for_datadeps)
5738 struct data_dependence_relation *ddr;
5740 /* Insert a single relation into dependence_relations:
5741 chrec_dont_know. */
5742 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5743 dependence_relations->safe_push (ddr);
5744 return false;
5747 FOR_EACH_VEC_ELT (datarefs, i, a)
5748 for (j = i + 1; datarefs.iterate (j, &b); j++)
5749 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5751 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5752 dependence_relations->safe_push (ddr);
5753 if (loop_nest.exists ())
5754 compute_affine_dependence (ddr, loop_nest[0]);
5757 if (compute_self_and_rr)
5758 FOR_EACH_VEC_ELT (datarefs, i, a)
5760 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5761 dependence_relations->safe_push (ddr);
5762 if (loop_nest.exists ())
5763 compute_affine_dependence (ddr, loop_nest[0]);
5766 return true;
5769 /* Describes a location of a memory reference. */
5771 struct data_ref_loc
5773 /* The memory reference. */
5774 tree ref;
5776 /* True if the memory reference is read. */
5777 bool is_read;
5779 /* True if the data reference is conditional within the containing
5780 statement, i.e. if it might not occur even when the statement
5781 is executed and runs to completion. */
5782 bool is_conditional_in_stmt;
5786 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5787 true if STMT clobbers memory, false otherwise. */
5789 static bool
5790 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5792 bool clobbers_memory = false;
5793 data_ref_loc ref;
5794 tree op0, op1;
5795 enum gimple_code stmt_code = gimple_code (stmt);
5797 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5798 As we cannot model data-references to not spelled out
5799 accesses give up if they may occur. */
5800 if (stmt_code == GIMPLE_CALL
5801 && !(gimple_call_flags (stmt) & ECF_CONST))
5803 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5804 if (gimple_call_internal_p (stmt))
5805 switch (gimple_call_internal_fn (stmt))
5807 case IFN_GOMP_SIMD_LANE:
5809 class loop *loop = gimple_bb (stmt)->loop_father;
5810 tree uid = gimple_call_arg (stmt, 0);
5811 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5812 if (loop == NULL
5813 || loop->simduid != SSA_NAME_VAR (uid))
5814 clobbers_memory = true;
5815 break;
5817 case IFN_MASK_LOAD:
5818 case IFN_MASK_STORE:
5819 break;
5820 default:
5821 clobbers_memory = true;
5822 break;
5824 else
5825 clobbers_memory = true;
5827 else if (stmt_code == GIMPLE_ASM
5828 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5829 || gimple_vuse (stmt)))
5830 clobbers_memory = true;
5832 if (!gimple_vuse (stmt))
5833 return clobbers_memory;
5835 if (stmt_code == GIMPLE_ASSIGN)
5837 tree base;
5838 op0 = gimple_assign_lhs (stmt);
5839 op1 = gimple_assign_rhs1 (stmt);
5841 if (DECL_P (op1)
5842 || (REFERENCE_CLASS_P (op1)
5843 && (base = get_base_address (op1))
5844 && TREE_CODE (base) != SSA_NAME
5845 && !is_gimple_min_invariant (base)))
5847 ref.ref = op1;
5848 ref.is_read = true;
5849 ref.is_conditional_in_stmt = false;
5850 references->safe_push (ref);
5853 else if (stmt_code == GIMPLE_CALL)
5855 unsigned i, n;
5856 tree ptr, type;
5857 unsigned int align;
5859 ref.is_read = false;
5860 if (gimple_call_internal_p (stmt))
5861 switch (gimple_call_internal_fn (stmt))
5863 case IFN_MASK_LOAD:
5864 if (gimple_call_lhs (stmt) == NULL_TREE)
5865 break;
5866 ref.is_read = true;
5867 /* FALLTHRU */
5868 case IFN_MASK_STORE:
5869 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5870 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5871 if (ref.is_read)
5872 type = TREE_TYPE (gimple_call_lhs (stmt));
5873 else
5874 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5875 if (TYPE_ALIGN (type) != align)
5876 type = build_aligned_type (type, align);
5877 ref.is_conditional_in_stmt = true;
5878 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5879 ptr);
5880 references->safe_push (ref);
5881 return false;
5882 default:
5883 break;
5886 op0 = gimple_call_lhs (stmt);
5887 n = gimple_call_num_args (stmt);
5888 for (i = 0; i < n; i++)
5890 op1 = gimple_call_arg (stmt, i);
5892 if (DECL_P (op1)
5893 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5895 ref.ref = op1;
5896 ref.is_read = true;
5897 ref.is_conditional_in_stmt = false;
5898 references->safe_push (ref);
5902 else
5903 return clobbers_memory;
5905 if (op0
5906 && (DECL_P (op0)
5907 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5909 ref.ref = op0;
5910 ref.is_read = false;
5911 ref.is_conditional_in_stmt = false;
5912 references->safe_push (ref);
5914 return clobbers_memory;
5918 /* Returns true if the loop-nest has any data reference. */
5920 bool
5921 loop_nest_has_data_refs (loop_p loop)
5923 basic_block *bbs = get_loop_body (loop);
5924 auto_vec<data_ref_loc, 3> references;
5926 for (unsigned i = 0; i < loop->num_nodes; i++)
5928 basic_block bb = bbs[i];
5929 gimple_stmt_iterator bsi;
5931 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5933 gimple *stmt = gsi_stmt (bsi);
5934 get_references_in_stmt (stmt, &references);
5935 if (references.length ())
5937 free (bbs);
5938 return true;
5942 free (bbs);
5943 return false;
5946 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5947 reference, returns false, otherwise returns true. NEST is the outermost
5948 loop of the loop nest in which the references should be analyzed. */
5950 opt_result
5951 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5952 vec<data_reference_p> *datarefs)
5954 auto_vec<data_ref_loc, 2> references;
5955 data_reference_p dr;
5957 if (get_references_in_stmt (stmt, &references))
5958 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5959 stmt);
5961 for (const data_ref_loc &ref : references)
5963 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5964 loop_containing_stmt (stmt), ref.ref,
5965 stmt, ref.is_read, ref.is_conditional_in_stmt);
5966 gcc_assert (dr != NULL);
5967 datarefs->safe_push (dr);
5970 return opt_result::success ();
5973 /* Stores the data references in STMT to DATAREFS. If there is an
5974 unanalyzable reference, returns false, otherwise returns true.
5975 NEST is the outermost loop of the loop nest in which the references
5976 should be instantiated, LOOP is the loop in which the references
5977 should be analyzed. */
5979 bool
5980 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5981 vec<data_reference_p> *datarefs)
5983 auto_vec<data_ref_loc, 2> references;
5984 bool ret = true;
5985 data_reference_p dr;
5987 if (get_references_in_stmt (stmt, &references))
5988 return false;
5990 for (const data_ref_loc &ref : references)
5992 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5993 ref.is_conditional_in_stmt);
5994 gcc_assert (dr != NULL);
5995 datarefs->safe_push (dr);
5998 return ret;
6001 /* Search the data references in LOOP, and record the information into
6002 DATAREFS. Returns chrec_dont_know when failing to analyze a
6003 difficult case, returns NULL_TREE otherwise. */
6005 tree
6006 find_data_references_in_bb (class loop *loop, basic_block bb,
6007 vec<data_reference_p> *datarefs)
6009 gimple_stmt_iterator bsi;
6011 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6013 gimple *stmt = gsi_stmt (bsi);
6015 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6017 struct data_reference *res;
6018 res = XCNEW (struct data_reference);
6019 datarefs->safe_push (res);
6021 return chrec_dont_know;
6025 return NULL_TREE;
6028 /* Search the data references in LOOP, and record the information into
6029 DATAREFS. Returns chrec_dont_know when failing to analyze a
6030 difficult case, returns NULL_TREE otherwise.
6032 TODO: This function should be made smarter so that it can handle address
6033 arithmetic as if they were array accesses, etc. */
6035 tree
6036 find_data_references_in_loop (class loop *loop,
6037 vec<data_reference_p> *datarefs)
6039 basic_block bb, *bbs;
6040 unsigned int i;
6042 bbs = get_loop_body_in_dom_order (loop);
6044 for (i = 0; i < loop->num_nodes; i++)
6046 bb = bbs[i];
6048 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6050 free (bbs);
6051 return chrec_dont_know;
6054 free (bbs);
6056 return NULL_TREE;
6059 /* Return the alignment in bytes that DRB is guaranteed to have at all
6060 times. */
6062 unsigned int
6063 dr_alignment (innermost_loop_behavior *drb)
6065 /* Get the alignment of BASE_ADDRESS + INIT. */
6066 unsigned int alignment = drb->base_alignment;
6067 unsigned int misalignment = (drb->base_misalignment
6068 + TREE_INT_CST_LOW (drb->init));
6069 if (misalignment != 0)
6070 alignment = MIN (alignment, misalignment & -misalignment);
6072 /* Cap it to the alignment of OFFSET. */
6073 if (!integer_zerop (drb->offset))
6074 alignment = MIN (alignment, drb->offset_alignment);
6076 /* Cap it to the alignment of STEP. */
6077 if (!integer_zerop (drb->step))
6078 alignment = MIN (alignment, drb->step_alignment);
6080 return alignment;
6083 /* If BASE is a pointer-typed SSA name, try to find the object that it
6084 is based on. Return this object X on success and store the alignment
6085 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6087 static tree
6088 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6090 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6091 return NULL_TREE;
6093 gimple *def = SSA_NAME_DEF_STMT (base);
6094 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6096 /* Peel chrecs and record the minimum alignment preserved by
6097 all steps. */
6098 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6099 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6101 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6102 alignment = MIN (alignment, step_alignment);
6103 base = CHREC_LEFT (base);
6106 /* Punt if the expression is too complicated to handle. */
6107 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6108 return NULL_TREE;
6110 /* The only useful cases are those for which a dereference folds to something
6111 other than an INDIRECT_REF. */
6112 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6113 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6114 if (!ref)
6115 return NULL_TREE;
6117 /* Analyze the base to which the steps we peeled were applied. */
6118 poly_int64 bitsize, bitpos, bytepos;
6119 machine_mode mode;
6120 int unsignedp, reversep, volatilep;
6121 tree offset;
6122 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6123 &unsignedp, &reversep, &volatilep);
6124 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6125 return NULL_TREE;
6127 /* Restrict the alignment to that guaranteed by the offsets. */
6128 unsigned int bytepos_alignment = known_alignment (bytepos);
6129 if (bytepos_alignment != 0)
6130 alignment = MIN (alignment, bytepos_alignment);
6131 if (offset)
6133 unsigned int offset_alignment = highest_pow2_factor (offset);
6134 alignment = MIN (alignment, offset_alignment);
6137 *alignment_out = alignment;
6138 return base;
6141 /* Return the object whose alignment would need to be changed in order
6142 to increase the alignment of ADDR. Store the maximum achievable
6143 alignment in *MAX_ALIGNMENT. */
6145 tree
6146 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6148 tree base = get_base_for_alignment_1 (addr, max_alignment);
6149 if (base)
6150 return base;
6152 if (TREE_CODE (addr) == ADDR_EXPR)
6153 addr = TREE_OPERAND (addr, 0);
6154 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6155 return addr;
6158 /* Recursive helper function. */
6160 static bool
6161 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6163 /* Inner loops of the nest should not contain siblings. Example:
6164 when there are two consecutive loops,
6166 | loop_0
6167 | loop_1
6168 | A[{0, +, 1}_1]
6169 | endloop_1
6170 | loop_2
6171 | A[{0, +, 1}_2]
6172 | endloop_2
6173 | endloop_0
6175 the dependence relation cannot be captured by the distance
6176 abstraction. */
6177 if (loop->next)
6178 return false;
6180 loop_nest->safe_push (loop);
6181 if (loop->inner)
6182 return find_loop_nest_1 (loop->inner, loop_nest);
6183 return true;
6186 /* Return false when the LOOP is not well nested. Otherwise return
6187 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6188 contain the loops from the outermost to the innermost, as they will
6189 appear in the classic distance vector. */
6191 bool
6192 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6194 loop_nest->safe_push (loop);
6195 if (loop->inner)
6196 return find_loop_nest_1 (loop->inner, loop_nest);
6197 return true;
6200 /* Returns true when the data dependences have been computed, false otherwise.
6201 Given a loop nest LOOP, the following vectors are returned:
6202 DATAREFS is initialized to all the array elements contained in this loop,
6203 DEPENDENCE_RELATIONS contains the relations between the data references.
6204 Compute read-read and self relations if
6205 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6207 bool
6208 compute_data_dependences_for_loop (class loop *loop,
6209 bool compute_self_and_read_read_dependences,
6210 vec<loop_p> *loop_nest,
6211 vec<data_reference_p> *datarefs,
6212 vec<ddr_p> *dependence_relations)
6214 bool res = true;
6216 memset (&dependence_stats, 0, sizeof (dependence_stats));
6218 /* If the loop nest is not well formed, or one of the data references
6219 is not computable, give up without spending time to compute other
6220 dependences. */
6221 if (!loop
6222 || !find_loop_nest (loop, loop_nest)
6223 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6224 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6225 compute_self_and_read_read_dependences))
6226 res = false;
6228 if (dump_file && (dump_flags & TDF_STATS))
6230 fprintf (dump_file, "Dependence tester statistics:\n");
6232 fprintf (dump_file, "Number of dependence tests: %d\n",
6233 dependence_stats.num_dependence_tests);
6234 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6235 dependence_stats.num_dependence_dependent);
6236 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6237 dependence_stats.num_dependence_independent);
6238 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6239 dependence_stats.num_dependence_undetermined);
6241 fprintf (dump_file, "Number of subscript tests: %d\n",
6242 dependence_stats.num_subscript_tests);
6243 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6244 dependence_stats.num_subscript_undetermined);
6245 fprintf (dump_file, "Number of same subscript function: %d\n",
6246 dependence_stats.num_same_subscript_function);
6248 fprintf (dump_file, "Number of ziv tests: %d\n",
6249 dependence_stats.num_ziv);
6250 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6251 dependence_stats.num_ziv_dependent);
6252 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6253 dependence_stats.num_ziv_independent);
6254 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6255 dependence_stats.num_ziv_unimplemented);
6257 fprintf (dump_file, "Number of siv tests: %d\n",
6258 dependence_stats.num_siv);
6259 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6260 dependence_stats.num_siv_dependent);
6261 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6262 dependence_stats.num_siv_independent);
6263 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6264 dependence_stats.num_siv_unimplemented);
6266 fprintf (dump_file, "Number of miv tests: %d\n",
6267 dependence_stats.num_miv);
6268 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6269 dependence_stats.num_miv_dependent);
6270 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6271 dependence_stats.num_miv_independent);
6272 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6273 dependence_stats.num_miv_unimplemented);
6276 return res;
6279 /* Free the memory used by a data dependence relation DDR. */
6281 void
6282 free_dependence_relation (struct data_dependence_relation *ddr)
6284 if (ddr == NULL)
6285 return;
6287 if (DDR_SUBSCRIPTS (ddr).exists ())
6288 free_subscripts (DDR_SUBSCRIPTS (ddr));
6289 DDR_DIST_VECTS (ddr).release ();
6290 DDR_DIR_VECTS (ddr).release ();
6292 free (ddr);
6295 /* Free the memory used by the data dependence relations from
6296 DEPENDENCE_RELATIONS. */
6298 void
6299 free_dependence_relations (vec<ddr_p>& dependence_relations)
6301 for (data_dependence_relation *ddr : dependence_relations)
6302 if (ddr)
6303 free_dependence_relation (ddr);
6305 dependence_relations.release ();
6308 /* Free the memory used by the data references from DATAREFS. */
6310 void
6311 free_data_refs (vec<data_reference_p>& datarefs)
6313 for (data_reference *dr : datarefs)
6314 free_data_ref (dr);
6315 datarefs.release ();
6318 /* Common routine implementing both dr_direction_indicator and
6319 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6320 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6321 Return the step as the indicator otherwise. */
6323 static tree
6324 dr_step_indicator (struct data_reference *dr, int useful_min)
6326 tree step = DR_STEP (dr);
6327 if (!step)
6328 return NULL_TREE;
6329 STRIP_NOPS (step);
6330 /* Look for cases where the step is scaled by a positive constant
6331 integer, which will often be the access size. If the multiplication
6332 doesn't change the sign (due to overflow effects) then we can
6333 test the unscaled value instead. */
6334 if (TREE_CODE (step) == MULT_EXPR
6335 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6336 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6338 tree factor = TREE_OPERAND (step, 1);
6339 step = TREE_OPERAND (step, 0);
6341 /* Strip widening and truncating conversions as well as nops. */
6342 if (CONVERT_EXPR_P (step)
6343 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6344 step = TREE_OPERAND (step, 0);
6345 tree type = TREE_TYPE (step);
6347 /* Get the range of step values that would not cause overflow. */
6348 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6349 / wi::to_widest (factor));
6350 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6351 / wi::to_widest (factor));
6353 /* Get the range of values that the unconverted step actually has. */
6354 wide_int step_min, step_max;
6355 value_range vr;
6356 if (TREE_CODE (step) != SSA_NAME
6357 || !get_range_query (cfun)->range_of_expr (vr, step)
6358 || vr.undefined_p ())
6360 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6361 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6363 else
6365 step_min = vr.lower_bound ();
6366 step_max = vr.upper_bound ();
6369 /* Check whether the unconverted step has an acceptable range. */
6370 signop sgn = TYPE_SIGN (type);
6371 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6372 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6374 if (wi::ge_p (step_min, useful_min, sgn))
6375 return ssize_int (useful_min);
6376 else if (wi::lt_p (step_max, 0, sgn))
6377 return ssize_int (-1);
6378 else
6379 return fold_convert (ssizetype, step);
6382 return DR_STEP (dr);
6385 /* Return a value that is negative iff DR has a negative step. */
6387 tree
6388 dr_direction_indicator (struct data_reference *dr)
6390 return dr_step_indicator (dr, 0);
6393 /* Return a value that is zero iff DR has a zero step. */
6395 tree
6396 dr_zero_step_indicator (struct data_reference *dr)
6398 return dr_step_indicator (dr, 1);
6401 /* Return true if DR is known to have a nonnegative (but possibly zero)
6402 step. */
6404 bool
6405 dr_known_forward_stride_p (struct data_reference *dr)
6407 tree indicator = dr_direction_indicator (dr);
6408 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6409 fold_convert (ssizetype, indicator),
6410 ssize_int (0));
6411 return neg_step_val && integer_zerop (neg_step_val);