gccrs: dump: Fix module dumping
[official-gcc.git] / gcc / tree-data-ref.cc
blob23602518665bc7c85c7fc9bf77583890230a3c16
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
104 static struct datadep_stats
106 int num_dependence_tests;
107 int num_dependence_dependent;
108 int num_dependence_independent;
109 int num_dependence_undetermined;
111 int num_subscript_tests;
112 int num_subscript_undetermined;
113 int num_same_subscript_function;
115 int num_ziv;
116 int num_ziv_independent;
117 int num_ziv_dependent;
118 int num_ziv_unimplemented;
120 int num_siv;
121 int num_siv_independent;
122 int num_siv_dependent;
123 int num_siv_unimplemented;
125 int num_miv;
126 int num_miv_independent;
127 int num_miv_dependent;
128 int num_miv_unimplemented;
129 } dependence_stats;
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 unsigned int, unsigned int,
133 class loop *);
134 /* Returns true iff A divides B. */
136 static inline bool
137 tree_fold_divides_p (const_tree a, const_tree b)
139 gcc_assert (TREE_CODE (a) == INTEGER_CST);
140 gcc_assert (TREE_CODE (b) == INTEGER_CST);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 /* Returns true iff A divides B. */
146 static inline bool
147 int_divides_p (lambda_int a, lambda_int b)
149 return ((b % a) == 0);
152 /* Return true if reference REF contains a union access. */
154 static bool
155 ref_contains_union_access_p (tree ref)
157 while (handled_component_p (ref))
159 ref = TREE_OPERAND (ref, 0);
160 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 return true;
164 return false;
169 /* Dump into FILE all the data references from DATAREFS. */
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 for (data_reference *dr : datarefs)
175 dump_data_reference (file, dr);
178 /* Unified dump into FILE all the data references from DATAREFS. */
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
183 dump_data_references (stderr, ref);
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
201 dump_data_references (stderr, datarefs);
204 /* Print to STDERR the data_reference DR. */
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
209 dump_data_reference (stderr, dr);
212 /* Dump function for a DATA_REFERENCE structure. */
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
218 unsigned int i;
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 fprintf (outf, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
242 dump_data_reference (stderr, &ref);
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
260 unsigned i;
261 tree coef;
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
272 /* Dumps the conflict function CF to the file OUTF. */
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
277 unsigned i;
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
285 for (i = 0; i < cf->n; i++)
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
296 /* Dump function for a SUBSCRIPT structure. */
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
335 int eq;
337 for (eq = 0; eq < length; eq++)
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
342 switch (dir)
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
370 fprintf (outf, "\n");
373 /* Print a vector of direction vectors. */
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
379 for (lambda_vector v : dir_vects)
380 print_direction_vector (outf, v, length);
383 /* Print out a vector VEC of length N to OUTFILE. */
385 DEBUG_FUNCTION void
386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 int i;
390 for (i = 0; i < n; i++)
391 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392 fprintf (outfile, "\n");
395 /* Print a vector of distance vectors. */
397 DEBUG_FUNCTION void
398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 int length)
401 for (lambda_vector v : dist_vects)
402 print_lambda_vector (outf, v, length);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 struct data_reference *dra, *drb;
412 fprintf (outf, "(Data Dep: \n");
414 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 if (ddr)
418 dra = DDR_A (ddr);
419 drb = DDR_B (ddr);
420 if (dra)
421 dump_data_reference (outf, dra);
422 else
423 fprintf (outf, " (nil)\n");
424 if (drb)
425 dump_data_reference (outf, drb);
426 else
427 fprintf (outf, " (nil)\n");
429 fprintf (outf, " (don't know)\n)\n");
430 return;
433 dra = DDR_A (ddr);
434 drb = DDR_B (ddr);
435 dump_data_reference (outf, dra);
436 dump_data_reference (outf, drb);
438 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439 fprintf (outf, " (no dependence)\n");
441 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 unsigned int i;
444 class loop *loopi;
446 subscript *sub;
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 dump_subscript (outf, sub);
456 fprintf (outf, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 fprintf (outf, "%d ", loopi->num);
459 fprintf (outf, ")\n");
461 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 fprintf (outf, " distance_vector: ");
464 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 DDR_NB_LOOPS (ddr));
468 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 fprintf (outf, " direction_vector: ");
471 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
476 fprintf (outf, ")\n");
479 /* Debug version. */
481 DEBUG_FUNCTION void
482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 dump_data_dependence_relation (stderr, ddr);
487 /* Dump into FILE all the dependence relations from DDRS. */
489 DEBUG_FUNCTION void
490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 for (auto ddr : ddrs)
493 dump_data_dependence_relation (file, ddr);
496 DEBUG_FUNCTION void
497 debug (vec<ddr_p> &ref)
499 dump_data_dependence_relations (stderr, ref);
502 DEBUG_FUNCTION void
503 debug (vec<ddr_p> *ptr)
505 if (ptr)
506 debug (*ptr);
507 else
508 fprintf (stderr, "<nil>\n");
512 /* Dump to STDERR all the dependence relations from DDRS. */
514 DEBUG_FUNCTION void
515 debug_data_dependence_relations (vec<ddr_p> ddrs)
517 dump_data_dependence_relations (stderr, ddrs);
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
523 considered nest. */
525 DEBUG_FUNCTION void
526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 for (data_dependence_relation *ddr : ddrs)
529 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 fprintf (file, "DISTANCE_V (");
534 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 fprintf (file, ")\n");
538 for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 fprintf (file, "DIRECTION_V (");
541 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 fprintf (file, ")\n");
546 fprintf (file, "\n\n");
549 /* Dumps the data dependence relations DDRS in FILE. */
551 DEBUG_FUNCTION void
552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 for (data_dependence_relation *ddr : ddrs)
555 dump_data_dependence_relation (file, ddr);
557 fprintf (file, "\n\n");
560 DEBUG_FUNCTION void
561 debug_ddrs (vec<ddr_p> ddrs)
563 dump_ddrs (stderr, ddrs);
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567 OP0 CODE OP1, where:
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
573 Independently of RESULT_RANGE, try to compute:
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
581 Truncation and sign changes are known to distribute over CODE, i.e.
583 (itype) (A CODE B) == (itype) A CODE (itype) B
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
588 static bool
589 compute_distributive_range (tree type, value_range &op0_range,
590 tree_code code, value_range &op1_range,
591 tree *off, value_range *result_range)
593 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594 if (result_range)
596 range_op_handler op (code, type);
597 if (!op.fold_range (*result_range, type, op0_range, op1_range))
598 result_range->set_varying (type);
601 /* The distributive property guarantees that if TYPE is no narrower
602 than SIZETYPE,
604 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606 and so we can treat DELTA as zero. */
607 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
608 return true;
610 /* If overflow is undefined, we can assume that:
612 X == (ssizetype) OP0 CODE (ssizetype) OP1
614 is within the range of TYPE, i.e.:
616 X == (ssizetype) (TYPE) X
618 Distributing the (TYPE) truncation over X gives:
620 X == (ssizetype) (OP0 CODE OP1)
622 Casting both sides to sizetype and distributing the sizetype cast
623 over X gives:
625 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627 and so we can treat DELTA as zero. */
628 if (TYPE_OVERFLOW_UNDEFINED (type))
629 return true;
631 /* Compute the range of:
633 (ssizetype) OP0 CODE (ssizetype) OP1
635 The distributive property guarantees that this has the same bitpattern as:
637 (sizetype) OP0 CODE (sizetype) OP1
639 but its range is more conducive to analysis. */
640 range_cast (op0_range, ssizetype);
641 range_cast (op1_range, ssizetype);
642 value_range wide_range;
643 range_op_handler op (code, ssizetype);
644 bool saved_flag_wrapv = flag_wrapv;
645 flag_wrapv = 1;
646 if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
647 wide_range.set_varying (ssizetype);;
648 flag_wrapv = saved_flag_wrapv;
649 if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
650 return false;
652 wide_int lb = wide_range.lower_bound ();
653 wide_int ub = wide_range.upper_bound ();
655 /* Calculate the number of times that each end of the range overflows or
656 underflows TYPE. We can only calculate DELTA if the numbers match. */
657 unsigned int precision = TYPE_PRECISION (type);
658 if (!TYPE_UNSIGNED (type))
660 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
661 lb -= type_min;
662 ub -= type_min;
664 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
665 lb &= upper_bits;
666 ub &= upper_bits;
667 if (lb != ub)
668 return false;
670 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
671 negative values indicating underflow. The low PRECISION bits of LB
672 are clear, so DELTA is therefore LB (== UB). */
673 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
674 return true;
677 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
678 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
679 FROM_TYPE are integral types. */
681 static bool
682 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
684 gcc_assert (INTEGRAL_TYPE_P (to_type)
685 && INTEGRAL_TYPE_P (from_type)
686 && !TYPE_OVERFLOW_TRAPS (to_type)
687 && !TYPE_OVERFLOW_TRAPS (from_type));
689 /* Converting to something no narrower than sizetype and then to sizetype
690 is equivalent to converting directly to sizetype. */
691 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
692 return true;
694 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
695 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
696 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
697 return true;
699 /* For narrowing conversions, we could in principle test whether
700 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
701 and apply a constant adjustment.
703 For other conversions (which involve a sign change) we could
704 check that the signs are always equal, and apply a constant
705 adjustment if the signs are negative.
707 However, both cases should be rare. */
708 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
709 TYPE_SIGN (to_type));
712 static void
713 split_constant_offset (tree type, tree *var, tree *off,
714 value_range *result_range,
715 hash_map<tree, std::pair<tree, tree> > &cache,
716 unsigned *limit);
718 /* Helper function for split_constant_offset. If TYPE is a pointer type,
719 try to express OP0 CODE OP1 as:
721 POINTER_PLUS <*VAR, (sizetype) *OFF>
723 where:
725 - *VAR has type TYPE
726 - *OFF is a constant of type ssizetype.
728 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
730 *VAR + (sizetype) *OFF
732 where:
734 - *VAR has type sizetype
735 - *OFF is a constant of type ssizetype.
737 In both cases, OP0 CODE OP1 has type TYPE.
739 Return true on success. A false return value indicates that we can't
740 do better than set *OFF to zero.
742 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
743 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
745 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
746 visited. LIMIT counts down the number of SSA names that we are
747 allowed to process before giving up. */
749 static bool
750 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
751 tree *var, tree *off, value_range *result_range,
752 hash_map<tree, std::pair<tree, tree> > &cache,
753 unsigned *limit)
755 tree var0, var1;
756 tree off0, off1;
757 value_range op0_range, op1_range;
759 *var = NULL_TREE;
760 *off = NULL_TREE;
762 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
763 return false;
765 switch (code)
767 case INTEGER_CST:
768 *var = size_int (0);
769 *off = fold_convert (ssizetype, op0);
770 if (result_range)
771 result_range->set (op0, op0);
772 return true;
774 case POINTER_PLUS_EXPR:
775 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
776 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
777 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
778 *off = size_binop (PLUS_EXPR, off0, off1);
779 return true;
781 case PLUS_EXPR:
782 case MINUS_EXPR:
783 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
784 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
785 *off = size_binop (code, off0, off1);
786 if (!compute_distributive_range (type, op0_range, code, op1_range,
787 off, result_range))
788 return false;
789 *var = fold_build2 (code, sizetype, var0, var1);
790 return true;
792 case MULT_EXPR:
793 if (TREE_CODE (op1) != INTEGER_CST)
794 return false;
796 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
797 op1_range.set (op1, op1);
798 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
799 if (!compute_distributive_range (type, op0_range, code, op1_range,
800 off, result_range))
801 return false;
802 *var = fold_build2 (MULT_EXPR, sizetype, var0,
803 fold_convert (sizetype, op1));
804 return true;
806 case ADDR_EXPR:
808 tree base, poffset;
809 poly_int64 pbitsize, pbitpos, pbytepos;
810 machine_mode pmode;
811 int punsignedp, preversep, pvolatilep;
813 op0 = TREE_OPERAND (op0, 0);
814 base
815 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
816 &punsignedp, &preversep, &pvolatilep);
818 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
819 return false;
820 base = build_fold_addr_expr (base);
821 off0 = ssize_int (pbytepos);
823 if (poffset)
825 split_constant_offset (poffset, &poffset, &off1, nullptr,
826 cache, limit);
827 off0 = size_binop (PLUS_EXPR, off0, off1);
828 base = fold_build_pointer_plus (base, poffset);
831 var0 = fold_convert (type, base);
833 /* If variable length types are involved, punt, otherwise casts
834 might be converted into ARRAY_REFs in gimplify_conversion.
835 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
836 possibly no longer appears in current GIMPLE, might resurface.
837 This perhaps could run
838 if (CONVERT_EXPR_P (var0))
840 gimplify_conversion (&var0);
841 // Attempt to fill in any within var0 found ARRAY_REF's
842 // element size from corresponding op embedded ARRAY_REF,
843 // if unsuccessful, just punt.
844 } */
845 while (POINTER_TYPE_P (type))
846 type = TREE_TYPE (type);
847 if (int_size_in_bytes (type) < 0)
848 return false;
850 *var = var0;
851 *off = off0;
852 return true;
855 case SSA_NAME:
857 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
858 return false;
860 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
861 enum tree_code subcode;
863 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
864 return false;
866 subcode = gimple_assign_rhs_code (def_stmt);
868 /* We are using a cache to avoid un-CSEing large amounts of code. */
869 bool use_cache = false;
870 if (!has_single_use (op0)
871 && (subcode == POINTER_PLUS_EXPR
872 || subcode == PLUS_EXPR
873 || subcode == MINUS_EXPR
874 || subcode == MULT_EXPR
875 || subcode == ADDR_EXPR
876 || CONVERT_EXPR_CODE_P (subcode)))
878 use_cache = true;
879 bool existed;
880 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
881 if (existed)
883 if (integer_zerop (e.second))
884 return false;
885 *var = e.first;
886 *off = e.second;
887 /* The caller sets the range in this case. */
888 return true;
890 e = std::make_pair (op0, ssize_int (0));
893 if (*limit == 0)
894 return false;
895 --*limit;
897 var0 = gimple_assign_rhs1 (def_stmt);
898 var1 = gimple_assign_rhs2 (def_stmt);
900 bool res = split_constant_offset_1 (type, var0, subcode, var1,
901 var, off, nullptr, cache, limit);
902 if (res && use_cache)
903 *cache.get (op0) = std::make_pair (*var, *off);
904 /* The caller sets the range in this case. */
905 return res;
907 CASE_CONVERT:
909 /* We can only handle the following conversions:
911 - Conversions from one pointer type to another pointer type.
913 - Conversions from one non-trapping integral type to another
914 non-trapping integral type. In this case, the recursive
915 call makes sure that:
917 (sizetype) OP0
919 can be expressed as a sizetype operation involving VAR and OFF,
920 and all we need to do is check whether:
922 (sizetype) OP0 == (sizetype) (TYPE) OP0
924 - Conversions from a non-trapping sizetype-size integral type to
925 a like-sized pointer type. In this case, the recursive call
926 makes sure that:
928 (sizetype) OP0 == *VAR + (sizetype) *OFF
930 and we can convert that to:
932 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
934 - Conversions from a sizetype-sized pointer type to a like-sized
935 non-trapping integral type. In this case, the recursive call
936 makes sure that:
938 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
940 where the POINTER_PLUS and *VAR have the same precision as
941 TYPE (and the same precision as sizetype). Then:
943 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
944 tree itype = TREE_TYPE (op0);
945 if ((POINTER_TYPE_P (itype)
946 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
947 && (POINTER_TYPE_P (type)
948 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
949 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
950 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
951 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
953 if (POINTER_TYPE_P (type))
955 split_constant_offset (op0, var, off, nullptr, cache, limit);
956 *var = fold_convert (type, *var);
958 else if (POINTER_TYPE_P (itype))
960 split_constant_offset (op0, var, off, nullptr, cache, limit);
961 *var = fold_convert (sizetype, *var);
963 else
965 split_constant_offset (op0, var, off, &op0_range,
966 cache, limit);
967 if (!nop_conversion_for_offset_p (type, itype, op0_range))
968 return false;
969 if (result_range)
971 *result_range = op0_range;
972 range_cast (*result_range, type);
975 return true;
977 return false;
980 default:
981 return false;
985 /* If EXP has pointer type, try to express it as:
987 POINTER_PLUS <*VAR, (sizetype) *OFF>
989 where:
991 - *VAR has the same type as EXP
992 - *OFF is a constant of type ssizetype.
994 If EXP has an integral type, try to express (sizetype) EXP as:
996 *VAR + (sizetype) *OFF
998 where:
1000 - *VAR has type sizetype
1001 - *OFF is a constant of type ssizetype.
1003 If EXP_RANGE is nonnull, set it to the range of EXP.
1005 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1006 visited. LIMIT counts down the number of SSA names that we are
1007 allowed to process before giving up. */
1009 static void
1010 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1011 hash_map<tree, std::pair<tree, tree> > &cache,
1012 unsigned *limit)
1014 tree type = TREE_TYPE (exp), op0, op1;
1015 enum tree_code code;
1017 code = TREE_CODE (exp);
1018 if (exp_range)
1020 *exp_range = type;
1021 if (code == SSA_NAME)
1023 value_range vr;
1024 get_range_query (cfun)->range_of_expr (vr, exp);
1025 if (vr.undefined_p ())
1026 vr.set_varying (TREE_TYPE (exp));
1027 wide_int var_min = wi::to_wide (vr.min ());
1028 wide_int var_max = wi::to_wide (vr.max ());
1029 value_range_kind vr_kind = vr.kind ();
1030 wide_int var_nonzero = get_nonzero_bits (exp);
1031 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1032 &var_min, &var_max,
1033 var_nonzero,
1034 TYPE_SIGN (type));
1035 /* This check for VR_VARYING is here because the old code
1036 using get_range_info would return VR_RANGE for the entire
1037 domain, instead of VR_VARYING. The new code normalizes
1038 full-domain ranges to VR_VARYING. */
1039 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1040 *exp_range = value_range (type, var_min, var_max);
1044 if (!tree_is_chrec (exp)
1045 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1047 extract_ops_from_tree (exp, &code, &op0, &op1);
1048 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1049 exp_range, cache, limit))
1050 return;
1053 *var = exp;
1054 if (INTEGRAL_TYPE_P (type))
1055 *var = fold_convert (sizetype, *var);
1056 *off = ssize_int (0);
1058 value_range r;
1059 if (exp_range && code != SSA_NAME
1060 && get_range_query (cfun)->range_of_expr (r, exp)
1061 && !r.undefined_p ())
1062 *exp_range = r;
1065 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1066 type as EXP while OFF has type ssizetype. */
1068 void
1069 split_constant_offset (tree exp, tree *var, tree *off)
1071 unsigned limit = param_ssa_name_def_chain_limit;
1072 static hash_map<tree, std::pair<tree, tree> > *cache;
1073 if (!cache)
1074 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1075 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1076 *var = fold_convert (TREE_TYPE (exp), *var);
1077 cache->empty ();
1080 /* Returns the address ADDR of an object in a canonical shape (without nop
1081 casts, and with type of pointer to the object). */
1083 static tree
1084 canonicalize_base_object_address (tree addr)
1086 tree orig = addr;
1088 STRIP_NOPS (addr);
1090 /* The base address may be obtained by casting from integer, in that case
1091 keep the cast. */
1092 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1093 return orig;
1095 if (TREE_CODE (addr) != ADDR_EXPR)
1096 return addr;
1098 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1101 /* Analyze the behavior of memory reference REF within STMT.
1102 There are two modes:
1104 - BB analysis. In this case we simply split the address into base,
1105 init and offset components, without reference to any containing loop.
1106 The resulting base and offset are general expressions and they can
1107 vary arbitrarily from one iteration of the containing loop to the next.
1108 The step is always zero.
1110 - loop analysis. In this case we analyze the reference both wrt LOOP
1111 and on the basis that the reference occurs (is "used") in LOOP;
1112 see the comment above analyze_scalar_evolution_in_loop for more
1113 information about this distinction. The base, init, offset and
1114 step fields are all invariant in LOOP.
1116 Perform BB analysis if LOOP is null, or if LOOP is the function's
1117 dummy outermost loop. In other cases perform loop analysis.
1119 Return true if the analysis succeeded and store the results in DRB if so.
1120 BB analysis can only fail for bitfield or reversed-storage accesses. */
1122 opt_result
1123 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1124 class loop *loop, const gimple *stmt)
1126 poly_int64 pbitsize, pbitpos;
1127 tree base, poffset;
1128 machine_mode pmode;
1129 int punsignedp, preversep, pvolatilep;
1130 affine_iv base_iv, offset_iv;
1131 tree init, dinit, step;
1132 bool in_loop = (loop && loop->num);
1134 if (dump_file && (dump_flags & TDF_DETAILS))
1135 fprintf (dump_file, "analyze_innermost: ");
1137 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1138 &punsignedp, &preversep, &pvolatilep);
1139 gcc_assert (base != NULL_TREE);
1141 poly_int64 pbytepos;
1142 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1143 return opt_result::failure_at (stmt,
1144 "failed: bit offset alignment.\n");
1146 if (preversep)
1147 return opt_result::failure_at (stmt,
1148 "failed: reverse storage order.\n");
1150 /* Calculate the alignment and misalignment for the inner reference. */
1151 unsigned int HOST_WIDE_INT bit_base_misalignment;
1152 unsigned int bit_base_alignment;
1153 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1155 /* There are no bitfield references remaining in BASE, so the values
1156 we got back must be whole bytes. */
1157 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1158 && bit_base_misalignment % BITS_PER_UNIT == 0);
1159 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1160 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1162 if (TREE_CODE (base) == MEM_REF)
1164 if (!integer_zerop (TREE_OPERAND (base, 1)))
1166 /* Subtract MOFF from the base and add it to POFFSET instead.
1167 Adjust the misalignment to reflect the amount we subtracted. */
1168 poly_offset_int moff = mem_ref_offset (base);
1169 base_misalignment -= moff.force_shwi ();
1170 tree mofft = wide_int_to_tree (sizetype, moff);
1171 if (!poffset)
1172 poffset = mofft;
1173 else
1174 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1176 base = TREE_OPERAND (base, 0);
1178 else
1179 base = build_fold_addr_expr (base);
1181 if (in_loop)
1183 if (!simple_iv (loop, loop, base, &base_iv, true))
1184 return opt_result::failure_at
1185 (stmt, "failed: evolution of base is not affine.\n");
1187 else
1189 base_iv.base = base;
1190 base_iv.step = ssize_int (0);
1191 base_iv.no_overflow = true;
1194 if (!poffset)
1196 offset_iv.base = ssize_int (0);
1197 offset_iv.step = ssize_int (0);
1199 else
1201 if (!in_loop)
1203 offset_iv.base = poffset;
1204 offset_iv.step = ssize_int (0);
1206 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1207 return opt_result::failure_at
1208 (stmt, "failed: evolution of offset is not affine.\n");
1211 init = ssize_int (pbytepos);
1213 /* Subtract any constant component from the base and add it to INIT instead.
1214 Adjust the misalignment to reflect the amount we subtracted. */
1215 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1216 init = size_binop (PLUS_EXPR, init, dinit);
1217 base_misalignment -= TREE_INT_CST_LOW (dinit);
1219 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1220 init = size_binop (PLUS_EXPR, init, dinit);
1222 step = size_binop (PLUS_EXPR,
1223 fold_convert (ssizetype, base_iv.step),
1224 fold_convert (ssizetype, offset_iv.step));
1226 base = canonicalize_base_object_address (base_iv.base);
1228 /* See if get_pointer_alignment can guarantee a higher alignment than
1229 the one we calculated above. */
1230 unsigned int HOST_WIDE_INT alt_misalignment;
1231 unsigned int alt_alignment;
1232 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1234 /* As above, these values must be whole bytes. */
1235 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1236 && alt_misalignment % BITS_PER_UNIT == 0);
1237 alt_alignment /= BITS_PER_UNIT;
1238 alt_misalignment /= BITS_PER_UNIT;
1240 if (base_alignment < alt_alignment)
1242 base_alignment = alt_alignment;
1243 base_misalignment = alt_misalignment;
1246 drb->base_address = base;
1247 drb->offset = fold_convert (ssizetype, offset_iv.base);
1248 drb->init = init;
1249 drb->step = step;
1250 if (known_misalignment (base_misalignment, base_alignment,
1251 &drb->base_misalignment))
1252 drb->base_alignment = base_alignment;
1253 else
1255 drb->base_alignment = known_alignment (base_misalignment);
1256 drb->base_misalignment = 0;
1258 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1259 drb->step_alignment = highest_pow2_factor (step);
1261 if (dump_file && (dump_flags & TDF_DETAILS))
1262 fprintf (dump_file, "success.\n");
1264 return opt_result::success ();
1267 /* Return true if OP is a valid component reference for a DR access
1268 function. This accepts a subset of what handled_component_p accepts. */
1270 static bool
1271 access_fn_component_p (tree op)
1273 switch (TREE_CODE (op))
1275 case REALPART_EXPR:
1276 case IMAGPART_EXPR:
1277 case ARRAY_REF:
1278 return true;
1280 case COMPONENT_REF:
1281 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1283 default:
1284 return false;
1288 /* Returns whether BASE can have a access_fn_component_p with BASE
1289 as base. */
1291 static bool
1292 base_supports_access_fn_components_p (tree base)
1294 switch (TREE_CODE (TREE_TYPE (base)))
1296 case COMPLEX_TYPE:
1297 case ARRAY_TYPE:
1298 case RECORD_TYPE:
1299 return true;
1300 default:
1301 return false;
1305 /* Determines the base object and the list of indices of memory reference
1306 DR, analyzed in LOOP and instantiated before NEST. */
1308 static void
1309 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1311 /* If analyzing a basic-block there are no indices to analyze
1312 and thus no access functions. */
1313 if (!nest)
1315 dri->base_object = ref;
1316 dri->access_fns.create (0);
1317 return;
1320 vec<tree> access_fns = vNULL;
1322 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1323 into a two element array with a constant index. The base is
1324 then just the immediate underlying object. */
1325 if (TREE_CODE (ref) == REALPART_EXPR)
1327 ref = TREE_OPERAND (ref, 0);
1328 access_fns.safe_push (integer_zero_node);
1330 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1332 ref = TREE_OPERAND (ref, 0);
1333 access_fns.safe_push (integer_one_node);
1336 /* Analyze access functions of dimensions we know to be independent.
1337 The list of component references handled here should be kept in
1338 sync with access_fn_component_p. */
1339 while (handled_component_p (ref))
1341 if (TREE_CODE (ref) == ARRAY_REF)
1343 tree op = TREE_OPERAND (ref, 1);
1344 tree access_fn = analyze_scalar_evolution (loop, op);
1345 access_fn = instantiate_scev (nest, loop, access_fn);
1346 access_fns.safe_push (access_fn);
1348 else if (TREE_CODE (ref) == COMPONENT_REF
1349 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1351 /* For COMPONENT_REFs of records (but not unions!) use the
1352 FIELD_DECL offset as constant access function so we can
1353 disambiguate a[i].f1 and a[i].f2. */
1354 tree off = component_ref_field_offset (ref);
1355 off = size_binop (PLUS_EXPR,
1356 size_binop (MULT_EXPR,
1357 fold_convert (bitsizetype, off),
1358 bitsize_int (BITS_PER_UNIT)),
1359 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1360 access_fns.safe_push (off);
1362 else
1363 /* If we have an unhandled component we could not translate
1364 to an access function stop analyzing. We have determined
1365 our base object in this case. */
1366 break;
1368 ref = TREE_OPERAND (ref, 0);
1371 /* If the address operand of a MEM_REF base has an evolution in the
1372 analyzed nest, add it as an additional independent access-function. */
1373 if (TREE_CODE (ref) == MEM_REF)
1375 tree op = TREE_OPERAND (ref, 0);
1376 tree access_fn = analyze_scalar_evolution (loop, op);
1377 access_fn = instantiate_scev (nest, loop, access_fn);
1378 STRIP_NOPS (access_fn);
1379 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1381 tree memoff = TREE_OPERAND (ref, 1);
1382 tree base = initial_condition (access_fn);
1383 tree orig_type = TREE_TYPE (base);
1384 STRIP_USELESS_TYPE_CONVERSION (base);
1385 tree off;
1386 split_constant_offset (base, &base, &off);
1387 STRIP_USELESS_TYPE_CONVERSION (base);
1388 /* Fold the MEM_REF offset into the evolutions initial
1389 value to make more bases comparable. */
1390 if (!integer_zerop (memoff))
1392 off = size_binop (PLUS_EXPR, off,
1393 fold_convert (ssizetype, memoff));
1394 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1396 /* Adjust the offset so it is a multiple of the access type
1397 size and thus we separate bases that can possibly be used
1398 to produce partial overlaps (which the access_fn machinery
1399 cannot handle). */
1400 wide_int rem;
1401 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1402 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1403 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1404 rem = wi::mod_trunc
1405 (wi::to_wide (off),
1406 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1407 SIGNED);
1408 else
1409 /* If we can't compute the remainder simply force the initial
1410 condition to zero. */
1411 rem = wi::to_wide (off);
1412 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1413 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1414 /* And finally replace the initial condition. */
1415 access_fn = chrec_replace_initial_condition
1416 (access_fn, fold_convert (orig_type, off));
1417 /* ??? This is still not a suitable base object for
1418 dr_may_alias_p - the base object needs to be an
1419 access that covers the object as whole. With
1420 an evolution in the pointer this cannot be
1421 guaranteed.
1422 As a band-aid, mark the access so we can special-case
1423 it in dr_may_alias_p. */
1424 tree old = ref;
1425 ref = fold_build2_loc (EXPR_LOCATION (ref),
1426 MEM_REF, TREE_TYPE (ref),
1427 base, memoff);
1428 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1429 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1430 dri->unconstrained_base = true;
1431 access_fns.safe_push (access_fn);
1434 else if (DECL_P (ref))
1436 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1437 ref = build2 (MEM_REF, TREE_TYPE (ref),
1438 build_fold_addr_expr (ref),
1439 build_int_cst (reference_alias_ptr_type (ref), 0));
1442 dri->base_object = ref;
1443 dri->access_fns = access_fns;
1446 /* Extracts the alias analysis information from the memory reference DR. */
1448 static void
1449 dr_analyze_alias (struct data_reference *dr)
1451 tree ref = DR_REF (dr);
1452 tree base = get_base_address (ref), addr;
1454 if (INDIRECT_REF_P (base)
1455 || TREE_CODE (base) == MEM_REF)
1457 addr = TREE_OPERAND (base, 0);
1458 if (TREE_CODE (addr) == SSA_NAME)
1459 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1463 /* Frees data reference DR. */
1465 void
1466 free_data_ref (data_reference_p dr)
1468 DR_ACCESS_FNS (dr).release ();
1469 if (dr->alt_indices.base_object)
1470 dr->alt_indices.access_fns.release ();
1471 free (dr);
1474 /* Analyze memory reference MEMREF, which is accessed in STMT.
1475 The reference is a read if IS_READ is true, otherwise it is a write.
1476 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1477 within STMT, i.e. that it might not occur even if STMT is executed
1478 and runs to completion.
1480 Return the data_reference description of MEMREF. NEST is the outermost
1481 loop in which the reference should be instantiated, LOOP is the loop
1482 in which the data reference should be analyzed. */
1484 struct data_reference *
1485 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1486 bool is_read, bool is_conditional_in_stmt)
1488 struct data_reference *dr;
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1492 fprintf (dump_file, "Creating dr for ");
1493 print_generic_expr (dump_file, memref, TDF_SLIM);
1494 fprintf (dump_file, "\n");
1497 dr = XCNEW (struct data_reference);
1498 DR_STMT (dr) = stmt;
1499 DR_REF (dr) = memref;
1500 DR_IS_READ (dr) = is_read;
1501 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1503 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1504 nest != NULL ? loop : NULL, stmt);
1505 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1506 dr_analyze_alias (dr);
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1510 unsigned i;
1511 fprintf (dump_file, "\tbase_address: ");
1512 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1513 fprintf (dump_file, "\n\toffset from base address: ");
1514 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1515 fprintf (dump_file, "\n\tconstant offset from base address: ");
1516 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1517 fprintf (dump_file, "\n\tstep: ");
1518 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1519 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1520 fprintf (dump_file, "\n\tbase misalignment: %d",
1521 DR_BASE_MISALIGNMENT (dr));
1522 fprintf (dump_file, "\n\toffset alignment: %d",
1523 DR_OFFSET_ALIGNMENT (dr));
1524 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1525 fprintf (dump_file, "\n\tbase_object: ");
1526 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1527 fprintf (dump_file, "\n");
1528 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1530 fprintf (dump_file, "\tAccess function %d: ", i);
1531 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1535 return dr;
1538 /* A helper function computes order between two tree expressions T1 and T2.
1539 This is used in comparator functions sorting objects based on the order
1540 of tree expressions. The function returns -1, 0, or 1. */
1543 data_ref_compare_tree (tree t1, tree t2)
1545 int i, cmp;
1546 enum tree_code code;
1547 char tclass;
1549 if (t1 == t2)
1550 return 0;
1551 if (t1 == NULL)
1552 return -1;
1553 if (t2 == NULL)
1554 return 1;
1556 STRIP_USELESS_TYPE_CONVERSION (t1);
1557 STRIP_USELESS_TYPE_CONVERSION (t2);
1558 if (t1 == t2)
1559 return 0;
1561 if (TREE_CODE (t1) != TREE_CODE (t2)
1562 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1563 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1565 code = TREE_CODE (t1);
1566 switch (code)
1568 case INTEGER_CST:
1569 return tree_int_cst_compare (t1, t2);
1571 case STRING_CST:
1572 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1573 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1574 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1575 TREE_STRING_LENGTH (t1));
1577 case SSA_NAME:
1578 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1579 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1580 break;
1582 default:
1583 if (POLY_INT_CST_P (t1))
1584 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1585 wi::to_poly_widest (t2));
1587 tclass = TREE_CODE_CLASS (code);
1589 /* For decls, compare their UIDs. */
1590 if (tclass == tcc_declaration)
1592 if (DECL_UID (t1) != DECL_UID (t2))
1593 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1594 break;
1596 /* For expressions, compare their operands recursively. */
1597 else if (IS_EXPR_CODE_CLASS (tclass))
1599 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1601 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1602 TREE_OPERAND (t2, i));
1603 if (cmp != 0)
1604 return cmp;
1607 else
1608 gcc_unreachable ();
1611 return 0;
1614 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1615 check. */
1617 opt_result
1618 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1620 if (dump_enabled_p ())
1621 dump_printf (MSG_NOTE,
1622 "consider run-time aliasing test between %T and %T\n",
1623 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1625 if (!speed_p)
1626 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1627 "runtime alias check not supported when"
1628 " optimizing for size.\n");
1630 /* FORNOW: We don't support versioning with outer-loop in either
1631 vectorization or loop distribution. */
1632 if (loop != NULL && loop->inner != NULL)
1633 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1634 "runtime alias check not supported for"
1635 " outer loop.\n");
1637 return opt_result::success ();
1640 /* Operator == between two dr_with_seg_len objects.
1642 This equality operator is used to make sure two data refs
1643 are the same one so that we will consider to combine the
1644 aliasing checks of those two pairs of data dependent data
1645 refs. */
1647 static bool
1648 operator == (const dr_with_seg_len& d1,
1649 const dr_with_seg_len& d2)
1651 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1652 DR_BASE_ADDRESS (d2.dr), 0)
1653 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1654 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1655 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1656 && known_eq (d1.access_size, d2.access_size)
1657 && d1.align == d2.align);
1660 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1661 so that we can combine aliasing checks in one scan. */
1663 static int
1664 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1666 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1667 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1668 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1669 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1671 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1672 if a and c have the same basic address snd step, and b and d have the same
1673 address and step. Therefore, if any a&c or b&d don't have the same address
1674 and step, we don't care the order of those two pairs after sorting. */
1675 int comp_res;
1677 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1678 DR_BASE_ADDRESS (b1.dr))) != 0)
1679 return comp_res;
1680 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1681 DR_BASE_ADDRESS (b2.dr))) != 0)
1682 return comp_res;
1683 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1684 DR_STEP (b1.dr))) != 0)
1685 return comp_res;
1686 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1687 DR_STEP (b2.dr))) != 0)
1688 return comp_res;
1689 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1690 DR_OFFSET (b1.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1693 DR_INIT (b1.dr))) != 0)
1694 return comp_res;
1695 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1696 DR_OFFSET (b2.dr))) != 0)
1697 return comp_res;
1698 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1699 DR_INIT (b2.dr))) != 0)
1700 return comp_res;
1702 return 0;
1705 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1707 static void
1708 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1710 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1711 DR_REF (alias_pair->first.dr),
1712 DR_REF (alias_pair->second.dr));
1714 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1715 alias_pair->first.seg_len);
1716 if (!operand_equal_p (alias_pair->first.seg_len,
1717 alias_pair->second.seg_len, 0))
1718 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1720 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1721 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1722 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1724 dump_printf (MSG_NOTE, " vs. ");
1725 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1728 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1729 alias_pair->first.align);
1730 if (alias_pair->first.align != alias_pair->second.align)
1731 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1733 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1734 if (alias_pair->flags & DR_ALIAS_RAW)
1735 dump_printf (MSG_NOTE, " RAW");
1736 if (alias_pair->flags & DR_ALIAS_WAR)
1737 dump_printf (MSG_NOTE, " WAR");
1738 if (alias_pair->flags & DR_ALIAS_WAW)
1739 dump_printf (MSG_NOTE, " WAW");
1740 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1741 dump_printf (MSG_NOTE, " ARBITRARY");
1742 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1743 dump_printf (MSG_NOTE, " SWAPPED");
1744 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1745 dump_printf (MSG_NOTE, " UNSWAPPED");
1746 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1747 dump_printf (MSG_NOTE, " MIXED_STEPS");
1748 if (alias_pair->flags == 0)
1749 dump_printf (MSG_NOTE, " <none>");
1750 dump_printf (MSG_NOTE, "\n");
1753 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1754 FACTOR is number of iterations that each data reference is accessed.
1756 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1757 we create an expression:
1759 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1760 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1762 for aliasing checks. However, in some cases we can decrease the number
1763 of checks by combining two checks into one. For example, suppose we have
1764 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1765 condition is satisfied:
1767 load_ptr_0 < load_ptr_1 &&
1768 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1770 (this condition means, in each iteration of vectorized loop, the accessed
1771 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1772 load_ptr_1.)
1774 we then can use only the following expression to finish the alising checks
1775 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1777 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1778 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1780 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1781 basic address. */
1783 void
1784 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1785 poly_uint64)
1787 if (alias_pairs->is_empty ())
1788 return;
1790 /* Canonicalize each pair so that the base components are ordered wrt
1791 data_ref_compare_tree. This allows the loop below to merge more
1792 cases. */
1793 unsigned int i;
1794 dr_with_seg_len_pair_t *alias_pair;
1795 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1797 data_reference_p dr_a = alias_pair->first.dr;
1798 data_reference_p dr_b = alias_pair->second.dr;
1799 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1800 DR_BASE_ADDRESS (dr_b));
1801 if (comp_res == 0)
1802 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1803 if (comp_res == 0)
1804 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1805 if (comp_res > 0)
1807 std::swap (alias_pair->first, alias_pair->second);
1808 alias_pair->flags |= DR_ALIAS_SWAPPED;
1810 else
1811 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1814 /* Sort the collected data ref pairs so that we can scan them once to
1815 combine all possible aliasing checks. */
1816 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1818 /* Scan the sorted dr pairs and check if we can combine alias checks
1819 of two neighboring dr pairs. */
1820 unsigned int last = 0;
1821 for (i = 1; i < alias_pairs->length (); ++i)
1823 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1824 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1825 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1827 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1828 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1829 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1830 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1832 /* Remove duplicate data ref pairs. */
1833 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1835 if (dump_enabled_p ())
1836 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1837 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1838 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1839 alias_pair1->flags |= alias_pair2->flags;
1840 continue;
1843 /* Assume that we won't be able to merge the pairs, then correct
1844 if we do. */
1845 last += 1;
1846 if (last != i)
1847 (*alias_pairs)[last] = (*alias_pairs)[i];
1849 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1851 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1852 and DR_A1 and DR_A2 are two consecutive memrefs. */
1853 if (*dr_a1 == *dr_a2)
1855 std::swap (dr_a1, dr_b1);
1856 std::swap (dr_a2, dr_b2);
1859 poly_int64 init_a1, init_a2;
1860 /* Only consider cases in which the distance between the initial
1861 DR_A1 and the initial DR_A2 is known at compile time. */
1862 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1863 DR_BASE_ADDRESS (dr_a2->dr), 0)
1864 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1865 DR_OFFSET (dr_a2->dr), 0)
1866 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1867 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1868 continue;
1870 /* Don't combine if we can't tell which one comes first. */
1871 if (!ordered_p (init_a1, init_a2))
1872 continue;
1874 /* Work out what the segment length would be if we did combine
1875 DR_A1 and DR_A2:
1877 - If DR_A1 and DR_A2 have equal lengths, that length is
1878 also the combined length.
1880 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1881 length is the lower bound on those lengths.
1883 - If DR_A1 and DR_A2 both have positive lengths, the combined
1884 length is the upper bound on those lengths.
1886 Other cases are unlikely to give a useful combination.
1888 The lengths both have sizetype, so the sign is taken from
1889 the step instead. */
1890 poly_uint64 new_seg_len = 0;
1891 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1892 dr_a2->seg_len, 0);
1893 if (new_seg_len_p)
1895 poly_uint64 seg_len_a1, seg_len_a2;
1896 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1897 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1898 continue;
1900 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1901 if (TREE_CODE (indicator_a) != INTEGER_CST)
1902 continue;
1904 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1905 if (TREE_CODE (indicator_b) != INTEGER_CST)
1906 continue;
1908 int sign_a = tree_int_cst_sgn (indicator_a);
1909 int sign_b = tree_int_cst_sgn (indicator_b);
1911 if (sign_a <= 0 && sign_b <= 0)
1912 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1913 else if (sign_a >= 0 && sign_b >= 0)
1914 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1915 else
1916 continue;
1918 /* At this point we're committed to merging the refs. */
1920 /* Make sure dr_a1 starts left of dr_a2. */
1921 if (maybe_gt (init_a1, init_a2))
1923 std::swap (*dr_a1, *dr_a2);
1924 std::swap (init_a1, init_a2);
1927 /* The DR_Bs are equal, so only the DR_As can introduce
1928 mixed steps. */
1929 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1930 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1932 if (new_seg_len_p)
1934 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1935 new_seg_len);
1936 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1939 /* This is always positive due to the swap above. */
1940 poly_uint64 diff = init_a2 - init_a1;
1942 /* The new check will start at DR_A1. Make sure that its access
1943 size encompasses the initial DR_A2. */
1944 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1946 dr_a1->access_size = upper_bound (dr_a1->access_size,
1947 diff + dr_a2->access_size);
1948 unsigned int new_align = known_alignment (dr_a1->access_size);
1949 dr_a1->align = MIN (dr_a1->align, new_align);
1951 if (dump_enabled_p ())
1952 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1953 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1954 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1955 alias_pair1->flags |= alias_pair2->flags;
1956 last -= 1;
1959 alias_pairs->truncate (last + 1);
1961 /* Try to restore the original dr_with_seg_len order within each
1962 dr_with_seg_len_pair_t. If we ended up combining swapped and
1963 unswapped pairs into the same check, we have to invalidate any
1964 RAW, WAR and WAW information for it. */
1965 if (dump_enabled_p ())
1966 dump_printf (MSG_NOTE, "merged alias checks:\n");
1967 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1969 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1970 unsigned int swapped = (alias_pair->flags & swap_mask);
1971 if (swapped == DR_ALIAS_SWAPPED)
1972 std::swap (alias_pair->first, alias_pair->second);
1973 else if (swapped != DR_ALIAS_UNSWAPPED)
1974 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1975 alias_pair->flags &= ~swap_mask;
1976 if (dump_enabled_p ())
1977 dump_alias_pair (alias_pair, " ");
1981 /* A subroutine of create_intersect_range_checks, with a subset of the
1982 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1983 to optimize cases in which the references form a simple RAW, WAR or
1984 WAR dependence. */
1986 static bool
1987 create_ifn_alias_checks (tree *cond_expr,
1988 const dr_with_seg_len_pair_t &alias_pair)
1990 const dr_with_seg_len& dr_a = alias_pair.first;
1991 const dr_with_seg_len& dr_b = alias_pair.second;
1993 /* Check for cases in which:
1995 (a) we have a known RAW, WAR or WAR dependence
1996 (b) the accesses are well-ordered in both the original and new code
1997 (see the comment above the DR_ALIAS_* flags for details); and
1998 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
1999 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2000 return false;
2002 /* Make sure that both DRs access the same pattern of bytes,
2003 with a constant length and step. */
2004 poly_uint64 seg_len;
2005 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2006 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2007 || maybe_ne (dr_a.access_size, dr_b.access_size)
2008 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2009 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2010 return false;
2012 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2013 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2014 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2016 /* See whether the target suports what we want to do. WAW checks are
2017 equivalent to WAR checks here. */
2018 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2019 ? IFN_CHECK_RAW_PTRS
2020 : IFN_CHECK_WAR_PTRS);
2021 unsigned int align = MIN (dr_a.align, dr_b.align);
2022 poly_uint64 full_length = seg_len + bytes;
2023 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2024 full_length, align))
2026 full_length = seg_len + dr_a.access_size;
2027 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2028 full_length, align))
2029 return false;
2032 /* Commit to using this form of test. */
2033 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2034 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2036 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2037 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2039 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2040 ifn, boolean_type_node,
2041 4, addr_a, addr_b,
2042 size_int (full_length),
2043 size_int (align));
2045 if (dump_enabled_p ())
2047 if (ifn == IFN_CHECK_RAW_PTRS)
2048 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2049 else
2050 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2052 return true;
2055 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2056 free of aliases, using a condition based on index values instead
2057 of a condition based on addresses. Return true on success,
2058 storing the condition in *COND_EXPR.
2060 This can only be done if the two data references in ALIAS_PAIR access
2061 the same array object and the index is the only difference. For example,
2062 if the two data references are DR_A and DR_B:
2064 DR_A DR_B
2065 data-ref arr[i] arr[j]
2066 base_object arr arr
2067 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2069 The addresses and their index are like:
2071 |<- ADDR_A ->| |<- ADDR_B ->|
2072 ------------------------------------------------------->
2073 | | | | | | | | | |
2074 ------------------------------------------------------->
2075 i_0 ... i_0+4 j_0 ... j_0+4
2077 We can create expression based on index rather than address:
2079 (unsigned) (i_0 - j_0 + 3) <= 6
2081 i.e. the indices are less than 4 apart.
2083 Note evolution step of index needs to be considered in comparison. */
2085 static bool
2086 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2087 const dr_with_seg_len_pair_t &alias_pair)
2089 const dr_with_seg_len &dr_a = alias_pair.first;
2090 const dr_with_seg_len &dr_b = alias_pair.second;
2091 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2092 || integer_zerop (DR_STEP (dr_a.dr))
2093 || integer_zerop (DR_STEP (dr_b.dr))
2094 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2095 return false;
2097 poly_uint64 seg_len1, seg_len2;
2098 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2099 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2100 return false;
2102 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2103 return false;
2105 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2106 return false;
2108 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2109 return false;
2111 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2113 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2114 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2115 if (neg_step)
2117 abs_step = -abs_step;
2118 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2119 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2122 /* Infer the number of iterations with which the memory segment is accessed
2123 by DR. In other words, alias is checked if memory segment accessed by
2124 DR_A in some iterations intersect with memory segment accessed by DR_B
2125 in the same amount iterations.
2126 Note segnment length is a linear function of number of iterations with
2127 DR_STEP as the coefficient. */
2128 poly_uint64 niter_len1, niter_len2;
2129 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2130 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2131 return false;
2133 /* Divide each access size by the byte step, rounding up. */
2134 poly_uint64 niter_access1, niter_access2;
2135 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2136 abs_step, &niter_access1)
2137 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2138 abs_step, &niter_access2))
2139 return false;
2141 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2143 int found = -1;
2144 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2146 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2147 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2148 /* Two indices must be the same if they are not scev, or not scev wrto
2149 current loop being vecorized. */
2150 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2151 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2152 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2153 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2155 if (operand_equal_p (access1, access2, 0))
2156 continue;
2158 return false;
2160 if (found >= 0)
2161 return false;
2162 found = i;
2165 /* Ought not to happen in practice, since if all accesses are equal then the
2166 alias should be decidable at compile time. */
2167 if (found < 0)
2168 return false;
2170 /* The two indices must have the same step. */
2171 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2172 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2173 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2174 return false;
2176 tree idx_step = CHREC_RIGHT (access1);
2177 /* Index must have const step, otherwise DR_STEP won't be constant. */
2178 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2179 /* Index must evaluate in the same direction as DR. */
2180 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2182 tree min1 = CHREC_LEFT (access1);
2183 tree min2 = CHREC_LEFT (access2);
2184 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2185 return false;
2187 /* Ideally, alias can be checked against loop's control IV, but we
2188 need to prove linear mapping between control IV and reference
2189 index. Although that should be true, we check against (array)
2190 index of data reference. Like segment length, index length is
2191 linear function of the number of iterations with index_step as
2192 the coefficient, i.e, niter_len * idx_step. */
2193 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2194 SIGNED);
2195 if (neg_step)
2196 abs_idx_step = -abs_idx_step;
2197 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2198 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2199 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2200 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2202 gcc_assert (known_ge (idx_len1, 0)
2203 && known_ge (idx_len2, 0)
2204 && known_ge (idx_access1, 0)
2205 && known_ge (idx_access2, 0));
2207 /* Each access has the following pattern, with lengths measured
2208 in units of INDEX:
2210 <-- idx_len -->
2211 <--- A: -ve step --->
2212 +-----+-------+-----+-------+-----+
2213 | n-1 | ..... | 0 | ..... | n-1 |
2214 +-----+-------+-----+-------+-----+
2215 <--- B: +ve step --->
2216 <-- idx_len -->
2220 where "n" is the number of scalar iterations covered by the segment
2221 and where each access spans idx_access units.
2223 A is the range of bytes accessed when the step is negative,
2224 B is the range when the step is positive.
2226 When checking for general overlap, we need to test whether
2227 the range:
2229 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2231 overlaps:
2233 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2235 where:
2237 low_offsetN = +ve step ? 0 : -idx_lenN;
2238 high_offsetN = +ve step ? idx_lenN : 0;
2240 This is equivalent to testing whether:
2242 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2243 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2245 Converting this into a single test, there is an overlap if:
2247 0 <= min2 - min1 + bias <= limit
2249 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2250 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2251 + (high_offset2 - low_offset2 + idx_access2 - 1)
2252 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2254 Combining the tests requires limit to be computable in an unsigned
2255 form of the index type; if it isn't, we fall back to the usual
2256 pointer-based checks.
2258 We can do better if DR_B is a write and if DR_A and DR_B are
2259 well-ordered in both the original and the new code (see the
2260 comment above the DR_ALIAS_* flags for details). In this case
2261 we know that for each i in [0, n-1], the write performed by
2262 access i of DR_B occurs after access numbers j<=i of DR_A in
2263 both the original and the new code. Any write or anti
2264 dependencies wrt those DR_A accesses are therefore maintained.
2266 We just need to make sure that each individual write in DR_B does not
2267 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2268 after the DR_B access in the original code but happen before it in
2269 the new code.
2271 We know the steps for both accesses are equal, so by induction, we
2272 just need to test whether the first write of DR_B overlaps a later
2273 access of DR_A. In other words, we need to move min1 along by
2274 one iteration:
2276 min1' = min1 + idx_step
2278 and use the ranges:
2280 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2282 and:
2284 [min2, min2 + idx_access2 - 1]
2286 where:
2288 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2289 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2290 if (waw_or_war_p)
2291 idx_len1 -= abs_idx_step;
2293 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2294 if (!waw_or_war_p)
2295 limit += idx_len2;
2297 tree utype = unsigned_type_for (TREE_TYPE (min1));
2298 if (!wi::fits_to_tree_p (limit, utype))
2299 return false;
2301 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2302 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2303 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2304 /* Equivalent to adding IDX_STEP to MIN1. */
2305 if (waw_or_war_p)
2306 bias -= wi::to_offset (idx_step);
2308 tree subject = fold_build2 (MINUS_EXPR, utype,
2309 fold_convert (utype, min2),
2310 fold_convert (utype, min1));
2311 subject = fold_build2 (PLUS_EXPR, utype, subject,
2312 wide_int_to_tree (utype, bias));
2313 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2314 wide_int_to_tree (utype, limit));
2315 if (*cond_expr)
2316 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2317 *cond_expr, part_cond_expr);
2318 else
2319 *cond_expr = part_cond_expr;
2320 if (dump_enabled_p ())
2322 if (waw_or_war_p)
2323 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2324 else
2325 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2327 return true;
2330 /* A subroutine of create_intersect_range_checks, with a subset of the
2331 same arguments. Try to optimize cases in which the second access
2332 is a write and in which some overlap is valid. */
2334 static bool
2335 create_waw_or_war_checks (tree *cond_expr,
2336 const dr_with_seg_len_pair_t &alias_pair)
2338 const dr_with_seg_len& dr_a = alias_pair.first;
2339 const dr_with_seg_len& dr_b = alias_pair.second;
2341 /* Check for cases in which:
2343 (a) DR_B is always a write;
2344 (b) the accesses are well-ordered in both the original and new code
2345 (see the comment above the DR_ALIAS_* flags for details); and
2346 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2347 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2348 return false;
2350 /* Check for equal (but possibly variable) steps. */
2351 tree step = DR_STEP (dr_a.dr);
2352 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2353 return false;
2355 /* Make sure that we can operate on sizetype without loss of precision. */
2356 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2357 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2358 return false;
2360 /* All addresses involved are known to have a common alignment ALIGN.
2361 We can therefore subtract ALIGN from an exclusive endpoint to get
2362 an inclusive endpoint. In the best (and common) case, ALIGN is the
2363 same as the access sizes of both DRs, and so subtracting ALIGN
2364 cancels out the addition of an access size. */
2365 unsigned int align = MIN (dr_a.align, dr_b.align);
2366 poly_uint64 last_chunk_a = dr_a.access_size - align;
2367 poly_uint64 last_chunk_b = dr_b.access_size - align;
2369 /* Get a boolean expression that is true when the step is negative. */
2370 tree indicator = dr_direction_indicator (dr_a.dr);
2371 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2372 fold_convert (ssizetype, indicator),
2373 ssize_int (0));
2375 /* Get lengths in sizetype. */
2376 tree seg_len_a
2377 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2378 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2380 /* Each access has the following pattern:
2382 <- |seg_len| ->
2383 <--- A: -ve step --->
2384 +-----+-------+-----+-------+-----+
2385 | n-1 | ..... | 0 | ..... | n-1 |
2386 +-----+-------+-----+-------+-----+
2387 <--- B: +ve step --->
2388 <- |seg_len| ->
2390 base address
2392 where "n" is the number of scalar iterations covered by the segment.
2394 A is the range of bytes accessed when the step is negative,
2395 B is the range when the step is positive.
2397 We know that DR_B is a write. We also know (from checking that
2398 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2399 the write performed by access i of DR_B occurs after access numbers
2400 j<=i of DR_A in both the original and the new code. Any write or
2401 anti dependencies wrt those DR_A accesses are therefore maintained.
2403 We just need to make sure that each individual write in DR_B does not
2404 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2405 after the DR_B access in the original code but happen before it in
2406 the new code.
2408 We know the steps for both accesses are equal, so by induction, we
2409 just need to test whether the first write of DR_B overlaps a later
2410 access of DR_A. In other words, we need to move addr_a along by
2411 one iteration:
2413 addr_a' = addr_a + step
2415 and check whether:
2417 [addr_b, addr_b + last_chunk_b]
2419 overlaps:
2421 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2423 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2425 low_offset_a = +ve step ? 0 : seg_len_a - step
2426 high_offset_a = +ve step ? seg_len_a - step : 0
2428 This is equivalent to testing whether:
2430 addr_a' + low_offset_a <= addr_b + last_chunk_b
2431 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2433 Converting this into a single test, there is an overlap if:
2435 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2437 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2439 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2440 less than the size of the object underlying DR_A. We also know
2441 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2442 guaranteed at compile time. There can therefore be no overflow if
2443 "limit" is calculated in an unsigned type with pointer precision. */
2444 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2445 DR_OFFSET (dr_a.dr));
2446 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2448 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2449 DR_OFFSET (dr_b.dr));
2450 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2452 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2453 addr_a = fold_build_pointer_plus (addr_a, step);
2454 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2455 seg_len_a, step);
2456 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2457 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2459 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2460 seg_len_a_minus_step, size_zero_node);
2461 if (!CONSTANT_CLASS_P (low_offset_a))
2462 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2464 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2465 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2466 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2467 low_offset_a);
2469 /* The amount added to addr_b - addr_a'. */
2470 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2471 size_int (last_chunk_b), low_offset_a);
2473 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2474 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2475 size_int (last_chunk_a + last_chunk_b));
2477 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2478 subject = fold_build2 (PLUS_EXPR, sizetype,
2479 fold_convert (sizetype, subject), bias);
2481 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2482 if (dump_enabled_p ())
2483 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2484 return true;
2487 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2488 every address ADDR accessed by D:
2490 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2492 In this case, every element accessed by D is aligned to at least
2493 ALIGN bytes.
2495 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2497 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2499 static void
2500 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2501 tree *seg_max_out, HOST_WIDE_INT align)
2503 /* Each access has the following pattern:
2505 <- |seg_len| ->
2506 <--- A: -ve step --->
2507 +-----+-------+-----+-------+-----+
2508 | n-1 | ,.... | 0 | ..... | n-1 |
2509 +-----+-------+-----+-------+-----+
2510 <--- B: +ve step --->
2511 <- |seg_len| ->
2513 base address
2515 where "n" is the number of scalar iterations covered by the segment.
2516 (This should be VF for a particular pair if we know that both steps
2517 are the same, otherwise it will be the full number of scalar loop
2518 iterations.)
2520 A is the range of bytes accessed when the step is negative,
2521 B is the range when the step is positive.
2523 If the access size is "access_size" bytes, the lowest addressed byte is:
2525 base + (step < 0 ? seg_len : 0) [LB]
2527 and the highest addressed byte is always below:
2529 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2531 Thus:
2533 LB <= ADDR < UB
2535 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2536 bytes, so:
2538 LB <= ADDR <= UB - ALIGN
2540 where "- ALIGN" folds naturally with the "+ access_size" and often
2541 cancels it out.
2543 We don't try to simplify LB and UB beyond this (e.g. by using
2544 MIN and MAX based on whether seg_len rather than the stride is
2545 negative) because it is possible for the absolute size of the
2546 segment to overflow the range of a ssize_t.
2548 Keeping the pointer_plus outside of the cond_expr should allow
2549 the cond_exprs to be shared with other alias checks. */
2550 tree indicator = dr_direction_indicator (d.dr);
2551 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2552 fold_convert (ssizetype, indicator),
2553 ssize_int (0));
2554 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2555 DR_OFFSET (d.dr));
2556 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2557 tree seg_len
2558 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2560 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561 seg_len, size_zero_node);
2562 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2563 size_zero_node, seg_len);
2564 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2565 size_int (d.access_size - align));
2567 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2568 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2571 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2572 storing the condition in *COND_EXPR. The fallback is to generate a
2573 a test that the two accesses do not overlap:
2575 end_a <= start_b || end_b <= start_a. */
2577 static void
2578 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2579 const dr_with_seg_len_pair_t &alias_pair)
2581 const dr_with_seg_len& dr_a = alias_pair.first;
2582 const dr_with_seg_len& dr_b = alias_pair.second;
2583 *cond_expr = NULL_TREE;
2584 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2585 return;
2587 if (create_ifn_alias_checks (cond_expr, alias_pair))
2588 return;
2590 if (create_waw_or_war_checks (cond_expr, alias_pair))
2591 return;
2593 unsigned HOST_WIDE_INT min_align;
2594 tree_code cmp_code;
2595 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2596 are equivalent. This is just an optimization heuristic. */
2597 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2598 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2600 /* In this case adding access_size to seg_len is likely to give
2601 a simple X * step, where X is either the number of scalar
2602 iterations or the vectorization factor. We're better off
2603 keeping that, rather than subtracting an alignment from it.
2605 In this case the maximum values are exclusive and so there is
2606 no alias if the maximum of one segment equals the minimum
2607 of another. */
2608 min_align = 0;
2609 cmp_code = LE_EXPR;
2611 else
2613 /* Calculate the minimum alignment shared by all four pointers,
2614 then arrange for this alignment to be subtracted from the
2615 exclusive maximum values to get inclusive maximum values.
2616 This "- min_align" is cumulative with a "+ access_size"
2617 in the calculation of the maximum values. In the best
2618 (and common) case, the two cancel each other out, leaving
2619 us with an inclusive bound based only on seg_len. In the
2620 worst case we're simply adding a smaller number than before.
2622 Because the maximum values are inclusive, there is an alias
2623 if the maximum value of one segment is equal to the minimum
2624 value of the other. */
2625 min_align = MIN (dr_a.align, dr_b.align);
2626 cmp_code = LT_EXPR;
2629 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2630 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2631 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2633 *cond_expr
2634 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2635 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2636 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2637 if (dump_enabled_p ())
2638 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2641 /* Create a conditional expression that represents the run-time checks for
2642 overlapping of address ranges represented by a list of data references
2643 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2644 COND_EXPR is the conditional expression to be used in the if statement
2645 that controls which version of the loop gets executed at runtime. */
2647 void
2648 create_runtime_alias_checks (class loop *loop,
2649 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2650 tree * cond_expr)
2652 tree part_cond_expr;
2654 fold_defer_overflow_warnings ();
2655 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2657 gcc_assert (alias_pair.flags);
2658 if (dump_enabled_p ())
2659 dump_printf (MSG_NOTE,
2660 "create runtime check for data references %T and %T\n",
2661 DR_REF (alias_pair.first.dr),
2662 DR_REF (alias_pair.second.dr));
2664 /* Create condition expression for each pair data references. */
2665 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2666 if (*cond_expr)
2667 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2668 *cond_expr, part_cond_expr);
2669 else
2670 *cond_expr = part_cond_expr;
2672 fold_undefer_and_ignore_overflow_warnings ();
2675 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2676 expressions. */
2677 static bool
2678 dr_equal_offsets_p1 (tree offset1, tree offset2)
2680 bool res;
2682 STRIP_NOPS (offset1);
2683 STRIP_NOPS (offset2);
2685 if (offset1 == offset2)
2686 return true;
2688 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2689 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2690 return false;
2692 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2693 TREE_OPERAND (offset2, 0));
2695 if (!res || !BINARY_CLASS_P (offset1))
2696 return res;
2698 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2699 TREE_OPERAND (offset2, 1));
2701 return res;
2704 /* Check if DRA and DRB have equal offsets. */
2705 bool
2706 dr_equal_offsets_p (struct data_reference *dra,
2707 struct data_reference *drb)
2709 tree offset1, offset2;
2711 offset1 = DR_OFFSET (dra);
2712 offset2 = DR_OFFSET (drb);
2714 return dr_equal_offsets_p1 (offset1, offset2);
2717 /* Returns true if FNA == FNB. */
2719 static bool
2720 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2722 unsigned i, n = fna.length ();
2724 if (n != fnb.length ())
2725 return false;
2727 for (i = 0; i < n; i++)
2728 if (!operand_equal_p (fna[i], fnb[i], 0))
2729 return false;
2731 return true;
2734 /* If all the functions in CF are the same, returns one of them,
2735 otherwise returns NULL. */
2737 static affine_fn
2738 common_affine_function (conflict_function *cf)
2740 unsigned i;
2741 affine_fn comm;
2743 if (!CF_NONTRIVIAL_P (cf))
2744 return affine_fn ();
2746 comm = cf->fns[0];
2748 for (i = 1; i < cf->n; i++)
2749 if (!affine_function_equal_p (comm, cf->fns[i]))
2750 return affine_fn ();
2752 return comm;
2755 /* Returns the base of the affine function FN. */
2757 static tree
2758 affine_function_base (affine_fn fn)
2760 return fn[0];
2763 /* Returns true if FN is a constant. */
2765 static bool
2766 affine_function_constant_p (affine_fn fn)
2768 unsigned i;
2769 tree coef;
2771 for (i = 1; fn.iterate (i, &coef); i++)
2772 if (!integer_zerop (coef))
2773 return false;
2775 return true;
2778 /* Returns true if FN is the zero constant function. */
2780 static bool
2781 affine_function_zero_p (affine_fn fn)
2783 return (integer_zerop (affine_function_base (fn))
2784 && affine_function_constant_p (fn));
2787 /* Returns a signed integer type with the largest precision from TA
2788 and TB. */
2790 static tree
2791 signed_type_for_types (tree ta, tree tb)
2793 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2794 return signed_type_for (ta);
2795 else
2796 return signed_type_for (tb);
2799 /* Applies operation OP on affine functions FNA and FNB, and returns the
2800 result. */
2802 static affine_fn
2803 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2805 unsigned i, n, m;
2806 affine_fn ret;
2807 tree coef;
2809 if (fnb.length () > fna.length ())
2811 n = fna.length ();
2812 m = fnb.length ();
2814 else
2816 n = fnb.length ();
2817 m = fna.length ();
2820 ret.create (m);
2821 for (i = 0; i < n; i++)
2823 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2824 TREE_TYPE (fnb[i]));
2825 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2828 for (; fna.iterate (i, &coef); i++)
2829 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2830 coef, integer_zero_node));
2831 for (; fnb.iterate (i, &coef); i++)
2832 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2833 integer_zero_node, coef));
2835 return ret;
2838 /* Returns the sum of affine functions FNA and FNB. */
2840 static affine_fn
2841 affine_fn_plus (affine_fn fna, affine_fn fnb)
2843 return affine_fn_op (PLUS_EXPR, fna, fnb);
2846 /* Returns the difference of affine functions FNA and FNB. */
2848 static affine_fn
2849 affine_fn_minus (affine_fn fna, affine_fn fnb)
2851 return affine_fn_op (MINUS_EXPR, fna, fnb);
2854 /* Frees affine function FN. */
2856 static void
2857 affine_fn_free (affine_fn fn)
2859 fn.release ();
2862 /* Determine for each subscript in the data dependence relation DDR
2863 the distance. */
2865 static void
2866 compute_subscript_distance (struct data_dependence_relation *ddr)
2868 conflict_function *cf_a, *cf_b;
2869 affine_fn fn_a, fn_b, diff;
2871 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2873 unsigned int i;
2875 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2877 struct subscript *subscript;
2879 subscript = DDR_SUBSCRIPT (ddr, i);
2880 cf_a = SUB_CONFLICTS_IN_A (subscript);
2881 cf_b = SUB_CONFLICTS_IN_B (subscript);
2883 fn_a = common_affine_function (cf_a);
2884 fn_b = common_affine_function (cf_b);
2885 if (!fn_a.exists () || !fn_b.exists ())
2887 SUB_DISTANCE (subscript) = chrec_dont_know;
2888 return;
2890 diff = affine_fn_minus (fn_a, fn_b);
2892 if (affine_function_constant_p (diff))
2893 SUB_DISTANCE (subscript) = affine_function_base (diff);
2894 else
2895 SUB_DISTANCE (subscript) = chrec_dont_know;
2897 affine_fn_free (diff);
2902 /* Returns the conflict function for "unknown". */
2904 static conflict_function *
2905 conflict_fn_not_known (void)
2907 conflict_function *fn = XCNEW (conflict_function);
2908 fn->n = NOT_KNOWN;
2910 return fn;
2913 /* Returns the conflict function for "independent". */
2915 static conflict_function *
2916 conflict_fn_no_dependence (void)
2918 conflict_function *fn = XCNEW (conflict_function);
2919 fn->n = NO_DEPENDENCE;
2921 return fn;
2924 /* Returns true if the address of OBJ is invariant in LOOP. */
2926 static bool
2927 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2929 while (handled_component_p (obj))
2931 if (TREE_CODE (obj) == ARRAY_REF)
2933 for (int i = 1; i < 4; ++i)
2934 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2935 loop->num))
2936 return false;
2938 else if (TREE_CODE (obj) == COMPONENT_REF)
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2941 loop->num))
2942 return false;
2944 obj = TREE_OPERAND (obj, 0);
2947 if (!INDIRECT_REF_P (obj)
2948 && TREE_CODE (obj) != MEM_REF)
2949 return true;
2951 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2952 loop->num);
2955 /* Returns false if we can prove that data references A and B do not alias,
2956 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2957 considered. */
2959 bool
2960 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2961 class loop *loop_nest)
2963 tree addr_a = DR_BASE_OBJECT (a);
2964 tree addr_b = DR_BASE_OBJECT (b);
2966 /* If we are not processing a loop nest but scalar code we
2967 do not need to care about possible cross-iteration dependences
2968 and thus can process the full original reference. Do so,
2969 similar to how loop invariant motion applies extra offset-based
2970 disambiguation. */
2971 if (!loop_nest)
2973 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2974 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2976 if (DR_BASE_ADDRESS (a)
2977 && DR_BASE_ADDRESS (b)
2978 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2979 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2980 && poly_int_tree_p (tree_size_a)
2981 && poly_int_tree_p (tree_size_b)
2982 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2983 wi::to_poly_widest (tree_size_a),
2984 wi::to_poly_widest (DR_INIT (b)),
2985 wi::to_poly_widest (tree_size_b)))
2987 gcc_assert (integer_zerop (DR_STEP (a))
2988 && integer_zerop (DR_STEP (b)));
2989 return false;
2992 aff_tree off1, off2;
2993 poly_widest_int size1, size2;
2994 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2995 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2996 aff_combination_scale (&off1, -1);
2997 aff_combination_add (&off2, &off1);
2998 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2999 return false;
3002 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3003 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3004 /* For cross-iteration dependences the cliques must be valid for the
3005 whole loop, not just individual iterations. */
3006 && (!loop_nest
3007 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3008 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3009 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3010 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3011 return false;
3013 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3014 do not know the size of the base-object. So we cannot do any
3015 offset/overlap based analysis but have to rely on points-to
3016 information only. */
3017 if (TREE_CODE (addr_a) == MEM_REF
3018 && (DR_UNCONSTRAINED_BASE (a)
3019 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3021 /* For true dependences we can apply TBAA. */
3022 if (flag_strict_aliasing
3023 && DR_IS_WRITE (a) && DR_IS_READ (b)
3024 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3025 get_alias_set (DR_REF (b))))
3026 return false;
3027 if (TREE_CODE (addr_b) == MEM_REF)
3028 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3029 TREE_OPERAND (addr_b, 0));
3030 else
3031 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3032 build_fold_addr_expr (addr_b));
3034 else if (TREE_CODE (addr_b) == MEM_REF
3035 && (DR_UNCONSTRAINED_BASE (b)
3036 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3038 /* For true dependences we can apply TBAA. */
3039 if (flag_strict_aliasing
3040 && DR_IS_WRITE (a) && DR_IS_READ (b)
3041 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3042 get_alias_set (DR_REF (b))))
3043 return false;
3044 if (TREE_CODE (addr_a) == MEM_REF)
3045 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3046 TREE_OPERAND (addr_b, 0));
3047 else
3048 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3049 TREE_OPERAND (addr_b, 0));
3052 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3053 that is being subsetted in the loop nest. */
3054 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3055 return refs_output_dependent_p (addr_a, addr_b);
3056 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3057 return refs_anti_dependent_p (addr_a, addr_b);
3058 return refs_may_alias_p (addr_a, addr_b);
3061 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3062 if it is meaningful to compare their associated access functions
3063 when checking for dependencies. */
3065 static bool
3066 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3068 /* Allow pairs of component refs from the following sets:
3070 { REALPART_EXPR, IMAGPART_EXPR }
3071 { COMPONENT_REF }
3072 { ARRAY_REF }. */
3073 tree_code code_a = TREE_CODE (ref_a);
3074 tree_code code_b = TREE_CODE (ref_b);
3075 if (code_a == IMAGPART_EXPR)
3076 code_a = REALPART_EXPR;
3077 if (code_b == IMAGPART_EXPR)
3078 code_b = REALPART_EXPR;
3079 if (code_a != code_b)
3080 return false;
3082 if (TREE_CODE (ref_a) == COMPONENT_REF)
3083 /* ??? We cannot simply use the type of operand #0 of the refs here as
3084 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3085 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3086 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3087 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3089 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3090 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3093 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3094 is true when the main indices of A and B were not comparable so we try again
3095 with alternate indices computed on an indirect reference. */
3097 struct data_dependence_relation *
3098 initialize_data_dependence_relation (struct data_dependence_relation *res,
3099 vec<loop_p> loop_nest,
3100 bool use_alt_indices)
3102 struct data_reference *a = DDR_A (res);
3103 struct data_reference *b = DDR_B (res);
3104 unsigned int i;
3106 struct indices *indices_a = &a->indices;
3107 struct indices *indices_b = &b->indices;
3108 if (use_alt_indices)
3110 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3111 indices_a = &a->alt_indices;
3112 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3113 indices_b = &b->alt_indices;
3115 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3116 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3117 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3119 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3120 return res;
3123 /* For unconstrained bases, the root (highest-indexed) subscript
3124 describes a variation in the base of the original DR_REF rather
3125 than a component access. We have no type that accurately describes
3126 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3127 applying this subscript) so limit the search to the last real
3128 component access.
3130 E.g. for:
3132 void
3133 f (int a[][8], int b[][8])
3135 for (int i = 0; i < 8; ++i)
3136 a[i * 2][0] = b[i][0];
3139 the a and b accesses have a single ARRAY_REF component reference [0]
3140 but have two subscripts. */
3141 if (indices_a->unconstrained_base)
3142 num_dimensions_a -= 1;
3143 if (indices_b->unconstrained_base)
3144 num_dimensions_b -= 1;
3146 /* These structures describe sequences of component references in
3147 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3148 specific access function. */
3149 struct {
3150 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3151 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3152 indices. In C notation, these are the indices of the rightmost
3153 component references; e.g. for a sequence .b.c.d, the start
3154 index is for .d. */
3155 unsigned int start_a;
3156 unsigned int start_b;
3158 /* The sequence contains LENGTH consecutive access functions from
3159 each DR. */
3160 unsigned int length;
3162 /* The enclosing objects for the A and B sequences respectively,
3163 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3164 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3165 tree object_a;
3166 tree object_b;
3167 } full_seq = {}, struct_seq = {};
3169 /* Before each iteration of the loop:
3171 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3172 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3173 unsigned int index_a = 0;
3174 unsigned int index_b = 0;
3175 tree ref_a = DR_REF (a);
3176 tree ref_b = DR_REF (b);
3178 /* Now walk the component references from the final DR_REFs back up to
3179 the enclosing base objects. Each component reference corresponds
3180 to one access function in the DR, with access function 0 being for
3181 the final DR_REF and the highest-indexed access function being the
3182 one that is applied to the base of the DR.
3184 Look for a sequence of component references whose access functions
3185 are comparable (see access_fn_components_comparable_p). If more
3186 than one such sequence exists, pick the one nearest the base
3187 (which is the leftmost sequence in C notation). Store this sequence
3188 in FULL_SEQ.
3190 For example, if we have:
3192 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3194 A: a[0][i].s.c.d
3195 B: __real b[0][i].s.e[i].f
3197 (where d is the same type as the real component of f) then the access
3198 functions would be:
3200 0 1 2 3
3201 A: .d .c .s [i]
3203 0 1 2 3 4 5
3204 B: __real .f [i] .e .s [i]
3206 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3207 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3208 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3209 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3210 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3211 index foo[10] arrays, so is again comparable. The sequence is
3212 therefore:
3214 A: [1, 3] (i.e. [i].s.c)
3215 B: [3, 5] (i.e. [i].s.e)
3217 Also look for sequences of component references whose access
3218 functions are comparable and whose enclosing objects have the same
3219 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3220 example, STRUCT_SEQ would be:
3222 A: [1, 2] (i.e. s.c)
3223 B: [3, 4] (i.e. s.e) */
3224 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3226 /* The alternate indices form always has a single dimension
3227 with unconstrained base. */
3228 gcc_assert (!use_alt_indices);
3230 /* REF_A and REF_B must be one of the component access types
3231 allowed by dr_analyze_indices. */
3232 gcc_checking_assert (access_fn_component_p (ref_a));
3233 gcc_checking_assert (access_fn_component_p (ref_b));
3235 /* Get the immediately-enclosing objects for REF_A and REF_B,
3236 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3237 and DR_ACCESS_FN (B, INDEX_B). */
3238 tree object_a = TREE_OPERAND (ref_a, 0);
3239 tree object_b = TREE_OPERAND (ref_b, 0);
3241 tree type_a = TREE_TYPE (object_a);
3242 tree type_b = TREE_TYPE (object_b);
3243 if (access_fn_components_comparable_p (ref_a, ref_b))
3245 /* This pair of component accesses is comparable for dependence
3246 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3247 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3248 if (full_seq.start_a + full_seq.length != index_a
3249 || full_seq.start_b + full_seq.length != index_b)
3251 /* The accesses don't extend the current sequence,
3252 so start a new one here. */
3253 full_seq.start_a = index_a;
3254 full_seq.start_b = index_b;
3255 full_seq.length = 0;
3258 /* Add this pair of references to the sequence. */
3259 full_seq.length += 1;
3260 full_seq.object_a = object_a;
3261 full_seq.object_b = object_b;
3263 /* If the enclosing objects are structures (and thus have the
3264 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3265 if (TREE_CODE (type_a) == RECORD_TYPE)
3266 struct_seq = full_seq;
3268 /* Move to the next containing reference for both A and B. */
3269 ref_a = object_a;
3270 ref_b = object_b;
3271 index_a += 1;
3272 index_b += 1;
3273 continue;
3276 /* Try to approach equal type sizes. */
3277 if (!COMPLETE_TYPE_P (type_a)
3278 || !COMPLETE_TYPE_P (type_b)
3279 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3280 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3281 break;
3283 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3284 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3285 if (size_a <= size_b)
3287 index_a += 1;
3288 ref_a = object_a;
3290 if (size_b <= size_a)
3292 index_b += 1;
3293 ref_b = object_b;
3297 /* See whether FULL_SEQ ends at the base and whether the two bases
3298 are equal. We do not care about TBAA or alignment info so we can
3299 use OEP_ADDRESS_OF to avoid false negatives. */
3300 tree base_a = indices_a->base_object;
3301 tree base_b = indices_b->base_object;
3302 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3303 && full_seq.start_b + full_seq.length == num_dimensions_b
3304 && (indices_a->unconstrained_base
3305 == indices_b->unconstrained_base)
3306 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3307 && (types_compatible_p (TREE_TYPE (base_a),
3308 TREE_TYPE (base_b))
3309 || (!base_supports_access_fn_components_p (base_a)
3310 && !base_supports_access_fn_components_p (base_b)
3311 && operand_equal_p
3312 (TYPE_SIZE (TREE_TYPE (base_a)),
3313 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3314 && (!loop_nest.exists ()
3315 || (object_address_invariant_in_loop_p
3316 (loop_nest[0], base_a))));
3318 /* If the bases are the same, we can include the base variation too.
3319 E.g. the b accesses in:
3321 for (int i = 0; i < n; ++i)
3322 b[i + 4][0] = b[i][0];
3324 have a definite dependence distance of 4, while for:
3326 for (int i = 0; i < n; ++i)
3327 a[i + 4][0] = b[i][0];
3329 the dependence distance depends on the gap between a and b.
3331 If the bases are different then we can only rely on the sequence
3332 rooted at a structure access, since arrays are allowed to overlap
3333 arbitrarily and change shape arbitrarily. E.g. we treat this as
3334 valid code:
3336 int a[256];
3338 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3340 where two lvalues with the same int[4][3] type overlap, and where
3341 both lvalues are distinct from the object's declared type. */
3342 if (same_base_p)
3344 if (indices_a->unconstrained_base)
3345 full_seq.length += 1;
3347 else
3348 full_seq = struct_seq;
3350 /* Punt if we didn't find a suitable sequence. */
3351 if (full_seq.length == 0)
3353 if (use_alt_indices
3354 || (TREE_CODE (DR_REF (a)) == MEM_REF
3355 && TREE_CODE (DR_REF (b)) == MEM_REF)
3356 || may_be_nonaddressable_p (DR_REF (a))
3357 || may_be_nonaddressable_p (DR_REF (b)))
3359 /* Fully exhausted possibilities. */
3360 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3361 return res;
3364 /* Try evaluating both DRs as dereferences of pointers. */
3365 if (!a->alt_indices.base_object
3366 && TREE_CODE (DR_REF (a)) != MEM_REF)
3368 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3369 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3370 build_int_cst
3371 (reference_alias_ptr_type (DR_REF (a)), 0));
3372 dr_analyze_indices (&a->alt_indices, alt_ref,
3373 loop_preheader_edge (loop_nest[0]),
3374 loop_containing_stmt (DR_STMT (a)));
3376 if (!b->alt_indices.base_object
3377 && TREE_CODE (DR_REF (b)) != MEM_REF)
3379 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3380 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3381 build_int_cst
3382 (reference_alias_ptr_type (DR_REF (b)), 0));
3383 dr_analyze_indices (&b->alt_indices, alt_ref,
3384 loop_preheader_edge (loop_nest[0]),
3385 loop_containing_stmt (DR_STMT (b)));
3387 return initialize_data_dependence_relation (res, loop_nest, true);
3390 if (!same_base_p)
3392 /* Partial overlap is possible for different bases when strict aliasing
3393 is not in effect. It's also possible if either base involves a union
3394 access; e.g. for:
3396 struct s1 { int a[2]; };
3397 struct s2 { struct s1 b; int c; };
3398 struct s3 { int d; struct s1 e; };
3399 union u { struct s2 f; struct s3 g; } *p, *q;
3401 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3402 "p->g.e" (base "p->g") and might partially overlap the s1 at
3403 "q->g.e" (base "q->g"). */
3404 if (!flag_strict_aliasing
3405 || ref_contains_union_access_p (full_seq.object_a)
3406 || ref_contains_union_access_p (full_seq.object_b))
3408 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3409 return res;
3412 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3413 if (!loop_nest.exists ()
3414 || (object_address_invariant_in_loop_p (loop_nest[0],
3415 full_seq.object_a)
3416 && object_address_invariant_in_loop_p (loop_nest[0],
3417 full_seq.object_b)))
3419 DDR_OBJECT_A (res) = full_seq.object_a;
3420 DDR_OBJECT_B (res) = full_seq.object_b;
3424 DDR_AFFINE_P (res) = true;
3425 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3426 DDR_SUBSCRIPTS (res).create (full_seq.length);
3427 DDR_LOOP_NEST (res) = loop_nest;
3428 DDR_SELF_REFERENCE (res) = false;
3430 for (i = 0; i < full_seq.length; ++i)
3432 struct subscript *subscript;
3434 subscript = XNEW (struct subscript);
3435 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3436 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3437 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3438 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3439 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3440 SUB_DISTANCE (subscript) = chrec_dont_know;
3441 DDR_SUBSCRIPTS (res).safe_push (subscript);
3444 return res;
3447 /* Initialize a data dependence relation between data accesses A and
3448 B. NB_LOOPS is the number of loops surrounding the references: the
3449 size of the classic distance/direction vectors. */
3451 struct data_dependence_relation *
3452 initialize_data_dependence_relation (struct data_reference *a,
3453 struct data_reference *b,
3454 vec<loop_p> loop_nest)
3456 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3457 DDR_A (res) = a;
3458 DDR_B (res) = b;
3459 DDR_LOOP_NEST (res).create (0);
3460 DDR_SUBSCRIPTS (res).create (0);
3461 DDR_DIR_VECTS (res).create (0);
3462 DDR_DIST_VECTS (res).create (0);
3464 if (a == NULL || b == NULL)
3466 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3467 return res;
3470 /* If the data references do not alias, then they are independent. */
3471 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3473 DDR_ARE_DEPENDENT (res) = chrec_known;
3474 return res;
3477 return initialize_data_dependence_relation (res, loop_nest, false);
3481 /* Frees memory used by the conflict function F. */
3483 static void
3484 free_conflict_function (conflict_function *f)
3486 unsigned i;
3488 if (CF_NONTRIVIAL_P (f))
3490 for (i = 0; i < f->n; i++)
3491 affine_fn_free (f->fns[i]);
3493 free (f);
3496 /* Frees memory used by SUBSCRIPTS. */
3498 static void
3499 free_subscripts (vec<subscript_p> subscripts)
3501 for (subscript_p s : subscripts)
3503 free_conflict_function (s->conflicting_iterations_in_a);
3504 free_conflict_function (s->conflicting_iterations_in_b);
3505 free (s);
3507 subscripts.release ();
3510 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3511 description. */
3513 static inline void
3514 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3515 tree chrec)
3517 DDR_ARE_DEPENDENT (ddr) = chrec;
3518 free_subscripts (DDR_SUBSCRIPTS (ddr));
3519 DDR_SUBSCRIPTS (ddr).create (0);
3522 /* The dependence relation DDR cannot be represented by a distance
3523 vector. */
3525 static inline void
3526 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3531 DDR_AFFINE_P (ddr) = false;
3536 /* This section contains the classic Banerjee tests. */
3538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3541 static inline bool
3542 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3544 return (evolution_function_is_constant_p (chrec_a)
3545 && evolution_function_is_constant_p (chrec_b));
3548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3549 variable, i.e., if the SIV (Single Index Variable) test is true. */
3551 static bool
3552 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3554 if ((evolution_function_is_constant_p (chrec_a)
3555 && evolution_function_is_univariate_p (chrec_b))
3556 || (evolution_function_is_constant_p (chrec_b)
3557 && evolution_function_is_univariate_p (chrec_a)))
3558 return true;
3560 if (evolution_function_is_univariate_p (chrec_a)
3561 && evolution_function_is_univariate_p (chrec_b))
3563 switch (TREE_CODE (chrec_a))
3565 case POLYNOMIAL_CHREC:
3566 switch (TREE_CODE (chrec_b))
3568 case POLYNOMIAL_CHREC:
3569 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3570 return false;
3571 /* FALLTHRU */
3573 default:
3574 return true;
3577 default:
3578 return true;
3582 return false;
3585 /* Creates a conflict function with N dimensions. The affine functions
3586 in each dimension follow. */
3588 static conflict_function *
3589 conflict_fn (unsigned n, ...)
3591 unsigned i;
3592 conflict_function *ret = XCNEW (conflict_function);
3593 va_list ap;
3595 gcc_assert (n > 0 && n <= MAX_DIM);
3596 va_start (ap, n);
3598 ret->n = n;
3599 for (i = 0; i < n; i++)
3600 ret->fns[i] = va_arg (ap, affine_fn);
3601 va_end (ap);
3603 return ret;
3606 /* Returns constant affine function with value CST. */
3608 static affine_fn
3609 affine_fn_cst (tree cst)
3611 affine_fn fn;
3612 fn.create (1);
3613 fn.quick_push (cst);
3614 return fn;
3617 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3619 static affine_fn
3620 affine_fn_univar (tree cst, unsigned dim, tree coef)
3622 affine_fn fn;
3623 fn.create (dim + 1);
3624 unsigned i;
3626 gcc_assert (dim > 0);
3627 fn.quick_push (cst);
3628 for (i = 1; i < dim; i++)
3629 fn.quick_push (integer_zero_node);
3630 fn.quick_push (coef);
3631 return fn;
3634 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3635 *OVERLAPS_B are initialized to the functions that describe the
3636 relation between the elements accessed twice by CHREC_A and
3637 CHREC_B. For k >= 0, the following property is verified:
3639 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3641 static void
3642 analyze_ziv_subscript (tree chrec_a,
3643 tree chrec_b,
3644 conflict_function **overlaps_a,
3645 conflict_function **overlaps_b,
3646 tree *last_conflicts)
3648 tree type, difference;
3649 dependence_stats.num_ziv++;
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3652 fprintf (dump_file, "(analyze_ziv_subscript \n");
3654 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3655 chrec_a = chrec_convert (type, chrec_a, NULL);
3656 chrec_b = chrec_convert (type, chrec_b, NULL);
3657 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3659 switch (TREE_CODE (difference))
3661 case INTEGER_CST:
3662 if (integer_zerop (difference))
3664 /* The difference is equal to zero: the accessed index
3665 overlaps for each iteration in the loop. */
3666 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3667 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3668 *last_conflicts = chrec_dont_know;
3669 dependence_stats.num_ziv_dependent++;
3671 else
3673 /* The accesses do not overlap. */
3674 *overlaps_a = conflict_fn_no_dependence ();
3675 *overlaps_b = conflict_fn_no_dependence ();
3676 *last_conflicts = integer_zero_node;
3677 dependence_stats.num_ziv_independent++;
3679 break;
3681 default:
3682 /* We're not sure whether the indexes overlap. For the moment,
3683 conservatively answer "don't know". */
3684 if (dump_file && (dump_flags & TDF_DETAILS))
3685 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3687 *overlaps_a = conflict_fn_not_known ();
3688 *overlaps_b = conflict_fn_not_known ();
3689 *last_conflicts = chrec_dont_know;
3690 dependence_stats.num_ziv_unimplemented++;
3691 break;
3694 if (dump_file && (dump_flags & TDF_DETAILS))
3695 fprintf (dump_file, ")\n");
3698 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3699 and only if it fits to the int type. If this is not the case, or the
3700 bound on the number of iterations of LOOP could not be derived, returns
3701 chrec_dont_know. */
3703 static tree
3704 max_stmt_executions_tree (class loop *loop)
3706 widest_int nit;
3708 if (!max_stmt_executions (loop, &nit))
3709 return chrec_dont_know;
3711 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3712 return chrec_dont_know;
3714 return wide_int_to_tree (unsigned_type_node, nit);
3717 /* Determine whether the CHREC is always positive/negative. If the expression
3718 cannot be statically analyzed, return false, otherwise set the answer into
3719 VALUE. */
3721 static bool
3722 chrec_is_positive (tree chrec, bool *value)
3724 bool value0, value1, value2;
3725 tree end_value, nb_iter;
3727 switch (TREE_CODE (chrec))
3729 case POLYNOMIAL_CHREC:
3730 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3731 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3732 return false;
3734 /* FIXME -- overflows. */
3735 if (value0 == value1)
3737 *value = value0;
3738 return true;
3741 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3742 and the proof consists in showing that the sign never
3743 changes during the execution of the loop, from 0 to
3744 loop->nb_iterations. */
3745 if (!evolution_function_is_affine_p (chrec))
3746 return false;
3748 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3749 if (chrec_contains_undetermined (nb_iter))
3750 return false;
3752 #if 0
3753 /* TODO -- If the test is after the exit, we may decrease the number of
3754 iterations by one. */
3755 if (after_exit)
3756 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3757 #endif
3759 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3761 if (!chrec_is_positive (end_value, &value2))
3762 return false;
3764 *value = value0;
3765 return value0 == value1;
3767 case INTEGER_CST:
3768 switch (tree_int_cst_sgn (chrec))
3770 case -1:
3771 *value = false;
3772 break;
3773 case 1:
3774 *value = true;
3775 break;
3776 default:
3777 return false;
3779 return true;
3781 default:
3782 return false;
3787 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3788 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3789 *OVERLAPS_B are initialized to the functions that describe the
3790 relation between the elements accessed twice by CHREC_A and
3791 CHREC_B. For k >= 0, the following property is verified:
3793 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3795 static void
3796 analyze_siv_subscript_cst_affine (tree chrec_a,
3797 tree chrec_b,
3798 conflict_function **overlaps_a,
3799 conflict_function **overlaps_b,
3800 tree *last_conflicts)
3802 bool value0, value1, value2;
3803 tree type, difference, tmp;
3805 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3806 chrec_a = chrec_convert (type, chrec_a, NULL);
3807 chrec_b = chrec_convert (type, chrec_b, NULL);
3808 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3810 /* Special case overlap in the first iteration. */
3811 if (integer_zerop (difference))
3813 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3814 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3815 *last_conflicts = integer_one_node;
3816 return;
3819 if (!chrec_is_positive (initial_condition (difference), &value0))
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3824 dependence_stats.num_siv_unimplemented++;
3825 *overlaps_a = conflict_fn_not_known ();
3826 *overlaps_b = conflict_fn_not_known ();
3827 *last_conflicts = chrec_dont_know;
3828 return;
3830 else
3832 if (value0 == false)
3834 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3835 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3837 if (dump_file && (dump_flags & TDF_DETAILS))
3838 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3840 *overlaps_a = conflict_fn_not_known ();
3841 *overlaps_b = conflict_fn_not_known ();
3842 *last_conflicts = chrec_dont_know;
3843 dependence_stats.num_siv_unimplemented++;
3844 return;
3846 else
3848 if (value1 == true)
3850 /* Example:
3851 chrec_a = 12
3852 chrec_b = {10, +, 1}
3855 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3857 HOST_WIDE_INT numiter;
3858 class loop *loop = get_chrec_loop (chrec_b);
3860 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3861 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3862 fold_build1 (ABS_EXPR, type, difference),
3863 CHREC_RIGHT (chrec_b));
3864 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3865 *last_conflicts = integer_one_node;
3868 /* Perform weak-zero siv test to see if overlap is
3869 outside the loop bounds. */
3870 numiter = max_stmt_executions_int (loop);
3872 if (numiter >= 0
3873 && compare_tree_int (tmp, numiter) > 0)
3875 free_conflict_function (*overlaps_a);
3876 free_conflict_function (*overlaps_b);
3877 *overlaps_a = conflict_fn_no_dependence ();
3878 *overlaps_b = conflict_fn_no_dependence ();
3879 *last_conflicts = integer_zero_node;
3880 dependence_stats.num_siv_independent++;
3881 return;
3883 dependence_stats.num_siv_dependent++;
3884 return;
3887 /* When the step does not divide the difference, there are
3888 no overlaps. */
3889 else
3891 *overlaps_a = conflict_fn_no_dependence ();
3892 *overlaps_b = conflict_fn_no_dependence ();
3893 *last_conflicts = integer_zero_node;
3894 dependence_stats.num_siv_independent++;
3895 return;
3899 else
3901 /* Example:
3902 chrec_a = 12
3903 chrec_b = {10, +, -1}
3905 In this case, chrec_a will not overlap with chrec_b. */
3906 *overlaps_a = conflict_fn_no_dependence ();
3907 *overlaps_b = conflict_fn_no_dependence ();
3908 *last_conflicts = integer_zero_node;
3909 dependence_stats.num_siv_independent++;
3910 return;
3914 else
3916 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3917 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3922 *overlaps_a = conflict_fn_not_known ();
3923 *overlaps_b = conflict_fn_not_known ();
3924 *last_conflicts = chrec_dont_know;
3925 dependence_stats.num_siv_unimplemented++;
3926 return;
3928 else
3930 if (value2 == false)
3932 /* Example:
3933 chrec_a = 3
3934 chrec_b = {10, +, -1}
3936 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3938 HOST_WIDE_INT numiter;
3939 class loop *loop = get_chrec_loop (chrec_b);
3941 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3942 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3943 CHREC_RIGHT (chrec_b));
3944 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3945 *last_conflicts = integer_one_node;
3947 /* Perform weak-zero siv test to see if overlap is
3948 outside the loop bounds. */
3949 numiter = max_stmt_executions_int (loop);
3951 if (numiter >= 0
3952 && compare_tree_int (tmp, numiter) > 0)
3954 free_conflict_function (*overlaps_a);
3955 free_conflict_function (*overlaps_b);
3956 *overlaps_a = conflict_fn_no_dependence ();
3957 *overlaps_b = conflict_fn_no_dependence ();
3958 *last_conflicts = integer_zero_node;
3959 dependence_stats.num_siv_independent++;
3960 return;
3962 dependence_stats.num_siv_dependent++;
3963 return;
3966 /* When the step does not divide the difference, there
3967 are no overlaps. */
3968 else
3970 *overlaps_a = conflict_fn_no_dependence ();
3971 *overlaps_b = conflict_fn_no_dependence ();
3972 *last_conflicts = integer_zero_node;
3973 dependence_stats.num_siv_independent++;
3974 return;
3977 else
3979 /* Example:
3980 chrec_a = 3
3981 chrec_b = {4, +, 1}
3983 In this case, chrec_a will not overlap with chrec_b. */
3984 *overlaps_a = conflict_fn_no_dependence ();
3985 *overlaps_b = conflict_fn_no_dependence ();
3986 *last_conflicts = integer_zero_node;
3987 dependence_stats.num_siv_independent++;
3988 return;
3995 /* Helper recursive function for initializing the matrix A. Returns
3996 the initial value of CHREC. */
3998 static tree
3999 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4001 gcc_assert (chrec);
4003 switch (TREE_CODE (chrec))
4005 case POLYNOMIAL_CHREC:
4006 HOST_WIDE_INT chrec_right;
4007 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4008 return chrec_dont_know;
4009 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4010 /* We want to be able to negate without overflow. */
4011 if (chrec_right == HOST_WIDE_INT_MIN)
4012 return chrec_dont_know;
4013 A[index][0] = mult * chrec_right;
4014 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4016 case PLUS_EXPR:
4017 case MULT_EXPR:
4018 case MINUS_EXPR:
4020 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4021 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4023 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4026 CASE_CONVERT:
4028 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4029 return chrec_convert (chrec_type (chrec), op, NULL);
4032 case BIT_NOT_EXPR:
4034 /* Handle ~X as -1 - X. */
4035 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4036 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4037 build_int_cst (TREE_TYPE (chrec), -1), op);
4040 case INTEGER_CST:
4041 return chrec;
4043 default:
4044 gcc_unreachable ();
4045 return NULL_TREE;
4049 #define FLOOR_DIV(x,y) ((x) / (y))
4051 /* Solves the special case of the Diophantine equation:
4052 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4054 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4055 number of iterations that loops X and Y run. The overlaps will be
4056 constructed as evolutions in dimension DIM. */
4058 static void
4059 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4060 HOST_WIDE_INT step_a,
4061 HOST_WIDE_INT step_b,
4062 affine_fn *overlaps_a,
4063 affine_fn *overlaps_b,
4064 tree *last_conflicts, int dim)
4066 if (((step_a > 0 && step_b > 0)
4067 || (step_a < 0 && step_b < 0)))
4069 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4070 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4072 gcd_steps_a_b = gcd (step_a, step_b);
4073 step_overlaps_a = step_b / gcd_steps_a_b;
4074 step_overlaps_b = step_a / gcd_steps_a_b;
4076 if (niter > 0)
4078 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4079 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4080 last_conflict = tau2;
4081 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4083 else
4084 *last_conflicts = chrec_dont_know;
4086 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4087 build_int_cst (NULL_TREE,
4088 step_overlaps_a));
4089 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4090 build_int_cst (NULL_TREE,
4091 step_overlaps_b));
4094 else
4096 *overlaps_a = affine_fn_cst (integer_zero_node);
4097 *overlaps_b = affine_fn_cst (integer_zero_node);
4098 *last_conflicts = integer_zero_node;
4102 /* Solves the special case of a Diophantine equation where CHREC_A is
4103 an affine bivariate function, and CHREC_B is an affine univariate
4104 function. For example,
4106 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4108 has the following overlapping functions:
4110 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4111 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4112 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4114 FORNOW: This is a specialized implementation for a case occurring in
4115 a common benchmark. Implement the general algorithm. */
4117 static void
4118 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4119 conflict_function **overlaps_a,
4120 conflict_function **overlaps_b,
4121 tree *last_conflicts)
4123 bool xz_p, yz_p, xyz_p;
4124 HOST_WIDE_INT step_x, step_y, step_z;
4125 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4126 affine_fn overlaps_a_xz, overlaps_b_xz;
4127 affine_fn overlaps_a_yz, overlaps_b_yz;
4128 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4129 affine_fn ova1, ova2, ovb;
4130 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4132 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4133 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4134 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4136 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4137 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4138 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4140 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4142 if (dump_file && (dump_flags & TDF_DETAILS))
4143 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4145 *overlaps_a = conflict_fn_not_known ();
4146 *overlaps_b = conflict_fn_not_known ();
4147 *last_conflicts = chrec_dont_know;
4148 return;
4151 niter = MIN (niter_x, niter_z);
4152 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4153 &overlaps_a_xz,
4154 &overlaps_b_xz,
4155 &last_conflicts_xz, 1);
4156 niter = MIN (niter_y, niter_z);
4157 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4158 &overlaps_a_yz,
4159 &overlaps_b_yz,
4160 &last_conflicts_yz, 2);
4161 niter = MIN (niter_x, niter_z);
4162 niter = MIN (niter_y, niter);
4163 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4164 &overlaps_a_xyz,
4165 &overlaps_b_xyz,
4166 &last_conflicts_xyz, 3);
4168 xz_p = !integer_zerop (last_conflicts_xz);
4169 yz_p = !integer_zerop (last_conflicts_yz);
4170 xyz_p = !integer_zerop (last_conflicts_xyz);
4172 if (xz_p || yz_p || xyz_p)
4174 ova1 = affine_fn_cst (integer_zero_node);
4175 ova2 = affine_fn_cst (integer_zero_node);
4176 ovb = affine_fn_cst (integer_zero_node);
4177 if (xz_p)
4179 affine_fn t0 = ova1;
4180 affine_fn t2 = ovb;
4182 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4183 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4184 affine_fn_free (t0);
4185 affine_fn_free (t2);
4186 *last_conflicts = last_conflicts_xz;
4188 if (yz_p)
4190 affine_fn t0 = ova2;
4191 affine_fn t2 = ovb;
4193 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4194 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4195 affine_fn_free (t0);
4196 affine_fn_free (t2);
4197 *last_conflicts = last_conflicts_yz;
4199 if (xyz_p)
4201 affine_fn t0 = ova1;
4202 affine_fn t2 = ova2;
4203 affine_fn t4 = ovb;
4205 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4206 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4207 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4208 affine_fn_free (t0);
4209 affine_fn_free (t2);
4210 affine_fn_free (t4);
4211 *last_conflicts = last_conflicts_xyz;
4213 *overlaps_a = conflict_fn (2, ova1, ova2);
4214 *overlaps_b = conflict_fn (1, ovb);
4216 else
4218 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4219 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4220 *last_conflicts = integer_zero_node;
4223 affine_fn_free (overlaps_a_xz);
4224 affine_fn_free (overlaps_b_xz);
4225 affine_fn_free (overlaps_a_yz);
4226 affine_fn_free (overlaps_b_yz);
4227 affine_fn_free (overlaps_a_xyz);
4228 affine_fn_free (overlaps_b_xyz);
4231 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4233 static void
4234 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4235 int size)
4237 memcpy (vec2, vec1, size * sizeof (*vec1));
4240 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4242 static void
4243 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4244 int m, int n)
4246 int i;
4248 for (i = 0; i < m; i++)
4249 lambda_vector_copy (mat1[i], mat2[i], n);
4252 /* Store the N x N identity matrix in MAT. */
4254 static void
4255 lambda_matrix_id (lambda_matrix mat, int size)
4257 int i, j;
4259 for (i = 0; i < size; i++)
4260 for (j = 0; j < size; j++)
4261 mat[i][j] = (i == j) ? 1 : 0;
4264 /* Return the index of the first nonzero element of vector VEC1 between
4265 START and N. We must have START <= N.
4266 Returns N if VEC1 is the zero vector. */
4268 static int
4269 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4271 int j = start;
4272 while (j < n && vec1[j] == 0)
4273 j++;
4274 return j;
4277 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4278 R2 = R2 + CONST1 * R1. */
4280 static bool
4281 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4282 lambda_int const1)
4284 int i;
4286 if (const1 == 0)
4287 return true;
4289 for (i = 0; i < n; i++)
4291 bool ovf;
4292 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4293 if (ovf)
4294 return false;
4295 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4296 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4297 return false;
4298 mat[r2][i] = tem2;
4301 return true;
4304 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4305 and store the result in VEC2. */
4307 static void
4308 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4309 int size, lambda_int const1)
4311 int i;
4313 if (const1 == 0)
4314 lambda_vector_clear (vec2, size);
4315 else
4316 for (i = 0; i < size; i++)
4317 vec2[i] = const1 * vec1[i];
4320 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4322 static void
4323 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4324 int size)
4326 lambda_vector_mult_const (vec1, vec2, size, -1);
4329 /* Negate row R1 of matrix MAT which has N columns. */
4331 static void
4332 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4334 lambda_vector_negate (mat[r1], mat[r1], n);
4337 /* Return true if two vectors are equal. */
4339 static bool
4340 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4342 int i;
4343 for (i = 0; i < size; i++)
4344 if (vec1[i] != vec2[i])
4345 return false;
4346 return true;
4349 /* Given an M x N integer matrix A, this function determines an M x
4350 M unimodular matrix U, and an M x N echelon matrix S such that
4351 "U.A = S". This decomposition is also known as "right Hermite".
4353 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4354 Restructuring Compilers" Utpal Banerjee. */
4356 static bool
4357 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4358 lambda_matrix S, lambda_matrix U)
4360 int i, j, i0 = 0;
4362 lambda_matrix_copy (A, S, m, n);
4363 lambda_matrix_id (U, m);
4365 for (j = 0; j < n; j++)
4367 if (lambda_vector_first_nz (S[j], m, i0) < m)
4369 ++i0;
4370 for (i = m - 1; i >= i0; i--)
4372 while (S[i][j] != 0)
4374 lambda_int factor, a, b;
4376 a = S[i-1][j];
4377 b = S[i][j];
4378 gcc_assert (a != HOST_WIDE_INT_MIN);
4379 factor = a / b;
4381 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4382 return false;
4383 std::swap (S[i], S[i-1]);
4385 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4386 return false;
4387 std::swap (U[i], U[i-1]);
4393 return true;
4396 /* Determines the overlapping elements due to accesses CHREC_A and
4397 CHREC_B, that are affine functions. This function cannot handle
4398 symbolic evolution functions, ie. when initial conditions are
4399 parameters, because it uses lambda matrices of integers. */
4401 static void
4402 analyze_subscript_affine_affine (tree chrec_a,
4403 tree chrec_b,
4404 conflict_function **overlaps_a,
4405 conflict_function **overlaps_b,
4406 tree *last_conflicts)
4408 unsigned nb_vars_a, nb_vars_b, dim;
4409 lambda_int gamma, gcd_alpha_beta;
4410 lambda_matrix A, U, S;
4411 struct obstack scratch_obstack;
4413 if (eq_evolutions_p (chrec_a, chrec_b))
4415 /* The accessed index overlaps for each iteration in the
4416 loop. */
4417 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4418 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4419 *last_conflicts = chrec_dont_know;
4420 return;
4422 if (dump_file && (dump_flags & TDF_DETAILS))
4423 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4425 /* For determining the initial intersection, we have to solve a
4426 Diophantine equation. This is the most time consuming part.
4428 For answering to the question: "Is there a dependence?" we have
4429 to prove that there exists a solution to the Diophantine
4430 equation, and that the solution is in the iteration domain,
4431 i.e. the solution is positive or zero, and that the solution
4432 happens before the upper bound loop.nb_iterations. Otherwise
4433 there is no dependence. This function outputs a description of
4434 the iterations that hold the intersections. */
4436 nb_vars_a = nb_vars_in_chrec (chrec_a);
4437 nb_vars_b = nb_vars_in_chrec (chrec_b);
4439 gcc_obstack_init (&scratch_obstack);
4441 dim = nb_vars_a + nb_vars_b;
4442 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4443 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4444 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4446 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4447 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4448 if (init_a == chrec_dont_know
4449 || init_b == chrec_dont_know)
4451 if (dump_file && (dump_flags & TDF_DETAILS))
4452 fprintf (dump_file, "affine-affine test failed: "
4453 "representation issue.\n");
4454 *overlaps_a = conflict_fn_not_known ();
4455 *overlaps_b = conflict_fn_not_known ();
4456 *last_conflicts = chrec_dont_know;
4457 goto end_analyze_subs_aa;
4459 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4461 /* Don't do all the hard work of solving the Diophantine equation
4462 when we already know the solution: for example,
4463 | {3, +, 1}_1
4464 | {3, +, 4}_2
4465 | gamma = 3 - 3 = 0.
4466 Then the first overlap occurs during the first iterations:
4467 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4469 if (gamma == 0)
4471 if (nb_vars_a == 1 && nb_vars_b == 1)
4473 HOST_WIDE_INT step_a, step_b;
4474 HOST_WIDE_INT niter, niter_a, niter_b;
4475 affine_fn ova, ovb;
4477 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4478 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4479 niter = MIN (niter_a, niter_b);
4480 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4481 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4483 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4484 &ova, &ovb,
4485 last_conflicts, 1);
4486 *overlaps_a = conflict_fn (1, ova);
4487 *overlaps_b = conflict_fn (1, ovb);
4490 else if (nb_vars_a == 2 && nb_vars_b == 1)
4491 compute_overlap_steps_for_affine_1_2
4492 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4494 else if (nb_vars_a == 1 && nb_vars_b == 2)
4495 compute_overlap_steps_for_affine_1_2
4496 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4498 else
4500 if (dump_file && (dump_flags & TDF_DETAILS))
4501 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4502 *overlaps_a = conflict_fn_not_known ();
4503 *overlaps_b = conflict_fn_not_known ();
4504 *last_conflicts = chrec_dont_know;
4506 goto end_analyze_subs_aa;
4509 /* U.A = S */
4510 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4512 *overlaps_a = conflict_fn_not_known ();
4513 *overlaps_b = conflict_fn_not_known ();
4514 *last_conflicts = chrec_dont_know;
4515 goto end_analyze_subs_aa;
4518 if (S[0][0] < 0)
4520 S[0][0] *= -1;
4521 lambda_matrix_row_negate (U, dim, 0);
4523 gcd_alpha_beta = S[0][0];
4525 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4526 but that is a quite strange case. Instead of ICEing, answer
4527 don't know. */
4528 if (gcd_alpha_beta == 0)
4530 *overlaps_a = conflict_fn_not_known ();
4531 *overlaps_b = conflict_fn_not_known ();
4532 *last_conflicts = chrec_dont_know;
4533 goto end_analyze_subs_aa;
4536 /* The classic "gcd-test". */
4537 if (!int_divides_p (gcd_alpha_beta, gamma))
4539 /* The "gcd-test" has determined that there is no integer
4540 solution, i.e. there is no dependence. */
4541 *overlaps_a = conflict_fn_no_dependence ();
4542 *overlaps_b = conflict_fn_no_dependence ();
4543 *last_conflicts = integer_zero_node;
4546 /* Both access functions are univariate. This includes SIV and MIV cases. */
4547 else if (nb_vars_a == 1 && nb_vars_b == 1)
4549 /* Both functions should have the same evolution sign. */
4550 if (((A[0][0] > 0 && -A[1][0] > 0)
4551 || (A[0][0] < 0 && -A[1][0] < 0)))
4553 /* The solutions are given by:
4555 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4556 | [u21 u22] [y0]
4558 For a given integer t. Using the following variables,
4560 | i0 = u11 * gamma / gcd_alpha_beta
4561 | j0 = u12 * gamma / gcd_alpha_beta
4562 | i1 = u21
4563 | j1 = u22
4565 the solutions are:
4567 | x0 = i0 + i1 * t,
4568 | y0 = j0 + j1 * t. */
4569 HOST_WIDE_INT i0, j0, i1, j1;
4571 i0 = U[0][0] * gamma / gcd_alpha_beta;
4572 j0 = U[0][1] * gamma / gcd_alpha_beta;
4573 i1 = U[1][0];
4574 j1 = U[1][1];
4576 if ((i1 == 0 && i0 < 0)
4577 || (j1 == 0 && j0 < 0))
4579 /* There is no solution.
4580 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4581 falls in here, but for the moment we don't look at the
4582 upper bound of the iteration domain. */
4583 *overlaps_a = conflict_fn_no_dependence ();
4584 *overlaps_b = conflict_fn_no_dependence ();
4585 *last_conflicts = integer_zero_node;
4586 goto end_analyze_subs_aa;
4589 if (i1 > 0 && j1 > 0)
4591 HOST_WIDE_INT niter_a
4592 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4593 HOST_WIDE_INT niter_b
4594 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4595 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4597 /* (X0, Y0) is a solution of the Diophantine equation:
4598 "chrec_a (X0) = chrec_b (Y0)". */
4599 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4600 CEIL (-j0, j1));
4601 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4602 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4604 /* (X1, Y1) is the smallest positive solution of the eq
4605 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4606 first conflict occurs. */
4607 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4608 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4609 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4611 if (niter > 0)
4613 /* If the overlap occurs outside of the bounds of the
4614 loop, there is no dependence. */
4615 if (x1 >= niter_a || y1 >= niter_b)
4617 *overlaps_a = conflict_fn_no_dependence ();
4618 *overlaps_b = conflict_fn_no_dependence ();
4619 *last_conflicts = integer_zero_node;
4620 goto end_analyze_subs_aa;
4623 /* max stmt executions can get quite large, avoid
4624 overflows by using wide ints here. */
4625 widest_int tau2
4626 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4627 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4628 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4629 if (wi::min_precision (last_conflict, SIGNED)
4630 <= TYPE_PRECISION (integer_type_node))
4631 *last_conflicts
4632 = build_int_cst (integer_type_node,
4633 last_conflict.to_shwi ());
4634 else
4635 *last_conflicts = chrec_dont_know;
4637 else
4638 *last_conflicts = chrec_dont_know;
4640 *overlaps_a
4641 = conflict_fn (1,
4642 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4644 build_int_cst (NULL_TREE, i1)));
4645 *overlaps_b
4646 = conflict_fn (1,
4647 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4649 build_int_cst (NULL_TREE, j1)));
4651 else
4653 /* FIXME: For the moment, the upper bound of the
4654 iteration domain for i and j is not checked. */
4655 if (dump_file && (dump_flags & TDF_DETAILS))
4656 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4657 *overlaps_a = conflict_fn_not_known ();
4658 *overlaps_b = conflict_fn_not_known ();
4659 *last_conflicts = chrec_dont_know;
4662 else
4664 if (dump_file && (dump_flags & TDF_DETAILS))
4665 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4666 *overlaps_a = conflict_fn_not_known ();
4667 *overlaps_b = conflict_fn_not_known ();
4668 *last_conflicts = chrec_dont_know;
4671 else
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4674 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4675 *overlaps_a = conflict_fn_not_known ();
4676 *overlaps_b = conflict_fn_not_known ();
4677 *last_conflicts = chrec_dont_know;
4680 end_analyze_subs_aa:
4681 obstack_free (&scratch_obstack, NULL);
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4684 fprintf (dump_file, " (overlaps_a = ");
4685 dump_conflict_function (dump_file, *overlaps_a);
4686 fprintf (dump_file, ")\n (overlaps_b = ");
4687 dump_conflict_function (dump_file, *overlaps_b);
4688 fprintf (dump_file, "))\n");
4692 /* Returns true when analyze_subscript_affine_affine can be used for
4693 determining the dependence relation between chrec_a and chrec_b,
4694 that contain symbols. This function modifies chrec_a and chrec_b
4695 such that the analysis result is the same, and such that they don't
4696 contain symbols, and then can safely be passed to the analyzer.
4698 Example: The analysis of the following tuples of evolutions produce
4699 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4700 vs. {0, +, 1}_1
4702 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4703 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4706 static bool
4707 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4709 tree diff, type, left_a, left_b, right_b;
4711 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4712 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4713 /* FIXME: For the moment not handled. Might be refined later. */
4714 return false;
4716 type = chrec_type (*chrec_a);
4717 left_a = CHREC_LEFT (*chrec_a);
4718 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4719 diff = chrec_fold_minus (type, left_a, left_b);
4721 if (!evolution_function_is_constant_p (diff))
4722 return false;
4724 if (dump_file && (dump_flags & TDF_DETAILS))
4725 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4727 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4728 diff, CHREC_RIGHT (*chrec_a));
4729 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4730 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4731 build_int_cst (type, 0),
4732 right_b);
4733 return true;
4736 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4737 *OVERLAPS_B are initialized to the functions that describe the
4738 relation between the elements accessed twice by CHREC_A and
4739 CHREC_B. For k >= 0, the following property is verified:
4741 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4743 static void
4744 analyze_siv_subscript (tree chrec_a,
4745 tree chrec_b,
4746 conflict_function **overlaps_a,
4747 conflict_function **overlaps_b,
4748 tree *last_conflicts,
4749 int loop_nest_num)
4751 dependence_stats.num_siv++;
4753 if (dump_file && (dump_flags & TDF_DETAILS))
4754 fprintf (dump_file, "(analyze_siv_subscript \n");
4756 if (evolution_function_is_constant_p (chrec_a)
4757 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4758 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4759 overlaps_a, overlaps_b, last_conflicts);
4761 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4762 && evolution_function_is_constant_p (chrec_b))
4763 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4764 overlaps_b, overlaps_a, last_conflicts);
4766 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4767 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4769 if (!chrec_contains_symbols (chrec_a)
4770 && !chrec_contains_symbols (chrec_b))
4772 analyze_subscript_affine_affine (chrec_a, chrec_b,
4773 overlaps_a, overlaps_b,
4774 last_conflicts);
4776 if (CF_NOT_KNOWN_P (*overlaps_a)
4777 || CF_NOT_KNOWN_P (*overlaps_b))
4778 dependence_stats.num_siv_unimplemented++;
4779 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4780 || CF_NO_DEPENDENCE_P (*overlaps_b))
4781 dependence_stats.num_siv_independent++;
4782 else
4783 dependence_stats.num_siv_dependent++;
4785 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4786 &chrec_b))
4788 analyze_subscript_affine_affine (chrec_a, chrec_b,
4789 overlaps_a, overlaps_b,
4790 last_conflicts);
4792 if (CF_NOT_KNOWN_P (*overlaps_a)
4793 || CF_NOT_KNOWN_P (*overlaps_b))
4794 dependence_stats.num_siv_unimplemented++;
4795 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4796 || CF_NO_DEPENDENCE_P (*overlaps_b))
4797 dependence_stats.num_siv_independent++;
4798 else
4799 dependence_stats.num_siv_dependent++;
4801 else
4802 goto siv_subscript_dontknow;
4805 else
4807 siv_subscript_dontknow:;
4808 if (dump_file && (dump_flags & TDF_DETAILS))
4809 fprintf (dump_file, " siv test failed: unimplemented");
4810 *overlaps_a = conflict_fn_not_known ();
4811 *overlaps_b = conflict_fn_not_known ();
4812 *last_conflicts = chrec_dont_know;
4813 dependence_stats.num_siv_unimplemented++;
4816 if (dump_file && (dump_flags & TDF_DETAILS))
4817 fprintf (dump_file, ")\n");
4820 /* Returns false if we can prove that the greatest common divisor of the steps
4821 of CHREC does not divide CST, false otherwise. */
4823 static bool
4824 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4826 HOST_WIDE_INT cd = 0, val;
4827 tree step;
4829 if (!tree_fits_shwi_p (cst))
4830 return true;
4831 val = tree_to_shwi (cst);
4833 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4835 step = CHREC_RIGHT (chrec);
4836 if (!tree_fits_shwi_p (step))
4837 return true;
4838 cd = gcd (cd, tree_to_shwi (step));
4839 chrec = CHREC_LEFT (chrec);
4842 return val % cd == 0;
4845 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4846 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4847 functions that describe the relation between the elements accessed
4848 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4849 is verified:
4851 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4853 static void
4854 analyze_miv_subscript (tree chrec_a,
4855 tree chrec_b,
4856 conflict_function **overlaps_a,
4857 conflict_function **overlaps_b,
4858 tree *last_conflicts,
4859 class loop *loop_nest)
4861 tree type, difference;
4863 dependence_stats.num_miv++;
4864 if (dump_file && (dump_flags & TDF_DETAILS))
4865 fprintf (dump_file, "(analyze_miv_subscript \n");
4867 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4868 chrec_a = chrec_convert (type, chrec_a, NULL);
4869 chrec_b = chrec_convert (type, chrec_b, NULL);
4870 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4872 if (eq_evolutions_p (chrec_a, chrec_b))
4874 /* Access functions are the same: all the elements are accessed
4875 in the same order. */
4876 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4877 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4878 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4879 dependence_stats.num_miv_dependent++;
4882 else if (evolution_function_is_constant_p (difference)
4883 && evolution_function_is_affine_multivariate_p (chrec_a,
4884 loop_nest->num)
4885 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4887 /* testsuite/.../ssa-chrec-33.c
4888 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4890 The difference is 1, and all the evolution steps are multiples
4891 of 2, consequently there are no overlapping elements. */
4892 *overlaps_a = conflict_fn_no_dependence ();
4893 *overlaps_b = conflict_fn_no_dependence ();
4894 *last_conflicts = integer_zero_node;
4895 dependence_stats.num_miv_independent++;
4898 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4899 && !chrec_contains_symbols (chrec_a, loop_nest)
4900 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4901 && !chrec_contains_symbols (chrec_b, loop_nest))
4903 /* testsuite/.../ssa-chrec-35.c
4904 {0, +, 1}_2 vs. {0, +, 1}_3
4905 the overlapping elements are respectively located at iterations:
4906 {0, +, 1}_x and {0, +, 1}_x,
4907 in other words, we have the equality:
4908 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4910 Other examples:
4911 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4912 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4914 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4915 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4917 analyze_subscript_affine_affine (chrec_a, chrec_b,
4918 overlaps_a, overlaps_b, last_conflicts);
4920 if (CF_NOT_KNOWN_P (*overlaps_a)
4921 || CF_NOT_KNOWN_P (*overlaps_b))
4922 dependence_stats.num_miv_unimplemented++;
4923 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4924 || CF_NO_DEPENDENCE_P (*overlaps_b))
4925 dependence_stats.num_miv_independent++;
4926 else
4927 dependence_stats.num_miv_dependent++;
4930 else
4932 /* When the analysis is too difficult, answer "don't know". */
4933 if (dump_file && (dump_flags & TDF_DETAILS))
4934 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4936 *overlaps_a = conflict_fn_not_known ();
4937 *overlaps_b = conflict_fn_not_known ();
4938 *last_conflicts = chrec_dont_know;
4939 dependence_stats.num_miv_unimplemented++;
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4943 fprintf (dump_file, ")\n");
4946 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4947 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4948 OVERLAP_ITERATIONS_B are initialized with two functions that
4949 describe the iterations that contain conflicting elements.
4951 Remark: For an integer k >= 0, the following equality is true:
4953 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4956 static void
4957 analyze_overlapping_iterations (tree chrec_a,
4958 tree chrec_b,
4959 conflict_function **overlap_iterations_a,
4960 conflict_function **overlap_iterations_b,
4961 tree *last_conflicts, class loop *loop_nest)
4963 unsigned int lnn = loop_nest->num;
4965 dependence_stats.num_subscript_tests++;
4967 if (dump_file && (dump_flags & TDF_DETAILS))
4969 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4970 fprintf (dump_file, " (chrec_a = ");
4971 print_generic_expr (dump_file, chrec_a);
4972 fprintf (dump_file, ")\n (chrec_b = ");
4973 print_generic_expr (dump_file, chrec_b);
4974 fprintf (dump_file, ")\n");
4977 if (chrec_a == NULL_TREE
4978 || chrec_b == NULL_TREE
4979 || chrec_contains_undetermined (chrec_a)
4980 || chrec_contains_undetermined (chrec_b))
4982 dependence_stats.num_subscript_undetermined++;
4984 *overlap_iterations_a = conflict_fn_not_known ();
4985 *overlap_iterations_b = conflict_fn_not_known ();
4988 /* If they are the same chrec, and are affine, they overlap
4989 on every iteration. */
4990 else if (eq_evolutions_p (chrec_a, chrec_b)
4991 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4992 || operand_equal_p (chrec_a, chrec_b, 0)))
4994 dependence_stats.num_same_subscript_function++;
4995 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4996 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4997 *last_conflicts = chrec_dont_know;
5000 /* If they aren't the same, and aren't affine, we can't do anything
5001 yet. */
5002 else if ((chrec_contains_symbols (chrec_a)
5003 || chrec_contains_symbols (chrec_b))
5004 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5005 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5007 dependence_stats.num_subscript_undetermined++;
5008 *overlap_iterations_a = conflict_fn_not_known ();
5009 *overlap_iterations_b = conflict_fn_not_known ();
5012 else if (ziv_subscript_p (chrec_a, chrec_b))
5013 analyze_ziv_subscript (chrec_a, chrec_b,
5014 overlap_iterations_a, overlap_iterations_b,
5015 last_conflicts);
5017 else if (siv_subscript_p (chrec_a, chrec_b))
5018 analyze_siv_subscript (chrec_a, chrec_b,
5019 overlap_iterations_a, overlap_iterations_b,
5020 last_conflicts, lnn);
5022 else
5023 analyze_miv_subscript (chrec_a, chrec_b,
5024 overlap_iterations_a, overlap_iterations_b,
5025 last_conflicts, loop_nest);
5027 if (dump_file && (dump_flags & TDF_DETAILS))
5029 fprintf (dump_file, " (overlap_iterations_a = ");
5030 dump_conflict_function (dump_file, *overlap_iterations_a);
5031 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5032 dump_conflict_function (dump_file, *overlap_iterations_b);
5033 fprintf (dump_file, "))\n");
5037 /* Helper function for uniquely inserting distance vectors. */
5039 static void
5040 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5042 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5043 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5044 return;
5046 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5049 /* Helper function for uniquely inserting direction vectors. */
5051 static void
5052 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5054 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5055 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5056 return;
5058 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5061 /* Add a distance of 1 on all the loops outer than INDEX. If we
5062 haven't yet determined a distance for this outer loop, push a new
5063 distance vector composed of the previous distance, and a distance
5064 of 1 for this outer loop. Example:
5066 | loop_1
5067 | loop_2
5068 | A[10]
5069 | endloop_2
5070 | endloop_1
5072 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5073 save (0, 1), then we have to save (1, 0). */
5075 static void
5076 add_outer_distances (struct data_dependence_relation *ddr,
5077 lambda_vector dist_v, int index)
5079 /* For each outer loop where init_v is not set, the accesses are
5080 in dependence of distance 1 in the loop. */
5081 while (--index >= 0)
5083 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5085 save_v[index] = 1;
5086 save_dist_v (ddr, save_v);
5090 /* Return false when fail to represent the data dependence as a
5091 distance vector. A_INDEX is the index of the first reference
5092 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5093 second reference. INIT_B is set to true when a component has been
5094 added to the distance vector DIST_V. INDEX_CARRY is then set to
5095 the index in DIST_V that carries the dependence. */
5097 static bool
5098 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5099 unsigned int a_index, unsigned int b_index,
5100 lambda_vector dist_v, bool *init_b,
5101 int *index_carry)
5103 unsigned i;
5104 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5105 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5107 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5109 tree access_fn_a, access_fn_b;
5110 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5112 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5114 non_affine_dependence_relation (ddr);
5115 return false;
5118 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5119 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5121 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5122 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5124 HOST_WIDE_INT dist;
5125 int index;
5126 int var_a = CHREC_VARIABLE (access_fn_a);
5127 int var_b = CHREC_VARIABLE (access_fn_b);
5129 if (var_a != var_b
5130 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5132 non_affine_dependence_relation (ddr);
5133 return false;
5136 /* When data references are collected in a loop while data
5137 dependences are analyzed in loop nest nested in the loop, we
5138 would have more number of access functions than number of
5139 loops. Skip access functions of loops not in the loop nest.
5141 See PR89725 for more information. */
5142 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5143 continue;
5145 dist = int_cst_value (SUB_DISTANCE (subscript));
5146 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5147 *index_carry = MIN (index, *index_carry);
5149 /* This is the subscript coupling test. If we have already
5150 recorded a distance for this loop (a distance coming from
5151 another subscript), it should be the same. For example,
5152 in the following code, there is no dependence:
5154 | loop i = 0, N, 1
5155 | T[i+1][i] = ...
5156 | ... = T[i][i]
5157 | endloop
5159 if (init_v[index] != 0 && dist_v[index] != dist)
5161 finalize_ddr_dependent (ddr, chrec_known);
5162 return false;
5165 dist_v[index] = dist;
5166 init_v[index] = 1;
5167 *init_b = true;
5169 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5171 /* This can be for example an affine vs. constant dependence
5172 (T[i] vs. T[3]) that is not an affine dependence and is
5173 not representable as a distance vector. */
5174 non_affine_dependence_relation (ddr);
5175 return false;
5177 else
5178 *init_b = true;
5181 return true;
5184 /* Return true when the DDR contains only invariant access functions wrto. loop
5185 number LNUM. */
5187 static bool
5188 invariant_access_functions (const struct data_dependence_relation *ddr,
5189 int lnum)
5191 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5192 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5193 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5194 return false;
5196 return true;
5199 /* Helper function for the case where DDR_A and DDR_B are the same
5200 multivariate access function with a constant step. For an example
5201 see pr34635-1.c. */
5203 static void
5204 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5206 int x_1, x_2;
5207 tree c_1 = CHREC_LEFT (c_2);
5208 tree c_0 = CHREC_LEFT (c_1);
5209 lambda_vector dist_v;
5210 HOST_WIDE_INT v1, v2, cd;
5212 /* Polynomials with more than 2 variables are not handled yet. When
5213 the evolution steps are parameters, it is not possible to
5214 represent the dependence using classical distance vectors. */
5215 if (TREE_CODE (c_0) != INTEGER_CST
5216 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5217 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5219 DDR_AFFINE_P (ddr) = false;
5220 return;
5223 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5224 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5226 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5227 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5228 v1 = int_cst_value (CHREC_RIGHT (c_1));
5229 v2 = int_cst_value (CHREC_RIGHT (c_2));
5230 cd = gcd (v1, v2);
5231 v1 /= cd;
5232 v2 /= cd;
5234 if (v2 < 0)
5236 v2 = -v2;
5237 v1 = -v1;
5240 dist_v[x_1] = v2;
5241 dist_v[x_2] = -v1;
5242 save_dist_v (ddr, dist_v);
5244 add_outer_distances (ddr, dist_v, x_1);
5247 /* Helper function for the case where DDR_A and DDR_B are the same
5248 access functions. */
5250 static void
5251 add_other_self_distances (struct data_dependence_relation *ddr)
5253 lambda_vector dist_v;
5254 unsigned i;
5255 int index_carry = DDR_NB_LOOPS (ddr);
5256 subscript *sub;
5257 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5259 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5261 tree access_fun = SUB_ACCESS_FN (sub, 0);
5263 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5265 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5267 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5269 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5270 return;
5273 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5275 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5276 add_multivariate_self_dist (ddr, access_fun);
5277 else
5278 /* The evolution step is not constant: it varies in
5279 the outer loop, so this cannot be represented by a
5280 distance vector. For example in pr34635.c the
5281 evolution is {0, +, {0, +, 4}_1}_2. */
5282 DDR_AFFINE_P (ddr) = false;
5284 return;
5287 /* When data references are collected in a loop while data
5288 dependences are analyzed in loop nest nested in the loop, we
5289 would have more number of access functions than number of
5290 loops. Skip access functions of loops not in the loop nest.
5292 See PR89725 for more information. */
5293 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5294 loop))
5295 continue;
5297 index_carry = MIN (index_carry,
5298 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5299 DDR_LOOP_NEST (ddr)));
5303 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5304 add_outer_distances (ddr, dist_v, index_carry);
5307 static void
5308 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5310 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5312 dist_v[0] = 1;
5313 save_dist_v (ddr, dist_v);
5316 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5317 is the case for example when access functions are the same and
5318 equal to a constant, as in:
5320 | loop_1
5321 | A[3] = ...
5322 | ... = A[3]
5323 | endloop_1
5325 in which case the distance vectors are (0) and (1). */
5327 static void
5328 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5330 unsigned i, j;
5332 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5334 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5335 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5336 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5338 for (j = 0; j < ca->n; j++)
5339 if (affine_function_zero_p (ca->fns[j]))
5341 insert_innermost_unit_dist_vector (ddr);
5342 return;
5345 for (j = 0; j < cb->n; j++)
5346 if (affine_function_zero_p (cb->fns[j]))
5348 insert_innermost_unit_dist_vector (ddr);
5349 return;
5354 /* Return true when the DDR contains two data references that have the
5355 same access functions. */
5357 static inline bool
5358 same_access_functions (const struct data_dependence_relation *ddr)
5360 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5361 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5362 SUB_ACCESS_FN (sub, 1)))
5363 return false;
5365 return true;
5368 /* Compute the classic per loop distance vector. DDR is the data
5369 dependence relation to build a vector from. Return false when fail
5370 to represent the data dependence as a distance vector. */
5372 static bool
5373 build_classic_dist_vector (struct data_dependence_relation *ddr,
5374 class loop *loop_nest)
5376 bool init_b = false;
5377 int index_carry = DDR_NB_LOOPS (ddr);
5378 lambda_vector dist_v;
5380 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5381 return false;
5383 if (same_access_functions (ddr))
5385 /* Save the 0 vector. */
5386 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5387 save_dist_v (ddr, dist_v);
5389 if (invariant_access_functions (ddr, loop_nest->num))
5390 add_distance_for_zero_overlaps (ddr);
5392 if (DDR_NB_LOOPS (ddr) > 1)
5393 add_other_self_distances (ddr);
5395 return true;
5398 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5399 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5400 return false;
5402 /* Save the distance vector if we initialized one. */
5403 if (init_b)
5405 /* Verify a basic constraint: classic distance vectors should
5406 always be lexicographically positive.
5408 Data references are collected in the order of execution of
5409 the program, thus for the following loop
5411 | for (i = 1; i < 100; i++)
5412 | for (j = 1; j < 100; j++)
5414 | t = T[j+1][i-1]; // A
5415 | T[j][i] = t + 2; // B
5418 references are collected following the direction of the wind:
5419 A then B. The data dependence tests are performed also
5420 following this order, such that we're looking at the distance
5421 separating the elements accessed by A from the elements later
5422 accessed by B. But in this example, the distance returned by
5423 test_dep (A, B) is lexicographically negative (-1, 1), that
5424 means that the access A occurs later than B with respect to
5425 the outer loop, ie. we're actually looking upwind. In this
5426 case we solve test_dep (B, A) looking downwind to the
5427 lexicographically positive solution, that returns the
5428 distance vector (1, -1). */
5429 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5431 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5432 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5433 return false;
5434 compute_subscript_distance (ddr);
5435 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5436 &index_carry))
5437 return false;
5438 save_dist_v (ddr, save_v);
5439 DDR_REVERSED_P (ddr) = true;
5441 /* In this case there is a dependence forward for all the
5442 outer loops:
5444 | for (k = 1; k < 100; k++)
5445 | for (i = 1; i < 100; i++)
5446 | for (j = 1; j < 100; j++)
5448 | t = T[j+1][i-1]; // A
5449 | T[j][i] = t + 2; // B
5452 the vectors are:
5453 (0, 1, -1)
5454 (1, 1, -1)
5455 (1, -1, 1)
5457 if (DDR_NB_LOOPS (ddr) > 1)
5459 add_outer_distances (ddr, save_v, index_carry);
5460 add_outer_distances (ddr, dist_v, index_carry);
5463 else
5465 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5466 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5468 if (DDR_NB_LOOPS (ddr) > 1)
5470 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5472 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5473 return false;
5474 compute_subscript_distance (ddr);
5475 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5476 &index_carry))
5477 return false;
5479 save_dist_v (ddr, save_v);
5480 add_outer_distances (ddr, dist_v, index_carry);
5481 add_outer_distances (ddr, opposite_v, index_carry);
5483 else
5484 save_dist_v (ddr, save_v);
5487 else
5489 /* There is a distance of 1 on all the outer loops: Example:
5490 there is a dependence of distance 1 on loop_1 for the array A.
5492 | loop_1
5493 | A[5] = ...
5494 | endloop
5496 add_outer_distances (ddr, dist_v,
5497 lambda_vector_first_nz (dist_v,
5498 DDR_NB_LOOPS (ddr), 0));
5501 if (dump_file && (dump_flags & TDF_DETAILS))
5503 unsigned i;
5505 fprintf (dump_file, "(build_classic_dist_vector\n");
5506 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5508 fprintf (dump_file, " dist_vector = (");
5509 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5510 DDR_NB_LOOPS (ddr));
5511 fprintf (dump_file, " )\n");
5513 fprintf (dump_file, ")\n");
5516 return true;
5519 /* Return the direction for a given distance.
5520 FIXME: Computing dir this way is suboptimal, since dir can catch
5521 cases that dist is unable to represent. */
5523 static inline enum data_dependence_direction
5524 dir_from_dist (int dist)
5526 if (dist > 0)
5527 return dir_positive;
5528 else if (dist < 0)
5529 return dir_negative;
5530 else
5531 return dir_equal;
5534 /* Compute the classic per loop direction vector. DDR is the data
5535 dependence relation to build a vector from. */
5537 static void
5538 build_classic_dir_vector (struct data_dependence_relation *ddr)
5540 unsigned i, j;
5541 lambda_vector dist_v;
5543 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5545 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5547 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5548 dir_v[j] = dir_from_dist (dist_v[j]);
5550 save_dir_v (ddr, dir_v);
5554 /* Helper function. Returns true when there is a dependence between the
5555 data references. A_INDEX is the index of the first reference (0 for
5556 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5558 static bool
5559 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5560 unsigned int a_index, unsigned int b_index,
5561 class loop *loop_nest)
5563 unsigned int i;
5564 tree last_conflicts;
5565 struct subscript *subscript;
5566 tree res = NULL_TREE;
5568 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5570 conflict_function *overlaps_a, *overlaps_b;
5572 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5573 SUB_ACCESS_FN (subscript, b_index),
5574 &overlaps_a, &overlaps_b,
5575 &last_conflicts, loop_nest);
5577 if (SUB_CONFLICTS_IN_A (subscript))
5578 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5579 if (SUB_CONFLICTS_IN_B (subscript))
5580 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5582 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5583 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5584 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5586 /* If there is any undetermined conflict function we have to
5587 give a conservative answer in case we cannot prove that
5588 no dependence exists when analyzing another subscript. */
5589 if (CF_NOT_KNOWN_P (overlaps_a)
5590 || CF_NOT_KNOWN_P (overlaps_b))
5592 res = chrec_dont_know;
5593 continue;
5596 /* When there is a subscript with no dependence we can stop. */
5597 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5598 || CF_NO_DEPENDENCE_P (overlaps_b))
5600 res = chrec_known;
5601 break;
5605 if (res == NULL_TREE)
5606 return true;
5608 if (res == chrec_known)
5609 dependence_stats.num_dependence_independent++;
5610 else
5611 dependence_stats.num_dependence_undetermined++;
5612 finalize_ddr_dependent (ddr, res);
5613 return false;
5616 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5618 static void
5619 subscript_dependence_tester (struct data_dependence_relation *ddr,
5620 class loop *loop_nest)
5622 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5623 dependence_stats.num_dependence_dependent++;
5625 compute_subscript_distance (ddr);
5626 if (build_classic_dist_vector (ddr, loop_nest))
5627 build_classic_dir_vector (ddr);
5630 /* Returns true when all the access functions of A are affine or
5631 constant with respect to LOOP_NEST. */
5633 static bool
5634 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5635 const class loop *loop_nest)
5637 vec<tree> fns = DR_ACCESS_FNS (a);
5638 for (tree t : fns)
5639 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5640 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5641 return false;
5643 return true;
5646 /* This computes the affine dependence relation between A and B with
5647 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5648 independence between two accesses, while CHREC_DONT_KNOW is used
5649 for representing the unknown relation.
5651 Note that it is possible to stop the computation of the dependence
5652 relation the first time we detect a CHREC_KNOWN element for a given
5653 subscript. */
5655 void
5656 compute_affine_dependence (struct data_dependence_relation *ddr,
5657 class loop *loop_nest)
5659 struct data_reference *dra = DDR_A (ddr);
5660 struct data_reference *drb = DDR_B (ddr);
5662 if (dump_file && (dump_flags & TDF_DETAILS))
5664 fprintf (dump_file, "(compute_affine_dependence\n");
5665 fprintf (dump_file, " ref_a: ");
5666 print_generic_expr (dump_file, DR_REF (dra));
5667 fprintf (dump_file, ", stmt_a: ");
5668 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5669 fprintf (dump_file, " ref_b: ");
5670 print_generic_expr (dump_file, DR_REF (drb));
5671 fprintf (dump_file, ", stmt_b: ");
5672 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5675 /* Analyze only when the dependence relation is not yet known. */
5676 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5678 dependence_stats.num_dependence_tests++;
5680 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5681 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5682 subscript_dependence_tester (ddr, loop_nest);
5684 /* As a last case, if the dependence cannot be determined, or if
5685 the dependence is considered too difficult to determine, answer
5686 "don't know". */
5687 else
5689 dependence_stats.num_dependence_undetermined++;
5691 if (dump_file && (dump_flags & TDF_DETAILS))
5693 fprintf (dump_file, "Data ref a:\n");
5694 dump_data_reference (dump_file, dra);
5695 fprintf (dump_file, "Data ref b:\n");
5696 dump_data_reference (dump_file, drb);
5697 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5699 finalize_ddr_dependent (ddr, chrec_dont_know);
5703 if (dump_file && (dump_flags & TDF_DETAILS))
5705 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5706 fprintf (dump_file, ") -> no dependence\n");
5707 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5708 fprintf (dump_file, ") -> dependence analysis failed\n");
5709 else
5710 fprintf (dump_file, ")\n");
5714 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5715 the data references in DATAREFS, in the LOOP_NEST. When
5716 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5717 relations. Return true when successful, i.e. data references number
5718 is small enough to be handled. */
5720 bool
5721 compute_all_dependences (const vec<data_reference_p> &datarefs,
5722 vec<ddr_p> *dependence_relations,
5723 const vec<loop_p> &loop_nest,
5724 bool compute_self_and_rr)
5726 struct data_dependence_relation *ddr;
5727 struct data_reference *a, *b;
5728 unsigned int i, j;
5730 if ((int) datarefs.length ()
5731 > param_loop_max_datarefs_for_datadeps)
5733 struct data_dependence_relation *ddr;
5735 /* Insert a single relation into dependence_relations:
5736 chrec_dont_know. */
5737 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5738 dependence_relations->safe_push (ddr);
5739 return false;
5742 FOR_EACH_VEC_ELT (datarefs, i, a)
5743 for (j = i + 1; datarefs.iterate (j, &b); j++)
5744 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5746 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5747 dependence_relations->safe_push (ddr);
5748 if (loop_nest.exists ())
5749 compute_affine_dependence (ddr, loop_nest[0]);
5752 if (compute_self_and_rr)
5753 FOR_EACH_VEC_ELT (datarefs, i, a)
5755 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5756 dependence_relations->safe_push (ddr);
5757 if (loop_nest.exists ())
5758 compute_affine_dependence (ddr, loop_nest[0]);
5761 return true;
5764 /* Describes a location of a memory reference. */
5766 struct data_ref_loc
5768 /* The memory reference. */
5769 tree ref;
5771 /* True if the memory reference is read. */
5772 bool is_read;
5774 /* True if the data reference is conditional within the containing
5775 statement, i.e. if it might not occur even when the statement
5776 is executed and runs to completion. */
5777 bool is_conditional_in_stmt;
5781 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5782 true if STMT clobbers memory, false otherwise. */
5784 static bool
5785 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5787 bool clobbers_memory = false;
5788 data_ref_loc ref;
5789 tree op0, op1;
5790 enum gimple_code stmt_code = gimple_code (stmt);
5792 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5793 As we cannot model data-references to not spelled out
5794 accesses give up if they may occur. */
5795 if (stmt_code == GIMPLE_CALL
5796 && !(gimple_call_flags (stmt) & ECF_CONST))
5798 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5799 if (gimple_call_internal_p (stmt))
5800 switch (gimple_call_internal_fn (stmt))
5802 case IFN_GOMP_SIMD_LANE:
5804 class loop *loop = gimple_bb (stmt)->loop_father;
5805 tree uid = gimple_call_arg (stmt, 0);
5806 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5807 if (loop == NULL
5808 || loop->simduid != SSA_NAME_VAR (uid))
5809 clobbers_memory = true;
5810 break;
5812 case IFN_MASK_LOAD:
5813 case IFN_MASK_STORE:
5814 break;
5815 default:
5816 clobbers_memory = true;
5817 break;
5819 else
5820 clobbers_memory = true;
5822 else if (stmt_code == GIMPLE_ASM
5823 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5824 || gimple_vuse (stmt)))
5825 clobbers_memory = true;
5827 if (!gimple_vuse (stmt))
5828 return clobbers_memory;
5830 if (stmt_code == GIMPLE_ASSIGN)
5832 tree base;
5833 op0 = gimple_assign_lhs (stmt);
5834 op1 = gimple_assign_rhs1 (stmt);
5836 if (DECL_P (op1)
5837 || (REFERENCE_CLASS_P (op1)
5838 && (base = get_base_address (op1))
5839 && TREE_CODE (base) != SSA_NAME
5840 && !is_gimple_min_invariant (base)))
5842 ref.ref = op1;
5843 ref.is_read = true;
5844 ref.is_conditional_in_stmt = false;
5845 references->safe_push (ref);
5848 else if (stmt_code == GIMPLE_CALL)
5850 unsigned i, n;
5851 tree ptr, type;
5852 unsigned int align;
5854 ref.is_read = false;
5855 if (gimple_call_internal_p (stmt))
5856 switch (gimple_call_internal_fn (stmt))
5858 case IFN_MASK_LOAD:
5859 if (gimple_call_lhs (stmt) == NULL_TREE)
5860 break;
5861 ref.is_read = true;
5862 /* FALLTHRU */
5863 case IFN_MASK_STORE:
5864 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5865 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5866 if (ref.is_read)
5867 type = TREE_TYPE (gimple_call_lhs (stmt));
5868 else
5869 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5870 if (TYPE_ALIGN (type) != align)
5871 type = build_aligned_type (type, align);
5872 ref.is_conditional_in_stmt = true;
5873 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5874 ptr);
5875 references->safe_push (ref);
5876 return false;
5877 default:
5878 break;
5881 op0 = gimple_call_lhs (stmt);
5882 n = gimple_call_num_args (stmt);
5883 for (i = 0; i < n; i++)
5885 op1 = gimple_call_arg (stmt, i);
5887 if (DECL_P (op1)
5888 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5890 ref.ref = op1;
5891 ref.is_read = true;
5892 ref.is_conditional_in_stmt = false;
5893 references->safe_push (ref);
5897 else
5898 return clobbers_memory;
5900 if (op0
5901 && (DECL_P (op0)
5902 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5904 ref.ref = op0;
5905 ref.is_read = false;
5906 ref.is_conditional_in_stmt = false;
5907 references->safe_push (ref);
5909 return clobbers_memory;
5913 /* Returns true if the loop-nest has any data reference. */
5915 bool
5916 loop_nest_has_data_refs (loop_p loop)
5918 basic_block *bbs = get_loop_body (loop);
5919 auto_vec<data_ref_loc, 3> references;
5921 for (unsigned i = 0; i < loop->num_nodes; i++)
5923 basic_block bb = bbs[i];
5924 gimple_stmt_iterator bsi;
5926 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5928 gimple *stmt = gsi_stmt (bsi);
5929 get_references_in_stmt (stmt, &references);
5930 if (references.length ())
5932 free (bbs);
5933 return true;
5937 free (bbs);
5938 return false;
5941 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5942 reference, returns false, otherwise returns true. NEST is the outermost
5943 loop of the loop nest in which the references should be analyzed. */
5945 opt_result
5946 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5947 vec<data_reference_p> *datarefs)
5949 auto_vec<data_ref_loc, 2> references;
5950 data_reference_p dr;
5952 if (get_references_in_stmt (stmt, &references))
5953 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5954 stmt);
5956 for (const data_ref_loc &ref : references)
5958 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5959 loop_containing_stmt (stmt), ref.ref,
5960 stmt, ref.is_read, ref.is_conditional_in_stmt);
5961 gcc_assert (dr != NULL);
5962 datarefs->safe_push (dr);
5965 return opt_result::success ();
5968 /* Stores the data references in STMT to DATAREFS. If there is an
5969 unanalyzable reference, returns false, otherwise returns true.
5970 NEST is the outermost loop of the loop nest in which the references
5971 should be instantiated, LOOP is the loop in which the references
5972 should be analyzed. */
5974 bool
5975 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5976 vec<data_reference_p> *datarefs)
5978 auto_vec<data_ref_loc, 2> references;
5979 bool ret = true;
5980 data_reference_p dr;
5982 if (get_references_in_stmt (stmt, &references))
5983 return false;
5985 for (const data_ref_loc &ref : references)
5987 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5988 ref.is_conditional_in_stmt);
5989 gcc_assert (dr != NULL);
5990 datarefs->safe_push (dr);
5993 return ret;
5996 /* Search the data references in LOOP, and record the information into
5997 DATAREFS. Returns chrec_dont_know when failing to analyze a
5998 difficult case, returns NULL_TREE otherwise. */
6000 tree
6001 find_data_references_in_bb (class loop *loop, basic_block bb,
6002 vec<data_reference_p> *datarefs)
6004 gimple_stmt_iterator bsi;
6006 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6008 gimple *stmt = gsi_stmt (bsi);
6010 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6012 struct data_reference *res;
6013 res = XCNEW (struct data_reference);
6014 datarefs->safe_push (res);
6016 return chrec_dont_know;
6020 return NULL_TREE;
6023 /* Search the data references in LOOP, and record the information into
6024 DATAREFS. Returns chrec_dont_know when failing to analyze a
6025 difficult case, returns NULL_TREE otherwise.
6027 TODO: This function should be made smarter so that it can handle address
6028 arithmetic as if they were array accesses, etc. */
6030 tree
6031 find_data_references_in_loop (class loop *loop,
6032 vec<data_reference_p> *datarefs)
6034 basic_block bb, *bbs;
6035 unsigned int i;
6037 bbs = get_loop_body_in_dom_order (loop);
6039 for (i = 0; i < loop->num_nodes; i++)
6041 bb = bbs[i];
6043 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6045 free (bbs);
6046 return chrec_dont_know;
6049 free (bbs);
6051 return NULL_TREE;
6054 /* Return the alignment in bytes that DRB is guaranteed to have at all
6055 times. */
6057 unsigned int
6058 dr_alignment (innermost_loop_behavior *drb)
6060 /* Get the alignment of BASE_ADDRESS + INIT. */
6061 unsigned int alignment = drb->base_alignment;
6062 unsigned int misalignment = (drb->base_misalignment
6063 + TREE_INT_CST_LOW (drb->init));
6064 if (misalignment != 0)
6065 alignment = MIN (alignment, misalignment & -misalignment);
6067 /* Cap it to the alignment of OFFSET. */
6068 if (!integer_zerop (drb->offset))
6069 alignment = MIN (alignment, drb->offset_alignment);
6071 /* Cap it to the alignment of STEP. */
6072 if (!integer_zerop (drb->step))
6073 alignment = MIN (alignment, drb->step_alignment);
6075 return alignment;
6078 /* If BASE is a pointer-typed SSA name, try to find the object that it
6079 is based on. Return this object X on success and store the alignment
6080 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6082 static tree
6083 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6085 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6086 return NULL_TREE;
6088 gimple *def = SSA_NAME_DEF_STMT (base);
6089 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6091 /* Peel chrecs and record the minimum alignment preserved by
6092 all steps. */
6093 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6094 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6096 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6097 alignment = MIN (alignment, step_alignment);
6098 base = CHREC_LEFT (base);
6101 /* Punt if the expression is too complicated to handle. */
6102 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6103 return NULL_TREE;
6105 /* The only useful cases are those for which a dereference folds to something
6106 other than an INDIRECT_REF. */
6107 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6108 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6109 if (!ref)
6110 return NULL_TREE;
6112 /* Analyze the base to which the steps we peeled were applied. */
6113 poly_int64 bitsize, bitpos, bytepos;
6114 machine_mode mode;
6115 int unsignedp, reversep, volatilep;
6116 tree offset;
6117 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6118 &unsignedp, &reversep, &volatilep);
6119 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6120 return NULL_TREE;
6122 /* Restrict the alignment to that guaranteed by the offsets. */
6123 unsigned int bytepos_alignment = known_alignment (bytepos);
6124 if (bytepos_alignment != 0)
6125 alignment = MIN (alignment, bytepos_alignment);
6126 if (offset)
6128 unsigned int offset_alignment = highest_pow2_factor (offset);
6129 alignment = MIN (alignment, offset_alignment);
6132 *alignment_out = alignment;
6133 return base;
6136 /* Return the object whose alignment would need to be changed in order
6137 to increase the alignment of ADDR. Store the maximum achievable
6138 alignment in *MAX_ALIGNMENT. */
6140 tree
6141 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6143 tree base = get_base_for_alignment_1 (addr, max_alignment);
6144 if (base)
6145 return base;
6147 if (TREE_CODE (addr) == ADDR_EXPR)
6148 addr = TREE_OPERAND (addr, 0);
6149 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6150 return addr;
6153 /* Recursive helper function. */
6155 static bool
6156 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6158 /* Inner loops of the nest should not contain siblings. Example:
6159 when there are two consecutive loops,
6161 | loop_0
6162 | loop_1
6163 | A[{0, +, 1}_1]
6164 | endloop_1
6165 | loop_2
6166 | A[{0, +, 1}_2]
6167 | endloop_2
6168 | endloop_0
6170 the dependence relation cannot be captured by the distance
6171 abstraction. */
6172 if (loop->next)
6173 return false;
6175 loop_nest->safe_push (loop);
6176 if (loop->inner)
6177 return find_loop_nest_1 (loop->inner, loop_nest);
6178 return true;
6181 /* Return false when the LOOP is not well nested. Otherwise return
6182 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6183 contain the loops from the outermost to the innermost, as they will
6184 appear in the classic distance vector. */
6186 bool
6187 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6189 loop_nest->safe_push (loop);
6190 if (loop->inner)
6191 return find_loop_nest_1 (loop->inner, loop_nest);
6192 return true;
6195 /* Returns true when the data dependences have been computed, false otherwise.
6196 Given a loop nest LOOP, the following vectors are returned:
6197 DATAREFS is initialized to all the array elements contained in this loop,
6198 DEPENDENCE_RELATIONS contains the relations between the data references.
6199 Compute read-read and self relations if
6200 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6202 bool
6203 compute_data_dependences_for_loop (class loop *loop,
6204 bool compute_self_and_read_read_dependences,
6205 vec<loop_p> *loop_nest,
6206 vec<data_reference_p> *datarefs,
6207 vec<ddr_p> *dependence_relations)
6209 bool res = true;
6211 memset (&dependence_stats, 0, sizeof (dependence_stats));
6213 /* If the loop nest is not well formed, or one of the data references
6214 is not computable, give up without spending time to compute other
6215 dependences. */
6216 if (!loop
6217 || !find_loop_nest (loop, loop_nest)
6218 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6219 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6220 compute_self_and_read_read_dependences))
6221 res = false;
6223 if (dump_file && (dump_flags & TDF_STATS))
6225 fprintf (dump_file, "Dependence tester statistics:\n");
6227 fprintf (dump_file, "Number of dependence tests: %d\n",
6228 dependence_stats.num_dependence_tests);
6229 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6230 dependence_stats.num_dependence_dependent);
6231 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6232 dependence_stats.num_dependence_independent);
6233 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6234 dependence_stats.num_dependence_undetermined);
6236 fprintf (dump_file, "Number of subscript tests: %d\n",
6237 dependence_stats.num_subscript_tests);
6238 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6239 dependence_stats.num_subscript_undetermined);
6240 fprintf (dump_file, "Number of same subscript function: %d\n",
6241 dependence_stats.num_same_subscript_function);
6243 fprintf (dump_file, "Number of ziv tests: %d\n",
6244 dependence_stats.num_ziv);
6245 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6246 dependence_stats.num_ziv_dependent);
6247 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6248 dependence_stats.num_ziv_independent);
6249 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6250 dependence_stats.num_ziv_unimplemented);
6252 fprintf (dump_file, "Number of siv tests: %d\n",
6253 dependence_stats.num_siv);
6254 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6255 dependence_stats.num_siv_dependent);
6256 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6257 dependence_stats.num_siv_independent);
6258 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6259 dependence_stats.num_siv_unimplemented);
6261 fprintf (dump_file, "Number of miv tests: %d\n",
6262 dependence_stats.num_miv);
6263 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6264 dependence_stats.num_miv_dependent);
6265 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6266 dependence_stats.num_miv_independent);
6267 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6268 dependence_stats.num_miv_unimplemented);
6271 return res;
6274 /* Free the memory used by a data dependence relation DDR. */
6276 void
6277 free_dependence_relation (struct data_dependence_relation *ddr)
6279 if (ddr == NULL)
6280 return;
6282 if (DDR_SUBSCRIPTS (ddr).exists ())
6283 free_subscripts (DDR_SUBSCRIPTS (ddr));
6284 DDR_DIST_VECTS (ddr).release ();
6285 DDR_DIR_VECTS (ddr).release ();
6287 free (ddr);
6290 /* Free the memory used by the data dependence relations from
6291 DEPENDENCE_RELATIONS. */
6293 void
6294 free_dependence_relations (vec<ddr_p>& dependence_relations)
6296 for (data_dependence_relation *ddr : dependence_relations)
6297 if (ddr)
6298 free_dependence_relation (ddr);
6300 dependence_relations.release ();
6303 /* Free the memory used by the data references from DATAREFS. */
6305 void
6306 free_data_refs (vec<data_reference_p>& datarefs)
6308 for (data_reference *dr : datarefs)
6309 free_data_ref (dr);
6310 datarefs.release ();
6313 /* Common routine implementing both dr_direction_indicator and
6314 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6315 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6316 Return the step as the indicator otherwise. */
6318 static tree
6319 dr_step_indicator (struct data_reference *dr, int useful_min)
6321 tree step = DR_STEP (dr);
6322 if (!step)
6323 return NULL_TREE;
6324 STRIP_NOPS (step);
6325 /* Look for cases where the step is scaled by a positive constant
6326 integer, which will often be the access size. If the multiplication
6327 doesn't change the sign (due to overflow effects) then we can
6328 test the unscaled value instead. */
6329 if (TREE_CODE (step) == MULT_EXPR
6330 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6331 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6333 tree factor = TREE_OPERAND (step, 1);
6334 step = TREE_OPERAND (step, 0);
6336 /* Strip widening and truncating conversions as well as nops. */
6337 if (CONVERT_EXPR_P (step)
6338 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6339 step = TREE_OPERAND (step, 0);
6340 tree type = TREE_TYPE (step);
6342 /* Get the range of step values that would not cause overflow. */
6343 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6344 / wi::to_widest (factor));
6345 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6346 / wi::to_widest (factor));
6348 /* Get the range of values that the unconverted step actually has. */
6349 wide_int step_min, step_max;
6350 value_range vr;
6351 if (TREE_CODE (step) != SSA_NAME
6352 || !get_range_query (cfun)->range_of_expr (vr, step)
6353 || vr.kind () != VR_RANGE)
6355 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6356 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6358 else
6360 step_min = vr.lower_bound ();
6361 step_max = vr.upper_bound ();
6364 /* Check whether the unconverted step has an acceptable range. */
6365 signop sgn = TYPE_SIGN (type);
6366 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6367 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6369 if (wi::ge_p (step_min, useful_min, sgn))
6370 return ssize_int (useful_min);
6371 else if (wi::lt_p (step_max, 0, sgn))
6372 return ssize_int (-1);
6373 else
6374 return fold_convert (ssizetype, step);
6377 return DR_STEP (dr);
6380 /* Return a value that is negative iff DR has a negative step. */
6382 tree
6383 dr_direction_indicator (struct data_reference *dr)
6385 return dr_step_indicator (dr, 0);
6388 /* Return a value that is zero iff DR has a zero step. */
6390 tree
6391 dr_zero_step_indicator (struct data_reference *dr)
6393 return dr_step_indicator (dr, 1);
6396 /* Return true if DR is known to have a nonnegative (but possibly zero)
6397 step. */
6399 bool
6400 dr_known_forward_stride_p (struct data_reference *dr)
6402 tree indicator = dr_direction_indicator (dr);
6403 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6404 fold_convert (ssizetype, indicator),
6405 ssize_int (0));
6406 return neg_step_val && integer_zerop (neg_step_val);