tree-optimization/112450 - avoid AVX512 style masking for BImode masks
[official-gcc.git] / gcc / tree-data-ref.cc
blob689aaeed72282bb0da2a17e19fb923a06e8d62fa
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
103 #include "calls.h"
105 static struct datadep_stats
107 int num_dependence_tests;
108 int num_dependence_dependent;
109 int num_dependence_independent;
110 int num_dependence_undetermined;
112 int num_subscript_tests;
113 int num_subscript_undetermined;
114 int num_same_subscript_function;
116 int num_ziv;
117 int num_ziv_independent;
118 int num_ziv_dependent;
119 int num_ziv_unimplemented;
121 int num_siv;
122 int num_siv_independent;
123 int num_siv_dependent;
124 int num_siv_unimplemented;
126 int num_miv;
127 int num_miv_independent;
128 int num_miv_dependent;
129 int num_miv_unimplemented;
130 } dependence_stats;
132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 unsigned int, unsigned int,
134 class loop *);
135 /* Returns true iff A divides B. */
137 static inline bool
138 tree_fold_divides_p (const_tree a, const_tree b)
140 gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
145 /* Returns true iff A divides B. */
147 static inline bool
148 int_divides_p (lambda_int a, lambda_int b)
150 return ((b % a) == 0);
153 /* Return true if reference REF contains a union access. */
155 static bool
156 ref_contains_union_access_p (tree ref)
158 while (handled_component_p (ref))
160 ref = TREE_OPERAND (ref, 0);
161 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 return true;
165 return false;
170 /* Dump into FILE all the data references from DATAREFS. */
172 static void
173 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 for (data_reference *dr : datarefs)
176 dump_data_reference (file, dr);
179 /* Unified dump into FILE all the data references from DATAREFS. */
181 DEBUG_FUNCTION void
182 debug (vec<data_reference_p> &ref)
184 dump_data_references (stderr, ref);
187 DEBUG_FUNCTION void
188 debug (vec<data_reference_p> *ptr)
190 if (ptr)
191 debug (*ptr);
192 else
193 fprintf (stderr, "<nil>\n");
197 /* Dump into STDERR all the data references from DATAREFS. */
199 DEBUG_FUNCTION void
200 debug_data_references (vec<data_reference_p> datarefs)
202 dump_data_references (stderr, datarefs);
205 /* Print to STDERR the data_reference DR. */
207 DEBUG_FUNCTION void
208 debug_data_reference (struct data_reference *dr)
210 dump_data_reference (stderr, dr);
213 /* Dump function for a DATA_REFERENCE structure. */
215 void
216 dump_data_reference (FILE *outf,
217 struct data_reference *dr)
219 unsigned int i;
221 fprintf (outf, "#(Data Ref: \n");
222 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
223 fprintf (outf, "# stmt: ");
224 print_gimple_stmt (outf, DR_STMT (dr), 0);
225 fprintf (outf, "# ref: ");
226 print_generic_stmt (outf, DR_REF (dr));
227 fprintf (outf, "# base_object: ");
228 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 fprintf (outf, "# Access function %d: ", i);
233 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 fprintf (outf, "#)\n");
238 /* Unified dump function for a DATA_REFERENCE structure. */
240 DEBUG_FUNCTION void
241 debug (data_reference &ref)
243 dump_data_reference (stderr, &ref);
246 DEBUG_FUNCTION void
247 debug (data_reference *ptr)
249 if (ptr)
250 debug (*ptr);
251 else
252 fprintf (stderr, "<nil>\n");
256 /* Dumps the affine function described by FN to the file OUTF. */
258 DEBUG_FUNCTION void
259 dump_affine_function (FILE *outf, affine_fn fn)
261 unsigned i;
262 tree coef;
264 print_generic_expr (outf, fn[0], TDF_SLIM);
265 for (i = 1; fn.iterate (i, &coef); i++)
267 fprintf (outf, " + ");
268 print_generic_expr (outf, coef, TDF_SLIM);
269 fprintf (outf, " * x_%u", i);
273 /* Dumps the conflict function CF to the file OUTF. */
275 DEBUG_FUNCTION void
276 dump_conflict_function (FILE *outf, conflict_function *cf)
278 unsigned i;
280 if (cf->n == NO_DEPENDENCE)
281 fprintf (outf, "no dependence");
282 else if (cf->n == NOT_KNOWN)
283 fprintf (outf, "not known");
284 else
286 for (i = 0; i < cf->n; i++)
288 if (i != 0)
289 fprintf (outf, " ");
290 fprintf (outf, "[");
291 dump_affine_function (outf, cf->fns[i]);
292 fprintf (outf, "]");
297 /* Dump function for a SUBSCRIPT structure. */
299 DEBUG_FUNCTION void
300 dump_subscript (FILE *outf, struct subscript *subscript)
302 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 fprintf (outf, "\n (subscript \n");
305 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
306 dump_conflict_function (outf, cf);
307 if (CF_NONTRIVIAL_P (cf))
309 tree last_iteration = SUB_LAST_CONFLICT (subscript);
310 fprintf (outf, "\n last_conflict: ");
311 print_generic_expr (outf, last_iteration);
314 cf = SUB_CONFLICTS_IN_B (subscript);
315 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
316 dump_conflict_function (outf, cf);
317 if (CF_NONTRIVIAL_P (cf))
319 tree last_iteration = SUB_LAST_CONFLICT (subscript);
320 fprintf (outf, "\n last_conflict: ");
321 print_generic_expr (outf, last_iteration);
324 fprintf (outf, "\n (Subscript distance: ");
325 print_generic_expr (outf, SUB_DISTANCE (subscript));
326 fprintf (outf, " ))\n");
329 /* Print the classic direction vector DIRV to OUTF. */
331 DEBUG_FUNCTION void
332 print_direction_vector (FILE *outf,
333 lambda_vector dirv,
334 int length)
336 int eq;
338 for (eq = 0; eq < length; eq++)
340 enum data_dependence_direction dir = ((enum data_dependence_direction)
341 dirv[eq]);
343 switch (dir)
345 case dir_positive:
346 fprintf (outf, " +");
347 break;
348 case dir_negative:
349 fprintf (outf, " -");
350 break;
351 case dir_equal:
352 fprintf (outf, " =");
353 break;
354 case dir_positive_or_equal:
355 fprintf (outf, " +=");
356 break;
357 case dir_positive_or_negative:
358 fprintf (outf, " +-");
359 break;
360 case dir_negative_or_equal:
361 fprintf (outf, " -=");
362 break;
363 case dir_star:
364 fprintf (outf, " *");
365 break;
366 default:
367 fprintf (outf, "indep");
368 break;
371 fprintf (outf, "\n");
374 /* Print a vector of direction vectors. */
376 DEBUG_FUNCTION void
377 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
378 int length)
380 for (lambda_vector v : dir_vects)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 for (lambda_vector v : dist_vects)
403 print_lambda_vector (outf, v, length);
406 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 DEBUG_FUNCTION void
409 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 struct data_reference *dra, *drb;
413 fprintf (outf, "(Data Dep: \n");
415 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 if (ddr)
419 dra = DDR_A (ddr);
420 drb = DDR_B (ddr);
421 if (dra)
422 dump_data_reference (outf, dra);
423 else
424 fprintf (outf, " (nil)\n");
425 if (drb)
426 dump_data_reference (outf, drb);
427 else
428 fprintf (outf, " (nil)\n");
430 fprintf (outf, " (don't know)\n)\n");
431 return;
434 dra = DDR_A (ddr);
435 drb = DDR_B (ddr);
436 dump_data_reference (outf, dra);
437 dump_data_reference (outf, drb);
439 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440 fprintf (outf, " (no dependence)\n");
442 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 unsigned int i;
445 class loop *loopi;
447 subscript *sub;
448 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 fprintf (outf, " access_fn_A: ");
451 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
452 fprintf (outf, " access_fn_B: ");
453 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
454 dump_subscript (outf, sub);
457 fprintf (outf, " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 fprintf (outf, "%d ", loopi->num);
460 fprintf (outf, ")\n");
462 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 fprintf (outf, " distance_vector: ");
465 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
466 DDR_NB_LOOPS (ddr));
469 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 fprintf (outf, " direction_vector: ");
472 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 DDR_NB_LOOPS (ddr));
477 fprintf (outf, ")\n");
480 /* Debug version. */
482 DEBUG_FUNCTION void
483 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 dump_data_dependence_relation (stderr, ddr);
488 /* Dump into FILE all the dependence relations from DDRS. */
490 DEBUG_FUNCTION void
491 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 for (auto ddr : ddrs)
494 dump_data_dependence_relation (file, ddr);
497 DEBUG_FUNCTION void
498 debug (vec<ddr_p> &ref)
500 dump_data_dependence_relations (stderr, ref);
503 DEBUG_FUNCTION void
504 debug (vec<ddr_p> *ptr)
506 if (ptr)
507 debug (*ptr);
508 else
509 fprintf (stderr, "<nil>\n");
513 /* Dump to STDERR all the dependence relations from DDRS. */
515 DEBUG_FUNCTION void
516 debug_data_dependence_relations (vec<ddr_p> ddrs)
518 dump_data_dependence_relations (stderr, ddrs);
521 /* Dumps the distance and direction vectors in FILE. DDRS contains
522 the dependence relations, and VECT_SIZE is the size of the
523 dependence vectors, or in other words the number of loops in the
524 considered nest. */
526 DEBUG_FUNCTION void
527 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 for (data_dependence_relation *ddr : ddrs)
530 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 fprintf (file, "DISTANCE_V (");
535 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
536 fprintf (file, ")\n");
539 for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 fprintf (file, "DIRECTION_V (");
542 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
543 fprintf (file, ")\n");
547 fprintf (file, "\n\n");
550 /* Dumps the data dependence relations DDRS in FILE. */
552 DEBUG_FUNCTION void
553 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 for (data_dependence_relation *ddr : ddrs)
556 dump_data_dependence_relation (file, ddr);
558 fprintf (file, "\n\n");
561 DEBUG_FUNCTION void
562 debug_ddrs (vec<ddr_p> ddrs)
564 dump_ddrs (stderr, ddrs);
567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
568 OP0 CODE OP1, where:
570 - OP0 CODE OP1 has integral type TYPE
571 - the range of OP0 is given by OP0_RANGE and
572 - the range of OP1 is given by OP1_RANGE.
574 Independently of RESULT_RANGE, try to compute:
576 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 - (sizetype) (OP0 CODE OP1)
579 as a constant and subtract DELTA from the ssizetype constant in *OFF.
580 Return true on success, or false if DELTA is not known at compile time.
582 Truncation and sign changes are known to distribute over CODE, i.e.
584 (itype) (A CODE B) == (itype) A CODE (itype) B
586 for any integral type ITYPE whose precision is no greater than the
587 precision of A and B. */
589 static bool
590 compute_distributive_range (tree type, value_range &op0_range,
591 tree_code code, value_range &op1_range,
592 tree *off, value_range *result_range)
594 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
595 if (result_range)
597 range_op_handler op (code);
598 if (!op.fold_range (*result_range, type, op0_range, op1_range))
599 result_range->set_varying (type);
602 /* The distributive property guarantees that if TYPE is no narrower
603 than SIZETYPE,
605 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 and so we can treat DELTA as zero. */
608 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 return true;
611 /* If overflow is undefined, we can assume that:
613 X == (ssizetype) OP0 CODE (ssizetype) OP1
615 is within the range of TYPE, i.e.:
617 X == (ssizetype) (TYPE) X
619 Distributing the (TYPE) truncation over X gives:
621 X == (ssizetype) (OP0 CODE OP1)
623 Casting both sides to sizetype and distributing the sizetype cast
624 over X gives:
626 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 and so we can treat DELTA as zero. */
629 if (TYPE_OVERFLOW_UNDEFINED (type))
630 return true;
632 /* Compute the range of:
634 (ssizetype) OP0 CODE (ssizetype) OP1
636 The distributive property guarantees that this has the same bitpattern as:
638 (sizetype) OP0 CODE (sizetype) OP1
640 but its range is more conducive to analysis. */
641 range_cast (op0_range, ssizetype);
642 range_cast (op1_range, ssizetype);
643 value_range wide_range;
644 range_op_handler op (code);
645 bool saved_flag_wrapv = flag_wrapv;
646 flag_wrapv = 1;
647 if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
648 wide_range.set_varying (ssizetype);;
649 flag_wrapv = saved_flag_wrapv;
650 if (wide_range.num_pairs () != 1
651 || wide_range.varying_p () || wide_range.undefined_p ())
652 return false;
654 wide_int lb = wide_range.lower_bound ();
655 wide_int ub = wide_range.upper_bound ();
657 /* Calculate the number of times that each end of the range overflows or
658 underflows TYPE. We can only calculate DELTA if the numbers match. */
659 unsigned int precision = TYPE_PRECISION (type);
660 if (!TYPE_UNSIGNED (type))
662 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
663 lb -= type_min;
664 ub -= type_min;
666 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
667 lb &= upper_bits;
668 ub &= upper_bits;
669 if (lb != ub)
670 return false;
672 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
673 negative values indicating underflow. The low PRECISION bits of LB
674 are clear, so DELTA is therefore LB (== UB). */
675 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
676 return true;
679 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
680 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
681 FROM_TYPE are integral types. */
683 static bool
684 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
686 gcc_assert (INTEGRAL_TYPE_P (to_type)
687 && INTEGRAL_TYPE_P (from_type)
688 && !TYPE_OVERFLOW_TRAPS (to_type)
689 && !TYPE_OVERFLOW_TRAPS (from_type));
691 /* Converting to something no narrower than sizetype and then to sizetype
692 is equivalent to converting directly to sizetype. */
693 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
694 return true;
696 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
697 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
698 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
699 return true;
701 /* For narrowing conversions, we could in principle test whether
702 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
703 and apply a constant adjustment.
705 For other conversions (which involve a sign change) we could
706 check that the signs are always equal, and apply a constant
707 adjustment if the signs are negative.
709 However, both cases should be rare. */
710 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
711 TYPE_SIGN (to_type));
714 static void
715 split_constant_offset (tree type, tree *var, tree *off,
716 value_range *result_range,
717 hash_map<tree, std::pair<tree, tree> > &cache,
718 unsigned *limit);
720 /* Helper function for split_constant_offset. If TYPE is a pointer type,
721 try to express OP0 CODE OP1 as:
723 POINTER_PLUS <*VAR, (sizetype) *OFF>
725 where:
727 - *VAR has type TYPE
728 - *OFF is a constant of type ssizetype.
730 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 *VAR + (sizetype) *OFF
734 where:
736 - *VAR has type sizetype
737 - *OFF is a constant of type ssizetype.
739 In both cases, OP0 CODE OP1 has type TYPE.
741 Return true on success. A false return value indicates that we can't
742 do better than set *OFF to zero.
744 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
745 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
748 visited. LIMIT counts down the number of SSA names that we are
749 allowed to process before giving up. */
751 static bool
752 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
753 tree *var, tree *off, value_range *result_range,
754 hash_map<tree, std::pair<tree, tree> > &cache,
755 unsigned *limit)
757 tree var0, var1;
758 tree off0, off1;
759 value_range op0_range, op1_range;
761 *var = NULL_TREE;
762 *off = NULL_TREE;
764 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
765 return false;
767 switch (code)
769 case INTEGER_CST:
770 *var = size_int (0);
771 *off = fold_convert (ssizetype, op0);
772 if (result_range)
774 wide_int w = wi::to_wide (op0);
775 result_range->set (TREE_TYPE (op0), w, w);
777 return true;
779 case POINTER_PLUS_EXPR:
780 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
781 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
782 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
783 *off = size_binop (PLUS_EXPR, off0, off1);
784 return true;
786 case PLUS_EXPR:
787 case MINUS_EXPR:
788 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
789 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
790 *off = size_binop (code, off0, off1);
791 if (!compute_distributive_range (type, op0_range, code, op1_range,
792 off, result_range))
793 return false;
794 *var = fold_build2 (code, sizetype, var0, var1);
795 return true;
797 case MULT_EXPR:
798 if (TREE_CODE (op1) != INTEGER_CST)
799 return false;
801 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
802 op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
803 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
804 if (!compute_distributive_range (type, op0_range, code, op1_range,
805 off, result_range))
806 return false;
807 *var = fold_build2 (MULT_EXPR, sizetype, var0,
808 fold_convert (sizetype, op1));
809 return true;
811 case ADDR_EXPR:
813 tree base, poffset;
814 poly_int64 pbitsize, pbitpos, pbytepos;
815 machine_mode pmode;
816 int punsignedp, preversep, pvolatilep;
818 op0 = TREE_OPERAND (op0, 0);
819 base
820 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
821 &punsignedp, &preversep, &pvolatilep);
823 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
824 return false;
825 base = build_fold_addr_expr (base);
826 off0 = ssize_int (pbytepos);
828 if (poffset)
830 split_constant_offset (poffset, &poffset, &off1, nullptr,
831 cache, limit);
832 off0 = size_binop (PLUS_EXPR, off0, off1);
833 base = fold_build_pointer_plus (base, poffset);
836 var0 = fold_convert (type, base);
838 /* If variable length types are involved, punt, otherwise casts
839 might be converted into ARRAY_REFs in gimplify_conversion.
840 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
841 possibly no longer appears in current GIMPLE, might resurface.
842 This perhaps could run
843 if (CONVERT_EXPR_P (var0))
845 gimplify_conversion (&var0);
846 // Attempt to fill in any within var0 found ARRAY_REF's
847 // element size from corresponding op embedded ARRAY_REF,
848 // if unsuccessful, just punt.
849 } */
850 while (POINTER_TYPE_P (type))
851 type = TREE_TYPE (type);
852 if (int_size_in_bytes (type) < 0)
853 return false;
855 *var = var0;
856 *off = off0;
857 return true;
860 case SSA_NAME:
862 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
863 return false;
865 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 enum tree_code subcode;
868 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
869 return false;
871 subcode = gimple_assign_rhs_code (def_stmt);
873 /* We are using a cache to avoid un-CSEing large amounts of code. */
874 bool use_cache = false;
875 if (!has_single_use (op0)
876 && (subcode == POINTER_PLUS_EXPR
877 || subcode == PLUS_EXPR
878 || subcode == MINUS_EXPR
879 || subcode == MULT_EXPR
880 || subcode == ADDR_EXPR
881 || CONVERT_EXPR_CODE_P (subcode)))
883 use_cache = true;
884 bool existed;
885 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
886 if (existed)
888 if (integer_zerop (e.second))
889 return false;
890 *var = e.first;
891 *off = e.second;
892 /* The caller sets the range in this case. */
893 return true;
895 e = std::make_pair (op0, ssize_int (0));
898 if (*limit == 0)
899 return false;
900 --*limit;
902 var0 = gimple_assign_rhs1 (def_stmt);
903 var1 = gimple_assign_rhs2 (def_stmt);
905 bool res = split_constant_offset_1 (type, var0, subcode, var1,
906 var, off, nullptr, cache, limit);
907 if (res && use_cache)
908 *cache.get (op0) = std::make_pair (*var, *off);
909 /* The caller sets the range in this case. */
910 return res;
912 CASE_CONVERT:
914 /* We can only handle the following conversions:
916 - Conversions from one pointer type to another pointer type.
918 - Conversions from one non-trapping integral type to another
919 non-trapping integral type. In this case, the recursive
920 call makes sure that:
922 (sizetype) OP0
924 can be expressed as a sizetype operation involving VAR and OFF,
925 and all we need to do is check whether:
927 (sizetype) OP0 == (sizetype) (TYPE) OP0
929 - Conversions from a non-trapping sizetype-size integral type to
930 a like-sized pointer type. In this case, the recursive call
931 makes sure that:
933 (sizetype) OP0 == *VAR + (sizetype) *OFF
935 and we can convert that to:
937 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
939 - Conversions from a sizetype-sized pointer type to a like-sized
940 non-trapping integral type. In this case, the recursive call
941 makes sure that:
943 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
945 where the POINTER_PLUS and *VAR have the same precision as
946 TYPE (and the same precision as sizetype). Then:
948 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 tree itype = TREE_TYPE (op0);
950 if ((POINTER_TYPE_P (itype)
951 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 && (POINTER_TYPE_P (type)
953 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
958 if (POINTER_TYPE_P (type))
960 split_constant_offset (op0, var, off, nullptr, cache, limit);
961 *var = fold_convert (type, *var);
963 else if (POINTER_TYPE_P (itype))
965 split_constant_offset (op0, var, off, nullptr, cache, limit);
966 *var = fold_convert (sizetype, *var);
968 else
970 split_constant_offset (op0, var, off, &op0_range,
971 cache, limit);
972 if (!nop_conversion_for_offset_p (type, itype, op0_range))
973 return false;
974 if (result_range)
976 *result_range = op0_range;
977 range_cast (*result_range, type);
980 return true;
982 return false;
985 default:
986 return false;
990 /* If EXP has pointer type, try to express it as:
992 POINTER_PLUS <*VAR, (sizetype) *OFF>
994 where:
996 - *VAR has the same type as EXP
997 - *OFF is a constant of type ssizetype.
999 If EXP has an integral type, try to express (sizetype) EXP as:
1001 *VAR + (sizetype) *OFF
1003 where:
1005 - *VAR has type sizetype
1006 - *OFF is a constant of type ssizetype.
1008 If EXP_RANGE is nonnull, set it to the range of EXP.
1010 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 visited. LIMIT counts down the number of SSA names that we are
1012 allowed to process before giving up. */
1014 static void
1015 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 hash_map<tree, std::pair<tree, tree> > &cache,
1017 unsigned *limit)
1019 tree type = TREE_TYPE (exp), op0, op1;
1020 enum tree_code code;
1022 code = TREE_CODE (exp);
1023 if (exp_range)
1025 *exp_range = type;
1026 if (code == SSA_NAME)
1028 value_range vr;
1029 get_range_query (cfun)->range_of_expr (vr, exp);
1030 if (vr.undefined_p ())
1031 vr.set_varying (TREE_TYPE (exp));
1032 tree vr_min, vr_max;
1033 value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1034 wide_int var_min = wi::to_wide (vr_min);
1035 wide_int var_max = wi::to_wide (vr_max);
1036 wide_int var_nonzero = get_nonzero_bits (exp);
1037 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038 &var_min, &var_max,
1039 var_nonzero,
1040 TYPE_SIGN (type));
1041 /* This check for VR_VARYING is here because the old code
1042 using get_range_info would return VR_RANGE for the entire
1043 domain, instead of VR_VARYING. The new code normalizes
1044 full-domain ranges to VR_VARYING. */
1045 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046 *exp_range = value_range (type, var_min, var_max);
1050 if (!tree_is_chrec (exp)
1051 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1053 extract_ops_from_tree (exp, &code, &op0, &op1);
1054 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055 exp_range, cache, limit))
1056 return;
1059 *var = exp;
1060 if (INTEGRAL_TYPE_P (type))
1061 *var = fold_convert (sizetype, *var);
1062 *off = ssize_int (0);
1064 value_range r;
1065 if (exp_range && code != SSA_NAME
1066 && get_range_query (cfun)->range_of_expr (r, exp)
1067 && !r.undefined_p ())
1068 *exp_range = r;
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1072 type as EXP while OFF has type ssizetype. */
1074 void
1075 split_constant_offset (tree exp, tree *var, tree *off)
1077 unsigned limit = param_ssa_name_def_chain_limit;
1078 static hash_map<tree, std::pair<tree, tree> > *cache;
1079 if (!cache)
1080 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082 *var = fold_convert (TREE_TYPE (exp), *var);
1083 cache->empty ();
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087 casts, and with type of pointer to the object). */
1089 static tree
1090 canonicalize_base_object_address (tree addr)
1092 tree orig = addr;
1094 STRIP_NOPS (addr);
1096 /* The base address may be obtained by casting from integer, in that case
1097 keep the cast. */
1098 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099 return orig;
1101 if (TREE_CODE (addr) != ADDR_EXPR)
1102 return addr;
1104 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1107 /* Analyze the behavior of memory reference REF within STMT.
1108 There are two modes:
1110 - BB analysis. In this case we simply split the address into base,
1111 init and offset components, without reference to any containing loop.
1112 The resulting base and offset are general expressions and they can
1113 vary arbitrarily from one iteration of the containing loop to the next.
1114 The step is always zero.
1116 - loop analysis. In this case we analyze the reference both wrt LOOP
1117 and on the basis that the reference occurs (is "used") in LOOP;
1118 see the comment above analyze_scalar_evolution_in_loop for more
1119 information about this distinction. The base, init, offset and
1120 step fields are all invariant in LOOP.
1122 Perform BB analysis if LOOP is null, or if LOOP is the function's
1123 dummy outermost loop. In other cases perform loop analysis.
1125 Return true if the analysis succeeded and store the results in DRB if so.
1126 BB analysis can only fail for bitfield or reversed-storage accesses. */
1128 opt_result
1129 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130 class loop *loop, const gimple *stmt)
1132 poly_int64 pbitsize, pbitpos;
1133 tree base, poffset;
1134 machine_mode pmode;
1135 int punsignedp, preversep, pvolatilep;
1136 affine_iv base_iv, offset_iv;
1137 tree init, dinit, step;
1138 bool in_loop = (loop && loop->num);
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1141 fprintf (dump_file, "analyze_innermost: ");
1143 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144 &punsignedp, &preversep, &pvolatilep);
1145 gcc_assert (base != NULL_TREE);
1147 poly_int64 pbytepos;
1148 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149 return opt_result::failure_at (stmt,
1150 "failed: bit offset alignment.\n");
1152 if (preversep)
1153 return opt_result::failure_at (stmt,
1154 "failed: reverse storage order.\n");
1156 /* Calculate the alignment and misalignment for the inner reference. */
1157 unsigned int HOST_WIDE_INT bit_base_misalignment;
1158 unsigned int bit_base_alignment;
1159 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1161 /* There are no bitfield references remaining in BASE, so the values
1162 we got back must be whole bytes. */
1163 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164 && bit_base_misalignment % BITS_PER_UNIT == 0);
1165 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1168 if (TREE_CODE (base) == MEM_REF)
1170 if (!integer_zerop (TREE_OPERAND (base, 1)))
1172 /* Subtract MOFF from the base and add it to POFFSET instead.
1173 Adjust the misalignment to reflect the amount we subtracted. */
1174 poly_offset_int moff = mem_ref_offset (base);
1175 base_misalignment -= moff.force_shwi ();
1176 tree mofft = wide_int_to_tree (sizetype, moff);
1177 if (!poffset)
1178 poffset = mofft;
1179 else
1180 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1182 base = TREE_OPERAND (base, 0);
1184 else
1185 base = build_fold_addr_expr (base);
1187 if (in_loop)
1189 if (!simple_iv (loop, loop, base, &base_iv, true))
1190 return opt_result::failure_at
1191 (stmt, "failed: evolution of base is not affine.\n");
1193 else
1195 base_iv.base = base;
1196 base_iv.step = ssize_int (0);
1197 base_iv.no_overflow = true;
1200 if (!poffset)
1202 offset_iv.base = ssize_int (0);
1203 offset_iv.step = ssize_int (0);
1205 else
1207 if (!in_loop)
1209 offset_iv.base = poffset;
1210 offset_iv.step = ssize_int (0);
1212 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213 return opt_result::failure_at
1214 (stmt, "failed: evolution of offset is not affine.\n");
1217 init = ssize_int (pbytepos);
1219 /* Subtract any constant component from the base and add it to INIT instead.
1220 Adjust the misalignment to reflect the amount we subtracted. */
1221 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1222 init = size_binop (PLUS_EXPR, init, dinit);
1223 base_misalignment -= TREE_INT_CST_LOW (dinit);
1225 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1226 init = size_binop (PLUS_EXPR, init, dinit);
1228 step = size_binop (PLUS_EXPR,
1229 fold_convert (ssizetype, base_iv.step),
1230 fold_convert (ssizetype, offset_iv.step));
1232 base = canonicalize_base_object_address (base_iv.base);
1234 /* See if get_pointer_alignment can guarantee a higher alignment than
1235 the one we calculated above. */
1236 unsigned int HOST_WIDE_INT alt_misalignment;
1237 unsigned int alt_alignment;
1238 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1240 /* As above, these values must be whole bytes. */
1241 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242 && alt_misalignment % BITS_PER_UNIT == 0);
1243 alt_alignment /= BITS_PER_UNIT;
1244 alt_misalignment /= BITS_PER_UNIT;
1246 if (base_alignment < alt_alignment)
1248 base_alignment = alt_alignment;
1249 base_misalignment = alt_misalignment;
1252 drb->base_address = base;
1253 drb->offset = fold_convert (ssizetype, offset_iv.base);
1254 drb->init = init;
1255 drb->step = step;
1256 if (known_misalignment (base_misalignment, base_alignment,
1257 &drb->base_misalignment))
1258 drb->base_alignment = base_alignment;
1259 else
1261 drb->base_alignment = known_alignment (base_misalignment);
1262 drb->base_misalignment = 0;
1264 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265 drb->step_alignment = highest_pow2_factor (step);
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 fprintf (dump_file, "success.\n");
1270 return opt_result::success ();
1273 /* Return true if OP is a valid component reference for a DR access
1274 function. This accepts a subset of what handled_component_p accepts. */
1276 static bool
1277 access_fn_component_p (tree op)
1279 switch (TREE_CODE (op))
1281 case REALPART_EXPR:
1282 case IMAGPART_EXPR:
1283 case ARRAY_REF:
1284 return true;
1286 case COMPONENT_REF:
1287 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1289 default:
1290 return false;
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1295 as base. */
1297 static bool
1298 base_supports_access_fn_components_p (tree base)
1300 switch (TREE_CODE (TREE_TYPE (base)))
1302 case COMPLEX_TYPE:
1303 case ARRAY_TYPE:
1304 case RECORD_TYPE:
1305 return true;
1306 default:
1307 return false;
1311 /* Determines the base object and the list of indices of memory reference
1312 DR, analyzed in LOOP and instantiated before NEST. */
1314 static void
1315 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1317 /* If analyzing a basic-block there are no indices to analyze
1318 and thus no access functions. */
1319 if (!nest)
1321 dri->base_object = ref;
1322 dri->access_fns.create (0);
1323 return;
1326 vec<tree> access_fns = vNULL;
1328 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329 into a two element array with a constant index. The base is
1330 then just the immediate underlying object. */
1331 if (TREE_CODE (ref) == REALPART_EXPR)
1333 ref = TREE_OPERAND (ref, 0);
1334 access_fns.safe_push (integer_zero_node);
1336 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1338 ref = TREE_OPERAND (ref, 0);
1339 access_fns.safe_push (integer_one_node);
1342 /* Analyze access functions of dimensions we know to be independent.
1343 The list of component references handled here should be kept in
1344 sync with access_fn_component_p. */
1345 while (handled_component_p (ref))
1347 if (TREE_CODE (ref) == ARRAY_REF)
1349 tree op = TREE_OPERAND (ref, 1);
1350 tree access_fn = analyze_scalar_evolution (loop, op);
1351 access_fn = instantiate_scev (nest, loop, access_fn);
1352 access_fns.safe_push (access_fn);
1354 else if (TREE_CODE (ref) == COMPONENT_REF
1355 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1357 /* For COMPONENT_REFs of records (but not unions!) use the
1358 FIELD_DECL offset as constant access function so we can
1359 disambiguate a[i].f1 and a[i].f2. */
1360 tree off = component_ref_field_offset (ref);
1361 off = size_binop (PLUS_EXPR,
1362 size_binop (MULT_EXPR,
1363 fold_convert (bitsizetype, off),
1364 bitsize_int (BITS_PER_UNIT)),
1365 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1366 access_fns.safe_push (off);
1368 else
1369 /* If we have an unhandled component we could not translate
1370 to an access function stop analyzing. We have determined
1371 our base object in this case. */
1372 break;
1374 ref = TREE_OPERAND (ref, 0);
1377 /* If the address operand of a MEM_REF base has an evolution in the
1378 analyzed nest, add it as an additional independent access-function. */
1379 if (TREE_CODE (ref) == MEM_REF)
1381 tree op = TREE_OPERAND (ref, 0);
1382 tree access_fn = analyze_scalar_evolution (loop, op);
1383 access_fn = instantiate_scev (nest, loop, access_fn);
1384 STRIP_NOPS (access_fn);
1385 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1387 tree memoff = TREE_OPERAND (ref, 1);
1388 tree base = initial_condition (access_fn);
1389 tree orig_type = TREE_TYPE (base);
1390 STRIP_USELESS_TYPE_CONVERSION (base);
1391 tree off;
1392 split_constant_offset (base, &base, &off);
1393 STRIP_USELESS_TYPE_CONVERSION (base);
1394 /* Fold the MEM_REF offset into the evolutions initial
1395 value to make more bases comparable. */
1396 if (!integer_zerop (memoff))
1398 off = size_binop (PLUS_EXPR, off,
1399 fold_convert (ssizetype, memoff));
1400 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1402 /* Adjust the offset so it is a multiple of the access type
1403 size and thus we separate bases that can possibly be used
1404 to produce partial overlaps (which the access_fn machinery
1405 cannot handle). */
1406 wide_int rem;
1407 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1408 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1409 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1410 rem = wi::mod_trunc
1411 (wi::to_wide (off),
1412 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1413 SIGNED);
1414 else
1415 /* If we can't compute the remainder simply force the initial
1416 condition to zero. */
1417 rem = wi::to_wide (off);
1418 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1419 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1420 /* And finally replace the initial condition. */
1421 access_fn = chrec_replace_initial_condition
1422 (access_fn, fold_convert (orig_type, off));
1423 /* ??? This is still not a suitable base object for
1424 dr_may_alias_p - the base object needs to be an
1425 access that covers the object as whole. With
1426 an evolution in the pointer this cannot be
1427 guaranteed.
1428 As a band-aid, mark the access so we can special-case
1429 it in dr_may_alias_p. */
1430 tree old = ref;
1431 ref = fold_build2_loc (EXPR_LOCATION (ref),
1432 MEM_REF, TREE_TYPE (ref),
1433 base, memoff);
1434 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1435 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1436 dri->unconstrained_base = true;
1437 access_fns.safe_push (access_fn);
1440 else if (DECL_P (ref))
1442 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1443 ref = build2 (MEM_REF, TREE_TYPE (ref),
1444 build_fold_addr_expr (ref),
1445 build_int_cst (reference_alias_ptr_type (ref), 0));
1448 dri->base_object = ref;
1449 dri->access_fns = access_fns;
1452 /* Extracts the alias analysis information from the memory reference DR. */
1454 static void
1455 dr_analyze_alias (struct data_reference *dr)
1457 tree ref = DR_REF (dr);
1458 tree base = get_base_address (ref), addr;
1460 if (INDIRECT_REF_P (base)
1461 || TREE_CODE (base) == MEM_REF)
1463 addr = TREE_OPERAND (base, 0);
1464 if (TREE_CODE (addr) == SSA_NAME)
1465 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1469 /* Frees data reference DR. */
1471 void
1472 free_data_ref (data_reference_p dr)
1474 DR_ACCESS_FNS (dr).release ();
1475 if (dr->alt_indices.base_object)
1476 dr->alt_indices.access_fns.release ();
1477 free (dr);
1480 /* Analyze memory reference MEMREF, which is accessed in STMT.
1481 The reference is a read if IS_READ is true, otherwise it is a write.
1482 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483 within STMT, i.e. that it might not occur even if STMT is executed
1484 and runs to completion.
1486 Return the data_reference description of MEMREF. NEST is the outermost
1487 loop in which the reference should be instantiated, LOOP is the loop
1488 in which the data reference should be analyzed. */
1490 struct data_reference *
1491 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1492 bool is_read, bool is_conditional_in_stmt)
1494 struct data_reference *dr;
1496 if (dump_file && (dump_flags & TDF_DETAILS))
1498 fprintf (dump_file, "Creating dr for ");
1499 print_generic_expr (dump_file, memref, TDF_SLIM);
1500 fprintf (dump_file, "\n");
1503 dr = XCNEW (struct data_reference);
1504 DR_STMT (dr) = stmt;
1505 DR_REF (dr) = memref;
1506 DR_IS_READ (dr) = is_read;
1507 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1509 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1510 nest != NULL ? loop : NULL, stmt);
1511 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1512 dr_analyze_alias (dr);
1514 if (dump_file && (dump_flags & TDF_DETAILS))
1516 unsigned i;
1517 fprintf (dump_file, "\tbase_address: ");
1518 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1519 fprintf (dump_file, "\n\toffset from base address: ");
1520 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1521 fprintf (dump_file, "\n\tconstant offset from base address: ");
1522 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1523 fprintf (dump_file, "\n\tstep: ");
1524 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1525 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1526 fprintf (dump_file, "\n\tbase misalignment: %d",
1527 DR_BASE_MISALIGNMENT (dr));
1528 fprintf (dump_file, "\n\toffset alignment: %d",
1529 DR_OFFSET_ALIGNMENT (dr));
1530 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1531 fprintf (dump_file, "\n\tbase_object: ");
1532 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1533 fprintf (dump_file, "\n");
1534 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1536 fprintf (dump_file, "\tAccess function %d: ", i);
1537 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1541 return dr;
1544 /* A helper function computes order between two tree expressions T1 and T2.
1545 This is used in comparator functions sorting objects based on the order
1546 of tree expressions. The function returns -1, 0, or 1. */
1549 data_ref_compare_tree (tree t1, tree t2)
1551 int i, cmp;
1552 enum tree_code code;
1553 char tclass;
1555 if (t1 == t2)
1556 return 0;
1557 if (t1 == NULL)
1558 return -1;
1559 if (t2 == NULL)
1560 return 1;
1562 STRIP_USELESS_TYPE_CONVERSION (t1);
1563 STRIP_USELESS_TYPE_CONVERSION (t2);
1564 if (t1 == t2)
1565 return 0;
1567 if (TREE_CODE (t1) != TREE_CODE (t2)
1568 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1569 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1571 code = TREE_CODE (t1);
1572 switch (code)
1574 case INTEGER_CST:
1575 return tree_int_cst_compare (t1, t2);
1577 case STRING_CST:
1578 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1579 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1580 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1581 TREE_STRING_LENGTH (t1));
1583 case SSA_NAME:
1584 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1585 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1586 break;
1588 default:
1589 if (POLY_INT_CST_P (t1))
1590 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1591 wi::to_poly_widest (t2));
1593 tclass = TREE_CODE_CLASS (code);
1595 /* For decls, compare their UIDs. */
1596 if (tclass == tcc_declaration)
1598 if (DECL_UID (t1) != DECL_UID (t2))
1599 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1600 break;
1602 /* For expressions, compare their operands recursively. */
1603 else if (IS_EXPR_CODE_CLASS (tclass))
1605 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1607 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1608 TREE_OPERAND (t2, i));
1609 if (cmp != 0)
1610 return cmp;
1613 else
1614 gcc_unreachable ();
1617 return 0;
1620 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1621 check. */
1623 opt_result
1624 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1626 if (dump_enabled_p ())
1627 dump_printf (MSG_NOTE,
1628 "consider run-time aliasing test between %T and %T\n",
1629 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1631 if (!speed_p)
1632 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633 "runtime alias check not supported when"
1634 " optimizing for size.\n");
1636 /* FORNOW: We don't support versioning with outer-loop in either
1637 vectorization or loop distribution. */
1638 if (loop != NULL && loop->inner != NULL)
1639 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640 "runtime alias check not supported for"
1641 " outer loop.\n");
1643 return opt_result::success ();
1646 /* Operator == between two dr_with_seg_len objects.
1648 This equality operator is used to make sure two data refs
1649 are the same one so that we will consider to combine the
1650 aliasing checks of those two pairs of data dependent data
1651 refs. */
1653 static bool
1654 operator == (const dr_with_seg_len& d1,
1655 const dr_with_seg_len& d2)
1657 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1658 DR_BASE_ADDRESS (d2.dr), 0)
1659 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1660 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1661 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1662 && known_eq (d1.access_size, d2.access_size)
1663 && d1.align == d2.align);
1666 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1667 so that we can combine aliasing checks in one scan. */
1669 static int
1670 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1672 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1673 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1674 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1675 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1677 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1678 if a and c have the same basic address snd step, and b and d have the same
1679 address and step. Therefore, if any a&c or b&d don't have the same address
1680 and step, we don't care the order of those two pairs after sorting. */
1681 int comp_res;
1683 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1684 DR_BASE_ADDRESS (b1.dr))) != 0)
1685 return comp_res;
1686 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1687 DR_BASE_ADDRESS (b2.dr))) != 0)
1688 return comp_res;
1689 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1690 DR_STEP (b1.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1693 DR_STEP (b2.dr))) != 0)
1694 return comp_res;
1695 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1696 DR_OFFSET (b1.dr))) != 0)
1697 return comp_res;
1698 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1699 DR_INIT (b1.dr))) != 0)
1700 return comp_res;
1701 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1702 DR_OFFSET (b2.dr))) != 0)
1703 return comp_res;
1704 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1705 DR_INIT (b2.dr))) != 0)
1706 return comp_res;
1708 return 0;
1711 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1713 static void
1714 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1716 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1717 DR_REF (alias_pair->first.dr),
1718 DR_REF (alias_pair->second.dr));
1720 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1721 alias_pair->first.seg_len);
1722 if (!operand_equal_p (alias_pair->first.seg_len,
1723 alias_pair->second.seg_len, 0))
1724 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1726 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1727 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1728 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1730 dump_printf (MSG_NOTE, " vs. ");
1731 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1734 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1735 alias_pair->first.align);
1736 if (alias_pair->first.align != alias_pair->second.align)
1737 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1739 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1740 if (alias_pair->flags & DR_ALIAS_RAW)
1741 dump_printf (MSG_NOTE, " RAW");
1742 if (alias_pair->flags & DR_ALIAS_WAR)
1743 dump_printf (MSG_NOTE, " WAR");
1744 if (alias_pair->flags & DR_ALIAS_WAW)
1745 dump_printf (MSG_NOTE, " WAW");
1746 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1747 dump_printf (MSG_NOTE, " ARBITRARY");
1748 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1749 dump_printf (MSG_NOTE, " SWAPPED");
1750 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1751 dump_printf (MSG_NOTE, " UNSWAPPED");
1752 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1753 dump_printf (MSG_NOTE, " MIXED_STEPS");
1754 if (alias_pair->flags == 0)
1755 dump_printf (MSG_NOTE, " <none>");
1756 dump_printf (MSG_NOTE, "\n");
1759 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1760 FACTOR is number of iterations that each data reference is accessed.
1762 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1763 we create an expression:
1765 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1766 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1768 for aliasing checks. However, in some cases we can decrease the number
1769 of checks by combining two checks into one. For example, suppose we have
1770 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1771 condition is satisfied:
1773 load_ptr_0 < load_ptr_1 &&
1774 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1776 (this condition means, in each iteration of vectorized loop, the accessed
1777 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1778 load_ptr_1.)
1780 we then can use only the following expression to finish the alising checks
1781 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1783 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1786 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1787 basic address. */
1789 void
1790 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1791 poly_uint64)
1793 if (alias_pairs->is_empty ())
1794 return;
1796 /* Canonicalize each pair so that the base components are ordered wrt
1797 data_ref_compare_tree. This allows the loop below to merge more
1798 cases. */
1799 unsigned int i;
1800 dr_with_seg_len_pair_t *alias_pair;
1801 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1803 data_reference_p dr_a = alias_pair->first.dr;
1804 data_reference_p dr_b = alias_pair->second.dr;
1805 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1806 DR_BASE_ADDRESS (dr_b));
1807 if (comp_res == 0)
1808 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1809 if (comp_res == 0)
1810 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1811 if (comp_res > 0)
1813 std::swap (alias_pair->first, alias_pair->second);
1814 alias_pair->flags |= DR_ALIAS_SWAPPED;
1816 else
1817 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1820 /* Sort the collected data ref pairs so that we can scan them once to
1821 combine all possible aliasing checks. */
1822 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1824 /* Scan the sorted dr pairs and check if we can combine alias checks
1825 of two neighboring dr pairs. */
1826 unsigned int last = 0;
1827 for (i = 1; i < alias_pairs->length (); ++i)
1829 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1830 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1831 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1833 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1834 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1835 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1836 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1838 /* Remove duplicate data ref pairs. */
1839 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1841 if (dump_enabled_p ())
1842 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1843 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1844 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1845 alias_pair1->flags |= alias_pair2->flags;
1846 continue;
1849 /* Assume that we won't be able to merge the pairs, then correct
1850 if we do. */
1851 last += 1;
1852 if (last != i)
1853 (*alias_pairs)[last] = (*alias_pairs)[i];
1855 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1857 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1858 and DR_A1 and DR_A2 are two consecutive memrefs. */
1859 if (*dr_a1 == *dr_a2)
1861 std::swap (dr_a1, dr_b1);
1862 std::swap (dr_a2, dr_b2);
1865 poly_int64 init_a1, init_a2;
1866 /* Only consider cases in which the distance between the initial
1867 DR_A1 and the initial DR_A2 is known at compile time. */
1868 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1869 DR_BASE_ADDRESS (dr_a2->dr), 0)
1870 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1871 DR_OFFSET (dr_a2->dr), 0)
1872 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1873 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1874 continue;
1876 /* Don't combine if we can't tell which one comes first. */
1877 if (!ordered_p (init_a1, init_a2))
1878 continue;
1880 /* Work out what the segment length would be if we did combine
1881 DR_A1 and DR_A2:
1883 - If DR_A1 and DR_A2 have equal lengths, that length is
1884 also the combined length.
1886 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1887 length is the lower bound on those lengths.
1889 - If DR_A1 and DR_A2 both have positive lengths, the combined
1890 length is the upper bound on those lengths.
1892 Other cases are unlikely to give a useful combination.
1894 The lengths both have sizetype, so the sign is taken from
1895 the step instead. */
1896 poly_uint64 new_seg_len = 0;
1897 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1898 dr_a2->seg_len, 0);
1899 if (new_seg_len_p)
1901 poly_uint64 seg_len_a1, seg_len_a2;
1902 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1903 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1904 continue;
1906 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1907 if (TREE_CODE (indicator_a) != INTEGER_CST)
1908 continue;
1910 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1911 if (TREE_CODE (indicator_b) != INTEGER_CST)
1912 continue;
1914 int sign_a = tree_int_cst_sgn (indicator_a);
1915 int sign_b = tree_int_cst_sgn (indicator_b);
1917 if (sign_a <= 0 && sign_b <= 0)
1918 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1919 else if (sign_a >= 0 && sign_b >= 0)
1920 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1921 else
1922 continue;
1924 /* At this point we're committed to merging the refs. */
1926 /* Make sure dr_a1 starts left of dr_a2. */
1927 if (maybe_gt (init_a1, init_a2))
1929 std::swap (*dr_a1, *dr_a2);
1930 std::swap (init_a1, init_a2);
1933 /* The DR_Bs are equal, so only the DR_As can introduce
1934 mixed steps. */
1935 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1936 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1938 if (new_seg_len_p)
1940 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1941 new_seg_len);
1942 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1945 /* This is always positive due to the swap above. */
1946 poly_uint64 diff = init_a2 - init_a1;
1948 /* The new check will start at DR_A1. Make sure that its access
1949 size encompasses the initial DR_A2. */
1950 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1952 dr_a1->access_size = upper_bound (dr_a1->access_size,
1953 diff + dr_a2->access_size);
1954 unsigned int new_align = known_alignment (dr_a1->access_size);
1955 dr_a1->align = MIN (dr_a1->align, new_align);
1957 if (dump_enabled_p ())
1958 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1959 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1960 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1961 alias_pair1->flags |= alias_pair2->flags;
1962 last -= 1;
1965 alias_pairs->truncate (last + 1);
1967 /* Try to restore the original dr_with_seg_len order within each
1968 dr_with_seg_len_pair_t. If we ended up combining swapped and
1969 unswapped pairs into the same check, we have to invalidate any
1970 RAW, WAR and WAW information for it. */
1971 if (dump_enabled_p ())
1972 dump_printf (MSG_NOTE, "merged alias checks:\n");
1973 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1975 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1976 unsigned int swapped = (alias_pair->flags & swap_mask);
1977 if (swapped == DR_ALIAS_SWAPPED)
1978 std::swap (alias_pair->first, alias_pair->second);
1979 else if (swapped != DR_ALIAS_UNSWAPPED)
1980 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1981 alias_pair->flags &= ~swap_mask;
1982 if (dump_enabled_p ())
1983 dump_alias_pair (alias_pair, " ");
1987 /* A subroutine of create_intersect_range_checks, with a subset of the
1988 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1989 to optimize cases in which the references form a simple RAW, WAR or
1990 WAR dependence. */
1992 static bool
1993 create_ifn_alias_checks (tree *cond_expr,
1994 const dr_with_seg_len_pair_t &alias_pair)
1996 const dr_with_seg_len& dr_a = alias_pair.first;
1997 const dr_with_seg_len& dr_b = alias_pair.second;
1999 /* Check for cases in which:
2001 (a) we have a known RAW, WAR or WAR dependence
2002 (b) the accesses are well-ordered in both the original and new code
2003 (see the comment above the DR_ALIAS_* flags for details); and
2004 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2005 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2006 return false;
2008 /* Make sure that both DRs access the same pattern of bytes,
2009 with a constant length and step. */
2010 poly_uint64 seg_len;
2011 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2012 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2013 || maybe_ne (dr_a.access_size, dr_b.access_size)
2014 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2015 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2016 return false;
2018 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2019 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2020 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2022 /* See whether the target suports what we want to do. WAW checks are
2023 equivalent to WAR checks here. */
2024 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2025 ? IFN_CHECK_RAW_PTRS
2026 : IFN_CHECK_WAR_PTRS);
2027 unsigned int align = MIN (dr_a.align, dr_b.align);
2028 poly_uint64 full_length = seg_len + bytes;
2029 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2030 full_length, align))
2032 full_length = seg_len + dr_a.access_size;
2033 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2034 full_length, align))
2035 return false;
2038 /* Commit to using this form of test. */
2039 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2040 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2042 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2043 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2045 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2046 ifn, boolean_type_node,
2047 4, addr_a, addr_b,
2048 size_int (full_length),
2049 size_int (align));
2051 if (dump_enabled_p ())
2053 if (ifn == IFN_CHECK_RAW_PTRS)
2054 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2055 else
2056 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2058 return true;
2061 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2062 free of aliases, using a condition based on index values instead
2063 of a condition based on addresses. Return true on success,
2064 storing the condition in *COND_EXPR.
2066 This can only be done if the two data references in ALIAS_PAIR access
2067 the same array object and the index is the only difference. For example,
2068 if the two data references are DR_A and DR_B:
2070 DR_A DR_B
2071 data-ref arr[i] arr[j]
2072 base_object arr arr
2073 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2075 The addresses and their index are like:
2077 |<- ADDR_A ->| |<- ADDR_B ->|
2078 ------------------------------------------------------->
2079 | | | | | | | | | |
2080 ------------------------------------------------------->
2081 i_0 ... i_0+4 j_0 ... j_0+4
2083 We can create expression based on index rather than address:
2085 (unsigned) (i_0 - j_0 + 3) <= 6
2087 i.e. the indices are less than 4 apart.
2089 Note evolution step of index needs to be considered in comparison. */
2091 static bool
2092 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2093 const dr_with_seg_len_pair_t &alias_pair)
2095 const dr_with_seg_len &dr_a = alias_pair.first;
2096 const dr_with_seg_len &dr_b = alias_pair.second;
2097 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2098 || integer_zerop (DR_STEP (dr_a.dr))
2099 || integer_zerop (DR_STEP (dr_b.dr))
2100 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2101 return false;
2103 poly_uint64 seg_len1, seg_len2;
2104 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2105 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2106 return false;
2108 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2109 return false;
2111 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2112 return false;
2114 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2115 return false;
2117 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2119 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2120 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2121 if (neg_step)
2123 abs_step = -abs_step;
2124 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2125 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2128 /* Infer the number of iterations with which the memory segment is accessed
2129 by DR. In other words, alias is checked if memory segment accessed by
2130 DR_A in some iterations intersect with memory segment accessed by DR_B
2131 in the same amount iterations.
2132 Note segnment length is a linear function of number of iterations with
2133 DR_STEP as the coefficient. */
2134 poly_uint64 niter_len1, niter_len2;
2135 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2136 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2137 return false;
2139 /* Divide each access size by the byte step, rounding up. */
2140 poly_uint64 niter_access1, niter_access2;
2141 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2142 abs_step, &niter_access1)
2143 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2144 abs_step, &niter_access2))
2145 return false;
2147 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2149 int found = -1;
2150 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2152 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2153 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2154 /* Two indices must be the same if they are not scev, or not scev wrto
2155 current loop being vecorized. */
2156 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2157 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2158 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2159 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2161 if (operand_equal_p (access1, access2, 0))
2162 continue;
2164 return false;
2166 if (found >= 0)
2167 return false;
2168 found = i;
2171 /* Ought not to happen in practice, since if all accesses are equal then the
2172 alias should be decidable at compile time. */
2173 if (found < 0)
2174 return false;
2176 /* The two indices must have the same step. */
2177 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2178 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2179 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2180 return false;
2182 tree idx_step = CHREC_RIGHT (access1);
2183 /* Index must have const step, otherwise DR_STEP won't be constant. */
2184 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2185 /* Index must evaluate in the same direction as DR. */
2186 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2188 tree min1 = CHREC_LEFT (access1);
2189 tree min2 = CHREC_LEFT (access2);
2190 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2191 return false;
2193 /* Ideally, alias can be checked against loop's control IV, but we
2194 need to prove linear mapping between control IV and reference
2195 index. Although that should be true, we check against (array)
2196 index of data reference. Like segment length, index length is
2197 linear function of the number of iterations with index_step as
2198 the coefficient, i.e, niter_len * idx_step. */
2199 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2200 SIGNED);
2201 if (neg_step)
2202 abs_idx_step = -abs_idx_step;
2203 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2204 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2205 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2206 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2208 gcc_assert (known_ge (idx_len1, 0)
2209 && known_ge (idx_len2, 0)
2210 && known_ge (idx_access1, 0)
2211 && known_ge (idx_access2, 0));
2213 /* Each access has the following pattern, with lengths measured
2214 in units of INDEX:
2216 <-- idx_len -->
2217 <--- A: -ve step --->
2218 +-----+-------+-----+-------+-----+
2219 | n-1 | ..... | 0 | ..... | n-1 |
2220 +-----+-------+-----+-------+-----+
2221 <--- B: +ve step --->
2222 <-- idx_len -->
2226 where "n" is the number of scalar iterations covered by the segment
2227 and where each access spans idx_access units.
2229 A is the range of bytes accessed when the step is negative,
2230 B is the range when the step is positive.
2232 When checking for general overlap, we need to test whether
2233 the range:
2235 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2237 overlaps:
2239 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2241 where:
2243 low_offsetN = +ve step ? 0 : -idx_lenN;
2244 high_offsetN = +ve step ? idx_lenN : 0;
2246 This is equivalent to testing whether:
2248 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2249 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2251 Converting this into a single test, there is an overlap if:
2253 0 <= min2 - min1 + bias <= limit
2255 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2256 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2257 + (high_offset2 - low_offset2 + idx_access2 - 1)
2258 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2260 Combining the tests requires limit to be computable in an unsigned
2261 form of the index type; if it isn't, we fall back to the usual
2262 pointer-based checks.
2264 We can do better if DR_B is a write and if DR_A and DR_B are
2265 well-ordered in both the original and the new code (see the
2266 comment above the DR_ALIAS_* flags for details). In this case
2267 we know that for each i in [0, n-1], the write performed by
2268 access i of DR_B occurs after access numbers j<=i of DR_A in
2269 both the original and the new code. Any write or anti
2270 dependencies wrt those DR_A accesses are therefore maintained.
2272 We just need to make sure that each individual write in DR_B does not
2273 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2274 after the DR_B access in the original code but happen before it in
2275 the new code.
2277 We know the steps for both accesses are equal, so by induction, we
2278 just need to test whether the first write of DR_B overlaps a later
2279 access of DR_A. In other words, we need to move min1 along by
2280 one iteration:
2282 min1' = min1 + idx_step
2284 and use the ranges:
2286 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2288 and:
2290 [min2, min2 + idx_access2 - 1]
2292 where:
2294 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2295 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2296 if (waw_or_war_p)
2297 idx_len1 -= abs_idx_step;
2299 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2300 if (!waw_or_war_p)
2301 limit += idx_len2;
2303 tree utype = unsigned_type_for (TREE_TYPE (min1));
2304 if (!wi::fits_to_tree_p (limit, utype))
2305 return false;
2307 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2308 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2309 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2310 /* Equivalent to adding IDX_STEP to MIN1. */
2311 if (waw_or_war_p)
2312 bias -= wi::to_offset (idx_step);
2314 tree subject = fold_build2 (MINUS_EXPR, utype,
2315 fold_convert (utype, min2),
2316 fold_convert (utype, min1));
2317 subject = fold_build2 (PLUS_EXPR, utype, subject,
2318 wide_int_to_tree (utype, bias));
2319 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2320 wide_int_to_tree (utype, limit));
2321 if (*cond_expr)
2322 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2323 *cond_expr, part_cond_expr);
2324 else
2325 *cond_expr = part_cond_expr;
2326 if (dump_enabled_p ())
2328 if (waw_or_war_p)
2329 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2330 else
2331 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2333 return true;
2336 /* A subroutine of create_intersect_range_checks, with a subset of the
2337 same arguments. Try to optimize cases in which the second access
2338 is a write and in which some overlap is valid. */
2340 static bool
2341 create_waw_or_war_checks (tree *cond_expr,
2342 const dr_with_seg_len_pair_t &alias_pair)
2344 const dr_with_seg_len& dr_a = alias_pair.first;
2345 const dr_with_seg_len& dr_b = alias_pair.second;
2347 /* Check for cases in which:
2349 (a) DR_B is always a write;
2350 (b) the accesses are well-ordered in both the original and new code
2351 (see the comment above the DR_ALIAS_* flags for details); and
2352 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2353 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2354 return false;
2356 /* Check for equal (but possibly variable) steps. */
2357 tree step = DR_STEP (dr_a.dr);
2358 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2359 return false;
2361 /* Make sure that we can operate on sizetype without loss of precision. */
2362 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2363 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2364 return false;
2366 /* All addresses involved are known to have a common alignment ALIGN.
2367 We can therefore subtract ALIGN from an exclusive endpoint to get
2368 an inclusive endpoint. In the best (and common) case, ALIGN is the
2369 same as the access sizes of both DRs, and so subtracting ALIGN
2370 cancels out the addition of an access size. */
2371 unsigned int align = MIN (dr_a.align, dr_b.align);
2372 poly_uint64 last_chunk_a = dr_a.access_size - align;
2373 poly_uint64 last_chunk_b = dr_b.access_size - align;
2375 /* Get a boolean expression that is true when the step is negative. */
2376 tree indicator = dr_direction_indicator (dr_a.dr);
2377 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2378 fold_convert (ssizetype, indicator),
2379 ssize_int (0));
2381 /* Get lengths in sizetype. */
2382 tree seg_len_a
2383 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2384 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2386 /* Each access has the following pattern:
2388 <- |seg_len| ->
2389 <--- A: -ve step --->
2390 +-----+-------+-----+-------+-----+
2391 | n-1 | ..... | 0 | ..... | n-1 |
2392 +-----+-------+-----+-------+-----+
2393 <--- B: +ve step --->
2394 <- |seg_len| ->
2396 base address
2398 where "n" is the number of scalar iterations covered by the segment.
2400 A is the range of bytes accessed when the step is negative,
2401 B is the range when the step is positive.
2403 We know that DR_B is a write. We also know (from checking that
2404 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2405 the write performed by access i of DR_B occurs after access numbers
2406 j<=i of DR_A in both the original and the new code. Any write or
2407 anti dependencies wrt those DR_A accesses are therefore maintained.
2409 We just need to make sure that each individual write in DR_B does not
2410 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2411 after the DR_B access in the original code but happen before it in
2412 the new code.
2414 We know the steps for both accesses are equal, so by induction, we
2415 just need to test whether the first write of DR_B overlaps a later
2416 access of DR_A. In other words, we need to move addr_a along by
2417 one iteration:
2419 addr_a' = addr_a + step
2421 and check whether:
2423 [addr_b, addr_b + last_chunk_b]
2425 overlaps:
2427 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2429 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2431 low_offset_a = +ve step ? 0 : seg_len_a - step
2432 high_offset_a = +ve step ? seg_len_a - step : 0
2434 This is equivalent to testing whether:
2436 addr_a' + low_offset_a <= addr_b + last_chunk_b
2437 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2439 Converting this into a single test, there is an overlap if:
2441 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2443 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2445 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2446 less than the size of the object underlying DR_A. We also know
2447 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2448 guaranteed at compile time. There can therefore be no overflow if
2449 "limit" is calculated in an unsigned type with pointer precision. */
2450 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2451 DR_OFFSET (dr_a.dr));
2452 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2454 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2455 DR_OFFSET (dr_b.dr));
2456 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2458 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2459 addr_a = fold_build_pointer_plus (addr_a, step);
2460 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2461 seg_len_a, step);
2462 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2463 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2465 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2466 seg_len_a_minus_step, size_zero_node);
2467 if (!CONSTANT_CLASS_P (low_offset_a))
2468 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2470 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2471 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2472 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2473 low_offset_a);
2475 /* The amount added to addr_b - addr_a'. */
2476 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2477 size_int (last_chunk_b), low_offset_a);
2479 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2480 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2481 size_int (last_chunk_a + last_chunk_b));
2483 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2484 subject = fold_build2 (PLUS_EXPR, sizetype,
2485 fold_convert (sizetype, subject), bias);
2487 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2488 if (dump_enabled_p ())
2489 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2490 return true;
2493 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2494 every address ADDR accessed by D:
2496 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2498 In this case, every element accessed by D is aligned to at least
2499 ALIGN bytes.
2501 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2503 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2505 static void
2506 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2507 tree *seg_max_out, HOST_WIDE_INT align)
2509 /* Each access has the following pattern:
2511 <- |seg_len| ->
2512 <--- A: -ve step --->
2513 +-----+-------+-----+-------+-----+
2514 | n-1 | ,.... | 0 | ..... | n-1 |
2515 +-----+-------+-----+-------+-----+
2516 <--- B: +ve step --->
2517 <- |seg_len| ->
2519 base address
2521 where "n" is the number of scalar iterations covered by the segment.
2522 (This should be VF for a particular pair if we know that both steps
2523 are the same, otherwise it will be the full number of scalar loop
2524 iterations.)
2526 A is the range of bytes accessed when the step is negative,
2527 B is the range when the step is positive.
2529 If the access size is "access_size" bytes, the lowest addressed byte is:
2531 base + (step < 0 ? seg_len : 0) [LB]
2533 and the highest addressed byte is always below:
2535 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2537 Thus:
2539 LB <= ADDR < UB
2541 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2542 bytes, so:
2544 LB <= ADDR <= UB - ALIGN
2546 where "- ALIGN" folds naturally with the "+ access_size" and often
2547 cancels it out.
2549 We don't try to simplify LB and UB beyond this (e.g. by using
2550 MIN and MAX based on whether seg_len rather than the stride is
2551 negative) because it is possible for the absolute size of the
2552 segment to overflow the range of a ssize_t.
2554 Keeping the pointer_plus outside of the cond_expr should allow
2555 the cond_exprs to be shared with other alias checks. */
2556 tree indicator = dr_direction_indicator (d.dr);
2557 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2558 fold_convert (ssizetype, indicator),
2559 ssize_int (0));
2560 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2561 DR_OFFSET (d.dr));
2562 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2563 tree seg_len
2564 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2566 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2567 seg_len, size_zero_node);
2568 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2569 size_zero_node, seg_len);
2570 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2571 size_int (d.access_size - align));
2573 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2574 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2577 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2578 storing the condition in *COND_EXPR. The fallback is to generate a
2579 a test that the two accesses do not overlap:
2581 end_a <= start_b || end_b <= start_a. */
2583 static void
2584 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2585 const dr_with_seg_len_pair_t &alias_pair)
2587 const dr_with_seg_len& dr_a = alias_pair.first;
2588 const dr_with_seg_len& dr_b = alias_pair.second;
2589 *cond_expr = NULL_TREE;
2590 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2591 return;
2593 if (create_ifn_alias_checks (cond_expr, alias_pair))
2594 return;
2596 if (create_waw_or_war_checks (cond_expr, alias_pair))
2597 return;
2599 unsigned HOST_WIDE_INT min_align;
2600 tree_code cmp_code;
2601 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2602 are equivalent. This is just an optimization heuristic. */
2603 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2604 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2606 /* In this case adding access_size to seg_len is likely to give
2607 a simple X * step, where X is either the number of scalar
2608 iterations or the vectorization factor. We're better off
2609 keeping that, rather than subtracting an alignment from it.
2611 In this case the maximum values are exclusive and so there is
2612 no alias if the maximum of one segment equals the minimum
2613 of another. */
2614 min_align = 0;
2615 cmp_code = LE_EXPR;
2617 else
2619 /* Calculate the minimum alignment shared by all four pointers,
2620 then arrange for this alignment to be subtracted from the
2621 exclusive maximum values to get inclusive maximum values.
2622 This "- min_align" is cumulative with a "+ access_size"
2623 in the calculation of the maximum values. In the best
2624 (and common) case, the two cancel each other out, leaving
2625 us with an inclusive bound based only on seg_len. In the
2626 worst case we're simply adding a smaller number than before.
2628 Because the maximum values are inclusive, there is an alias
2629 if the maximum value of one segment is equal to the minimum
2630 value of the other. */
2631 min_align = MIN (dr_a.align, dr_b.align);
2632 cmp_code = LT_EXPR;
2635 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2636 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2637 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2639 *cond_expr
2640 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2641 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2642 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2643 if (dump_enabled_p ())
2644 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2647 /* Create a conditional expression that represents the run-time checks for
2648 overlapping of address ranges represented by a list of data references
2649 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2650 COND_EXPR is the conditional expression to be used in the if statement
2651 that controls which version of the loop gets executed at runtime. */
2653 void
2654 create_runtime_alias_checks (class loop *loop,
2655 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2656 tree * cond_expr)
2658 tree part_cond_expr;
2660 fold_defer_overflow_warnings ();
2661 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2663 gcc_assert (alias_pair.flags);
2664 if (dump_enabled_p ())
2665 dump_printf (MSG_NOTE,
2666 "create runtime check for data references %T and %T\n",
2667 DR_REF (alias_pair.first.dr),
2668 DR_REF (alias_pair.second.dr));
2670 /* Create condition expression for each pair data references. */
2671 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2672 if (*cond_expr)
2673 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2674 *cond_expr, part_cond_expr);
2675 else
2676 *cond_expr = part_cond_expr;
2678 fold_undefer_and_ignore_overflow_warnings ();
2681 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2682 expressions. */
2683 static bool
2684 dr_equal_offsets_p1 (tree offset1, tree offset2)
2686 bool res;
2688 STRIP_NOPS (offset1);
2689 STRIP_NOPS (offset2);
2691 if (offset1 == offset2)
2692 return true;
2694 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2695 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2696 return false;
2698 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2699 TREE_OPERAND (offset2, 0));
2701 if (!res || !BINARY_CLASS_P (offset1))
2702 return res;
2704 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2705 TREE_OPERAND (offset2, 1));
2707 return res;
2710 /* Check if DRA and DRB have equal offsets. */
2711 bool
2712 dr_equal_offsets_p (struct data_reference *dra,
2713 struct data_reference *drb)
2715 tree offset1, offset2;
2717 offset1 = DR_OFFSET (dra);
2718 offset2 = DR_OFFSET (drb);
2720 return dr_equal_offsets_p1 (offset1, offset2);
2723 /* Returns true if FNA == FNB. */
2725 static bool
2726 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2728 unsigned i, n = fna.length ();
2730 if (n != fnb.length ())
2731 return false;
2733 for (i = 0; i < n; i++)
2734 if (!operand_equal_p (fna[i], fnb[i], 0))
2735 return false;
2737 return true;
2740 /* If all the functions in CF are the same, returns one of them,
2741 otherwise returns NULL. */
2743 static affine_fn
2744 common_affine_function (conflict_function *cf)
2746 unsigned i;
2747 affine_fn comm;
2749 if (!CF_NONTRIVIAL_P (cf))
2750 return affine_fn ();
2752 comm = cf->fns[0];
2754 for (i = 1; i < cf->n; i++)
2755 if (!affine_function_equal_p (comm, cf->fns[i]))
2756 return affine_fn ();
2758 return comm;
2761 /* Returns the base of the affine function FN. */
2763 static tree
2764 affine_function_base (affine_fn fn)
2766 return fn[0];
2769 /* Returns true if FN is a constant. */
2771 static bool
2772 affine_function_constant_p (affine_fn fn)
2774 unsigned i;
2775 tree coef;
2777 for (i = 1; fn.iterate (i, &coef); i++)
2778 if (!integer_zerop (coef))
2779 return false;
2781 return true;
2784 /* Returns true if FN is the zero constant function. */
2786 static bool
2787 affine_function_zero_p (affine_fn fn)
2789 return (integer_zerop (affine_function_base (fn))
2790 && affine_function_constant_p (fn));
2793 /* Returns a signed integer type with the largest precision from TA
2794 and TB. */
2796 static tree
2797 signed_type_for_types (tree ta, tree tb)
2799 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2800 return signed_type_for (ta);
2801 else
2802 return signed_type_for (tb);
2805 /* Applies operation OP on affine functions FNA and FNB, and returns the
2806 result. */
2808 static affine_fn
2809 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2811 unsigned i, n, m;
2812 affine_fn ret;
2813 tree coef;
2815 if (fnb.length () > fna.length ())
2817 n = fna.length ();
2818 m = fnb.length ();
2820 else
2822 n = fnb.length ();
2823 m = fna.length ();
2826 ret.create (m);
2827 for (i = 0; i < n; i++)
2829 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2830 TREE_TYPE (fnb[i]));
2831 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2834 for (; fna.iterate (i, &coef); i++)
2835 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2836 coef, integer_zero_node));
2837 for (; fnb.iterate (i, &coef); i++)
2838 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2839 integer_zero_node, coef));
2841 return ret;
2844 /* Returns the sum of affine functions FNA and FNB. */
2846 static affine_fn
2847 affine_fn_plus (affine_fn fna, affine_fn fnb)
2849 return affine_fn_op (PLUS_EXPR, fna, fnb);
2852 /* Returns the difference of affine functions FNA and FNB. */
2854 static affine_fn
2855 affine_fn_minus (affine_fn fna, affine_fn fnb)
2857 return affine_fn_op (MINUS_EXPR, fna, fnb);
2860 /* Frees affine function FN. */
2862 static void
2863 affine_fn_free (affine_fn fn)
2865 fn.release ();
2868 /* Determine for each subscript in the data dependence relation DDR
2869 the distance. */
2871 static void
2872 compute_subscript_distance (struct data_dependence_relation *ddr)
2874 conflict_function *cf_a, *cf_b;
2875 affine_fn fn_a, fn_b, diff;
2877 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2879 unsigned int i;
2881 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2883 struct subscript *subscript;
2885 subscript = DDR_SUBSCRIPT (ddr, i);
2886 cf_a = SUB_CONFLICTS_IN_A (subscript);
2887 cf_b = SUB_CONFLICTS_IN_B (subscript);
2889 fn_a = common_affine_function (cf_a);
2890 fn_b = common_affine_function (cf_b);
2891 if (!fn_a.exists () || !fn_b.exists ())
2893 SUB_DISTANCE (subscript) = chrec_dont_know;
2894 return;
2896 diff = affine_fn_minus (fn_a, fn_b);
2898 if (affine_function_constant_p (diff))
2899 SUB_DISTANCE (subscript) = affine_function_base (diff);
2900 else
2901 SUB_DISTANCE (subscript) = chrec_dont_know;
2903 affine_fn_free (diff);
2908 /* Returns the conflict function for "unknown". */
2910 static conflict_function *
2911 conflict_fn_not_known (void)
2913 conflict_function *fn = XCNEW (conflict_function);
2914 fn->n = NOT_KNOWN;
2916 return fn;
2919 /* Returns the conflict function for "independent". */
2921 static conflict_function *
2922 conflict_fn_no_dependence (void)
2924 conflict_function *fn = XCNEW (conflict_function);
2925 fn->n = NO_DEPENDENCE;
2927 return fn;
2930 /* Returns true if the address of OBJ is invariant in LOOP. */
2932 static bool
2933 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2935 while (handled_component_p (obj))
2937 if (TREE_CODE (obj) == ARRAY_REF)
2939 for (int i = 1; i < 4; ++i)
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2941 loop->num))
2942 return false;
2944 else if (TREE_CODE (obj) == COMPONENT_REF)
2946 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2947 loop->num))
2948 return false;
2950 obj = TREE_OPERAND (obj, 0);
2953 if (!INDIRECT_REF_P (obj)
2954 && TREE_CODE (obj) != MEM_REF)
2955 return true;
2957 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2958 loop->num);
2961 /* Returns false if we can prove that data references A and B do not alias,
2962 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2963 considered. */
2965 bool
2966 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2967 class loop *loop_nest)
2969 tree addr_a = DR_BASE_OBJECT (a);
2970 tree addr_b = DR_BASE_OBJECT (b);
2972 /* If we are not processing a loop nest but scalar code we
2973 do not need to care about possible cross-iteration dependences
2974 and thus can process the full original reference. Do so,
2975 similar to how loop invariant motion applies extra offset-based
2976 disambiguation. */
2977 if (!loop_nest)
2979 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2980 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2982 if (DR_BASE_ADDRESS (a)
2983 && DR_BASE_ADDRESS (b)
2984 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2985 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2986 && poly_int_tree_p (tree_size_a)
2987 && poly_int_tree_p (tree_size_b)
2988 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2989 wi::to_poly_widest (tree_size_a),
2990 wi::to_poly_widest (DR_INIT (b)),
2991 wi::to_poly_widest (tree_size_b)))
2993 gcc_assert (integer_zerop (DR_STEP (a))
2994 && integer_zerop (DR_STEP (b)));
2995 return false;
2998 aff_tree off1, off2;
2999 poly_widest_int size1, size2;
3000 get_inner_reference_aff (DR_REF (a), &off1, &size1);
3001 get_inner_reference_aff (DR_REF (b), &off2, &size2);
3002 aff_combination_scale (&off1, -1);
3003 aff_combination_add (&off2, &off1);
3004 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3005 return false;
3008 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3009 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3010 /* For cross-iteration dependences the cliques must be valid for the
3011 whole loop, not just individual iterations. */
3012 && (!loop_nest
3013 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3014 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3015 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3016 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3017 return false;
3019 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3020 do not know the size of the base-object. So we cannot do any
3021 offset/overlap based analysis but have to rely on points-to
3022 information only. */
3023 if (TREE_CODE (addr_a) == MEM_REF
3024 && (DR_UNCONSTRAINED_BASE (a)
3025 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3027 /* For true dependences we can apply TBAA. */
3028 if (flag_strict_aliasing
3029 && DR_IS_WRITE (a) && DR_IS_READ (b)
3030 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3031 get_alias_set (DR_REF (b))))
3032 return false;
3033 if (TREE_CODE (addr_b) == MEM_REF)
3034 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3035 TREE_OPERAND (addr_b, 0));
3036 else
3037 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3038 build_fold_addr_expr (addr_b));
3040 else if (TREE_CODE (addr_b) == MEM_REF
3041 && (DR_UNCONSTRAINED_BASE (b)
3042 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3044 /* For true dependences we can apply TBAA. */
3045 if (flag_strict_aliasing
3046 && DR_IS_WRITE (a) && DR_IS_READ (b)
3047 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3048 get_alias_set (DR_REF (b))))
3049 return false;
3050 if (TREE_CODE (addr_a) == MEM_REF)
3051 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3052 TREE_OPERAND (addr_b, 0));
3053 else
3054 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3055 TREE_OPERAND (addr_b, 0));
3058 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3059 that is being subsetted in the loop nest. */
3060 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3061 return refs_output_dependent_p (addr_a, addr_b);
3062 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3063 return refs_anti_dependent_p (addr_a, addr_b);
3064 return refs_may_alias_p (addr_a, addr_b);
3067 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3068 if it is meaningful to compare their associated access functions
3069 when checking for dependencies. */
3071 static bool
3072 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3074 /* Allow pairs of component refs from the following sets:
3076 { REALPART_EXPR, IMAGPART_EXPR }
3077 { COMPONENT_REF }
3078 { ARRAY_REF }. */
3079 tree_code code_a = TREE_CODE (ref_a);
3080 tree_code code_b = TREE_CODE (ref_b);
3081 if (code_a == IMAGPART_EXPR)
3082 code_a = REALPART_EXPR;
3083 if (code_b == IMAGPART_EXPR)
3084 code_b = REALPART_EXPR;
3085 if (code_a != code_b)
3086 return false;
3088 if (TREE_CODE (ref_a) == COMPONENT_REF)
3089 /* ??? We cannot simply use the type of operand #0 of the refs here as
3090 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3091 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3092 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3093 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3095 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3096 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3099 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3100 is true when the main indices of A and B were not comparable so we try again
3101 with alternate indices computed on an indirect reference. */
3103 struct data_dependence_relation *
3104 initialize_data_dependence_relation (struct data_dependence_relation *res,
3105 vec<loop_p> loop_nest,
3106 bool use_alt_indices)
3108 struct data_reference *a = DDR_A (res);
3109 struct data_reference *b = DDR_B (res);
3110 unsigned int i;
3112 struct indices *indices_a = &a->indices;
3113 struct indices *indices_b = &b->indices;
3114 if (use_alt_indices)
3116 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3117 indices_a = &a->alt_indices;
3118 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3119 indices_b = &b->alt_indices;
3121 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3122 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3123 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3125 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3126 return res;
3129 /* For unconstrained bases, the root (highest-indexed) subscript
3130 describes a variation in the base of the original DR_REF rather
3131 than a component access. We have no type that accurately describes
3132 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3133 applying this subscript) so limit the search to the last real
3134 component access.
3136 E.g. for:
3138 void
3139 f (int a[][8], int b[][8])
3141 for (int i = 0; i < 8; ++i)
3142 a[i * 2][0] = b[i][0];
3145 the a and b accesses have a single ARRAY_REF component reference [0]
3146 but have two subscripts. */
3147 if (indices_a->unconstrained_base)
3148 num_dimensions_a -= 1;
3149 if (indices_b->unconstrained_base)
3150 num_dimensions_b -= 1;
3152 /* These structures describe sequences of component references in
3153 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3154 specific access function. */
3155 struct {
3156 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3157 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3158 indices. In C notation, these are the indices of the rightmost
3159 component references; e.g. for a sequence .b.c.d, the start
3160 index is for .d. */
3161 unsigned int start_a;
3162 unsigned int start_b;
3164 /* The sequence contains LENGTH consecutive access functions from
3165 each DR. */
3166 unsigned int length;
3168 /* The enclosing objects for the A and B sequences respectively,
3169 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3170 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3171 tree object_a;
3172 tree object_b;
3173 } full_seq = {}, struct_seq = {};
3175 /* Before each iteration of the loop:
3177 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3178 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3179 unsigned int index_a = 0;
3180 unsigned int index_b = 0;
3181 tree ref_a = DR_REF (a);
3182 tree ref_b = DR_REF (b);
3184 /* Now walk the component references from the final DR_REFs back up to
3185 the enclosing base objects. Each component reference corresponds
3186 to one access function in the DR, with access function 0 being for
3187 the final DR_REF and the highest-indexed access function being the
3188 one that is applied to the base of the DR.
3190 Look for a sequence of component references whose access functions
3191 are comparable (see access_fn_components_comparable_p). If more
3192 than one such sequence exists, pick the one nearest the base
3193 (which is the leftmost sequence in C notation). Store this sequence
3194 in FULL_SEQ.
3196 For example, if we have:
3198 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3200 A: a[0][i].s.c.d
3201 B: __real b[0][i].s.e[i].f
3203 (where d is the same type as the real component of f) then the access
3204 functions would be:
3206 0 1 2 3
3207 A: .d .c .s [i]
3209 0 1 2 3 4 5
3210 B: __real .f [i] .e .s [i]
3212 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3213 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3214 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3215 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3216 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3217 index foo[10] arrays, so is again comparable. The sequence is
3218 therefore:
3220 A: [1, 3] (i.e. [i].s.c)
3221 B: [3, 5] (i.e. [i].s.e)
3223 Also look for sequences of component references whose access
3224 functions are comparable and whose enclosing objects have the same
3225 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3226 example, STRUCT_SEQ would be:
3228 A: [1, 2] (i.e. s.c)
3229 B: [3, 4] (i.e. s.e) */
3230 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3232 /* The alternate indices form always has a single dimension
3233 with unconstrained base. */
3234 gcc_assert (!use_alt_indices);
3236 /* REF_A and REF_B must be one of the component access types
3237 allowed by dr_analyze_indices. */
3238 gcc_checking_assert (access_fn_component_p (ref_a));
3239 gcc_checking_assert (access_fn_component_p (ref_b));
3241 /* Get the immediately-enclosing objects for REF_A and REF_B,
3242 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3243 and DR_ACCESS_FN (B, INDEX_B). */
3244 tree object_a = TREE_OPERAND (ref_a, 0);
3245 tree object_b = TREE_OPERAND (ref_b, 0);
3247 tree type_a = TREE_TYPE (object_a);
3248 tree type_b = TREE_TYPE (object_b);
3249 if (access_fn_components_comparable_p (ref_a, ref_b))
3251 /* This pair of component accesses is comparable for dependence
3252 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3253 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3254 if (full_seq.start_a + full_seq.length != index_a
3255 || full_seq.start_b + full_seq.length != index_b)
3257 /* The accesses don't extend the current sequence,
3258 so start a new one here. */
3259 full_seq.start_a = index_a;
3260 full_seq.start_b = index_b;
3261 full_seq.length = 0;
3264 /* Add this pair of references to the sequence. */
3265 full_seq.length += 1;
3266 full_seq.object_a = object_a;
3267 full_seq.object_b = object_b;
3269 /* If the enclosing objects are structures (and thus have the
3270 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3271 if (TREE_CODE (type_a) == RECORD_TYPE)
3272 struct_seq = full_seq;
3274 /* Move to the next containing reference for both A and B. */
3275 ref_a = object_a;
3276 ref_b = object_b;
3277 index_a += 1;
3278 index_b += 1;
3279 continue;
3282 /* Try to approach equal type sizes. */
3283 if (!COMPLETE_TYPE_P (type_a)
3284 || !COMPLETE_TYPE_P (type_b)
3285 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3286 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3287 break;
3289 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3290 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3291 if (size_a <= size_b)
3293 index_a += 1;
3294 ref_a = object_a;
3296 if (size_b <= size_a)
3298 index_b += 1;
3299 ref_b = object_b;
3303 /* See whether FULL_SEQ ends at the base and whether the two bases
3304 are equal. We do not care about TBAA or alignment info so we can
3305 use OEP_ADDRESS_OF to avoid false negatives. */
3306 tree base_a = indices_a->base_object;
3307 tree base_b = indices_b->base_object;
3308 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3309 && full_seq.start_b + full_seq.length == num_dimensions_b
3310 && (indices_a->unconstrained_base
3311 == indices_b->unconstrained_base)
3312 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3313 && (types_compatible_p (TREE_TYPE (base_a),
3314 TREE_TYPE (base_b))
3315 || (!base_supports_access_fn_components_p (base_a)
3316 && !base_supports_access_fn_components_p (base_b)
3317 && operand_equal_p
3318 (TYPE_SIZE (TREE_TYPE (base_a)),
3319 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3320 && (!loop_nest.exists ()
3321 || (object_address_invariant_in_loop_p
3322 (loop_nest[0], base_a))));
3324 /* If the bases are the same, we can include the base variation too.
3325 E.g. the b accesses in:
3327 for (int i = 0; i < n; ++i)
3328 b[i + 4][0] = b[i][0];
3330 have a definite dependence distance of 4, while for:
3332 for (int i = 0; i < n; ++i)
3333 a[i + 4][0] = b[i][0];
3335 the dependence distance depends on the gap between a and b.
3337 If the bases are different then we can only rely on the sequence
3338 rooted at a structure access, since arrays are allowed to overlap
3339 arbitrarily and change shape arbitrarily. E.g. we treat this as
3340 valid code:
3342 int a[256];
3344 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3346 where two lvalues with the same int[4][3] type overlap, and where
3347 both lvalues are distinct from the object's declared type. */
3348 if (same_base_p)
3350 if (indices_a->unconstrained_base)
3351 full_seq.length += 1;
3353 else
3354 full_seq = struct_seq;
3356 /* Punt if we didn't find a suitable sequence. */
3357 if (full_seq.length == 0)
3359 if (use_alt_indices
3360 || (TREE_CODE (DR_REF (a)) == MEM_REF
3361 && TREE_CODE (DR_REF (b)) == MEM_REF)
3362 || may_be_nonaddressable_p (DR_REF (a))
3363 || may_be_nonaddressable_p (DR_REF (b)))
3365 /* Fully exhausted possibilities. */
3366 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3367 return res;
3370 /* Try evaluating both DRs as dereferences of pointers. */
3371 if (!a->alt_indices.base_object
3372 && TREE_CODE (DR_REF (a)) != MEM_REF)
3374 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3375 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3376 build_int_cst
3377 (reference_alias_ptr_type (DR_REF (a)), 0));
3378 dr_analyze_indices (&a->alt_indices, alt_ref,
3379 loop_preheader_edge (loop_nest[0]),
3380 loop_containing_stmt (DR_STMT (a)));
3382 if (!b->alt_indices.base_object
3383 && TREE_CODE (DR_REF (b)) != MEM_REF)
3385 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3386 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3387 build_int_cst
3388 (reference_alias_ptr_type (DR_REF (b)), 0));
3389 dr_analyze_indices (&b->alt_indices, alt_ref,
3390 loop_preheader_edge (loop_nest[0]),
3391 loop_containing_stmt (DR_STMT (b)));
3393 return initialize_data_dependence_relation (res, loop_nest, true);
3396 if (!same_base_p)
3398 /* Partial overlap is possible for different bases when strict aliasing
3399 is not in effect. It's also possible if either base involves a union
3400 access; e.g. for:
3402 struct s1 { int a[2]; };
3403 struct s2 { struct s1 b; int c; };
3404 struct s3 { int d; struct s1 e; };
3405 union u { struct s2 f; struct s3 g; } *p, *q;
3407 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3408 "p->g.e" (base "p->g") and might partially overlap the s1 at
3409 "q->g.e" (base "q->g"). */
3410 if (!flag_strict_aliasing
3411 || ref_contains_union_access_p (full_seq.object_a)
3412 || ref_contains_union_access_p (full_seq.object_b))
3414 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3415 return res;
3418 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3419 if (!loop_nest.exists ()
3420 || (object_address_invariant_in_loop_p (loop_nest[0],
3421 full_seq.object_a)
3422 && object_address_invariant_in_loop_p (loop_nest[0],
3423 full_seq.object_b)))
3425 DDR_OBJECT_A (res) = full_seq.object_a;
3426 DDR_OBJECT_B (res) = full_seq.object_b;
3430 DDR_AFFINE_P (res) = true;
3431 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3432 DDR_SUBSCRIPTS (res).create (full_seq.length);
3433 DDR_LOOP_NEST (res) = loop_nest;
3434 DDR_SELF_REFERENCE (res) = false;
3436 for (i = 0; i < full_seq.length; ++i)
3438 struct subscript *subscript;
3440 subscript = XNEW (struct subscript);
3441 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3442 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3443 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3444 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3445 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3446 SUB_DISTANCE (subscript) = chrec_dont_know;
3447 DDR_SUBSCRIPTS (res).safe_push (subscript);
3450 return res;
3453 /* Initialize a data dependence relation between data accesses A and
3454 B. NB_LOOPS is the number of loops surrounding the references: the
3455 size of the classic distance/direction vectors. */
3457 struct data_dependence_relation *
3458 initialize_data_dependence_relation (struct data_reference *a,
3459 struct data_reference *b,
3460 vec<loop_p> loop_nest)
3462 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3463 DDR_A (res) = a;
3464 DDR_B (res) = b;
3465 DDR_LOOP_NEST (res).create (0);
3466 DDR_SUBSCRIPTS (res).create (0);
3467 DDR_DIR_VECTS (res).create (0);
3468 DDR_DIST_VECTS (res).create (0);
3470 if (a == NULL || b == NULL)
3472 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3473 return res;
3476 /* If the data references do not alias, then they are independent. */
3477 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3479 DDR_ARE_DEPENDENT (res) = chrec_known;
3480 return res;
3483 return initialize_data_dependence_relation (res, loop_nest, false);
3487 /* Frees memory used by the conflict function F. */
3489 static void
3490 free_conflict_function (conflict_function *f)
3492 unsigned i;
3494 if (CF_NONTRIVIAL_P (f))
3496 for (i = 0; i < f->n; i++)
3497 affine_fn_free (f->fns[i]);
3499 free (f);
3502 /* Frees memory used by SUBSCRIPTS. */
3504 static void
3505 free_subscripts (vec<subscript_p> subscripts)
3507 for (subscript_p s : subscripts)
3509 free_conflict_function (s->conflicting_iterations_in_a);
3510 free_conflict_function (s->conflicting_iterations_in_b);
3511 free (s);
3513 subscripts.release ();
3516 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3517 description. */
3519 static inline void
3520 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3521 tree chrec)
3523 DDR_ARE_DEPENDENT (ddr) = chrec;
3524 free_subscripts (DDR_SUBSCRIPTS (ddr));
3525 DDR_SUBSCRIPTS (ddr).create (0);
3528 /* The dependence relation DDR cannot be represented by a distance
3529 vector. */
3531 static inline void
3532 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3537 DDR_AFFINE_P (ddr) = false;
3542 /* This section contains the classic Banerjee tests. */
3544 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3545 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3547 static inline bool
3548 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3550 return (evolution_function_is_constant_p (chrec_a)
3551 && evolution_function_is_constant_p (chrec_b));
3554 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3555 variable, i.e., if the SIV (Single Index Variable) test is true. */
3557 static bool
3558 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3560 if ((evolution_function_is_constant_p (chrec_a)
3561 && evolution_function_is_univariate_p (chrec_b))
3562 || (evolution_function_is_constant_p (chrec_b)
3563 && evolution_function_is_univariate_p (chrec_a)))
3564 return true;
3566 if (evolution_function_is_univariate_p (chrec_a)
3567 && evolution_function_is_univariate_p (chrec_b))
3569 switch (TREE_CODE (chrec_a))
3571 case POLYNOMIAL_CHREC:
3572 switch (TREE_CODE (chrec_b))
3574 case POLYNOMIAL_CHREC:
3575 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3576 return false;
3577 /* FALLTHRU */
3579 default:
3580 return true;
3583 default:
3584 return true;
3588 return false;
3591 /* Creates a conflict function with N dimensions. The affine functions
3592 in each dimension follow. */
3594 static conflict_function *
3595 conflict_fn (unsigned n, ...)
3597 unsigned i;
3598 conflict_function *ret = XCNEW (conflict_function);
3599 va_list ap;
3601 gcc_assert (n > 0 && n <= MAX_DIM);
3602 va_start (ap, n);
3604 ret->n = n;
3605 for (i = 0; i < n; i++)
3606 ret->fns[i] = va_arg (ap, affine_fn);
3607 va_end (ap);
3609 return ret;
3612 /* Returns constant affine function with value CST. */
3614 static affine_fn
3615 affine_fn_cst (tree cst)
3617 affine_fn fn;
3618 fn.create (1);
3619 fn.quick_push (cst);
3620 return fn;
3623 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3625 static affine_fn
3626 affine_fn_univar (tree cst, unsigned dim, tree coef)
3628 affine_fn fn;
3629 fn.create (dim + 1);
3630 unsigned i;
3632 gcc_assert (dim > 0);
3633 fn.quick_push (cst);
3634 for (i = 1; i < dim; i++)
3635 fn.quick_push (integer_zero_node);
3636 fn.quick_push (coef);
3637 return fn;
3640 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3641 *OVERLAPS_B are initialized to the functions that describe the
3642 relation between the elements accessed twice by CHREC_A and
3643 CHREC_B. For k >= 0, the following property is verified:
3645 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3647 static void
3648 analyze_ziv_subscript (tree chrec_a,
3649 tree chrec_b,
3650 conflict_function **overlaps_a,
3651 conflict_function **overlaps_b,
3652 tree *last_conflicts)
3654 tree type, difference;
3655 dependence_stats.num_ziv++;
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (dump_file, "(analyze_ziv_subscript \n");
3660 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3661 chrec_a = chrec_convert (type, chrec_a, NULL);
3662 chrec_b = chrec_convert (type, chrec_b, NULL);
3663 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3665 switch (TREE_CODE (difference))
3667 case INTEGER_CST:
3668 if (integer_zerop (difference))
3670 /* The difference is equal to zero: the accessed index
3671 overlaps for each iteration in the loop. */
3672 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3673 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3674 *last_conflicts = chrec_dont_know;
3675 dependence_stats.num_ziv_dependent++;
3677 else
3679 /* The accesses do not overlap. */
3680 *overlaps_a = conflict_fn_no_dependence ();
3681 *overlaps_b = conflict_fn_no_dependence ();
3682 *last_conflicts = integer_zero_node;
3683 dependence_stats.num_ziv_independent++;
3685 break;
3687 default:
3688 /* We're not sure whether the indexes overlap. For the moment,
3689 conservatively answer "don't know". */
3690 if (dump_file && (dump_flags & TDF_DETAILS))
3691 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3693 *overlaps_a = conflict_fn_not_known ();
3694 *overlaps_b = conflict_fn_not_known ();
3695 *last_conflicts = chrec_dont_know;
3696 dependence_stats.num_ziv_unimplemented++;
3697 break;
3700 if (dump_file && (dump_flags & TDF_DETAILS))
3701 fprintf (dump_file, ")\n");
3704 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3705 and only if it fits to the int type. If this is not the case, or the
3706 bound on the number of iterations of LOOP could not be derived, returns
3707 chrec_dont_know. */
3709 static tree
3710 max_stmt_executions_tree (class loop *loop)
3712 widest_int nit;
3714 if (!max_stmt_executions (loop, &nit))
3715 return chrec_dont_know;
3717 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3718 return chrec_dont_know;
3720 return wide_int_to_tree (unsigned_type_node, nit);
3723 /* Determine whether the CHREC is always positive/negative. If the expression
3724 cannot be statically analyzed, return false, otherwise set the answer into
3725 VALUE. */
3727 static bool
3728 chrec_is_positive (tree chrec, bool *value)
3730 bool value0, value1, value2;
3731 tree end_value, nb_iter;
3733 switch (TREE_CODE (chrec))
3735 case POLYNOMIAL_CHREC:
3736 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3737 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3738 return false;
3740 /* FIXME -- overflows. */
3741 if (value0 == value1)
3743 *value = value0;
3744 return true;
3747 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3748 and the proof consists in showing that the sign never
3749 changes during the execution of the loop, from 0 to
3750 loop->nb_iterations. */
3751 if (!evolution_function_is_affine_p (chrec))
3752 return false;
3754 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3755 if (chrec_contains_undetermined (nb_iter))
3756 return false;
3758 #if 0
3759 /* TODO -- If the test is after the exit, we may decrease the number of
3760 iterations by one. */
3761 if (after_exit)
3762 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3763 #endif
3765 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3767 if (!chrec_is_positive (end_value, &value2))
3768 return false;
3770 *value = value0;
3771 return value0 == value1;
3773 case INTEGER_CST:
3774 switch (tree_int_cst_sgn (chrec))
3776 case -1:
3777 *value = false;
3778 break;
3779 case 1:
3780 *value = true;
3781 break;
3782 default:
3783 return false;
3785 return true;
3787 default:
3788 return false;
3793 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3794 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3795 *OVERLAPS_B are initialized to the functions that describe the
3796 relation between the elements accessed twice by CHREC_A and
3797 CHREC_B. For k >= 0, the following property is verified:
3799 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3801 static void
3802 analyze_siv_subscript_cst_affine (tree chrec_a,
3803 tree chrec_b,
3804 conflict_function **overlaps_a,
3805 conflict_function **overlaps_b,
3806 tree *last_conflicts)
3808 bool value0, value1, value2;
3809 tree type, difference, tmp;
3811 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3812 chrec_a = chrec_convert (type, chrec_a, NULL);
3813 chrec_b = chrec_convert (type, chrec_b, NULL);
3814 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3816 /* Special case overlap in the first iteration. */
3817 if (integer_zerop (difference))
3819 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3820 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3821 *last_conflicts = integer_one_node;
3822 return;
3825 if (!chrec_is_positive (initial_condition (difference), &value0))
3827 if (dump_file && (dump_flags & TDF_DETAILS))
3828 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3830 dependence_stats.num_siv_unimplemented++;
3831 *overlaps_a = conflict_fn_not_known ();
3832 *overlaps_b = conflict_fn_not_known ();
3833 *last_conflicts = chrec_dont_know;
3834 return;
3836 else
3838 if (value0 == false)
3840 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3841 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3843 if (dump_file && (dump_flags & TDF_DETAILS))
3844 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3846 *overlaps_a = conflict_fn_not_known ();
3847 *overlaps_b = conflict_fn_not_known ();
3848 *last_conflicts = chrec_dont_know;
3849 dependence_stats.num_siv_unimplemented++;
3850 return;
3852 else
3854 if (value1 == true)
3856 /* Example:
3857 chrec_a = 12
3858 chrec_b = {10, +, 1}
3861 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3863 HOST_WIDE_INT numiter;
3864 class loop *loop = get_chrec_loop (chrec_b);
3866 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3867 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3868 fold_build1 (ABS_EXPR, type, difference),
3869 CHREC_RIGHT (chrec_b));
3870 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3871 *last_conflicts = integer_one_node;
3874 /* Perform weak-zero siv test to see if overlap is
3875 outside the loop bounds. */
3876 numiter = max_stmt_executions_int (loop);
3878 if (numiter >= 0
3879 && compare_tree_int (tmp, numiter) > 0)
3881 free_conflict_function (*overlaps_a);
3882 free_conflict_function (*overlaps_b);
3883 *overlaps_a = conflict_fn_no_dependence ();
3884 *overlaps_b = conflict_fn_no_dependence ();
3885 *last_conflicts = integer_zero_node;
3886 dependence_stats.num_siv_independent++;
3887 return;
3889 dependence_stats.num_siv_dependent++;
3890 return;
3893 /* When the step does not divide the difference, there are
3894 no overlaps. */
3895 else
3897 *overlaps_a = conflict_fn_no_dependence ();
3898 *overlaps_b = conflict_fn_no_dependence ();
3899 *last_conflicts = integer_zero_node;
3900 dependence_stats.num_siv_independent++;
3901 return;
3905 else
3907 /* Example:
3908 chrec_a = 12
3909 chrec_b = {10, +, -1}
3911 In this case, chrec_a will not overlap with chrec_b. */
3912 *overlaps_a = conflict_fn_no_dependence ();
3913 *overlaps_b = conflict_fn_no_dependence ();
3914 *last_conflicts = integer_zero_node;
3915 dependence_stats.num_siv_independent++;
3916 return;
3920 else
3922 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3923 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3925 if (dump_file && (dump_flags & TDF_DETAILS))
3926 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3928 *overlaps_a = conflict_fn_not_known ();
3929 *overlaps_b = conflict_fn_not_known ();
3930 *last_conflicts = chrec_dont_know;
3931 dependence_stats.num_siv_unimplemented++;
3932 return;
3934 else
3936 if (value2 == false)
3938 /* Example:
3939 chrec_a = 3
3940 chrec_b = {10, +, -1}
3942 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3944 HOST_WIDE_INT numiter;
3945 class loop *loop = get_chrec_loop (chrec_b);
3947 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3948 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3949 CHREC_RIGHT (chrec_b));
3950 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3951 *last_conflicts = integer_one_node;
3953 /* Perform weak-zero siv test to see if overlap is
3954 outside the loop bounds. */
3955 numiter = max_stmt_executions_int (loop);
3957 if (numiter >= 0
3958 && compare_tree_int (tmp, numiter) > 0)
3960 free_conflict_function (*overlaps_a);
3961 free_conflict_function (*overlaps_b);
3962 *overlaps_a = conflict_fn_no_dependence ();
3963 *overlaps_b = conflict_fn_no_dependence ();
3964 *last_conflicts = integer_zero_node;
3965 dependence_stats.num_siv_independent++;
3966 return;
3968 dependence_stats.num_siv_dependent++;
3969 return;
3972 /* When the step does not divide the difference, there
3973 are no overlaps. */
3974 else
3976 *overlaps_a = conflict_fn_no_dependence ();
3977 *overlaps_b = conflict_fn_no_dependence ();
3978 *last_conflicts = integer_zero_node;
3979 dependence_stats.num_siv_independent++;
3980 return;
3983 else
3985 /* Example:
3986 chrec_a = 3
3987 chrec_b = {4, +, 1}
3989 In this case, chrec_a will not overlap with chrec_b. */
3990 *overlaps_a = conflict_fn_no_dependence ();
3991 *overlaps_b = conflict_fn_no_dependence ();
3992 *last_conflicts = integer_zero_node;
3993 dependence_stats.num_siv_independent++;
3994 return;
4001 /* Helper recursive function for initializing the matrix A. Returns
4002 the initial value of CHREC. */
4004 static tree
4005 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4007 gcc_assert (chrec);
4009 switch (TREE_CODE (chrec))
4011 case POLYNOMIAL_CHREC:
4012 HOST_WIDE_INT chrec_right;
4013 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4014 return chrec_dont_know;
4015 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4016 /* We want to be able to negate without overflow. */
4017 if (chrec_right == HOST_WIDE_INT_MIN)
4018 return chrec_dont_know;
4019 A[index][0] = mult * chrec_right;
4020 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4022 case PLUS_EXPR:
4023 case MULT_EXPR:
4024 case MINUS_EXPR:
4026 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4027 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4029 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4032 CASE_CONVERT:
4034 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4035 return chrec_convert (chrec_type (chrec), op, NULL);
4038 case BIT_NOT_EXPR:
4040 /* Handle ~X as -1 - X. */
4041 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4042 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4043 build_int_cst (TREE_TYPE (chrec), -1), op);
4046 case INTEGER_CST:
4047 return chrec;
4049 default:
4050 gcc_unreachable ();
4051 return NULL_TREE;
4055 #define FLOOR_DIV(x,y) ((x) / (y))
4057 /* Solves the special case of the Diophantine equation:
4058 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4060 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4061 number of iterations that loops X and Y run. The overlaps will be
4062 constructed as evolutions in dimension DIM. */
4064 static void
4065 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4066 HOST_WIDE_INT step_a,
4067 HOST_WIDE_INT step_b,
4068 affine_fn *overlaps_a,
4069 affine_fn *overlaps_b,
4070 tree *last_conflicts, int dim)
4072 if (((step_a > 0 && step_b > 0)
4073 || (step_a < 0 && step_b < 0)))
4075 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4076 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4078 gcd_steps_a_b = gcd (step_a, step_b);
4079 step_overlaps_a = step_b / gcd_steps_a_b;
4080 step_overlaps_b = step_a / gcd_steps_a_b;
4082 if (niter > 0)
4084 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4085 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4086 last_conflict = tau2;
4087 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4089 else
4090 *last_conflicts = chrec_dont_know;
4092 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4093 build_int_cst (NULL_TREE,
4094 step_overlaps_a));
4095 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4096 build_int_cst (NULL_TREE,
4097 step_overlaps_b));
4100 else
4102 *overlaps_a = affine_fn_cst (integer_zero_node);
4103 *overlaps_b = affine_fn_cst (integer_zero_node);
4104 *last_conflicts = integer_zero_node;
4108 /* Solves the special case of a Diophantine equation where CHREC_A is
4109 an affine bivariate function, and CHREC_B is an affine univariate
4110 function. For example,
4112 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4114 has the following overlapping functions:
4116 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4117 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4118 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4120 FORNOW: This is a specialized implementation for a case occurring in
4121 a common benchmark. Implement the general algorithm. */
4123 static void
4124 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4125 conflict_function **overlaps_a,
4126 conflict_function **overlaps_b,
4127 tree *last_conflicts)
4129 bool xz_p, yz_p, xyz_p;
4130 HOST_WIDE_INT step_x, step_y, step_z;
4131 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4132 affine_fn overlaps_a_xz, overlaps_b_xz;
4133 affine_fn overlaps_a_yz, overlaps_b_yz;
4134 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4135 affine_fn ova1, ova2, ovb;
4136 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4138 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4139 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4140 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4142 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4143 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4144 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4146 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4148 if (dump_file && (dump_flags & TDF_DETAILS))
4149 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4151 *overlaps_a = conflict_fn_not_known ();
4152 *overlaps_b = conflict_fn_not_known ();
4153 *last_conflicts = chrec_dont_know;
4154 return;
4157 niter = MIN (niter_x, niter_z);
4158 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4159 &overlaps_a_xz,
4160 &overlaps_b_xz,
4161 &last_conflicts_xz, 1);
4162 niter = MIN (niter_y, niter_z);
4163 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4164 &overlaps_a_yz,
4165 &overlaps_b_yz,
4166 &last_conflicts_yz, 2);
4167 niter = MIN (niter_x, niter_z);
4168 niter = MIN (niter_y, niter);
4169 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4170 &overlaps_a_xyz,
4171 &overlaps_b_xyz,
4172 &last_conflicts_xyz, 3);
4174 xz_p = !integer_zerop (last_conflicts_xz);
4175 yz_p = !integer_zerop (last_conflicts_yz);
4176 xyz_p = !integer_zerop (last_conflicts_xyz);
4178 if (xz_p || yz_p || xyz_p)
4180 ova1 = affine_fn_cst (integer_zero_node);
4181 ova2 = affine_fn_cst (integer_zero_node);
4182 ovb = affine_fn_cst (integer_zero_node);
4183 if (xz_p)
4185 affine_fn t0 = ova1;
4186 affine_fn t2 = ovb;
4188 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4189 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4190 affine_fn_free (t0);
4191 affine_fn_free (t2);
4192 *last_conflicts = last_conflicts_xz;
4194 if (yz_p)
4196 affine_fn t0 = ova2;
4197 affine_fn t2 = ovb;
4199 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4200 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4201 affine_fn_free (t0);
4202 affine_fn_free (t2);
4203 *last_conflicts = last_conflicts_yz;
4205 if (xyz_p)
4207 affine_fn t0 = ova1;
4208 affine_fn t2 = ova2;
4209 affine_fn t4 = ovb;
4211 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4212 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4213 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4214 affine_fn_free (t0);
4215 affine_fn_free (t2);
4216 affine_fn_free (t4);
4217 *last_conflicts = last_conflicts_xyz;
4219 *overlaps_a = conflict_fn (2, ova1, ova2);
4220 *overlaps_b = conflict_fn (1, ovb);
4222 else
4224 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4225 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4226 *last_conflicts = integer_zero_node;
4229 affine_fn_free (overlaps_a_xz);
4230 affine_fn_free (overlaps_b_xz);
4231 affine_fn_free (overlaps_a_yz);
4232 affine_fn_free (overlaps_b_yz);
4233 affine_fn_free (overlaps_a_xyz);
4234 affine_fn_free (overlaps_b_xyz);
4237 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4239 static void
4240 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4241 int size)
4243 memcpy (vec2, vec1, size * sizeof (*vec1));
4246 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4248 static void
4249 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4250 int m, int n)
4252 int i;
4254 for (i = 0; i < m; i++)
4255 lambda_vector_copy (mat1[i], mat2[i], n);
4258 /* Store the N x N identity matrix in MAT. */
4260 static void
4261 lambda_matrix_id (lambda_matrix mat, int size)
4263 int i, j;
4265 for (i = 0; i < size; i++)
4266 for (j = 0; j < size; j++)
4267 mat[i][j] = (i == j) ? 1 : 0;
4270 /* Return the index of the first nonzero element of vector VEC1 between
4271 START and N. We must have START <= N.
4272 Returns N if VEC1 is the zero vector. */
4274 static int
4275 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4277 int j = start;
4278 while (j < n && vec1[j] == 0)
4279 j++;
4280 return j;
4283 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4284 R2 = R2 + CONST1 * R1. */
4286 static bool
4287 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4288 lambda_int const1)
4290 int i;
4292 if (const1 == 0)
4293 return true;
4295 for (i = 0; i < n; i++)
4297 bool ovf;
4298 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4299 if (ovf)
4300 return false;
4301 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4302 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4303 return false;
4304 mat[r2][i] = tem2;
4307 return true;
4310 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4311 and store the result in VEC2. */
4313 static void
4314 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4315 int size, lambda_int const1)
4317 int i;
4319 if (const1 == 0)
4320 lambda_vector_clear (vec2, size);
4321 else
4322 for (i = 0; i < size; i++)
4323 vec2[i] = const1 * vec1[i];
4326 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4328 static void
4329 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4330 int size)
4332 lambda_vector_mult_const (vec1, vec2, size, -1);
4335 /* Negate row R1 of matrix MAT which has N columns. */
4337 static void
4338 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4340 lambda_vector_negate (mat[r1], mat[r1], n);
4343 /* Return true if two vectors are equal. */
4345 static bool
4346 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4348 int i;
4349 for (i = 0; i < size; i++)
4350 if (vec1[i] != vec2[i])
4351 return false;
4352 return true;
4355 /* Given an M x N integer matrix A, this function determines an M x
4356 M unimodular matrix U, and an M x N echelon matrix S such that
4357 "U.A = S". This decomposition is also known as "right Hermite".
4359 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4360 Restructuring Compilers" Utpal Banerjee. */
4362 static bool
4363 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4364 lambda_matrix S, lambda_matrix U)
4366 int i, j, i0 = 0;
4368 lambda_matrix_copy (A, S, m, n);
4369 lambda_matrix_id (U, m);
4371 for (j = 0; j < n; j++)
4373 if (lambda_vector_first_nz (S[j], m, i0) < m)
4375 ++i0;
4376 for (i = m - 1; i >= i0; i--)
4378 while (S[i][j] != 0)
4380 lambda_int factor, a, b;
4382 a = S[i-1][j];
4383 b = S[i][j];
4384 gcc_assert (a != HOST_WIDE_INT_MIN);
4385 factor = a / b;
4387 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4388 return false;
4389 std::swap (S[i], S[i-1]);
4391 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4392 return false;
4393 std::swap (U[i], U[i-1]);
4399 return true;
4402 /* Determines the overlapping elements due to accesses CHREC_A and
4403 CHREC_B, that are affine functions. This function cannot handle
4404 symbolic evolution functions, ie. when initial conditions are
4405 parameters, because it uses lambda matrices of integers. */
4407 static void
4408 analyze_subscript_affine_affine (tree chrec_a,
4409 tree chrec_b,
4410 conflict_function **overlaps_a,
4411 conflict_function **overlaps_b,
4412 tree *last_conflicts)
4414 unsigned nb_vars_a, nb_vars_b, dim;
4415 lambda_int gamma, gcd_alpha_beta;
4416 lambda_matrix A, U, S;
4417 struct obstack scratch_obstack;
4419 if (eq_evolutions_p (chrec_a, chrec_b))
4421 /* The accessed index overlaps for each iteration in the
4422 loop. */
4423 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4424 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4425 *last_conflicts = chrec_dont_know;
4426 return;
4428 if (dump_file && (dump_flags & TDF_DETAILS))
4429 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4431 /* For determining the initial intersection, we have to solve a
4432 Diophantine equation. This is the most time consuming part.
4434 For answering to the question: "Is there a dependence?" we have
4435 to prove that there exists a solution to the Diophantine
4436 equation, and that the solution is in the iteration domain,
4437 i.e. the solution is positive or zero, and that the solution
4438 happens before the upper bound loop.nb_iterations. Otherwise
4439 there is no dependence. This function outputs a description of
4440 the iterations that hold the intersections. */
4442 nb_vars_a = nb_vars_in_chrec (chrec_a);
4443 nb_vars_b = nb_vars_in_chrec (chrec_b);
4445 gcc_obstack_init (&scratch_obstack);
4447 dim = nb_vars_a + nb_vars_b;
4448 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4449 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4450 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4452 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4453 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4454 if (init_a == chrec_dont_know
4455 || init_b == chrec_dont_know)
4457 if (dump_file && (dump_flags & TDF_DETAILS))
4458 fprintf (dump_file, "affine-affine test failed: "
4459 "representation issue.\n");
4460 *overlaps_a = conflict_fn_not_known ();
4461 *overlaps_b = conflict_fn_not_known ();
4462 *last_conflicts = chrec_dont_know;
4463 goto end_analyze_subs_aa;
4465 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4467 /* Don't do all the hard work of solving the Diophantine equation
4468 when we already know the solution: for example,
4469 | {3, +, 1}_1
4470 | {3, +, 4}_2
4471 | gamma = 3 - 3 = 0.
4472 Then the first overlap occurs during the first iterations:
4473 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4475 if (gamma == 0)
4477 if (nb_vars_a == 1 && nb_vars_b == 1)
4479 HOST_WIDE_INT step_a, step_b;
4480 HOST_WIDE_INT niter, niter_a, niter_b;
4481 affine_fn ova, ovb;
4483 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4484 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4485 niter = MIN (niter_a, niter_b);
4486 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4487 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4489 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4490 &ova, &ovb,
4491 last_conflicts, 1);
4492 *overlaps_a = conflict_fn (1, ova);
4493 *overlaps_b = conflict_fn (1, ovb);
4496 else if (nb_vars_a == 2 && nb_vars_b == 1)
4497 compute_overlap_steps_for_affine_1_2
4498 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4500 else if (nb_vars_a == 1 && nb_vars_b == 2)
4501 compute_overlap_steps_for_affine_1_2
4502 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4504 else
4506 if (dump_file && (dump_flags & TDF_DETAILS))
4507 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4508 *overlaps_a = conflict_fn_not_known ();
4509 *overlaps_b = conflict_fn_not_known ();
4510 *last_conflicts = chrec_dont_know;
4512 goto end_analyze_subs_aa;
4515 /* U.A = S */
4516 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4518 *overlaps_a = conflict_fn_not_known ();
4519 *overlaps_b = conflict_fn_not_known ();
4520 *last_conflicts = chrec_dont_know;
4521 goto end_analyze_subs_aa;
4524 if (S[0][0] < 0)
4526 S[0][0] *= -1;
4527 lambda_matrix_row_negate (U, dim, 0);
4529 gcd_alpha_beta = S[0][0];
4531 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4532 but that is a quite strange case. Instead of ICEing, answer
4533 don't know. */
4534 if (gcd_alpha_beta == 0)
4536 *overlaps_a = conflict_fn_not_known ();
4537 *overlaps_b = conflict_fn_not_known ();
4538 *last_conflicts = chrec_dont_know;
4539 goto end_analyze_subs_aa;
4542 /* The classic "gcd-test". */
4543 if (!int_divides_p (gcd_alpha_beta, gamma))
4545 /* The "gcd-test" has determined that there is no integer
4546 solution, i.e. there is no dependence. */
4547 *overlaps_a = conflict_fn_no_dependence ();
4548 *overlaps_b = conflict_fn_no_dependence ();
4549 *last_conflicts = integer_zero_node;
4552 /* Both access functions are univariate. This includes SIV and MIV cases. */
4553 else if (nb_vars_a == 1 && nb_vars_b == 1)
4555 /* Both functions should have the same evolution sign. */
4556 if (((A[0][0] > 0 && -A[1][0] > 0)
4557 || (A[0][0] < 0 && -A[1][0] < 0)))
4559 /* The solutions are given by:
4561 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4562 | [u21 u22] [y0]
4564 For a given integer t. Using the following variables,
4566 | i0 = u11 * gamma / gcd_alpha_beta
4567 | j0 = u12 * gamma / gcd_alpha_beta
4568 | i1 = u21
4569 | j1 = u22
4571 the solutions are:
4573 | x0 = i0 + i1 * t,
4574 | y0 = j0 + j1 * t. */
4575 HOST_WIDE_INT i0, j0, i1, j1;
4577 i0 = U[0][0] * gamma / gcd_alpha_beta;
4578 j0 = U[0][1] * gamma / gcd_alpha_beta;
4579 i1 = U[1][0];
4580 j1 = U[1][1];
4582 if ((i1 == 0 && i0 < 0)
4583 || (j1 == 0 && j0 < 0))
4585 /* There is no solution.
4586 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4587 falls in here, but for the moment we don't look at the
4588 upper bound of the iteration domain. */
4589 *overlaps_a = conflict_fn_no_dependence ();
4590 *overlaps_b = conflict_fn_no_dependence ();
4591 *last_conflicts = integer_zero_node;
4592 goto end_analyze_subs_aa;
4595 if (i1 > 0 && j1 > 0)
4597 HOST_WIDE_INT niter_a
4598 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4599 HOST_WIDE_INT niter_b
4600 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4601 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4603 /* (X0, Y0) is a solution of the Diophantine equation:
4604 "chrec_a (X0) = chrec_b (Y0)". */
4605 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4606 CEIL (-j0, j1));
4607 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4608 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4610 /* (X1, Y1) is the smallest positive solution of the eq
4611 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4612 first conflict occurs. */
4613 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4614 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4615 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4617 if (niter > 0)
4619 /* If the overlap occurs outside of the bounds of the
4620 loop, there is no dependence. */
4621 if (x1 >= niter_a || y1 >= niter_b)
4623 *overlaps_a = conflict_fn_no_dependence ();
4624 *overlaps_b = conflict_fn_no_dependence ();
4625 *last_conflicts = integer_zero_node;
4626 goto end_analyze_subs_aa;
4629 /* max stmt executions can get quite large, avoid
4630 overflows by using wide ints here. */
4631 widest_int tau2
4632 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4633 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4634 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4635 if (wi::min_precision (last_conflict, SIGNED)
4636 <= TYPE_PRECISION (integer_type_node))
4637 *last_conflicts
4638 = build_int_cst (integer_type_node,
4639 last_conflict.to_shwi ());
4640 else
4641 *last_conflicts = chrec_dont_know;
4643 else
4644 *last_conflicts = chrec_dont_know;
4646 *overlaps_a
4647 = conflict_fn (1,
4648 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4650 build_int_cst (NULL_TREE, i1)));
4651 *overlaps_b
4652 = conflict_fn (1,
4653 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4655 build_int_cst (NULL_TREE, j1)));
4657 else
4659 /* FIXME: For the moment, the upper bound of the
4660 iteration domain for i and j is not checked. */
4661 if (dump_file && (dump_flags & TDF_DETAILS))
4662 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4663 *overlaps_a = conflict_fn_not_known ();
4664 *overlaps_b = conflict_fn_not_known ();
4665 *last_conflicts = chrec_dont_know;
4668 else
4670 if (dump_file && (dump_flags & TDF_DETAILS))
4671 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4672 *overlaps_a = conflict_fn_not_known ();
4673 *overlaps_b = conflict_fn_not_known ();
4674 *last_conflicts = chrec_dont_know;
4677 else
4679 if (dump_file && (dump_flags & TDF_DETAILS))
4680 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4681 *overlaps_a = conflict_fn_not_known ();
4682 *overlaps_b = conflict_fn_not_known ();
4683 *last_conflicts = chrec_dont_know;
4686 end_analyze_subs_aa:
4687 obstack_free (&scratch_obstack, NULL);
4688 if (dump_file && (dump_flags & TDF_DETAILS))
4690 fprintf (dump_file, " (overlaps_a = ");
4691 dump_conflict_function (dump_file, *overlaps_a);
4692 fprintf (dump_file, ")\n (overlaps_b = ");
4693 dump_conflict_function (dump_file, *overlaps_b);
4694 fprintf (dump_file, "))\n");
4698 /* Returns true when analyze_subscript_affine_affine can be used for
4699 determining the dependence relation between chrec_a and chrec_b,
4700 that contain symbols. This function modifies chrec_a and chrec_b
4701 such that the analysis result is the same, and such that they don't
4702 contain symbols, and then can safely be passed to the analyzer.
4704 Example: The analysis of the following tuples of evolutions produce
4705 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4706 vs. {0, +, 1}_1
4708 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4709 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4712 static bool
4713 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4715 tree diff, type, left_a, left_b, right_b;
4717 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4718 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4719 /* FIXME: For the moment not handled. Might be refined later. */
4720 return false;
4722 type = chrec_type (*chrec_a);
4723 left_a = CHREC_LEFT (*chrec_a);
4724 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4725 diff = chrec_fold_minus (type, left_a, left_b);
4727 if (!evolution_function_is_constant_p (diff))
4728 return false;
4730 if (dump_file && (dump_flags & TDF_DETAILS))
4731 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4733 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4734 diff, CHREC_RIGHT (*chrec_a));
4735 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4736 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4737 build_int_cst (type, 0),
4738 right_b);
4739 return true;
4742 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4743 *OVERLAPS_B are initialized to the functions that describe the
4744 relation between the elements accessed twice by CHREC_A and
4745 CHREC_B. For k >= 0, the following property is verified:
4747 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4749 static void
4750 analyze_siv_subscript (tree chrec_a,
4751 tree chrec_b,
4752 conflict_function **overlaps_a,
4753 conflict_function **overlaps_b,
4754 tree *last_conflicts,
4755 int loop_nest_num)
4757 dependence_stats.num_siv++;
4759 if (dump_file && (dump_flags & TDF_DETAILS))
4760 fprintf (dump_file, "(analyze_siv_subscript \n");
4762 if (evolution_function_is_constant_p (chrec_a)
4763 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4764 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4765 overlaps_a, overlaps_b, last_conflicts);
4767 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4768 && evolution_function_is_constant_p (chrec_b))
4769 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4770 overlaps_b, overlaps_a, last_conflicts);
4772 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4773 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4775 if (!chrec_contains_symbols (chrec_a)
4776 && !chrec_contains_symbols (chrec_b))
4778 analyze_subscript_affine_affine (chrec_a, chrec_b,
4779 overlaps_a, overlaps_b,
4780 last_conflicts);
4782 if (CF_NOT_KNOWN_P (*overlaps_a)
4783 || CF_NOT_KNOWN_P (*overlaps_b))
4784 dependence_stats.num_siv_unimplemented++;
4785 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4786 || CF_NO_DEPENDENCE_P (*overlaps_b))
4787 dependence_stats.num_siv_independent++;
4788 else
4789 dependence_stats.num_siv_dependent++;
4791 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4792 &chrec_b))
4794 analyze_subscript_affine_affine (chrec_a, chrec_b,
4795 overlaps_a, overlaps_b,
4796 last_conflicts);
4798 if (CF_NOT_KNOWN_P (*overlaps_a)
4799 || CF_NOT_KNOWN_P (*overlaps_b))
4800 dependence_stats.num_siv_unimplemented++;
4801 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4802 || CF_NO_DEPENDENCE_P (*overlaps_b))
4803 dependence_stats.num_siv_independent++;
4804 else
4805 dependence_stats.num_siv_dependent++;
4807 else
4808 goto siv_subscript_dontknow;
4811 else
4813 siv_subscript_dontknow:;
4814 if (dump_file && (dump_flags & TDF_DETAILS))
4815 fprintf (dump_file, " siv test failed: unimplemented");
4816 *overlaps_a = conflict_fn_not_known ();
4817 *overlaps_b = conflict_fn_not_known ();
4818 *last_conflicts = chrec_dont_know;
4819 dependence_stats.num_siv_unimplemented++;
4822 if (dump_file && (dump_flags & TDF_DETAILS))
4823 fprintf (dump_file, ")\n");
4826 /* Returns false if we can prove that the greatest common divisor of the steps
4827 of CHREC does not divide CST, false otherwise. */
4829 static bool
4830 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4832 HOST_WIDE_INT cd = 0, val;
4833 tree step;
4835 if (!tree_fits_shwi_p (cst))
4836 return true;
4837 val = tree_to_shwi (cst);
4839 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4841 step = CHREC_RIGHT (chrec);
4842 if (!tree_fits_shwi_p (step))
4843 return true;
4844 cd = gcd (cd, tree_to_shwi (step));
4845 chrec = CHREC_LEFT (chrec);
4848 return val % cd == 0;
4851 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4852 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4853 functions that describe the relation between the elements accessed
4854 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4855 is verified:
4857 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4859 static void
4860 analyze_miv_subscript (tree chrec_a,
4861 tree chrec_b,
4862 conflict_function **overlaps_a,
4863 conflict_function **overlaps_b,
4864 tree *last_conflicts,
4865 class loop *loop_nest)
4867 tree type, difference;
4869 dependence_stats.num_miv++;
4870 if (dump_file && (dump_flags & TDF_DETAILS))
4871 fprintf (dump_file, "(analyze_miv_subscript \n");
4873 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4874 chrec_a = chrec_convert (type, chrec_a, NULL);
4875 chrec_b = chrec_convert (type, chrec_b, NULL);
4876 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4878 if (eq_evolutions_p (chrec_a, chrec_b))
4880 /* Access functions are the same: all the elements are accessed
4881 in the same order. */
4882 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4883 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4884 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4885 dependence_stats.num_miv_dependent++;
4888 else if (evolution_function_is_constant_p (difference)
4889 && evolution_function_is_affine_multivariate_p (chrec_a,
4890 loop_nest->num)
4891 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4893 /* testsuite/.../ssa-chrec-33.c
4894 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4896 The difference is 1, and all the evolution steps are multiples
4897 of 2, consequently there are no overlapping elements. */
4898 *overlaps_a = conflict_fn_no_dependence ();
4899 *overlaps_b = conflict_fn_no_dependence ();
4900 *last_conflicts = integer_zero_node;
4901 dependence_stats.num_miv_independent++;
4904 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4905 && !chrec_contains_symbols (chrec_a, loop_nest)
4906 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4907 && !chrec_contains_symbols (chrec_b, loop_nest))
4909 /* testsuite/.../ssa-chrec-35.c
4910 {0, +, 1}_2 vs. {0, +, 1}_3
4911 the overlapping elements are respectively located at iterations:
4912 {0, +, 1}_x and {0, +, 1}_x,
4913 in other words, we have the equality:
4914 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4916 Other examples:
4917 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4918 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4920 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4921 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4923 analyze_subscript_affine_affine (chrec_a, chrec_b,
4924 overlaps_a, overlaps_b, last_conflicts);
4926 if (CF_NOT_KNOWN_P (*overlaps_a)
4927 || CF_NOT_KNOWN_P (*overlaps_b))
4928 dependence_stats.num_miv_unimplemented++;
4929 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4930 || CF_NO_DEPENDENCE_P (*overlaps_b))
4931 dependence_stats.num_miv_independent++;
4932 else
4933 dependence_stats.num_miv_dependent++;
4936 else
4938 /* When the analysis is too difficult, answer "don't know". */
4939 if (dump_file && (dump_flags & TDF_DETAILS))
4940 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4942 *overlaps_a = conflict_fn_not_known ();
4943 *overlaps_b = conflict_fn_not_known ();
4944 *last_conflicts = chrec_dont_know;
4945 dependence_stats.num_miv_unimplemented++;
4948 if (dump_file && (dump_flags & TDF_DETAILS))
4949 fprintf (dump_file, ")\n");
4952 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4953 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4954 OVERLAP_ITERATIONS_B are initialized with two functions that
4955 describe the iterations that contain conflicting elements.
4957 Remark: For an integer k >= 0, the following equality is true:
4959 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4962 static void
4963 analyze_overlapping_iterations (tree chrec_a,
4964 tree chrec_b,
4965 conflict_function **overlap_iterations_a,
4966 conflict_function **overlap_iterations_b,
4967 tree *last_conflicts, class loop *loop_nest)
4969 unsigned int lnn = loop_nest->num;
4971 dependence_stats.num_subscript_tests++;
4973 if (dump_file && (dump_flags & TDF_DETAILS))
4975 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4976 fprintf (dump_file, " (chrec_a = ");
4977 print_generic_expr (dump_file, chrec_a);
4978 fprintf (dump_file, ")\n (chrec_b = ");
4979 print_generic_expr (dump_file, chrec_b);
4980 fprintf (dump_file, ")\n");
4983 if (chrec_a == NULL_TREE
4984 || chrec_b == NULL_TREE
4985 || chrec_contains_undetermined (chrec_a)
4986 || chrec_contains_undetermined (chrec_b))
4988 dependence_stats.num_subscript_undetermined++;
4990 *overlap_iterations_a = conflict_fn_not_known ();
4991 *overlap_iterations_b = conflict_fn_not_known ();
4994 /* If they are the same chrec, and are affine, they overlap
4995 on every iteration. */
4996 else if (eq_evolutions_p (chrec_a, chrec_b)
4997 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4998 || operand_equal_p (chrec_a, chrec_b, 0)))
5000 dependence_stats.num_same_subscript_function++;
5001 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5002 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5003 *last_conflicts = chrec_dont_know;
5006 /* If they aren't the same, and aren't affine, we can't do anything
5007 yet. */
5008 else if ((chrec_contains_symbols (chrec_a)
5009 || chrec_contains_symbols (chrec_b))
5010 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5011 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5013 dependence_stats.num_subscript_undetermined++;
5014 *overlap_iterations_a = conflict_fn_not_known ();
5015 *overlap_iterations_b = conflict_fn_not_known ();
5018 else if (ziv_subscript_p (chrec_a, chrec_b))
5019 analyze_ziv_subscript (chrec_a, chrec_b,
5020 overlap_iterations_a, overlap_iterations_b,
5021 last_conflicts);
5023 else if (siv_subscript_p (chrec_a, chrec_b))
5024 analyze_siv_subscript (chrec_a, chrec_b,
5025 overlap_iterations_a, overlap_iterations_b,
5026 last_conflicts, lnn);
5028 else
5029 analyze_miv_subscript (chrec_a, chrec_b,
5030 overlap_iterations_a, overlap_iterations_b,
5031 last_conflicts, loop_nest);
5033 if (dump_file && (dump_flags & TDF_DETAILS))
5035 fprintf (dump_file, " (overlap_iterations_a = ");
5036 dump_conflict_function (dump_file, *overlap_iterations_a);
5037 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5038 dump_conflict_function (dump_file, *overlap_iterations_b);
5039 fprintf (dump_file, "))\n");
5043 /* Helper function for uniquely inserting distance vectors. */
5045 static void
5046 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5048 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5049 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5050 return;
5052 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5055 /* Helper function for uniquely inserting direction vectors. */
5057 static void
5058 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5060 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5061 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5062 return;
5064 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5067 /* Add a distance of 1 on all the loops outer than INDEX. If we
5068 haven't yet determined a distance for this outer loop, push a new
5069 distance vector composed of the previous distance, and a distance
5070 of 1 for this outer loop. Example:
5072 | loop_1
5073 | loop_2
5074 | A[10]
5075 | endloop_2
5076 | endloop_1
5078 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5079 save (0, 1), then we have to save (1, 0). */
5081 static void
5082 add_outer_distances (struct data_dependence_relation *ddr,
5083 lambda_vector dist_v, int index)
5085 /* For each outer loop where init_v is not set, the accesses are
5086 in dependence of distance 1 in the loop. */
5087 while (--index >= 0)
5089 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5090 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5091 save_v[index] = 1;
5092 save_dist_v (ddr, save_v);
5096 /* Return false when fail to represent the data dependence as a
5097 distance vector. A_INDEX is the index of the first reference
5098 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5099 second reference. INIT_B is set to true when a component has been
5100 added to the distance vector DIST_V. INDEX_CARRY is then set to
5101 the index in DIST_V that carries the dependence. */
5103 static bool
5104 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5105 unsigned int a_index, unsigned int b_index,
5106 lambda_vector dist_v, bool *init_b,
5107 int *index_carry)
5109 unsigned i;
5110 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5111 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5113 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5115 tree access_fn_a, access_fn_b;
5116 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5118 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5120 non_affine_dependence_relation (ddr);
5121 return false;
5124 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5125 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5127 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5128 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5130 HOST_WIDE_INT dist;
5131 int index;
5132 int var_a = CHREC_VARIABLE (access_fn_a);
5133 int var_b = CHREC_VARIABLE (access_fn_b);
5135 if (var_a != var_b
5136 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5138 non_affine_dependence_relation (ddr);
5139 return false;
5142 /* When data references are collected in a loop while data
5143 dependences are analyzed in loop nest nested in the loop, we
5144 would have more number of access functions than number of
5145 loops. Skip access functions of loops not in the loop nest.
5147 See PR89725 for more information. */
5148 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5149 continue;
5151 dist = int_cst_value (SUB_DISTANCE (subscript));
5152 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5153 *index_carry = MIN (index, *index_carry);
5155 /* This is the subscript coupling test. If we have already
5156 recorded a distance for this loop (a distance coming from
5157 another subscript), it should be the same. For example,
5158 in the following code, there is no dependence:
5160 | loop i = 0, N, 1
5161 | T[i+1][i] = ...
5162 | ... = T[i][i]
5163 | endloop
5165 if (init_v[index] != 0 && dist_v[index] != dist)
5167 finalize_ddr_dependent (ddr, chrec_known);
5168 return false;
5171 dist_v[index] = dist;
5172 init_v[index] = 1;
5173 *init_b = true;
5175 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5177 /* This can be for example an affine vs. constant dependence
5178 (T[i] vs. T[3]) that is not an affine dependence and is
5179 not representable as a distance vector. */
5180 non_affine_dependence_relation (ddr);
5181 return false;
5183 else
5184 *init_b = true;
5187 return true;
5190 /* Return true when the DDR contains only invariant access functions wrto. loop
5191 number LNUM. */
5193 static bool
5194 invariant_access_functions (const struct data_dependence_relation *ddr,
5195 int lnum)
5197 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5198 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5199 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5200 return false;
5202 return true;
5205 /* Helper function for the case where DDR_A and DDR_B are the same
5206 multivariate access function with a constant step. For an example
5207 see pr34635-1.c. */
5209 static void
5210 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5212 int x_1, x_2;
5213 tree c_1 = CHREC_LEFT (c_2);
5214 tree c_0 = CHREC_LEFT (c_1);
5215 lambda_vector dist_v;
5216 HOST_WIDE_INT v1, v2, cd;
5218 /* Polynomials with more than 2 variables are not handled yet. When
5219 the evolution steps are parameters, it is not possible to
5220 represent the dependence using classical distance vectors. */
5221 if (TREE_CODE (c_0) != INTEGER_CST
5222 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5223 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5225 DDR_AFFINE_P (ddr) = false;
5226 return;
5229 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5230 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5233 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5234 v1 = int_cst_value (CHREC_RIGHT (c_1));
5235 v2 = int_cst_value (CHREC_RIGHT (c_2));
5236 cd = gcd (v1, v2);
5237 v1 /= cd;
5238 v2 /= cd;
5240 if (v2 < 0)
5242 v2 = -v2;
5243 v1 = -v1;
5246 dist_v[x_1] = v2;
5247 dist_v[x_2] = -v1;
5248 save_dist_v (ddr, dist_v);
5250 add_outer_distances (ddr, dist_v, x_1);
5253 /* Helper function for the case where DDR_A and DDR_B are the same
5254 access functions. */
5256 static void
5257 add_other_self_distances (struct data_dependence_relation *ddr)
5259 lambda_vector dist_v;
5260 unsigned i;
5261 int index_carry = DDR_NB_LOOPS (ddr);
5262 subscript *sub;
5263 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5265 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5267 tree access_fun = SUB_ACCESS_FN (sub, 0);
5269 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5271 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5273 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5275 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5276 return;
5279 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5281 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5282 add_multivariate_self_dist (ddr, access_fun);
5283 else
5284 /* The evolution step is not constant: it varies in
5285 the outer loop, so this cannot be represented by a
5286 distance vector. For example in pr34635.c the
5287 evolution is {0, +, {0, +, 4}_1}_2. */
5288 DDR_AFFINE_P (ddr) = false;
5290 return;
5293 /* When data references are collected in a loop while data
5294 dependences are analyzed in loop nest nested in the loop, we
5295 would have more number of access functions than number of
5296 loops. Skip access functions of loops not in the loop nest.
5298 See PR89725 for more information. */
5299 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5300 loop))
5301 continue;
5303 index_carry = MIN (index_carry,
5304 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5305 DDR_LOOP_NEST (ddr)));
5309 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5310 add_outer_distances (ddr, dist_v, index_carry);
5313 static void
5314 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5316 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5318 dist_v[0] = 1;
5319 save_dist_v (ddr, dist_v);
5322 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5323 is the case for example when access functions are the same and
5324 equal to a constant, as in:
5326 | loop_1
5327 | A[3] = ...
5328 | ... = A[3]
5329 | endloop_1
5331 in which case the distance vectors are (0) and (1). */
5333 static void
5334 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5336 unsigned i, j;
5338 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5340 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5341 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5342 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5344 for (j = 0; j < ca->n; j++)
5345 if (affine_function_zero_p (ca->fns[j]))
5347 insert_innermost_unit_dist_vector (ddr);
5348 return;
5351 for (j = 0; j < cb->n; j++)
5352 if (affine_function_zero_p (cb->fns[j]))
5354 insert_innermost_unit_dist_vector (ddr);
5355 return;
5360 /* Return true when the DDR contains two data references that have the
5361 same access functions. */
5363 static inline bool
5364 same_access_functions (const struct data_dependence_relation *ddr)
5366 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5367 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5368 SUB_ACCESS_FN (sub, 1)))
5369 return false;
5371 return true;
5374 /* Compute the classic per loop distance vector. DDR is the data
5375 dependence relation to build a vector from. Return false when fail
5376 to represent the data dependence as a distance vector. */
5378 static bool
5379 build_classic_dist_vector (struct data_dependence_relation *ddr,
5380 class loop *loop_nest)
5382 bool init_b = false;
5383 int index_carry = DDR_NB_LOOPS (ddr);
5384 lambda_vector dist_v;
5386 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5387 return false;
5389 if (same_access_functions (ddr))
5391 /* Save the 0 vector. */
5392 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5393 save_dist_v (ddr, dist_v);
5395 if (invariant_access_functions (ddr, loop_nest->num))
5396 add_distance_for_zero_overlaps (ddr);
5398 if (DDR_NB_LOOPS (ddr) > 1)
5399 add_other_self_distances (ddr);
5401 return true;
5404 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5405 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5406 return false;
5408 /* Save the distance vector if we initialized one. */
5409 if (init_b)
5411 /* Verify a basic constraint: classic distance vectors should
5412 always be lexicographically positive.
5414 Data references are collected in the order of execution of
5415 the program, thus for the following loop
5417 | for (i = 1; i < 100; i++)
5418 | for (j = 1; j < 100; j++)
5420 | t = T[j+1][i-1]; // A
5421 | T[j][i] = t + 2; // B
5424 references are collected following the direction of the wind:
5425 A then B. The data dependence tests are performed also
5426 following this order, such that we're looking at the distance
5427 separating the elements accessed by A from the elements later
5428 accessed by B. But in this example, the distance returned by
5429 test_dep (A, B) is lexicographically negative (-1, 1), that
5430 means that the access A occurs later than B with respect to
5431 the outer loop, ie. we're actually looking upwind. In this
5432 case we solve test_dep (B, A) looking downwind to the
5433 lexicographically positive solution, that returns the
5434 distance vector (1, -1). */
5435 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5437 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5438 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5439 return false;
5440 compute_subscript_distance (ddr);
5441 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5442 &index_carry))
5443 return false;
5444 save_dist_v (ddr, save_v);
5445 DDR_REVERSED_P (ddr) = true;
5447 /* In this case there is a dependence forward for all the
5448 outer loops:
5450 | for (k = 1; k < 100; k++)
5451 | for (i = 1; i < 100; i++)
5452 | for (j = 1; j < 100; j++)
5454 | t = T[j+1][i-1]; // A
5455 | T[j][i] = t + 2; // B
5458 the vectors are:
5459 (0, 1, -1)
5460 (1, 1, -1)
5461 (1, -1, 1)
5463 if (DDR_NB_LOOPS (ddr) > 1)
5465 add_outer_distances (ddr, save_v, index_carry);
5466 add_outer_distances (ddr, dist_v, index_carry);
5469 else
5471 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5472 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5474 if (DDR_NB_LOOPS (ddr) > 1)
5476 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5478 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5479 return false;
5480 compute_subscript_distance (ddr);
5481 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5482 &index_carry))
5483 return false;
5485 save_dist_v (ddr, save_v);
5486 add_outer_distances (ddr, dist_v, index_carry);
5487 add_outer_distances (ddr, opposite_v, index_carry);
5489 else
5490 save_dist_v (ddr, save_v);
5493 else
5495 /* There is a distance of 1 on all the outer loops: Example:
5496 there is a dependence of distance 1 on loop_1 for the array A.
5498 | loop_1
5499 | A[5] = ...
5500 | endloop
5502 add_outer_distances (ddr, dist_v,
5503 lambda_vector_first_nz (dist_v,
5504 DDR_NB_LOOPS (ddr), 0));
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5509 unsigned i;
5511 fprintf (dump_file, "(build_classic_dist_vector\n");
5512 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5514 fprintf (dump_file, " dist_vector = (");
5515 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5516 DDR_NB_LOOPS (ddr));
5517 fprintf (dump_file, " )\n");
5519 fprintf (dump_file, ")\n");
5522 return true;
5525 /* Return the direction for a given distance.
5526 FIXME: Computing dir this way is suboptimal, since dir can catch
5527 cases that dist is unable to represent. */
5529 static inline enum data_dependence_direction
5530 dir_from_dist (int dist)
5532 if (dist > 0)
5533 return dir_positive;
5534 else if (dist < 0)
5535 return dir_negative;
5536 else
5537 return dir_equal;
5540 /* Compute the classic per loop direction vector. DDR is the data
5541 dependence relation to build a vector from. */
5543 static void
5544 build_classic_dir_vector (struct data_dependence_relation *ddr)
5546 unsigned i, j;
5547 lambda_vector dist_v;
5549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5551 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5553 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5554 dir_v[j] = dir_from_dist (dist_v[j]);
5556 save_dir_v (ddr, dir_v);
5560 /* Helper function. Returns true when there is a dependence between the
5561 data references. A_INDEX is the index of the first reference (0 for
5562 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5564 static bool
5565 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5566 unsigned int a_index, unsigned int b_index,
5567 class loop *loop_nest)
5569 unsigned int i;
5570 tree last_conflicts;
5571 struct subscript *subscript;
5572 tree res = NULL_TREE;
5574 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5576 conflict_function *overlaps_a, *overlaps_b;
5578 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5579 SUB_ACCESS_FN (subscript, b_index),
5580 &overlaps_a, &overlaps_b,
5581 &last_conflicts, loop_nest);
5583 if (SUB_CONFLICTS_IN_A (subscript))
5584 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5585 if (SUB_CONFLICTS_IN_B (subscript))
5586 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5588 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5589 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5590 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5592 /* If there is any undetermined conflict function we have to
5593 give a conservative answer in case we cannot prove that
5594 no dependence exists when analyzing another subscript. */
5595 if (CF_NOT_KNOWN_P (overlaps_a)
5596 || CF_NOT_KNOWN_P (overlaps_b))
5598 res = chrec_dont_know;
5599 continue;
5602 /* When there is a subscript with no dependence we can stop. */
5603 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5604 || CF_NO_DEPENDENCE_P (overlaps_b))
5606 res = chrec_known;
5607 break;
5611 if (res == NULL_TREE)
5612 return true;
5614 if (res == chrec_known)
5615 dependence_stats.num_dependence_independent++;
5616 else
5617 dependence_stats.num_dependence_undetermined++;
5618 finalize_ddr_dependent (ddr, res);
5619 return false;
5622 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5624 static void
5625 subscript_dependence_tester (struct data_dependence_relation *ddr,
5626 class loop *loop_nest)
5628 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5629 dependence_stats.num_dependence_dependent++;
5631 compute_subscript_distance (ddr);
5632 if (build_classic_dist_vector (ddr, loop_nest))
5633 build_classic_dir_vector (ddr);
5636 /* Returns true when all the access functions of A are affine or
5637 constant with respect to LOOP_NEST. */
5639 static bool
5640 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5641 const class loop *loop_nest)
5643 vec<tree> fns = DR_ACCESS_FNS (a);
5644 for (tree t : fns)
5645 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5646 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5647 return false;
5649 return true;
5652 /* This computes the affine dependence relation between A and B with
5653 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5654 independence between two accesses, while CHREC_DONT_KNOW is used
5655 for representing the unknown relation.
5657 Note that it is possible to stop the computation of the dependence
5658 relation the first time we detect a CHREC_KNOWN element for a given
5659 subscript. */
5661 void
5662 compute_affine_dependence (struct data_dependence_relation *ddr,
5663 class loop *loop_nest)
5665 struct data_reference *dra = DDR_A (ddr);
5666 struct data_reference *drb = DDR_B (ddr);
5668 if (dump_file && (dump_flags & TDF_DETAILS))
5670 fprintf (dump_file, "(compute_affine_dependence\n");
5671 fprintf (dump_file, " ref_a: ");
5672 print_generic_expr (dump_file, DR_REF (dra));
5673 fprintf (dump_file, ", stmt_a: ");
5674 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5675 fprintf (dump_file, " ref_b: ");
5676 print_generic_expr (dump_file, DR_REF (drb));
5677 fprintf (dump_file, ", stmt_b: ");
5678 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5681 /* Analyze only when the dependence relation is not yet known. */
5682 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5684 dependence_stats.num_dependence_tests++;
5686 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5687 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5688 subscript_dependence_tester (ddr, loop_nest);
5690 /* As a last case, if the dependence cannot be determined, or if
5691 the dependence is considered too difficult to determine, answer
5692 "don't know". */
5693 else
5695 dependence_stats.num_dependence_undetermined++;
5697 if (dump_file && (dump_flags & TDF_DETAILS))
5699 fprintf (dump_file, "Data ref a:\n");
5700 dump_data_reference (dump_file, dra);
5701 fprintf (dump_file, "Data ref b:\n");
5702 dump_data_reference (dump_file, drb);
5703 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5705 finalize_ddr_dependent (ddr, chrec_dont_know);
5709 if (dump_file && (dump_flags & TDF_DETAILS))
5711 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5712 fprintf (dump_file, ") -> no dependence\n");
5713 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5714 fprintf (dump_file, ") -> dependence analysis failed\n");
5715 else
5716 fprintf (dump_file, ")\n");
5720 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5721 the data references in DATAREFS, in the LOOP_NEST. When
5722 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5723 relations. Return true when successful, i.e. data references number
5724 is small enough to be handled. */
5726 bool
5727 compute_all_dependences (const vec<data_reference_p> &datarefs,
5728 vec<ddr_p> *dependence_relations,
5729 const vec<loop_p> &loop_nest,
5730 bool compute_self_and_rr)
5732 struct data_dependence_relation *ddr;
5733 struct data_reference *a, *b;
5734 unsigned int i, j;
5736 if ((int) datarefs.length ()
5737 > param_loop_max_datarefs_for_datadeps)
5739 struct data_dependence_relation *ddr;
5741 /* Insert a single relation into dependence_relations:
5742 chrec_dont_know. */
5743 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5744 dependence_relations->safe_push (ddr);
5745 return false;
5748 FOR_EACH_VEC_ELT (datarefs, i, a)
5749 for (j = i + 1; datarefs.iterate (j, &b); j++)
5750 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5752 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5753 dependence_relations->safe_push (ddr);
5754 if (loop_nest.exists ())
5755 compute_affine_dependence (ddr, loop_nest[0]);
5758 if (compute_self_and_rr)
5759 FOR_EACH_VEC_ELT (datarefs, i, a)
5761 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5762 dependence_relations->safe_push (ddr);
5763 if (loop_nest.exists ())
5764 compute_affine_dependence (ddr, loop_nest[0]);
5767 return true;
5770 /* Describes a location of a memory reference. */
5772 struct data_ref_loc
5774 /* The memory reference. */
5775 tree ref;
5777 /* True if the memory reference is read. */
5778 bool is_read;
5780 /* True if the data reference is conditional within the containing
5781 statement, i.e. if it might not occur even when the statement
5782 is executed and runs to completion. */
5783 bool is_conditional_in_stmt;
5787 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5788 true if STMT clobbers memory, false otherwise. */
5790 static bool
5791 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5793 bool clobbers_memory = false;
5794 data_ref_loc ref;
5795 tree op0, op1;
5796 enum gimple_code stmt_code = gimple_code (stmt);
5798 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5799 As we cannot model data-references to not spelled out
5800 accesses give up if they may occur. */
5801 if (stmt_code == GIMPLE_CALL
5802 && !(gimple_call_flags (stmt) & ECF_CONST))
5804 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5805 if (gimple_call_internal_p (stmt))
5806 switch (gimple_call_internal_fn (stmt))
5808 case IFN_GOMP_SIMD_LANE:
5810 class loop *loop = gimple_bb (stmt)->loop_father;
5811 tree uid = gimple_call_arg (stmt, 0);
5812 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5813 if (loop == NULL
5814 || loop->simduid != SSA_NAME_VAR (uid))
5815 clobbers_memory = true;
5816 break;
5818 case IFN_MASK_LOAD:
5819 case IFN_MASK_STORE:
5820 break;
5821 case IFN_MASK_CALL:
5823 tree orig_fndecl
5824 = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5825 if (!orig_fndecl
5826 || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5827 clobbers_memory = true;
5829 break;
5830 default:
5831 clobbers_memory = true;
5832 break;
5834 else
5835 clobbers_memory = true;
5837 else if (stmt_code == GIMPLE_ASM
5838 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5839 || gimple_vuse (stmt)))
5840 clobbers_memory = true;
5842 if (!gimple_vuse (stmt))
5843 return clobbers_memory;
5845 if (stmt_code == GIMPLE_ASSIGN)
5847 tree base;
5848 op0 = gimple_assign_lhs (stmt);
5849 op1 = gimple_assign_rhs1 (stmt);
5851 if (DECL_P (op1)
5852 || (REFERENCE_CLASS_P (op1)
5853 && (base = get_base_address (op1))
5854 && TREE_CODE (base) != SSA_NAME
5855 && !is_gimple_min_invariant (base)))
5857 ref.ref = op1;
5858 ref.is_read = true;
5859 ref.is_conditional_in_stmt = false;
5860 references->safe_push (ref);
5863 else if (stmt_code == GIMPLE_CALL)
5865 unsigned i = 0, n;
5866 tree ptr, type;
5867 unsigned int align;
5869 ref.is_read = false;
5870 if (gimple_call_internal_p (stmt))
5871 switch (gimple_call_internal_fn (stmt))
5873 case IFN_MASK_LOAD:
5874 if (gimple_call_lhs (stmt) == NULL_TREE)
5875 break;
5876 ref.is_read = true;
5877 /* FALLTHRU */
5878 case IFN_MASK_STORE:
5879 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5880 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5881 if (ref.is_read)
5882 type = TREE_TYPE (gimple_call_lhs (stmt));
5883 else
5884 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5885 if (TYPE_ALIGN (type) != align)
5886 type = build_aligned_type (type, align);
5887 ref.is_conditional_in_stmt = true;
5888 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5889 ptr);
5890 references->safe_push (ref);
5891 return false;
5892 case IFN_MASK_CALL:
5893 i = 1;
5894 gcc_fallthrough ();
5895 default:
5896 break;
5899 op0 = gimple_call_lhs (stmt);
5900 n = gimple_call_num_args (stmt);
5901 for (; i < n; i++)
5903 op1 = gimple_call_arg (stmt, i);
5905 if (DECL_P (op1)
5906 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5908 ref.ref = op1;
5909 ref.is_read = true;
5910 ref.is_conditional_in_stmt = false;
5911 references->safe_push (ref);
5915 else
5916 return clobbers_memory;
5918 if (op0
5919 && (DECL_P (op0)
5920 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5922 ref.ref = op0;
5923 ref.is_read = false;
5924 ref.is_conditional_in_stmt = false;
5925 references->safe_push (ref);
5927 return clobbers_memory;
5931 /* Returns true if the loop-nest has any data reference. */
5933 bool
5934 loop_nest_has_data_refs (loop_p loop)
5936 basic_block *bbs = get_loop_body (loop);
5937 auto_vec<data_ref_loc, 3> references;
5939 for (unsigned i = 0; i < loop->num_nodes; i++)
5941 basic_block bb = bbs[i];
5942 gimple_stmt_iterator bsi;
5944 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5946 gimple *stmt = gsi_stmt (bsi);
5947 get_references_in_stmt (stmt, &references);
5948 if (references.length ())
5950 free (bbs);
5951 return true;
5955 free (bbs);
5956 return false;
5959 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5960 reference, returns false, otherwise returns true. NEST is the outermost
5961 loop of the loop nest in which the references should be analyzed. */
5963 opt_result
5964 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5965 vec<data_reference_p> *datarefs)
5967 auto_vec<data_ref_loc, 2> references;
5968 data_reference_p dr;
5970 if (get_references_in_stmt (stmt, &references))
5971 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5972 stmt);
5974 for (const data_ref_loc &ref : references)
5976 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5977 loop_containing_stmt (stmt), ref.ref,
5978 stmt, ref.is_read, ref.is_conditional_in_stmt);
5979 gcc_assert (dr != NULL);
5980 datarefs->safe_push (dr);
5983 return opt_result::success ();
5986 /* Stores the data references in STMT to DATAREFS. If there is an
5987 unanalyzable reference, returns false, otherwise returns true.
5988 NEST is the outermost loop of the loop nest in which the references
5989 should be instantiated, LOOP is the loop in which the references
5990 should be analyzed. */
5992 bool
5993 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5994 vec<data_reference_p> *datarefs)
5996 auto_vec<data_ref_loc, 2> references;
5997 bool ret = true;
5998 data_reference_p dr;
6000 if (get_references_in_stmt (stmt, &references))
6001 return false;
6003 for (const data_ref_loc &ref : references)
6005 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6006 ref.is_conditional_in_stmt);
6007 gcc_assert (dr != NULL);
6008 datarefs->safe_push (dr);
6011 return ret;
6014 /* Search the data references in LOOP, and record the information into
6015 DATAREFS. Returns chrec_dont_know when failing to analyze a
6016 difficult case, returns NULL_TREE otherwise. */
6018 tree
6019 find_data_references_in_bb (class loop *loop, basic_block bb,
6020 vec<data_reference_p> *datarefs)
6022 gimple_stmt_iterator bsi;
6024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6026 gimple *stmt = gsi_stmt (bsi);
6028 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6030 struct data_reference *res;
6031 res = XCNEW (struct data_reference);
6032 datarefs->safe_push (res);
6034 return chrec_dont_know;
6038 return NULL_TREE;
6041 /* Search the data references in LOOP, and record the information into
6042 DATAREFS. Returns chrec_dont_know when failing to analyze a
6043 difficult case, returns NULL_TREE otherwise.
6045 TODO: This function should be made smarter so that it can handle address
6046 arithmetic as if they were array accesses, etc. */
6048 tree
6049 find_data_references_in_loop (class loop *loop,
6050 vec<data_reference_p> *datarefs)
6052 basic_block bb, *bbs;
6053 unsigned int i;
6055 bbs = get_loop_body_in_dom_order (loop);
6057 for (i = 0; i < loop->num_nodes; i++)
6059 bb = bbs[i];
6061 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6063 free (bbs);
6064 return chrec_dont_know;
6067 free (bbs);
6069 return NULL_TREE;
6072 /* Return the alignment in bytes that DRB is guaranteed to have at all
6073 times. */
6075 unsigned int
6076 dr_alignment (innermost_loop_behavior *drb)
6078 /* Get the alignment of BASE_ADDRESS + INIT. */
6079 unsigned int alignment = drb->base_alignment;
6080 unsigned int misalignment = (drb->base_misalignment
6081 + TREE_INT_CST_LOW (drb->init));
6082 if (misalignment != 0)
6083 alignment = MIN (alignment, misalignment & -misalignment);
6085 /* Cap it to the alignment of OFFSET. */
6086 if (!integer_zerop (drb->offset))
6087 alignment = MIN (alignment, drb->offset_alignment);
6089 /* Cap it to the alignment of STEP. */
6090 if (!integer_zerop (drb->step))
6091 alignment = MIN (alignment, drb->step_alignment);
6093 return alignment;
6096 /* If BASE is a pointer-typed SSA name, try to find the object that it
6097 is based on. Return this object X on success and store the alignment
6098 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6100 static tree
6101 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6103 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6104 return NULL_TREE;
6106 gimple *def = SSA_NAME_DEF_STMT (base);
6107 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6109 /* Peel chrecs and record the minimum alignment preserved by
6110 all steps. */
6111 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6112 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6114 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6115 alignment = MIN (alignment, step_alignment);
6116 base = CHREC_LEFT (base);
6119 /* Punt if the expression is too complicated to handle. */
6120 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6121 return NULL_TREE;
6123 /* The only useful cases are those for which a dereference folds to something
6124 other than an INDIRECT_REF. */
6125 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6126 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6127 if (!ref)
6128 return NULL_TREE;
6130 /* Analyze the base to which the steps we peeled were applied. */
6131 poly_int64 bitsize, bitpos, bytepos;
6132 machine_mode mode;
6133 int unsignedp, reversep, volatilep;
6134 tree offset;
6135 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6136 &unsignedp, &reversep, &volatilep);
6137 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6138 return NULL_TREE;
6140 /* Restrict the alignment to that guaranteed by the offsets. */
6141 unsigned int bytepos_alignment = known_alignment (bytepos);
6142 if (bytepos_alignment != 0)
6143 alignment = MIN (alignment, bytepos_alignment);
6144 if (offset)
6146 unsigned int offset_alignment = highest_pow2_factor (offset);
6147 alignment = MIN (alignment, offset_alignment);
6150 *alignment_out = alignment;
6151 return base;
6154 /* Return the object whose alignment would need to be changed in order
6155 to increase the alignment of ADDR. Store the maximum achievable
6156 alignment in *MAX_ALIGNMENT. */
6158 tree
6159 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6161 tree base = get_base_for_alignment_1 (addr, max_alignment);
6162 if (base)
6163 return base;
6165 if (TREE_CODE (addr) == ADDR_EXPR)
6166 addr = TREE_OPERAND (addr, 0);
6167 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6168 return addr;
6171 /* Recursive helper function. */
6173 static bool
6174 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6176 /* Inner loops of the nest should not contain siblings. Example:
6177 when there are two consecutive loops,
6179 | loop_0
6180 | loop_1
6181 | A[{0, +, 1}_1]
6182 | endloop_1
6183 | loop_2
6184 | A[{0, +, 1}_2]
6185 | endloop_2
6186 | endloop_0
6188 the dependence relation cannot be captured by the distance
6189 abstraction. */
6190 if (loop->next)
6191 return false;
6193 loop_nest->safe_push (loop);
6194 if (loop->inner)
6195 return find_loop_nest_1 (loop->inner, loop_nest);
6196 return true;
6199 /* Return false when the LOOP is not well nested. Otherwise return
6200 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6201 contain the loops from the outermost to the innermost, as they will
6202 appear in the classic distance vector. */
6204 bool
6205 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6207 loop_nest->safe_push (loop);
6208 if (loop->inner)
6209 return find_loop_nest_1 (loop->inner, loop_nest);
6210 return true;
6213 /* Returns true when the data dependences have been computed, false otherwise.
6214 Given a loop nest LOOP, the following vectors are returned:
6215 DATAREFS is initialized to all the array elements contained in this loop,
6216 DEPENDENCE_RELATIONS contains the relations between the data references.
6217 Compute read-read and self relations if
6218 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6220 bool
6221 compute_data_dependences_for_loop (class loop *loop,
6222 bool compute_self_and_read_read_dependences,
6223 vec<loop_p> *loop_nest,
6224 vec<data_reference_p> *datarefs,
6225 vec<ddr_p> *dependence_relations)
6227 bool res = true;
6229 memset (&dependence_stats, 0, sizeof (dependence_stats));
6231 /* If the loop nest is not well formed, or one of the data references
6232 is not computable, give up without spending time to compute other
6233 dependences. */
6234 if (!loop
6235 || !find_loop_nest (loop, loop_nest)
6236 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6237 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6238 compute_self_and_read_read_dependences))
6239 res = false;
6241 if (dump_file && (dump_flags & TDF_STATS))
6243 fprintf (dump_file, "Dependence tester statistics:\n");
6245 fprintf (dump_file, "Number of dependence tests: %d\n",
6246 dependence_stats.num_dependence_tests);
6247 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6248 dependence_stats.num_dependence_dependent);
6249 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6250 dependence_stats.num_dependence_independent);
6251 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6252 dependence_stats.num_dependence_undetermined);
6254 fprintf (dump_file, "Number of subscript tests: %d\n",
6255 dependence_stats.num_subscript_tests);
6256 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6257 dependence_stats.num_subscript_undetermined);
6258 fprintf (dump_file, "Number of same subscript function: %d\n",
6259 dependence_stats.num_same_subscript_function);
6261 fprintf (dump_file, "Number of ziv tests: %d\n",
6262 dependence_stats.num_ziv);
6263 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6264 dependence_stats.num_ziv_dependent);
6265 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6266 dependence_stats.num_ziv_independent);
6267 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6268 dependence_stats.num_ziv_unimplemented);
6270 fprintf (dump_file, "Number of siv tests: %d\n",
6271 dependence_stats.num_siv);
6272 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6273 dependence_stats.num_siv_dependent);
6274 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6275 dependence_stats.num_siv_independent);
6276 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6277 dependence_stats.num_siv_unimplemented);
6279 fprintf (dump_file, "Number of miv tests: %d\n",
6280 dependence_stats.num_miv);
6281 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6282 dependence_stats.num_miv_dependent);
6283 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6284 dependence_stats.num_miv_independent);
6285 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6286 dependence_stats.num_miv_unimplemented);
6289 return res;
6292 /* Free the memory used by a data dependence relation DDR. */
6294 void
6295 free_dependence_relation (struct data_dependence_relation *ddr)
6297 if (ddr == NULL)
6298 return;
6300 if (DDR_SUBSCRIPTS (ddr).exists ())
6301 free_subscripts (DDR_SUBSCRIPTS (ddr));
6302 DDR_DIST_VECTS (ddr).release ();
6303 DDR_DIR_VECTS (ddr).release ();
6305 free (ddr);
6308 /* Free the memory used by the data dependence relations from
6309 DEPENDENCE_RELATIONS. */
6311 void
6312 free_dependence_relations (vec<ddr_p>& dependence_relations)
6314 for (data_dependence_relation *ddr : dependence_relations)
6315 if (ddr)
6316 free_dependence_relation (ddr);
6318 dependence_relations.release ();
6321 /* Free the memory used by the data references from DATAREFS. */
6323 void
6324 free_data_refs (vec<data_reference_p>& datarefs)
6326 for (data_reference *dr : datarefs)
6327 free_data_ref (dr);
6328 datarefs.release ();
6331 /* Common routine implementing both dr_direction_indicator and
6332 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6333 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6334 Return the step as the indicator otherwise. */
6336 static tree
6337 dr_step_indicator (struct data_reference *dr, int useful_min)
6339 tree step = DR_STEP (dr);
6340 if (!step)
6341 return NULL_TREE;
6342 STRIP_NOPS (step);
6343 /* Look for cases where the step is scaled by a positive constant
6344 integer, which will often be the access size. If the multiplication
6345 doesn't change the sign (due to overflow effects) then we can
6346 test the unscaled value instead. */
6347 if (TREE_CODE (step) == MULT_EXPR
6348 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6349 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6351 tree factor = TREE_OPERAND (step, 1);
6352 step = TREE_OPERAND (step, 0);
6354 /* Strip widening and truncating conversions as well as nops. */
6355 if (CONVERT_EXPR_P (step)
6356 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6357 step = TREE_OPERAND (step, 0);
6358 tree type = TREE_TYPE (step);
6360 /* Get the range of step values that would not cause overflow. */
6361 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6362 / wi::to_widest (factor));
6363 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6364 / wi::to_widest (factor));
6366 /* Get the range of values that the unconverted step actually has. */
6367 wide_int step_min, step_max;
6368 value_range vr;
6369 if (TREE_CODE (step) != SSA_NAME
6370 || !get_range_query (cfun)->range_of_expr (vr, step)
6371 || vr.undefined_p ())
6373 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6374 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6376 else
6378 step_min = vr.lower_bound ();
6379 step_max = vr.upper_bound ();
6382 /* Check whether the unconverted step has an acceptable range. */
6383 signop sgn = TYPE_SIGN (type);
6384 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6385 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6387 if (wi::ge_p (step_min, useful_min, sgn))
6388 return ssize_int (useful_min);
6389 else if (wi::lt_p (step_max, 0, sgn))
6390 return ssize_int (-1);
6391 else
6392 return fold_convert (ssizetype, step);
6395 return DR_STEP (dr);
6398 /* Return a value that is negative iff DR has a negative step. */
6400 tree
6401 dr_direction_indicator (struct data_reference *dr)
6403 return dr_step_indicator (dr, 0);
6406 /* Return a value that is zero iff DR has a zero step. */
6408 tree
6409 dr_zero_step_indicator (struct data_reference *dr)
6411 return dr_step_indicator (dr, 1);
6414 /* Return true if DR is known to have a nonnegative (but possibly zero)
6415 step. */
6417 bool
6418 dr_known_forward_stride_p (struct data_reference *dr)
6420 tree indicator = dr_direction_indicator (dr);
6421 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6422 fold_convert (ssizetype, indicator),
6423 ssize_int (0));
6424 return neg_step_val && integer_zerop (neg_step_val);