Daily bump.
[official-gcc.git] / gcc / tree-data-ref.cc
blob3549485d2513fcc79bebf5f8a3a0f6e0a7db982e
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
103 #include "calls.h"
105 static struct datadep_stats
107 int num_dependence_tests;
108 int num_dependence_dependent;
109 int num_dependence_independent;
110 int num_dependence_undetermined;
112 int num_subscript_tests;
113 int num_subscript_undetermined;
114 int num_same_subscript_function;
116 int num_ziv;
117 int num_ziv_independent;
118 int num_ziv_dependent;
119 int num_ziv_unimplemented;
121 int num_siv;
122 int num_siv_independent;
123 int num_siv_dependent;
124 int num_siv_unimplemented;
126 int num_miv;
127 int num_miv_independent;
128 int num_miv_dependent;
129 int num_miv_unimplemented;
130 } dependence_stats;
132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 unsigned int, unsigned int,
134 class loop *);
135 /* Returns true iff A divides B. */
137 static inline bool
138 tree_fold_divides_p (const_tree a, const_tree b)
140 gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
145 /* Returns true iff A divides B. */
147 static inline bool
148 int_divides_p (lambda_int a, lambda_int b)
150 return ((b % a) == 0);
153 /* Return true if reference REF contains a union access. */
155 static bool
156 ref_contains_union_access_p (tree ref)
158 while (handled_component_p (ref))
160 ref = TREE_OPERAND (ref, 0);
161 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 return true;
165 return false;
170 /* Dump into FILE all the data references from DATAREFS. */
172 static void
173 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175 for (data_reference *dr : datarefs)
176 dump_data_reference (file, dr);
179 /* Unified dump into FILE all the data references from DATAREFS. */
181 DEBUG_FUNCTION void
182 debug (vec<data_reference_p> &ref)
184 dump_data_references (stderr, ref);
187 DEBUG_FUNCTION void
188 debug (vec<data_reference_p> *ptr)
190 if (ptr)
191 debug (*ptr);
192 else
193 fprintf (stderr, "<nil>\n");
197 /* Dump into STDERR all the data references from DATAREFS. */
199 DEBUG_FUNCTION void
200 debug_data_references (vec<data_reference_p> datarefs)
202 dump_data_references (stderr, datarefs);
205 /* Print to STDERR the data_reference DR. */
207 DEBUG_FUNCTION void
208 debug_data_reference (struct data_reference *dr)
210 dump_data_reference (stderr, dr);
213 /* Dump function for a DATA_REFERENCE structure. */
215 void
216 dump_data_reference (FILE *outf,
217 struct data_reference *dr)
219 unsigned int i;
221 fprintf (outf, "#(Data Ref: \n");
222 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
223 fprintf (outf, "# stmt: ");
224 print_gimple_stmt (outf, DR_STMT (dr), 0);
225 fprintf (outf, "# ref: ");
226 print_generic_stmt (outf, DR_REF (dr));
227 fprintf (outf, "# base_object: ");
228 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 fprintf (outf, "# Access function %d: ", i);
233 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 fprintf (outf, "#)\n");
238 /* Unified dump function for a DATA_REFERENCE structure. */
240 DEBUG_FUNCTION void
241 debug (data_reference &ref)
243 dump_data_reference (stderr, &ref);
246 DEBUG_FUNCTION void
247 debug (data_reference *ptr)
249 if (ptr)
250 debug (*ptr);
251 else
252 fprintf (stderr, "<nil>\n");
256 /* Dumps the affine function described by FN to the file OUTF. */
258 DEBUG_FUNCTION void
259 dump_affine_function (FILE *outf, affine_fn fn)
261 unsigned i;
262 tree coef;
264 print_generic_expr (outf, fn[0], TDF_SLIM);
265 for (i = 1; fn.iterate (i, &coef); i++)
267 fprintf (outf, " + ");
268 print_generic_expr (outf, coef, TDF_SLIM);
269 fprintf (outf, " * x_%u", i);
273 /* Dumps the conflict function CF to the file OUTF. */
275 DEBUG_FUNCTION void
276 dump_conflict_function (FILE *outf, conflict_function *cf)
278 unsigned i;
280 if (cf->n == NO_DEPENDENCE)
281 fprintf (outf, "no dependence");
282 else if (cf->n == NOT_KNOWN)
283 fprintf (outf, "not known");
284 else
286 for (i = 0; i < cf->n; i++)
288 if (i != 0)
289 fprintf (outf, " ");
290 fprintf (outf, "[");
291 dump_affine_function (outf, cf->fns[i]);
292 fprintf (outf, "]");
297 /* Dump function for a SUBSCRIPT structure. */
299 DEBUG_FUNCTION void
300 dump_subscript (FILE *outf, struct subscript *subscript)
302 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 fprintf (outf, "\n (subscript \n");
305 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
306 dump_conflict_function (outf, cf);
307 if (CF_NONTRIVIAL_P (cf))
309 tree last_iteration = SUB_LAST_CONFLICT (subscript);
310 fprintf (outf, "\n last_conflict: ");
311 print_generic_expr (outf, last_iteration);
314 cf = SUB_CONFLICTS_IN_B (subscript);
315 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
316 dump_conflict_function (outf, cf);
317 if (CF_NONTRIVIAL_P (cf))
319 tree last_iteration = SUB_LAST_CONFLICT (subscript);
320 fprintf (outf, "\n last_conflict: ");
321 print_generic_expr (outf, last_iteration);
324 fprintf (outf, "\n (Subscript distance: ");
325 print_generic_expr (outf, SUB_DISTANCE (subscript));
326 fprintf (outf, " ))\n");
329 /* Print the classic direction vector DIRV to OUTF. */
331 DEBUG_FUNCTION void
332 print_direction_vector (FILE *outf,
333 lambda_vector dirv,
334 int length)
336 int eq;
338 for (eq = 0; eq < length; eq++)
340 enum data_dependence_direction dir = ((enum data_dependence_direction)
341 dirv[eq]);
343 switch (dir)
345 case dir_positive:
346 fprintf (outf, " +");
347 break;
348 case dir_negative:
349 fprintf (outf, " -");
350 break;
351 case dir_equal:
352 fprintf (outf, " =");
353 break;
354 case dir_positive_or_equal:
355 fprintf (outf, " +=");
356 break;
357 case dir_positive_or_negative:
358 fprintf (outf, " +-");
359 break;
360 case dir_negative_or_equal:
361 fprintf (outf, " -=");
362 break;
363 case dir_star:
364 fprintf (outf, " *");
365 break;
366 default:
367 fprintf (outf, "indep");
368 break;
371 fprintf (outf, "\n");
374 /* Print a vector of direction vectors. */
376 DEBUG_FUNCTION void
377 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
378 int length)
380 for (lambda_vector v : dir_vects)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 for (lambda_vector v : dist_vects)
403 print_lambda_vector (outf, v, length);
406 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 DEBUG_FUNCTION void
409 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411 struct data_reference *dra, *drb;
413 fprintf (outf, "(Data Dep: \n");
415 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 if (ddr)
419 dra = DDR_A (ddr);
420 drb = DDR_B (ddr);
421 if (dra)
422 dump_data_reference (outf, dra);
423 else
424 fprintf (outf, " (nil)\n");
425 if (drb)
426 dump_data_reference (outf, drb);
427 else
428 fprintf (outf, " (nil)\n");
430 fprintf (outf, " (don't know)\n)\n");
431 return;
434 dra = DDR_A (ddr);
435 drb = DDR_B (ddr);
436 dump_data_reference (outf, dra);
437 dump_data_reference (outf, drb);
439 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440 fprintf (outf, " (no dependence)\n");
442 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 unsigned int i;
445 class loop *loopi;
447 subscript *sub;
448 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 fprintf (outf, " access_fn_A: ");
451 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
452 fprintf (outf, " access_fn_B: ");
453 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
454 dump_subscript (outf, sub);
457 fprintf (outf, " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 fprintf (outf, "%d ", loopi->num);
460 fprintf (outf, ")\n");
462 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 fprintf (outf, " distance_vector: ");
465 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
466 DDR_NB_LOOPS (ddr));
469 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 fprintf (outf, " direction_vector: ");
472 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 DDR_NB_LOOPS (ddr));
477 fprintf (outf, ")\n");
480 /* Debug version. */
482 DEBUG_FUNCTION void
483 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485 dump_data_dependence_relation (stderr, ddr);
488 /* Dump into FILE all the dependence relations from DDRS. */
490 DEBUG_FUNCTION void
491 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493 for (auto ddr : ddrs)
494 dump_data_dependence_relation (file, ddr);
497 DEBUG_FUNCTION void
498 debug (vec<ddr_p> &ref)
500 dump_data_dependence_relations (stderr, ref);
503 DEBUG_FUNCTION void
504 debug (vec<ddr_p> *ptr)
506 if (ptr)
507 debug (*ptr);
508 else
509 fprintf (stderr, "<nil>\n");
513 /* Dump to STDERR all the dependence relations from DDRS. */
515 DEBUG_FUNCTION void
516 debug_data_dependence_relations (vec<ddr_p> ddrs)
518 dump_data_dependence_relations (stderr, ddrs);
521 /* Dumps the distance and direction vectors in FILE. DDRS contains
522 the dependence relations, and VECT_SIZE is the size of the
523 dependence vectors, or in other words the number of loops in the
524 considered nest. */
526 DEBUG_FUNCTION void
527 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529 for (data_dependence_relation *ddr : ddrs)
530 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 fprintf (file, "DISTANCE_V (");
535 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
536 fprintf (file, ")\n");
539 for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 fprintf (file, "DIRECTION_V (");
542 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
543 fprintf (file, ")\n");
547 fprintf (file, "\n\n");
550 /* Dumps the data dependence relations DDRS in FILE. */
552 DEBUG_FUNCTION void
553 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555 for (data_dependence_relation *ddr : ddrs)
556 dump_data_dependence_relation (file, ddr);
558 fprintf (file, "\n\n");
561 DEBUG_FUNCTION void
562 debug_ddrs (vec<ddr_p> ddrs)
564 dump_ddrs (stderr, ddrs);
567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
568 OP0 CODE OP1, where:
570 - OP0 CODE OP1 has integral type TYPE
571 - the range of OP0 is given by OP0_RANGE and
572 - the range of OP1 is given by OP1_RANGE.
574 Independently of RESULT_RANGE, try to compute:
576 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 - (sizetype) (OP0 CODE OP1)
579 as a constant and subtract DELTA from the ssizetype constant in *OFF.
580 Return true on success, or false if DELTA is not known at compile time.
582 Truncation and sign changes are known to distribute over CODE, i.e.
584 (itype) (A CODE B) == (itype) A CODE (itype) B
586 for any integral type ITYPE whose precision is no greater than the
587 precision of A and B. */
589 static bool
590 compute_distributive_range (tree type, value_range &op0_range,
591 tree_code code, value_range &op1_range,
592 tree *off, value_range *result_range)
594 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
595 if (result_range)
597 range_op_handler op (code);
598 if (!op.fold_range (*result_range, type, op0_range, op1_range))
599 result_range->set_varying (type);
602 /* The distributive property guarantees that if TYPE is no narrower
603 than SIZETYPE,
605 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607 and so we can treat DELTA as zero. */
608 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 return true;
611 /* If overflow is undefined, we can assume that:
613 X == (ssizetype) OP0 CODE (ssizetype) OP1
615 is within the range of TYPE, i.e.:
617 X == (ssizetype) (TYPE) X
619 Distributing the (TYPE) truncation over X gives:
621 X == (ssizetype) (OP0 CODE OP1)
623 Casting both sides to sizetype and distributing the sizetype cast
624 over X gives:
626 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628 and so we can treat DELTA as zero. */
629 if (TYPE_OVERFLOW_UNDEFINED (type))
630 return true;
632 /* Compute the range of:
634 (ssizetype) OP0 CODE (ssizetype) OP1
636 The distributive property guarantees that this has the same bitpattern as:
638 (sizetype) OP0 CODE (sizetype) OP1
640 but its range is more conducive to analysis. */
641 range_cast (op0_range, ssizetype);
642 range_cast (op1_range, ssizetype);
643 value_range wide_range;
644 range_op_handler op (code);
645 bool saved_flag_wrapv = flag_wrapv;
646 flag_wrapv = 1;
647 if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
648 wide_range.set_varying (ssizetype);;
649 flag_wrapv = saved_flag_wrapv;
650 if (wide_range.num_pairs () != 1
651 || wide_range.varying_p () || wide_range.undefined_p ())
652 return false;
654 wide_int lb = wide_range.lower_bound ();
655 wide_int ub = wide_range.upper_bound ();
657 /* Calculate the number of times that each end of the range overflows or
658 underflows TYPE. We can only calculate DELTA if the numbers match. */
659 unsigned int precision = TYPE_PRECISION (type);
660 if (!TYPE_UNSIGNED (type))
662 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
663 lb -= type_min;
664 ub -= type_min;
666 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
667 lb &= upper_bits;
668 ub &= upper_bits;
669 if (lb != ub)
670 return false;
672 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
673 negative values indicating underflow. The low PRECISION bits of LB
674 are clear, so DELTA is therefore LB (== UB). */
675 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
676 return true;
679 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
680 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
681 FROM_TYPE are integral types. */
683 static bool
684 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
686 gcc_assert (INTEGRAL_TYPE_P (to_type)
687 && INTEGRAL_TYPE_P (from_type)
688 && !TYPE_OVERFLOW_TRAPS (to_type)
689 && !TYPE_OVERFLOW_TRAPS (from_type));
691 /* Converting to something no narrower than sizetype and then to sizetype
692 is equivalent to converting directly to sizetype. */
693 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
694 return true;
696 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
697 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
698 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
699 return true;
701 /* For narrowing conversions, we could in principle test whether
702 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
703 and apply a constant adjustment.
705 For other conversions (which involve a sign change) we could
706 check that the signs are always equal, and apply a constant
707 adjustment if the signs are negative.
709 However, both cases should be rare. */
710 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
711 TYPE_SIGN (to_type));
714 static void
715 split_constant_offset (tree type, tree *var, tree *off,
716 value_range *result_range,
717 hash_map<tree, std::pair<tree, tree> > &cache,
718 unsigned *limit);
720 /* Helper function for split_constant_offset. If TYPE is a pointer type,
721 try to express OP0 CODE OP1 as:
723 POINTER_PLUS <*VAR, (sizetype) *OFF>
725 where:
727 - *VAR has type TYPE
728 - *OFF is a constant of type ssizetype.
730 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732 *VAR + (sizetype) *OFF
734 where:
736 - *VAR has type sizetype
737 - *OFF is a constant of type ssizetype.
739 In both cases, OP0 CODE OP1 has type TYPE.
741 Return true on success. A false return value indicates that we can't
742 do better than set *OFF to zero.
744 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
745 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
748 visited. LIMIT counts down the number of SSA names that we are
749 allowed to process before giving up. */
751 static bool
752 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
753 tree *var, tree *off, value_range *result_range,
754 hash_map<tree, std::pair<tree, tree> > &cache,
755 unsigned *limit)
757 tree var0, var1;
758 tree off0, off1;
759 value_range op0_range, op1_range;
761 *var = NULL_TREE;
762 *off = NULL_TREE;
764 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
765 return false;
767 switch (code)
769 case INTEGER_CST:
770 *var = size_int (0);
771 *off = fold_convert (ssizetype, op0);
772 if (result_range)
774 wide_int w = wi::to_wide (op0);
775 result_range->set (TREE_TYPE (op0), w, w);
777 return true;
779 case POINTER_PLUS_EXPR:
780 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
781 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
782 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
783 *off = size_binop (PLUS_EXPR, off0, off1);
784 return true;
786 case PLUS_EXPR:
787 case MINUS_EXPR:
788 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
789 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
790 *off = size_binop (code, off0, off1);
791 if (!compute_distributive_range (type, op0_range, code, op1_range,
792 off, result_range))
793 return false;
794 *var = fold_build2 (code, sizetype, var0, var1);
795 return true;
797 case MULT_EXPR:
798 if (TREE_CODE (op1) != INTEGER_CST)
799 return false;
801 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
802 op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
803 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
804 if (!compute_distributive_range (type, op0_range, code, op1_range,
805 off, result_range))
806 return false;
807 *var = fold_build2 (MULT_EXPR, sizetype, var0,
808 fold_convert (sizetype, op1));
809 return true;
811 case ADDR_EXPR:
813 tree base, poffset;
814 poly_int64 pbitsize, pbitpos, pbytepos;
815 machine_mode pmode;
816 int punsignedp, preversep, pvolatilep;
818 op0 = TREE_OPERAND (op0, 0);
819 base
820 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
821 &punsignedp, &preversep, &pvolatilep);
823 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
824 return false;
825 base = build_fold_addr_expr (base);
826 off0 = ssize_int (pbytepos);
828 if (poffset)
830 split_constant_offset (poffset, &poffset, &off1, nullptr,
831 cache, limit);
832 off0 = size_binop (PLUS_EXPR, off0, off1);
833 base = fold_build_pointer_plus (base, poffset);
836 var0 = fold_convert (type, base);
838 /* If variable length types are involved, punt, otherwise casts
839 might be converted into ARRAY_REFs in gimplify_conversion.
840 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
841 possibly no longer appears in current GIMPLE, might resurface.
842 This perhaps could run
843 if (CONVERT_EXPR_P (var0))
845 gimplify_conversion (&var0);
846 // Attempt to fill in any within var0 found ARRAY_REF's
847 // element size from corresponding op embedded ARRAY_REF,
848 // if unsuccessful, just punt.
849 } */
850 while (POINTER_TYPE_P (type))
851 type = TREE_TYPE (type);
852 if (int_size_in_bytes (type) < 0)
853 return false;
855 *var = var0;
856 *off = off0;
857 return true;
860 case SSA_NAME:
862 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
863 return false;
865 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 enum tree_code subcode;
868 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
869 return false;
871 subcode = gimple_assign_rhs_code (def_stmt);
873 /* We are using a cache to avoid un-CSEing large amounts of code. */
874 bool use_cache = false;
875 if (!has_single_use (op0)
876 && (subcode == POINTER_PLUS_EXPR
877 || subcode == PLUS_EXPR
878 || subcode == MINUS_EXPR
879 || subcode == MULT_EXPR
880 || subcode == ADDR_EXPR
881 || CONVERT_EXPR_CODE_P (subcode)))
883 use_cache = true;
884 bool existed;
885 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
886 if (existed)
888 if (integer_zerop (e.second))
889 return false;
890 *var = e.first;
891 *off = e.second;
892 /* The caller sets the range in this case. */
893 return true;
895 e = std::make_pair (op0, ssize_int (0));
898 if (*limit == 0)
899 return false;
900 --*limit;
902 var0 = gimple_assign_rhs1 (def_stmt);
903 var1 = gimple_assign_rhs2 (def_stmt);
905 bool res = split_constant_offset_1 (type, var0, subcode, var1,
906 var, off, nullptr, cache, limit);
907 if (res && use_cache)
908 *cache.get (op0) = std::make_pair (*var, *off);
909 /* The caller sets the range in this case. */
910 return res;
912 CASE_CONVERT:
914 /* We can only handle the following conversions:
916 - Conversions from one pointer type to another pointer type.
918 - Conversions from one non-trapping integral type to another
919 non-trapping integral type. In this case, the recursive
920 call makes sure that:
922 (sizetype) OP0
924 can be expressed as a sizetype operation involving VAR and OFF,
925 and all we need to do is check whether:
927 (sizetype) OP0 == (sizetype) (TYPE) OP0
929 - Conversions from a non-trapping sizetype-size integral type to
930 a like-sized pointer type. In this case, the recursive call
931 makes sure that:
933 (sizetype) OP0 == *VAR + (sizetype) *OFF
935 and we can convert that to:
937 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
939 - Conversions from a sizetype-sized pointer type to a like-sized
940 non-trapping integral type. In this case, the recursive call
941 makes sure that:
943 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
945 where the POINTER_PLUS and *VAR have the same precision as
946 TYPE (and the same precision as sizetype). Then:
948 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 tree itype = TREE_TYPE (op0);
950 if ((POINTER_TYPE_P (itype)
951 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 && (POINTER_TYPE_P (type)
953 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
958 if (POINTER_TYPE_P (type))
960 split_constant_offset (op0, var, off, nullptr, cache, limit);
961 *var = fold_convert (type, *var);
963 else if (POINTER_TYPE_P (itype))
965 split_constant_offset (op0, var, off, nullptr, cache, limit);
966 *var = fold_convert (sizetype, *var);
968 else
970 split_constant_offset (op0, var, off, &op0_range,
971 cache, limit);
972 if (!nop_conversion_for_offset_p (type, itype, op0_range))
973 return false;
974 if (result_range)
976 *result_range = op0_range;
977 range_cast (*result_range, type);
980 return true;
982 return false;
985 default:
986 return false;
990 /* If EXP has pointer type, try to express it as:
992 POINTER_PLUS <*VAR, (sizetype) *OFF>
994 where:
996 - *VAR has the same type as EXP
997 - *OFF is a constant of type ssizetype.
999 If EXP has an integral type, try to express (sizetype) EXP as:
1001 *VAR + (sizetype) *OFF
1003 where:
1005 - *VAR has type sizetype
1006 - *OFF is a constant of type ssizetype.
1008 If EXP_RANGE is nonnull, set it to the range of EXP.
1010 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 visited. LIMIT counts down the number of SSA names that we are
1012 allowed to process before giving up. */
1014 static void
1015 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 hash_map<tree, std::pair<tree, tree> > &cache,
1017 unsigned *limit)
1019 tree type = TREE_TYPE (exp), op0, op1;
1020 enum tree_code code;
1022 code = TREE_CODE (exp);
1023 if (exp_range)
1025 *exp_range = type;
1026 if (code == SSA_NAME)
1028 value_range vr;
1029 get_range_query (cfun)->range_of_expr (vr, exp);
1030 if (vr.undefined_p ())
1031 vr.set_varying (TREE_TYPE (exp));
1032 tree vr_min, vr_max;
1033 value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1034 wide_int var_min = wi::to_wide (vr_min);
1035 wide_int var_max = wi::to_wide (vr_max);
1036 wide_int var_nonzero = get_nonzero_bits (exp);
1037 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038 &var_min, &var_max,
1039 var_nonzero,
1040 TYPE_SIGN (type));
1041 /* This check for VR_VARYING is here because the old code
1042 using get_range_info would return VR_RANGE for the entire
1043 domain, instead of VR_VARYING. The new code normalizes
1044 full-domain ranges to VR_VARYING. */
1045 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046 *exp_range = value_range (type, var_min, var_max);
1050 if (!tree_is_chrec (exp)
1051 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1053 extract_ops_from_tree (exp, &code, &op0, &op1);
1054 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055 exp_range, cache, limit))
1056 return;
1059 *var = exp;
1060 if (INTEGRAL_TYPE_P (type))
1061 *var = fold_convert (sizetype, *var);
1062 *off = ssize_int (0);
1064 value_range r;
1065 if (exp_range && code != SSA_NAME
1066 && get_range_query (cfun)->range_of_expr (r, exp)
1067 && !r.undefined_p ())
1068 *exp_range = r;
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1072 type as EXP while OFF has type ssizetype. */
1074 void
1075 split_constant_offset (tree exp, tree *var, tree *off)
1077 unsigned limit = param_ssa_name_def_chain_limit;
1078 static hash_map<tree, std::pair<tree, tree> > *cache;
1079 if (!cache)
1080 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082 *var = fold_convert (TREE_TYPE (exp), *var);
1083 cache->empty ();
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087 casts, and with type of pointer to the object). */
1089 static tree
1090 canonicalize_base_object_address (tree addr)
1092 tree orig = addr;
1094 STRIP_NOPS (addr);
1096 /* The base address may be obtained by casting from integer, in that case
1097 keep the cast. */
1098 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099 return orig;
1101 if (TREE_CODE (addr) != ADDR_EXPR)
1102 return addr;
1104 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1107 /* Analyze the behavior of memory reference REF within STMT.
1108 There are two modes:
1110 - BB analysis. In this case we simply split the address into base,
1111 init and offset components, without reference to any containing loop.
1112 The resulting base and offset are general expressions and they can
1113 vary arbitrarily from one iteration of the containing loop to the next.
1114 The step is always zero.
1116 - loop analysis. In this case we analyze the reference both wrt LOOP
1117 and on the basis that the reference occurs (is "used") in LOOP;
1118 see the comment above analyze_scalar_evolution_in_loop for more
1119 information about this distinction. The base, init, offset and
1120 step fields are all invariant in LOOP.
1122 Perform BB analysis if LOOP is null, or if LOOP is the function's
1123 dummy outermost loop. In other cases perform loop analysis.
1125 Return true if the analysis succeeded and store the results in DRB if so.
1126 BB analysis can only fail for bitfield or reversed-storage accesses. */
1128 opt_result
1129 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130 class loop *loop, const gimple *stmt)
1132 poly_int64 pbitsize, pbitpos;
1133 tree base, poffset;
1134 machine_mode pmode;
1135 int punsignedp, preversep, pvolatilep;
1136 affine_iv base_iv, offset_iv;
1137 tree init, dinit, step;
1138 bool in_loop = (loop && loop->num);
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1141 fprintf (dump_file, "analyze_innermost: ");
1143 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144 &punsignedp, &preversep, &pvolatilep);
1145 gcc_assert (base != NULL_TREE);
1147 poly_int64 pbytepos;
1148 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149 return opt_result::failure_at (stmt,
1150 "failed: bit offset alignment.\n");
1152 if (preversep)
1153 return opt_result::failure_at (stmt,
1154 "failed: reverse storage order.\n");
1156 /* Calculate the alignment and misalignment for the inner reference. */
1157 unsigned int HOST_WIDE_INT bit_base_misalignment;
1158 unsigned int bit_base_alignment;
1159 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1161 /* There are no bitfield references remaining in BASE, so the values
1162 we got back must be whole bytes. */
1163 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164 && bit_base_misalignment % BITS_PER_UNIT == 0);
1165 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1168 if (TREE_CODE (base) == MEM_REF)
1170 if (!integer_zerop (TREE_OPERAND (base, 1)))
1172 /* Subtract MOFF from the base and add it to POFFSET instead.
1173 Adjust the misalignment to reflect the amount we subtracted. */
1174 poly_offset_int moff = mem_ref_offset (base);
1175 base_misalignment -= moff.force_shwi ();
1176 tree mofft = wide_int_to_tree (sizetype, moff);
1177 if (!poffset)
1178 poffset = mofft;
1179 else
1180 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1182 base = TREE_OPERAND (base, 0);
1184 else
1185 base = build_fold_addr_expr (base);
1187 if (in_loop)
1189 if (!simple_iv (loop, loop, base, &base_iv, true))
1190 return opt_result::failure_at
1191 (stmt, "failed: evolution of base is not affine.\n");
1193 else
1195 base_iv.base = base;
1196 base_iv.step = ssize_int (0);
1197 base_iv.no_overflow = true;
1200 if (!poffset)
1202 offset_iv.base = ssize_int (0);
1203 offset_iv.step = ssize_int (0);
1205 else
1207 if (!in_loop)
1209 offset_iv.base = poffset;
1210 offset_iv.step = ssize_int (0);
1212 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213 return opt_result::failure_at
1214 (stmt, "failed: evolution of offset is not affine.\n");
1217 init = ssize_int (pbytepos);
1219 /* Subtract any constant component from the base and add it to INIT instead.
1220 Adjust the misalignment to reflect the amount we subtracted. */
1221 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1222 init = size_binop (PLUS_EXPR, init, dinit);
1223 base_misalignment -= TREE_INT_CST_LOW (dinit);
1225 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1226 init = size_binop (PLUS_EXPR, init, dinit);
1228 step = size_binop (PLUS_EXPR,
1229 fold_convert (ssizetype, base_iv.step),
1230 fold_convert (ssizetype, offset_iv.step));
1232 base = canonicalize_base_object_address (base_iv.base);
1234 /* See if get_pointer_alignment can guarantee a higher alignment than
1235 the one we calculated above. */
1236 unsigned int HOST_WIDE_INT alt_misalignment;
1237 unsigned int alt_alignment;
1238 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1240 /* As above, these values must be whole bytes. */
1241 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242 && alt_misalignment % BITS_PER_UNIT == 0);
1243 alt_alignment /= BITS_PER_UNIT;
1244 alt_misalignment /= BITS_PER_UNIT;
1246 if (base_alignment < alt_alignment)
1248 base_alignment = alt_alignment;
1249 base_misalignment = alt_misalignment;
1252 drb->base_address = base;
1253 drb->offset = fold_convert (ssizetype, offset_iv.base);
1254 drb->init = init;
1255 drb->step = step;
1256 if (known_misalignment (base_misalignment, base_alignment,
1257 &drb->base_misalignment))
1258 drb->base_alignment = base_alignment;
1259 else
1261 drb->base_alignment = known_alignment (base_misalignment);
1262 drb->base_misalignment = 0;
1264 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265 drb->step_alignment = highest_pow2_factor (step);
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 fprintf (dump_file, "success.\n");
1270 return opt_result::success ();
1273 /* Return true if OP is a valid component reference for a DR access
1274 function. This accepts a subset of what handled_component_p accepts. */
1276 static bool
1277 access_fn_component_p (tree op)
1279 switch (TREE_CODE (op))
1281 case REALPART_EXPR:
1282 case IMAGPART_EXPR:
1283 case ARRAY_REF:
1284 return true;
1286 case COMPONENT_REF:
1287 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1289 default:
1290 return false;
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1295 as base. */
1297 static bool
1298 base_supports_access_fn_components_p (tree base)
1300 switch (TREE_CODE (TREE_TYPE (base)))
1302 case COMPLEX_TYPE:
1303 case ARRAY_TYPE:
1304 case RECORD_TYPE:
1305 return true;
1306 default:
1307 return false;
1311 /* Determines the base object and the list of indices of memory reference
1312 DR, analyzed in LOOP and instantiated before NEST. */
1314 static void
1315 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1317 /* If analyzing a basic-block there are no indices to analyze
1318 and thus no access functions. */
1319 if (!nest)
1321 dri->base_object = ref;
1322 dri->access_fns.create (0);
1323 return;
1326 vec<tree> access_fns = vNULL;
1328 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329 into a two element array with a constant index. The base is
1330 then just the immediate underlying object. */
1331 if (TREE_CODE (ref) == REALPART_EXPR)
1333 ref = TREE_OPERAND (ref, 0);
1334 access_fns.safe_push (integer_zero_node);
1336 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1338 ref = TREE_OPERAND (ref, 0);
1339 access_fns.safe_push (integer_one_node);
1342 /* Analyze access functions of dimensions we know to be independent.
1343 The list of component references handled here should be kept in
1344 sync with access_fn_component_p. */
1345 while (handled_component_p (ref))
1347 if (TREE_CODE (ref) == ARRAY_REF)
1349 tree op = TREE_OPERAND (ref, 1);
1350 tree access_fn = analyze_scalar_evolution (loop, op);
1351 access_fn = instantiate_scev (nest, loop, access_fn);
1352 access_fns.safe_push (access_fn);
1354 else if (TREE_CODE (ref) == COMPONENT_REF
1355 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1357 /* For COMPONENT_REFs of records (but not unions!) use the
1358 FIELD_DECL offset as constant access function so we can
1359 disambiguate a[i].f1 and a[i].f2. */
1360 tree off = component_ref_field_offset (ref);
1361 off = size_binop (PLUS_EXPR,
1362 size_binop (MULT_EXPR,
1363 fold_convert (bitsizetype, off),
1364 bitsize_int (BITS_PER_UNIT)),
1365 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1366 access_fns.safe_push (off);
1368 else
1369 /* If we have an unhandled component we could not translate
1370 to an access function stop analyzing. We have determined
1371 our base object in this case. */
1372 break;
1374 ref = TREE_OPERAND (ref, 0);
1377 /* If the address operand of a MEM_REF base has an evolution in the
1378 analyzed nest, add it as an additional independent access-function. */
1379 if (TREE_CODE (ref) == MEM_REF)
1381 tree op = TREE_OPERAND (ref, 0);
1382 tree access_fn = analyze_scalar_evolution (loop, op);
1383 access_fn = instantiate_scev (nest, loop, access_fn);
1384 STRIP_NOPS (access_fn);
1385 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1387 tree memoff = TREE_OPERAND (ref, 1);
1388 tree base = initial_condition (access_fn);
1389 tree orig_type = TREE_TYPE (base);
1390 STRIP_USELESS_TYPE_CONVERSION (base);
1391 tree off;
1392 split_constant_offset (base, &base, &off);
1393 STRIP_USELESS_TYPE_CONVERSION (base);
1394 /* Fold the MEM_REF offset into the evolutions initial
1395 value to make more bases comparable. */
1396 if (!integer_zerop (memoff))
1398 off = size_binop (PLUS_EXPR, off,
1399 fold_convert (ssizetype, memoff));
1400 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1402 /* Adjust the offset so it is a multiple of the access type
1403 size and thus we separate bases that can possibly be used
1404 to produce partial overlaps (which the access_fn machinery
1405 cannot handle). */
1406 wide_int rem;
1407 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1408 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1409 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1410 rem = wi::mod_trunc
1411 (wi::to_wide (off),
1412 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1413 SIGNED);
1414 else
1415 /* If we can't compute the remainder simply force the initial
1416 condition to zero. */
1417 rem = wi::to_wide (off);
1418 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1419 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1420 /* And finally replace the initial condition. */
1421 access_fn = chrec_replace_initial_condition
1422 (access_fn, fold_convert (orig_type, off));
1423 /* ??? This is still not a suitable base object for
1424 dr_may_alias_p - the base object needs to be an
1425 access that covers the object as whole. With
1426 an evolution in the pointer this cannot be
1427 guaranteed.
1428 As a band-aid, mark the access so we can special-case
1429 it in dr_may_alias_p. */
1430 tree old = ref;
1431 ref = fold_build2_loc (EXPR_LOCATION (ref),
1432 MEM_REF, TREE_TYPE (ref),
1433 base, memoff);
1434 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1435 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1436 dri->unconstrained_base = true;
1437 access_fns.safe_push (access_fn);
1440 else if (DECL_P (ref))
1442 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1443 ref = build2 (MEM_REF, TREE_TYPE (ref),
1444 build_fold_addr_expr (ref),
1445 build_int_cst (reference_alias_ptr_type (ref), 0));
1448 dri->base_object = ref;
1449 dri->access_fns = access_fns;
1452 /* Extracts the alias analysis information from the memory reference DR. */
1454 static void
1455 dr_analyze_alias (struct data_reference *dr)
1457 tree ref = DR_REF (dr);
1458 tree base = get_base_address (ref), addr;
1460 if (INDIRECT_REF_P (base)
1461 || TREE_CODE (base) == MEM_REF)
1463 addr = TREE_OPERAND (base, 0);
1464 if (TREE_CODE (addr) == SSA_NAME)
1465 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1469 /* Frees data reference DR. */
1471 void
1472 free_data_ref (data_reference_p dr)
1474 DR_ACCESS_FNS (dr).release ();
1475 if (dr->alt_indices.base_object)
1476 dr->alt_indices.access_fns.release ();
1477 free (dr);
1480 /* Analyze memory reference MEMREF, which is accessed in STMT.
1481 The reference is a read if IS_READ is true, otherwise it is a write.
1482 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483 within STMT, i.e. that it might not occur even if STMT is executed
1484 and runs to completion.
1486 Return the data_reference description of MEMREF. NEST is the outermost
1487 loop in which the reference should be instantiated, LOOP is the loop
1488 in which the data reference should be analyzed. */
1490 struct data_reference *
1491 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1492 bool is_read, bool is_conditional_in_stmt)
1494 struct data_reference *dr;
1496 if (dump_file && (dump_flags & TDF_DETAILS))
1498 fprintf (dump_file, "Creating dr for ");
1499 print_generic_expr (dump_file, memref, TDF_SLIM);
1500 fprintf (dump_file, "\n");
1503 dr = XCNEW (struct data_reference);
1504 DR_STMT (dr) = stmt;
1505 DR_REF (dr) = memref;
1506 DR_IS_READ (dr) = is_read;
1507 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1509 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1510 nest != NULL ? loop : NULL, stmt);
1511 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1512 dr_analyze_alias (dr);
1514 if (dump_file && (dump_flags & TDF_DETAILS))
1516 unsigned i;
1517 fprintf (dump_file, "\tbase_address: ");
1518 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1519 fprintf (dump_file, "\n\toffset from base address: ");
1520 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1521 fprintf (dump_file, "\n\tconstant offset from base address: ");
1522 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1523 fprintf (dump_file, "\n\tstep: ");
1524 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1525 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1526 fprintf (dump_file, "\n\tbase misalignment: %d",
1527 DR_BASE_MISALIGNMENT (dr));
1528 fprintf (dump_file, "\n\toffset alignment: %d",
1529 DR_OFFSET_ALIGNMENT (dr));
1530 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1531 fprintf (dump_file, "\n\tbase_object: ");
1532 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1533 fprintf (dump_file, "\n");
1534 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1536 fprintf (dump_file, "\tAccess function %d: ", i);
1537 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1541 return dr;
1544 /* A helper function computes order between two tree expressions T1 and T2.
1545 This is used in comparator functions sorting objects based on the order
1546 of tree expressions. The function returns -1, 0, or 1. */
1549 data_ref_compare_tree (tree t1, tree t2)
1551 int i, cmp;
1552 enum tree_code code;
1553 char tclass;
1555 if (t1 == t2)
1556 return 0;
1557 if (t1 == NULL)
1558 return -1;
1559 if (t2 == NULL)
1560 return 1;
1562 STRIP_USELESS_TYPE_CONVERSION (t1);
1563 STRIP_USELESS_TYPE_CONVERSION (t2);
1564 if (t1 == t2)
1565 return 0;
1567 if (TREE_CODE (t1) != TREE_CODE (t2)
1568 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1569 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1571 code = TREE_CODE (t1);
1572 switch (code)
1574 case INTEGER_CST:
1575 return tree_int_cst_compare (t1, t2);
1577 case STRING_CST:
1578 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1579 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1580 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1581 TREE_STRING_LENGTH (t1));
1583 case SSA_NAME:
1584 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1585 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1586 break;
1588 default:
1589 if (POLY_INT_CST_P (t1))
1590 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1591 wi::to_poly_widest (t2));
1593 tclass = TREE_CODE_CLASS (code);
1595 /* For decls, compare their UIDs. */
1596 if (tclass == tcc_declaration)
1598 if (DECL_UID (t1) != DECL_UID (t2))
1599 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1600 break;
1602 /* For expressions, compare their operands recursively. */
1603 else if (IS_EXPR_CODE_CLASS (tclass))
1605 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1607 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1608 TREE_OPERAND (t2, i));
1609 if (cmp != 0)
1610 return cmp;
1613 else
1614 gcc_unreachable ();
1617 return 0;
1620 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1621 check. */
1623 opt_result
1624 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1626 if (dump_enabled_p ())
1627 dump_printf (MSG_NOTE,
1628 "consider run-time aliasing test between %T and %T\n",
1629 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1631 if (!speed_p)
1632 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633 "runtime alias check not supported when"
1634 " optimizing for size.\n");
1636 /* FORNOW: We don't support versioning with outer-loop in either
1637 vectorization or loop distribution. */
1638 if (loop != NULL && loop->inner != NULL)
1639 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640 "runtime alias check not supported for"
1641 " outer loop.\n");
1643 /* FORNOW: We don't support handling different address spaces. */
1644 if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1645 != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1646 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1647 "runtime alias check between different "
1648 "address spaces not supported.\n");
1650 return opt_result::success ();
1653 /* Operator == between two dr_with_seg_len objects.
1655 This equality operator is used to make sure two data refs
1656 are the same one so that we will consider to combine the
1657 aliasing checks of those two pairs of data dependent data
1658 refs. */
1660 static bool
1661 operator == (const dr_with_seg_len& d1,
1662 const dr_with_seg_len& d2)
1664 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1665 DR_BASE_ADDRESS (d2.dr), 0)
1666 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1667 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1668 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1669 && known_eq (d1.access_size, d2.access_size)
1670 && d1.align == d2.align);
1673 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1674 so that we can combine aliasing checks in one scan. */
1676 static int
1677 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1679 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1680 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1681 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1682 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1684 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1685 if a and c have the same basic address snd step, and b and d have the same
1686 address and step. Therefore, if any a&c or b&d don't have the same address
1687 and step, we don't care the order of those two pairs after sorting. */
1688 int comp_res;
1690 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1691 DR_BASE_ADDRESS (b1.dr))) != 0)
1692 return comp_res;
1693 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1694 DR_BASE_ADDRESS (b2.dr))) != 0)
1695 return comp_res;
1696 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1697 DR_STEP (b1.dr))) != 0)
1698 return comp_res;
1699 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1700 DR_STEP (b2.dr))) != 0)
1701 return comp_res;
1702 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1703 DR_OFFSET (b1.dr))) != 0)
1704 return comp_res;
1705 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1706 DR_INIT (b1.dr))) != 0)
1707 return comp_res;
1708 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1709 DR_OFFSET (b2.dr))) != 0)
1710 return comp_res;
1711 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1712 DR_INIT (b2.dr))) != 0)
1713 return comp_res;
1715 return 0;
1718 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1720 static void
1721 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1723 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1724 DR_REF (alias_pair->first.dr),
1725 DR_REF (alias_pair->second.dr));
1727 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1728 alias_pair->first.seg_len);
1729 if (!operand_equal_p (alias_pair->first.seg_len,
1730 alias_pair->second.seg_len, 0))
1731 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1733 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1734 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1735 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1737 dump_printf (MSG_NOTE, " vs. ");
1738 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1741 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1742 alias_pair->first.align);
1743 if (alias_pair->first.align != alias_pair->second.align)
1744 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1746 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1747 if (alias_pair->flags & DR_ALIAS_RAW)
1748 dump_printf (MSG_NOTE, " RAW");
1749 if (alias_pair->flags & DR_ALIAS_WAR)
1750 dump_printf (MSG_NOTE, " WAR");
1751 if (alias_pair->flags & DR_ALIAS_WAW)
1752 dump_printf (MSG_NOTE, " WAW");
1753 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1754 dump_printf (MSG_NOTE, " ARBITRARY");
1755 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1756 dump_printf (MSG_NOTE, " SWAPPED");
1757 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1758 dump_printf (MSG_NOTE, " UNSWAPPED");
1759 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1760 dump_printf (MSG_NOTE, " MIXED_STEPS");
1761 if (alias_pair->flags == 0)
1762 dump_printf (MSG_NOTE, " <none>");
1763 dump_printf (MSG_NOTE, "\n");
1766 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1767 FACTOR is number of iterations that each data reference is accessed.
1769 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1770 we create an expression:
1772 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1773 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1775 for aliasing checks. However, in some cases we can decrease the number
1776 of checks by combining two checks into one. For example, suppose we have
1777 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1778 condition is satisfied:
1780 load_ptr_0 < load_ptr_1 &&
1781 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1783 (this condition means, in each iteration of vectorized loop, the accessed
1784 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1785 load_ptr_1.)
1787 we then can use only the following expression to finish the alising checks
1788 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1790 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1791 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1793 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1794 basic address. */
1796 void
1797 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1798 poly_uint64)
1800 if (alias_pairs->is_empty ())
1801 return;
1803 /* Canonicalize each pair so that the base components are ordered wrt
1804 data_ref_compare_tree. This allows the loop below to merge more
1805 cases. */
1806 unsigned int i;
1807 dr_with_seg_len_pair_t *alias_pair;
1808 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1810 data_reference_p dr_a = alias_pair->first.dr;
1811 data_reference_p dr_b = alias_pair->second.dr;
1812 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1813 DR_BASE_ADDRESS (dr_b));
1814 if (comp_res == 0)
1815 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1816 if (comp_res == 0)
1817 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1818 if (comp_res > 0)
1820 std::swap (alias_pair->first, alias_pair->second);
1821 alias_pair->flags |= DR_ALIAS_SWAPPED;
1823 else
1824 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1827 /* Sort the collected data ref pairs so that we can scan them once to
1828 combine all possible aliasing checks. */
1829 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1831 /* Scan the sorted dr pairs and check if we can combine alias checks
1832 of two neighboring dr pairs. */
1833 unsigned int last = 0;
1834 for (i = 1; i < alias_pairs->length (); ++i)
1836 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1837 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1838 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1840 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1841 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1842 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1843 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1845 /* Remove duplicate data ref pairs. */
1846 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1848 if (dump_enabled_p ())
1849 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1850 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1851 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1852 alias_pair1->flags |= alias_pair2->flags;
1853 continue;
1856 /* Assume that we won't be able to merge the pairs, then correct
1857 if we do. */
1858 last += 1;
1859 if (last != i)
1860 (*alias_pairs)[last] = (*alias_pairs)[i];
1862 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1864 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1865 and DR_A1 and DR_A2 are two consecutive memrefs. */
1866 if (*dr_a1 == *dr_a2)
1868 std::swap (dr_a1, dr_b1);
1869 std::swap (dr_a2, dr_b2);
1872 poly_int64 init_a1, init_a2;
1873 /* Only consider cases in which the distance between the initial
1874 DR_A1 and the initial DR_A2 is known at compile time. */
1875 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1876 DR_BASE_ADDRESS (dr_a2->dr), 0)
1877 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1878 DR_OFFSET (dr_a2->dr), 0)
1879 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1880 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1881 continue;
1883 /* Don't combine if we can't tell which one comes first. */
1884 if (!ordered_p (init_a1, init_a2))
1885 continue;
1887 /* Work out what the segment length would be if we did combine
1888 DR_A1 and DR_A2:
1890 - If DR_A1 and DR_A2 have equal lengths, that length is
1891 also the combined length.
1893 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1894 length is the lower bound on those lengths.
1896 - If DR_A1 and DR_A2 both have positive lengths, the combined
1897 length is the upper bound on those lengths.
1899 Other cases are unlikely to give a useful combination.
1901 The lengths both have sizetype, so the sign is taken from
1902 the step instead. */
1903 poly_uint64 new_seg_len = 0;
1904 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1905 dr_a2->seg_len, 0);
1906 if (new_seg_len_p)
1908 poly_uint64 seg_len_a1, seg_len_a2;
1909 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1910 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1911 continue;
1913 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1914 if (TREE_CODE (indicator_a) != INTEGER_CST)
1915 continue;
1917 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1918 if (TREE_CODE (indicator_b) != INTEGER_CST)
1919 continue;
1921 int sign_a = tree_int_cst_sgn (indicator_a);
1922 int sign_b = tree_int_cst_sgn (indicator_b);
1924 if (sign_a <= 0 && sign_b <= 0)
1925 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1926 else if (sign_a >= 0 && sign_b >= 0)
1927 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1928 else
1929 continue;
1931 /* At this point we're committed to merging the refs. */
1933 /* Make sure dr_a1 starts left of dr_a2. */
1934 if (maybe_gt (init_a1, init_a2))
1936 std::swap (*dr_a1, *dr_a2);
1937 std::swap (init_a1, init_a2);
1940 /* The DR_Bs are equal, so only the DR_As can introduce
1941 mixed steps. */
1942 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1943 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1945 if (new_seg_len_p)
1947 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1948 new_seg_len);
1949 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1952 /* This is always positive due to the swap above. */
1953 poly_uint64 diff = init_a2 - init_a1;
1955 /* The new check will start at DR_A1. Make sure that its access
1956 size encompasses the initial DR_A2. */
1957 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1959 dr_a1->access_size = upper_bound (dr_a1->access_size,
1960 diff + dr_a2->access_size);
1961 unsigned int new_align = known_alignment (dr_a1->access_size);
1962 dr_a1->align = MIN (dr_a1->align, new_align);
1964 if (dump_enabled_p ())
1965 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1966 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1967 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1968 alias_pair1->flags |= alias_pair2->flags;
1969 last -= 1;
1972 alias_pairs->truncate (last + 1);
1974 /* Try to restore the original dr_with_seg_len order within each
1975 dr_with_seg_len_pair_t. If we ended up combining swapped and
1976 unswapped pairs into the same check, we have to invalidate any
1977 RAW, WAR and WAW information for it. */
1978 if (dump_enabled_p ())
1979 dump_printf (MSG_NOTE, "merged alias checks:\n");
1980 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1982 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1983 unsigned int swapped = (alias_pair->flags & swap_mask);
1984 if (swapped == DR_ALIAS_SWAPPED)
1985 std::swap (alias_pair->first, alias_pair->second);
1986 else if (swapped != DR_ALIAS_UNSWAPPED)
1987 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1988 alias_pair->flags &= ~swap_mask;
1989 if (dump_enabled_p ())
1990 dump_alias_pair (alias_pair, " ");
1994 /* A subroutine of create_intersect_range_checks, with a subset of the
1995 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1996 to optimize cases in which the references form a simple RAW, WAR or
1997 WAR dependence. */
1999 static bool
2000 create_ifn_alias_checks (tree *cond_expr,
2001 const dr_with_seg_len_pair_t &alias_pair)
2003 const dr_with_seg_len& dr_a = alias_pair.first;
2004 const dr_with_seg_len& dr_b = alias_pair.second;
2006 /* Check for cases in which:
2008 (a) we have a known RAW, WAR or WAR dependence
2009 (b) the accesses are well-ordered in both the original and new code
2010 (see the comment above the DR_ALIAS_* flags for details); and
2011 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2012 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2013 return false;
2015 /* Make sure that both DRs access the same pattern of bytes,
2016 with a constant length and step. */
2017 poly_uint64 seg_len;
2018 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2019 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2020 || maybe_ne (dr_a.access_size, dr_b.access_size)
2021 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2022 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2023 return false;
2025 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2026 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2027 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2029 /* See whether the target suports what we want to do. WAW checks are
2030 equivalent to WAR checks here. */
2031 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2032 ? IFN_CHECK_RAW_PTRS
2033 : IFN_CHECK_WAR_PTRS);
2034 unsigned int align = MIN (dr_a.align, dr_b.align);
2035 poly_uint64 full_length = seg_len + bytes;
2036 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2037 full_length, align))
2039 full_length = seg_len + dr_a.access_size;
2040 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2041 full_length, align))
2042 return false;
2045 /* Commit to using this form of test. */
2046 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2047 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2049 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2050 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2052 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2053 ifn, boolean_type_node,
2054 4, addr_a, addr_b,
2055 size_int (full_length),
2056 size_int (align));
2058 if (dump_enabled_p ())
2060 if (ifn == IFN_CHECK_RAW_PTRS)
2061 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2062 else
2063 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2065 return true;
2068 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2069 free of aliases, using a condition based on index values instead
2070 of a condition based on addresses. Return true on success,
2071 storing the condition in *COND_EXPR.
2073 This can only be done if the two data references in ALIAS_PAIR access
2074 the same array object and the index is the only difference. For example,
2075 if the two data references are DR_A and DR_B:
2077 DR_A DR_B
2078 data-ref arr[i] arr[j]
2079 base_object arr arr
2080 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2082 The addresses and their index are like:
2084 |<- ADDR_A ->| |<- ADDR_B ->|
2085 ------------------------------------------------------->
2086 | | | | | | | | | |
2087 ------------------------------------------------------->
2088 i_0 ... i_0+4 j_0 ... j_0+4
2090 We can create expression based on index rather than address:
2092 (unsigned) (i_0 - j_0 + 3) <= 6
2094 i.e. the indices are less than 4 apart.
2096 Note evolution step of index needs to be considered in comparison. */
2098 static bool
2099 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2100 const dr_with_seg_len_pair_t &alias_pair)
2102 const dr_with_seg_len &dr_a = alias_pair.first;
2103 const dr_with_seg_len &dr_b = alias_pair.second;
2104 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2105 || integer_zerop (DR_STEP (dr_a.dr))
2106 || integer_zerop (DR_STEP (dr_b.dr))
2107 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2108 return false;
2110 poly_uint64 seg_len1, seg_len2;
2111 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2112 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2113 return false;
2115 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2116 return false;
2118 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2119 return false;
2121 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2122 return false;
2124 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2126 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2127 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2128 if (neg_step)
2130 abs_step = -abs_step;
2131 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2132 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2135 /* Infer the number of iterations with which the memory segment is accessed
2136 by DR. In other words, alias is checked if memory segment accessed by
2137 DR_A in some iterations intersect with memory segment accessed by DR_B
2138 in the same amount iterations.
2139 Note segnment length is a linear function of number of iterations with
2140 DR_STEP as the coefficient. */
2141 poly_uint64 niter_len1, niter_len2;
2142 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2143 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2144 return false;
2146 /* Divide each access size by the byte step, rounding up. */
2147 poly_uint64 niter_access1, niter_access2;
2148 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2149 abs_step, &niter_access1)
2150 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2151 abs_step, &niter_access2))
2152 return false;
2154 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2156 int found = -1;
2157 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2159 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2160 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2161 /* Two indices must be the same if they are not scev, or not scev wrto
2162 current loop being vecorized. */
2163 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2164 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2165 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2166 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2168 if (operand_equal_p (access1, access2, 0))
2169 continue;
2171 return false;
2173 if (found >= 0)
2174 return false;
2175 found = i;
2178 /* Ought not to happen in practice, since if all accesses are equal then the
2179 alias should be decidable at compile time. */
2180 if (found < 0)
2181 return false;
2183 /* The two indices must have the same step. */
2184 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2185 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2186 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2187 return false;
2189 tree idx_step = CHREC_RIGHT (access1);
2190 /* Index must have const step, otherwise DR_STEP won't be constant. */
2191 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2192 /* Index must evaluate in the same direction as DR. */
2193 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2195 tree min1 = CHREC_LEFT (access1);
2196 tree min2 = CHREC_LEFT (access2);
2197 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2198 return false;
2200 /* Ideally, alias can be checked against loop's control IV, but we
2201 need to prove linear mapping between control IV and reference
2202 index. Although that should be true, we check against (array)
2203 index of data reference. Like segment length, index length is
2204 linear function of the number of iterations with index_step as
2205 the coefficient, i.e, niter_len * idx_step. */
2206 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2207 SIGNED);
2208 if (neg_step)
2209 abs_idx_step = -abs_idx_step;
2210 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2211 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2212 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2213 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2215 gcc_assert (known_ge (idx_len1, 0)
2216 && known_ge (idx_len2, 0)
2217 && known_ge (idx_access1, 0)
2218 && known_ge (idx_access2, 0));
2220 /* Each access has the following pattern, with lengths measured
2221 in units of INDEX:
2223 <-- idx_len -->
2224 <--- A: -ve step --->
2225 +-----+-------+-----+-------+-----+
2226 | n-1 | ..... | 0 | ..... | n-1 |
2227 +-----+-------+-----+-------+-----+
2228 <--- B: +ve step --->
2229 <-- idx_len -->
2233 where "n" is the number of scalar iterations covered by the segment
2234 and where each access spans idx_access units.
2236 A is the range of bytes accessed when the step is negative,
2237 B is the range when the step is positive.
2239 When checking for general overlap, we need to test whether
2240 the range:
2242 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2244 overlaps:
2246 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2248 where:
2250 low_offsetN = +ve step ? 0 : -idx_lenN;
2251 high_offsetN = +ve step ? idx_lenN : 0;
2253 This is equivalent to testing whether:
2255 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2256 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2258 Converting this into a single test, there is an overlap if:
2260 0 <= min2 - min1 + bias <= limit
2262 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2263 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2264 + (high_offset2 - low_offset2 + idx_access2 - 1)
2265 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2267 Combining the tests requires limit to be computable in an unsigned
2268 form of the index type; if it isn't, we fall back to the usual
2269 pointer-based checks.
2271 We can do better if DR_B is a write and if DR_A and DR_B are
2272 well-ordered in both the original and the new code (see the
2273 comment above the DR_ALIAS_* flags for details). In this case
2274 we know that for each i in [0, n-1], the write performed by
2275 access i of DR_B occurs after access numbers j<=i of DR_A in
2276 both the original and the new code. Any write or anti
2277 dependencies wrt those DR_A accesses are therefore maintained.
2279 We just need to make sure that each individual write in DR_B does not
2280 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2281 after the DR_B access in the original code but happen before it in
2282 the new code.
2284 We know the steps for both accesses are equal, so by induction, we
2285 just need to test whether the first write of DR_B overlaps a later
2286 access of DR_A. In other words, we need to move min1 along by
2287 one iteration:
2289 min1' = min1 + idx_step
2291 and use the ranges:
2293 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2295 and:
2297 [min2, min2 + idx_access2 - 1]
2299 where:
2301 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2302 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2303 if (waw_or_war_p)
2304 idx_len1 -= abs_idx_step;
2306 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2307 if (!waw_or_war_p)
2308 limit += idx_len2;
2310 tree utype = unsigned_type_for (TREE_TYPE (min1));
2311 if (!wi::fits_to_tree_p (limit, utype))
2312 return false;
2314 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2315 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2316 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2317 /* Equivalent to adding IDX_STEP to MIN1. */
2318 if (waw_or_war_p)
2319 bias -= wi::to_offset (idx_step);
2321 tree subject = fold_build2 (MINUS_EXPR, utype,
2322 fold_convert (utype, min2),
2323 fold_convert (utype, min1));
2324 subject = fold_build2 (PLUS_EXPR, utype, subject,
2325 wide_int_to_tree (utype, bias));
2326 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2327 wide_int_to_tree (utype, limit));
2328 if (*cond_expr)
2329 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2330 *cond_expr, part_cond_expr);
2331 else
2332 *cond_expr = part_cond_expr;
2333 if (dump_enabled_p ())
2335 if (waw_or_war_p)
2336 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2337 else
2338 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2340 return true;
2343 /* A subroutine of create_intersect_range_checks, with a subset of the
2344 same arguments. Try to optimize cases in which the second access
2345 is a write and in which some overlap is valid. */
2347 static bool
2348 create_waw_or_war_checks (tree *cond_expr,
2349 const dr_with_seg_len_pair_t &alias_pair)
2351 const dr_with_seg_len& dr_a = alias_pair.first;
2352 const dr_with_seg_len& dr_b = alias_pair.second;
2354 /* Check for cases in which:
2356 (a) DR_B is always a write;
2357 (b) the accesses are well-ordered in both the original and new code
2358 (see the comment above the DR_ALIAS_* flags for details); and
2359 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2360 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2361 return false;
2363 /* Check for equal (but possibly variable) steps. */
2364 tree step = DR_STEP (dr_a.dr);
2365 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2366 return false;
2368 /* Make sure that we can operate on sizetype without loss of precision. */
2369 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2370 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2371 return false;
2373 /* All addresses involved are known to have a common alignment ALIGN.
2374 We can therefore subtract ALIGN from an exclusive endpoint to get
2375 an inclusive endpoint. In the best (and common) case, ALIGN is the
2376 same as the access sizes of both DRs, and so subtracting ALIGN
2377 cancels out the addition of an access size. */
2378 unsigned int align = MIN (dr_a.align, dr_b.align);
2379 poly_uint64 last_chunk_a = dr_a.access_size - align;
2380 poly_uint64 last_chunk_b = dr_b.access_size - align;
2382 /* Get a boolean expression that is true when the step is negative. */
2383 tree indicator = dr_direction_indicator (dr_a.dr);
2384 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2385 fold_convert (ssizetype, indicator),
2386 ssize_int (0));
2388 /* Get lengths in sizetype. */
2389 tree seg_len_a
2390 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2391 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2393 /* Each access has the following pattern:
2395 <- |seg_len| ->
2396 <--- A: -ve step --->
2397 +-----+-------+-----+-------+-----+
2398 | n-1 | ..... | 0 | ..... | n-1 |
2399 +-----+-------+-----+-------+-----+
2400 <--- B: +ve step --->
2401 <- |seg_len| ->
2403 base address
2405 where "n" is the number of scalar iterations covered by the segment.
2407 A is the range of bytes accessed when the step is negative,
2408 B is the range when the step is positive.
2410 We know that DR_B is a write. We also know (from checking that
2411 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2412 the write performed by access i of DR_B occurs after access numbers
2413 j<=i of DR_A in both the original and the new code. Any write or
2414 anti dependencies wrt those DR_A accesses are therefore maintained.
2416 We just need to make sure that each individual write in DR_B does not
2417 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2418 after the DR_B access in the original code but happen before it in
2419 the new code.
2421 We know the steps for both accesses are equal, so by induction, we
2422 just need to test whether the first write of DR_B overlaps a later
2423 access of DR_A. In other words, we need to move addr_a along by
2424 one iteration:
2426 addr_a' = addr_a + step
2428 and check whether:
2430 [addr_b, addr_b + last_chunk_b]
2432 overlaps:
2434 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2436 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2438 low_offset_a = +ve step ? 0 : seg_len_a - step
2439 high_offset_a = +ve step ? seg_len_a - step : 0
2441 This is equivalent to testing whether:
2443 addr_a' + low_offset_a <= addr_b + last_chunk_b
2444 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2446 Converting this into a single test, there is an overlap if:
2448 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2450 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2452 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2453 less than the size of the object underlying DR_A. We also know
2454 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2455 guaranteed at compile time. There can therefore be no overflow if
2456 "limit" is calculated in an unsigned type with pointer precision. */
2457 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2458 DR_OFFSET (dr_a.dr));
2459 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2461 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2462 DR_OFFSET (dr_b.dr));
2463 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2465 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2466 addr_a = fold_build_pointer_plus (addr_a, step);
2467 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2468 seg_len_a, step);
2469 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2470 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2472 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2473 seg_len_a_minus_step, size_zero_node);
2474 if (!CONSTANT_CLASS_P (low_offset_a))
2475 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2477 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2478 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2479 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2480 low_offset_a);
2482 /* The amount added to addr_b - addr_a'. */
2483 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2484 size_int (last_chunk_b), low_offset_a);
2486 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2487 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2488 size_int (last_chunk_a + last_chunk_b));
2490 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2491 subject = fold_build2 (PLUS_EXPR, sizetype,
2492 fold_convert (sizetype, subject), bias);
2494 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2495 if (dump_enabled_p ())
2496 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2497 return true;
2500 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2501 every address ADDR accessed by D:
2503 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2505 In this case, every element accessed by D is aligned to at least
2506 ALIGN bytes.
2508 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2510 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2512 static void
2513 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2514 tree *seg_max_out, HOST_WIDE_INT align)
2516 /* Each access has the following pattern:
2518 <- |seg_len| ->
2519 <--- A: -ve step --->
2520 +-----+-------+-----+-------+-----+
2521 | n-1 | ,.... | 0 | ..... | n-1 |
2522 +-----+-------+-----+-------+-----+
2523 <--- B: +ve step --->
2524 <- |seg_len| ->
2526 base address
2528 where "n" is the number of scalar iterations covered by the segment.
2529 (This should be VF for a particular pair if we know that both steps
2530 are the same, otherwise it will be the full number of scalar loop
2531 iterations.)
2533 A is the range of bytes accessed when the step is negative,
2534 B is the range when the step is positive.
2536 If the access size is "access_size" bytes, the lowest addressed byte is:
2538 base + (step < 0 ? seg_len : 0) [LB]
2540 and the highest addressed byte is always below:
2542 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2544 Thus:
2546 LB <= ADDR < UB
2548 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2549 bytes, so:
2551 LB <= ADDR <= UB - ALIGN
2553 where "- ALIGN" folds naturally with the "+ access_size" and often
2554 cancels it out.
2556 We don't try to simplify LB and UB beyond this (e.g. by using
2557 MIN and MAX based on whether seg_len rather than the stride is
2558 negative) because it is possible for the absolute size of the
2559 segment to overflow the range of a ssize_t.
2561 Keeping the pointer_plus outside of the cond_expr should allow
2562 the cond_exprs to be shared with other alias checks. */
2563 tree indicator = dr_direction_indicator (d.dr);
2564 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2565 fold_convert (ssizetype, indicator),
2566 ssize_int (0));
2567 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2568 DR_OFFSET (d.dr));
2569 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2570 tree seg_len
2571 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2573 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2574 seg_len, size_zero_node);
2575 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2576 size_zero_node, seg_len);
2577 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2578 size_int (d.access_size - align));
2580 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2581 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2584 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2585 storing the condition in *COND_EXPR. The fallback is to generate a
2586 a test that the two accesses do not overlap:
2588 end_a <= start_b || end_b <= start_a. */
2590 static void
2591 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2592 const dr_with_seg_len_pair_t &alias_pair)
2594 const dr_with_seg_len& dr_a = alias_pair.first;
2595 const dr_with_seg_len& dr_b = alias_pair.second;
2596 *cond_expr = NULL_TREE;
2597 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2598 return;
2600 if (create_ifn_alias_checks (cond_expr, alias_pair))
2601 return;
2603 if (create_waw_or_war_checks (cond_expr, alias_pair))
2604 return;
2606 unsigned HOST_WIDE_INT min_align;
2607 tree_code cmp_code;
2608 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2609 are equivalent. This is just an optimization heuristic. */
2610 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2611 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2613 /* In this case adding access_size to seg_len is likely to give
2614 a simple X * step, where X is either the number of scalar
2615 iterations or the vectorization factor. We're better off
2616 keeping that, rather than subtracting an alignment from it.
2618 In this case the maximum values are exclusive and so there is
2619 no alias if the maximum of one segment equals the minimum
2620 of another. */
2621 min_align = 0;
2622 cmp_code = LE_EXPR;
2624 else
2626 /* Calculate the minimum alignment shared by all four pointers,
2627 then arrange for this alignment to be subtracted from the
2628 exclusive maximum values to get inclusive maximum values.
2629 This "- min_align" is cumulative with a "+ access_size"
2630 in the calculation of the maximum values. In the best
2631 (and common) case, the two cancel each other out, leaving
2632 us with an inclusive bound based only on seg_len. In the
2633 worst case we're simply adding a smaller number than before.
2635 Because the maximum values are inclusive, there is an alias
2636 if the maximum value of one segment is equal to the minimum
2637 value of the other. */
2638 min_align = MIN (dr_a.align, dr_b.align);
2639 cmp_code = LT_EXPR;
2642 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2643 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2644 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2646 *cond_expr
2647 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2648 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2649 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2650 if (dump_enabled_p ())
2651 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2654 /* Create a conditional expression that represents the run-time checks for
2655 overlapping of address ranges represented by a list of data references
2656 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2657 COND_EXPR is the conditional expression to be used in the if statement
2658 that controls which version of the loop gets executed at runtime. */
2660 void
2661 create_runtime_alias_checks (class loop *loop,
2662 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2663 tree * cond_expr)
2665 tree part_cond_expr;
2667 fold_defer_overflow_warnings ();
2668 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2670 gcc_assert (alias_pair.flags);
2671 if (dump_enabled_p ())
2672 dump_printf (MSG_NOTE,
2673 "create runtime check for data references %T and %T\n",
2674 DR_REF (alias_pair.first.dr),
2675 DR_REF (alias_pair.second.dr));
2677 /* Create condition expression for each pair data references. */
2678 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2679 if (*cond_expr)
2680 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2681 *cond_expr, part_cond_expr);
2682 else
2683 *cond_expr = part_cond_expr;
2685 fold_undefer_and_ignore_overflow_warnings ();
2688 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2689 expressions. */
2690 static bool
2691 dr_equal_offsets_p1 (tree offset1, tree offset2)
2693 bool res;
2695 STRIP_NOPS (offset1);
2696 STRIP_NOPS (offset2);
2698 if (offset1 == offset2)
2699 return true;
2701 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2702 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2703 return false;
2705 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2706 TREE_OPERAND (offset2, 0));
2708 if (!res || !BINARY_CLASS_P (offset1))
2709 return res;
2711 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2712 TREE_OPERAND (offset2, 1));
2714 return res;
2717 /* Check if DRA and DRB have equal offsets. */
2718 bool
2719 dr_equal_offsets_p (struct data_reference *dra,
2720 struct data_reference *drb)
2722 tree offset1, offset2;
2724 offset1 = DR_OFFSET (dra);
2725 offset2 = DR_OFFSET (drb);
2727 return dr_equal_offsets_p1 (offset1, offset2);
2730 /* Returns true if FNA == FNB. */
2732 static bool
2733 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2735 unsigned i, n = fna.length ();
2737 if (n != fnb.length ())
2738 return false;
2740 for (i = 0; i < n; i++)
2741 if (!operand_equal_p (fna[i], fnb[i], 0))
2742 return false;
2744 return true;
2747 /* If all the functions in CF are the same, returns one of them,
2748 otherwise returns NULL. */
2750 static affine_fn
2751 common_affine_function (conflict_function *cf)
2753 unsigned i;
2754 affine_fn comm;
2756 if (!CF_NONTRIVIAL_P (cf))
2757 return affine_fn ();
2759 comm = cf->fns[0];
2761 for (i = 1; i < cf->n; i++)
2762 if (!affine_function_equal_p (comm, cf->fns[i]))
2763 return affine_fn ();
2765 return comm;
2768 /* Returns the base of the affine function FN. */
2770 static tree
2771 affine_function_base (affine_fn fn)
2773 return fn[0];
2776 /* Returns true if FN is a constant. */
2778 static bool
2779 affine_function_constant_p (affine_fn fn)
2781 unsigned i;
2782 tree coef;
2784 for (i = 1; fn.iterate (i, &coef); i++)
2785 if (!integer_zerop (coef))
2786 return false;
2788 return true;
2791 /* Returns true if FN is the zero constant function. */
2793 static bool
2794 affine_function_zero_p (affine_fn fn)
2796 return (integer_zerop (affine_function_base (fn))
2797 && affine_function_constant_p (fn));
2800 /* Returns a signed integer type with the largest precision from TA
2801 and TB. */
2803 static tree
2804 signed_type_for_types (tree ta, tree tb)
2806 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2807 return signed_type_for (ta);
2808 else
2809 return signed_type_for (tb);
2812 /* Applies operation OP on affine functions FNA and FNB, and returns the
2813 result. */
2815 static affine_fn
2816 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2818 unsigned i, n, m;
2819 affine_fn ret;
2820 tree coef;
2822 if (fnb.length () > fna.length ())
2824 n = fna.length ();
2825 m = fnb.length ();
2827 else
2829 n = fnb.length ();
2830 m = fna.length ();
2833 ret.create (m);
2834 for (i = 0; i < n; i++)
2836 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2837 TREE_TYPE (fnb[i]));
2838 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2841 for (; fna.iterate (i, &coef); i++)
2842 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2843 coef, integer_zero_node));
2844 for (; fnb.iterate (i, &coef); i++)
2845 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2846 integer_zero_node, coef));
2848 return ret;
2851 /* Returns the sum of affine functions FNA and FNB. */
2853 static affine_fn
2854 affine_fn_plus (affine_fn fna, affine_fn fnb)
2856 return affine_fn_op (PLUS_EXPR, fna, fnb);
2859 /* Returns the difference of affine functions FNA and FNB. */
2861 static affine_fn
2862 affine_fn_minus (affine_fn fna, affine_fn fnb)
2864 return affine_fn_op (MINUS_EXPR, fna, fnb);
2867 /* Frees affine function FN. */
2869 static void
2870 affine_fn_free (affine_fn fn)
2872 fn.release ();
2875 /* Determine for each subscript in the data dependence relation DDR
2876 the distance. */
2878 static void
2879 compute_subscript_distance (struct data_dependence_relation *ddr)
2881 conflict_function *cf_a, *cf_b;
2882 affine_fn fn_a, fn_b, diff;
2884 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2886 unsigned int i;
2888 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2890 struct subscript *subscript;
2892 subscript = DDR_SUBSCRIPT (ddr, i);
2893 cf_a = SUB_CONFLICTS_IN_A (subscript);
2894 cf_b = SUB_CONFLICTS_IN_B (subscript);
2896 fn_a = common_affine_function (cf_a);
2897 fn_b = common_affine_function (cf_b);
2898 if (!fn_a.exists () || !fn_b.exists ())
2900 SUB_DISTANCE (subscript) = chrec_dont_know;
2901 return;
2903 diff = affine_fn_minus (fn_a, fn_b);
2905 if (affine_function_constant_p (diff))
2906 SUB_DISTANCE (subscript) = affine_function_base (diff);
2907 else
2908 SUB_DISTANCE (subscript) = chrec_dont_know;
2910 affine_fn_free (diff);
2915 /* Returns the conflict function for "unknown". */
2917 static conflict_function *
2918 conflict_fn_not_known (void)
2920 conflict_function *fn = XCNEW (conflict_function);
2921 fn->n = NOT_KNOWN;
2923 return fn;
2926 /* Returns the conflict function for "independent". */
2928 static conflict_function *
2929 conflict_fn_no_dependence (void)
2931 conflict_function *fn = XCNEW (conflict_function);
2932 fn->n = NO_DEPENDENCE;
2934 return fn;
2937 /* Returns true if the address of OBJ is invariant in LOOP. */
2939 static bool
2940 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2942 while (handled_component_p (obj))
2944 if (TREE_CODE (obj) == ARRAY_REF)
2946 for (int i = 1; i < 4; ++i)
2947 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2948 loop->num))
2949 return false;
2951 else if (TREE_CODE (obj) == COMPONENT_REF)
2953 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2954 loop->num))
2955 return false;
2957 obj = TREE_OPERAND (obj, 0);
2960 if (!INDIRECT_REF_P (obj)
2961 && TREE_CODE (obj) != MEM_REF)
2962 return true;
2964 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2965 loop->num);
2968 /* Returns false if we can prove that data references A and B do not alias,
2969 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2970 considered. */
2972 bool
2973 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2974 class loop *loop_nest)
2976 tree addr_a = DR_BASE_OBJECT (a);
2977 tree addr_b = DR_BASE_OBJECT (b);
2979 /* If we are not processing a loop nest but scalar code we
2980 do not need to care about possible cross-iteration dependences
2981 and thus can process the full original reference. Do so,
2982 similar to how loop invariant motion applies extra offset-based
2983 disambiguation. */
2984 if (!loop_nest)
2986 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2987 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2989 if (DR_BASE_ADDRESS (a)
2990 && DR_BASE_ADDRESS (b)
2991 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2992 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2993 && poly_int_tree_p (tree_size_a)
2994 && poly_int_tree_p (tree_size_b)
2995 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2996 wi::to_poly_widest (tree_size_a),
2997 wi::to_poly_widest (DR_INIT (b)),
2998 wi::to_poly_widest (tree_size_b)))
3000 gcc_assert (integer_zerop (DR_STEP (a))
3001 && integer_zerop (DR_STEP (b)));
3002 return false;
3005 aff_tree off1, off2;
3006 poly_widest_int size1, size2;
3007 get_inner_reference_aff (DR_REF (a), &off1, &size1);
3008 get_inner_reference_aff (DR_REF (b), &off2, &size2);
3009 aff_combination_scale (&off1, -1);
3010 aff_combination_add (&off2, &off1);
3011 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3012 return false;
3015 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3016 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3017 /* For cross-iteration dependences the cliques must be valid for the
3018 whole loop, not just individual iterations. */
3019 && (!loop_nest
3020 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3021 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3022 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3023 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3024 return false;
3026 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3027 do not know the size of the base-object. So we cannot do any
3028 offset/overlap based analysis but have to rely on points-to
3029 information only. */
3030 if (TREE_CODE (addr_a) == MEM_REF
3031 && (DR_UNCONSTRAINED_BASE (a)
3032 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3034 /* For true dependences we can apply TBAA. */
3035 if (flag_strict_aliasing
3036 && DR_IS_WRITE (a) && DR_IS_READ (b)
3037 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3038 get_alias_set (DR_REF (b))))
3039 return false;
3040 if (TREE_CODE (addr_b) == MEM_REF)
3041 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3042 TREE_OPERAND (addr_b, 0));
3043 else
3044 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3045 build_fold_addr_expr (addr_b));
3047 else if (TREE_CODE (addr_b) == MEM_REF
3048 && (DR_UNCONSTRAINED_BASE (b)
3049 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3051 /* For true dependences we can apply TBAA. */
3052 if (flag_strict_aliasing
3053 && DR_IS_WRITE (a) && DR_IS_READ (b)
3054 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3055 get_alias_set (DR_REF (b))))
3056 return false;
3057 if (TREE_CODE (addr_a) == MEM_REF)
3058 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3059 TREE_OPERAND (addr_b, 0));
3060 else
3061 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3062 TREE_OPERAND (addr_b, 0));
3065 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3066 that is being subsetted in the loop nest. */
3067 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3068 return refs_output_dependent_p (addr_a, addr_b);
3069 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3070 return refs_anti_dependent_p (addr_a, addr_b);
3071 return refs_may_alias_p (addr_a, addr_b);
3074 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3075 if it is meaningful to compare their associated access functions
3076 when checking for dependencies. */
3078 static bool
3079 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3081 /* Allow pairs of component refs from the following sets:
3083 { REALPART_EXPR, IMAGPART_EXPR }
3084 { COMPONENT_REF }
3085 { ARRAY_REF }. */
3086 tree_code code_a = TREE_CODE (ref_a);
3087 tree_code code_b = TREE_CODE (ref_b);
3088 if (code_a == IMAGPART_EXPR)
3089 code_a = REALPART_EXPR;
3090 if (code_b == IMAGPART_EXPR)
3091 code_b = REALPART_EXPR;
3092 if (code_a != code_b)
3093 return false;
3095 if (TREE_CODE (ref_a) == COMPONENT_REF)
3096 /* ??? We cannot simply use the type of operand #0 of the refs here as
3097 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3098 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3099 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3100 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3102 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3103 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3106 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3107 is true when the main indices of A and B were not comparable so we try again
3108 with alternate indices computed on an indirect reference. */
3110 struct data_dependence_relation *
3111 initialize_data_dependence_relation (struct data_dependence_relation *res,
3112 vec<loop_p> loop_nest,
3113 bool use_alt_indices)
3115 struct data_reference *a = DDR_A (res);
3116 struct data_reference *b = DDR_B (res);
3117 unsigned int i;
3119 struct indices *indices_a = &a->indices;
3120 struct indices *indices_b = &b->indices;
3121 if (use_alt_indices)
3123 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3124 indices_a = &a->alt_indices;
3125 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3126 indices_b = &b->alt_indices;
3128 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3129 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3130 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3132 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3133 return res;
3136 /* For unconstrained bases, the root (highest-indexed) subscript
3137 describes a variation in the base of the original DR_REF rather
3138 than a component access. We have no type that accurately describes
3139 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3140 applying this subscript) so limit the search to the last real
3141 component access.
3143 E.g. for:
3145 void
3146 f (int a[][8], int b[][8])
3148 for (int i = 0; i < 8; ++i)
3149 a[i * 2][0] = b[i][0];
3152 the a and b accesses have a single ARRAY_REF component reference [0]
3153 but have two subscripts. */
3154 if (indices_a->unconstrained_base)
3155 num_dimensions_a -= 1;
3156 if (indices_b->unconstrained_base)
3157 num_dimensions_b -= 1;
3159 /* These structures describe sequences of component references in
3160 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3161 specific access function. */
3162 struct {
3163 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3164 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3165 indices. In C notation, these are the indices of the rightmost
3166 component references; e.g. for a sequence .b.c.d, the start
3167 index is for .d. */
3168 unsigned int start_a;
3169 unsigned int start_b;
3171 /* The sequence contains LENGTH consecutive access functions from
3172 each DR. */
3173 unsigned int length;
3175 /* The enclosing objects for the A and B sequences respectively,
3176 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3177 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3178 tree object_a;
3179 tree object_b;
3180 } full_seq = {}, struct_seq = {};
3182 /* Before each iteration of the loop:
3184 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3185 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3186 unsigned int index_a = 0;
3187 unsigned int index_b = 0;
3188 tree ref_a = DR_REF (a);
3189 tree ref_b = DR_REF (b);
3191 /* Now walk the component references from the final DR_REFs back up to
3192 the enclosing base objects. Each component reference corresponds
3193 to one access function in the DR, with access function 0 being for
3194 the final DR_REF and the highest-indexed access function being the
3195 one that is applied to the base of the DR.
3197 Look for a sequence of component references whose access functions
3198 are comparable (see access_fn_components_comparable_p). If more
3199 than one such sequence exists, pick the one nearest the base
3200 (which is the leftmost sequence in C notation). Store this sequence
3201 in FULL_SEQ.
3203 For example, if we have:
3205 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3207 A: a[0][i].s.c.d
3208 B: __real b[0][i].s.e[i].f
3210 (where d is the same type as the real component of f) then the access
3211 functions would be:
3213 0 1 2 3
3214 A: .d .c .s [i]
3216 0 1 2 3 4 5
3217 B: __real .f [i] .e .s [i]
3219 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3220 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3221 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3222 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3223 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3224 index foo[10] arrays, so is again comparable. The sequence is
3225 therefore:
3227 A: [1, 3] (i.e. [i].s.c)
3228 B: [3, 5] (i.e. [i].s.e)
3230 Also look for sequences of component references whose access
3231 functions are comparable and whose enclosing objects have the same
3232 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3233 example, STRUCT_SEQ would be:
3235 A: [1, 2] (i.e. s.c)
3236 B: [3, 4] (i.e. s.e) */
3237 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3239 /* The alternate indices form always has a single dimension
3240 with unconstrained base. */
3241 gcc_assert (!use_alt_indices);
3243 /* REF_A and REF_B must be one of the component access types
3244 allowed by dr_analyze_indices. */
3245 gcc_checking_assert (access_fn_component_p (ref_a));
3246 gcc_checking_assert (access_fn_component_p (ref_b));
3248 /* Get the immediately-enclosing objects for REF_A and REF_B,
3249 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3250 and DR_ACCESS_FN (B, INDEX_B). */
3251 tree object_a = TREE_OPERAND (ref_a, 0);
3252 tree object_b = TREE_OPERAND (ref_b, 0);
3254 tree type_a = TREE_TYPE (object_a);
3255 tree type_b = TREE_TYPE (object_b);
3256 if (access_fn_components_comparable_p (ref_a, ref_b))
3258 /* This pair of component accesses is comparable for dependence
3259 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3260 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3261 if (full_seq.start_a + full_seq.length != index_a
3262 || full_seq.start_b + full_seq.length != index_b)
3264 /* The accesses don't extend the current sequence,
3265 so start a new one here. */
3266 full_seq.start_a = index_a;
3267 full_seq.start_b = index_b;
3268 full_seq.length = 0;
3271 /* Add this pair of references to the sequence. */
3272 full_seq.length += 1;
3273 full_seq.object_a = object_a;
3274 full_seq.object_b = object_b;
3276 /* If the enclosing objects are structures (and thus have the
3277 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3278 if (TREE_CODE (type_a) == RECORD_TYPE)
3279 struct_seq = full_seq;
3281 /* Move to the next containing reference for both A and B. */
3282 ref_a = object_a;
3283 ref_b = object_b;
3284 index_a += 1;
3285 index_b += 1;
3286 continue;
3289 /* Try to approach equal type sizes. */
3290 if (!COMPLETE_TYPE_P (type_a)
3291 || !COMPLETE_TYPE_P (type_b)
3292 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3293 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3294 break;
3296 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3297 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3298 if (size_a <= size_b)
3300 index_a += 1;
3301 ref_a = object_a;
3303 if (size_b <= size_a)
3305 index_b += 1;
3306 ref_b = object_b;
3310 /* See whether FULL_SEQ ends at the base and whether the two bases
3311 are equal. We do not care about TBAA or alignment info so we can
3312 use OEP_ADDRESS_OF to avoid false negatives. */
3313 tree base_a = indices_a->base_object;
3314 tree base_b = indices_b->base_object;
3315 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3316 && full_seq.start_b + full_seq.length == num_dimensions_b
3317 && (indices_a->unconstrained_base
3318 == indices_b->unconstrained_base)
3319 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3320 && (types_compatible_p (TREE_TYPE (base_a),
3321 TREE_TYPE (base_b))
3322 || (!base_supports_access_fn_components_p (base_a)
3323 && !base_supports_access_fn_components_p (base_b)
3324 && operand_equal_p
3325 (TYPE_SIZE (TREE_TYPE (base_a)),
3326 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3327 && (!loop_nest.exists ()
3328 || (object_address_invariant_in_loop_p
3329 (loop_nest[0], base_a))));
3331 /* If the bases are the same, we can include the base variation too.
3332 E.g. the b accesses in:
3334 for (int i = 0; i < n; ++i)
3335 b[i + 4][0] = b[i][0];
3337 have a definite dependence distance of 4, while for:
3339 for (int i = 0; i < n; ++i)
3340 a[i + 4][0] = b[i][0];
3342 the dependence distance depends on the gap between a and b.
3344 If the bases are different then we can only rely on the sequence
3345 rooted at a structure access, since arrays are allowed to overlap
3346 arbitrarily and change shape arbitrarily. E.g. we treat this as
3347 valid code:
3349 int a[256];
3351 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3353 where two lvalues with the same int[4][3] type overlap, and where
3354 both lvalues are distinct from the object's declared type. */
3355 if (same_base_p)
3357 if (indices_a->unconstrained_base)
3358 full_seq.length += 1;
3360 else
3361 full_seq = struct_seq;
3363 /* Punt if we didn't find a suitable sequence. */
3364 if (full_seq.length == 0)
3366 if (use_alt_indices
3367 || (TREE_CODE (DR_REF (a)) == MEM_REF
3368 && TREE_CODE (DR_REF (b)) == MEM_REF)
3369 || may_be_nonaddressable_p (DR_REF (a))
3370 || may_be_nonaddressable_p (DR_REF (b)))
3372 /* Fully exhausted possibilities. */
3373 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3374 return res;
3377 /* Try evaluating both DRs as dereferences of pointers. */
3378 if (!a->alt_indices.base_object
3379 && TREE_CODE (DR_REF (a)) != MEM_REF)
3381 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3382 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3383 build_int_cst
3384 (reference_alias_ptr_type (DR_REF (a)), 0));
3385 dr_analyze_indices (&a->alt_indices, alt_ref,
3386 loop_preheader_edge (loop_nest[0]),
3387 loop_containing_stmt (DR_STMT (a)));
3389 if (!b->alt_indices.base_object
3390 && TREE_CODE (DR_REF (b)) != MEM_REF)
3392 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3393 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3394 build_int_cst
3395 (reference_alias_ptr_type (DR_REF (b)), 0));
3396 dr_analyze_indices (&b->alt_indices, alt_ref,
3397 loop_preheader_edge (loop_nest[0]),
3398 loop_containing_stmt (DR_STMT (b)));
3400 return initialize_data_dependence_relation (res, loop_nest, true);
3403 if (!same_base_p)
3405 /* Partial overlap is possible for different bases when strict aliasing
3406 is not in effect. It's also possible if either base involves a union
3407 access; e.g. for:
3409 struct s1 { int a[2]; };
3410 struct s2 { struct s1 b; int c; };
3411 struct s3 { int d; struct s1 e; };
3412 union u { struct s2 f; struct s3 g; } *p, *q;
3414 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3415 "p->g.e" (base "p->g") and might partially overlap the s1 at
3416 "q->g.e" (base "q->g"). */
3417 if (!flag_strict_aliasing
3418 || ref_contains_union_access_p (full_seq.object_a)
3419 || ref_contains_union_access_p (full_seq.object_b))
3421 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3422 return res;
3425 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3426 if (!loop_nest.exists ()
3427 || (object_address_invariant_in_loop_p (loop_nest[0],
3428 full_seq.object_a)
3429 && object_address_invariant_in_loop_p (loop_nest[0],
3430 full_seq.object_b)))
3432 DDR_OBJECT_A (res) = full_seq.object_a;
3433 DDR_OBJECT_B (res) = full_seq.object_b;
3437 DDR_AFFINE_P (res) = true;
3438 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3439 DDR_SUBSCRIPTS (res).create (full_seq.length);
3440 DDR_LOOP_NEST (res) = loop_nest;
3441 DDR_SELF_REFERENCE (res) = false;
3443 for (i = 0; i < full_seq.length; ++i)
3445 struct subscript *subscript;
3447 subscript = XNEW (struct subscript);
3448 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3449 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3450 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3451 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3452 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3453 SUB_DISTANCE (subscript) = chrec_dont_know;
3454 DDR_SUBSCRIPTS (res).safe_push (subscript);
3457 return res;
3460 /* Initialize a data dependence relation between data accesses A and
3461 B. NB_LOOPS is the number of loops surrounding the references: the
3462 size of the classic distance/direction vectors. */
3464 struct data_dependence_relation *
3465 initialize_data_dependence_relation (struct data_reference *a,
3466 struct data_reference *b,
3467 vec<loop_p> loop_nest)
3469 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3470 DDR_A (res) = a;
3471 DDR_B (res) = b;
3472 DDR_LOOP_NEST (res).create (0);
3473 DDR_SUBSCRIPTS (res).create (0);
3474 DDR_DIR_VECTS (res).create (0);
3475 DDR_DIST_VECTS (res).create (0);
3477 if (a == NULL || b == NULL)
3479 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3480 return res;
3483 /* If the data references do not alias, then they are independent. */
3484 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3486 DDR_ARE_DEPENDENT (res) = chrec_known;
3487 return res;
3490 return initialize_data_dependence_relation (res, loop_nest, false);
3494 /* Frees memory used by the conflict function F. */
3496 static void
3497 free_conflict_function (conflict_function *f)
3499 unsigned i;
3501 if (CF_NONTRIVIAL_P (f))
3503 for (i = 0; i < f->n; i++)
3504 affine_fn_free (f->fns[i]);
3506 free (f);
3509 /* Frees memory used by SUBSCRIPTS. */
3511 static void
3512 free_subscripts (vec<subscript_p> subscripts)
3514 for (subscript_p s : subscripts)
3516 free_conflict_function (s->conflicting_iterations_in_a);
3517 free_conflict_function (s->conflicting_iterations_in_b);
3518 free (s);
3520 subscripts.release ();
3523 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3524 description. */
3526 static inline void
3527 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3528 tree chrec)
3530 DDR_ARE_DEPENDENT (ddr) = chrec;
3531 free_subscripts (DDR_SUBSCRIPTS (ddr));
3532 DDR_SUBSCRIPTS (ddr).create (0);
3535 /* The dependence relation DDR cannot be represented by a distance
3536 vector. */
3538 static inline void
3539 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3544 DDR_AFFINE_P (ddr) = false;
3549 /* This section contains the classic Banerjee tests. */
3551 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3552 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3554 static inline bool
3555 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3557 return (evolution_function_is_constant_p (chrec_a)
3558 && evolution_function_is_constant_p (chrec_b));
3561 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3562 variable, i.e., if the SIV (Single Index Variable) test is true. */
3564 static bool
3565 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3567 if ((evolution_function_is_constant_p (chrec_a)
3568 && evolution_function_is_univariate_p (chrec_b))
3569 || (evolution_function_is_constant_p (chrec_b)
3570 && evolution_function_is_univariate_p (chrec_a)))
3571 return true;
3573 if (evolution_function_is_univariate_p (chrec_a)
3574 && evolution_function_is_univariate_p (chrec_b))
3576 switch (TREE_CODE (chrec_a))
3578 case POLYNOMIAL_CHREC:
3579 switch (TREE_CODE (chrec_b))
3581 case POLYNOMIAL_CHREC:
3582 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3583 return false;
3584 /* FALLTHRU */
3586 default:
3587 return true;
3590 default:
3591 return true;
3595 return false;
3598 /* Creates a conflict function with N dimensions. The affine functions
3599 in each dimension follow. */
3601 static conflict_function *
3602 conflict_fn (unsigned n, ...)
3604 unsigned i;
3605 conflict_function *ret = XCNEW (conflict_function);
3606 va_list ap;
3608 gcc_assert (n > 0 && n <= MAX_DIM);
3609 va_start (ap, n);
3611 ret->n = n;
3612 for (i = 0; i < n; i++)
3613 ret->fns[i] = va_arg (ap, affine_fn);
3614 va_end (ap);
3616 return ret;
3619 /* Returns constant affine function with value CST. */
3621 static affine_fn
3622 affine_fn_cst (tree cst)
3624 affine_fn fn;
3625 fn.create (1);
3626 fn.quick_push (cst);
3627 return fn;
3630 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3632 static affine_fn
3633 affine_fn_univar (tree cst, unsigned dim, tree coef)
3635 affine_fn fn;
3636 fn.create (dim + 1);
3637 unsigned i;
3639 gcc_assert (dim > 0);
3640 fn.quick_push (cst);
3641 for (i = 1; i < dim; i++)
3642 fn.quick_push (integer_zero_node);
3643 fn.quick_push (coef);
3644 return fn;
3647 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3648 *OVERLAPS_B are initialized to the functions that describe the
3649 relation between the elements accessed twice by CHREC_A and
3650 CHREC_B. For k >= 0, the following property is verified:
3652 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3654 static void
3655 analyze_ziv_subscript (tree chrec_a,
3656 tree chrec_b,
3657 conflict_function **overlaps_a,
3658 conflict_function **overlaps_b,
3659 tree *last_conflicts)
3661 tree type, difference;
3662 dependence_stats.num_ziv++;
3664 if (dump_file && (dump_flags & TDF_DETAILS))
3665 fprintf (dump_file, "(analyze_ziv_subscript \n");
3667 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3668 chrec_a = chrec_convert (type, chrec_a, NULL);
3669 chrec_b = chrec_convert (type, chrec_b, NULL);
3670 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3672 switch (TREE_CODE (difference))
3674 case INTEGER_CST:
3675 if (integer_zerop (difference))
3677 /* The difference is equal to zero: the accessed index
3678 overlaps for each iteration in the loop. */
3679 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3680 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3681 *last_conflicts = chrec_dont_know;
3682 dependence_stats.num_ziv_dependent++;
3684 else
3686 /* The accesses do not overlap. */
3687 *overlaps_a = conflict_fn_no_dependence ();
3688 *overlaps_b = conflict_fn_no_dependence ();
3689 *last_conflicts = integer_zero_node;
3690 dependence_stats.num_ziv_independent++;
3692 break;
3694 default:
3695 /* We're not sure whether the indexes overlap. For the moment,
3696 conservatively answer "don't know". */
3697 if (dump_file && (dump_flags & TDF_DETAILS))
3698 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3700 *overlaps_a = conflict_fn_not_known ();
3701 *overlaps_b = conflict_fn_not_known ();
3702 *last_conflicts = chrec_dont_know;
3703 dependence_stats.num_ziv_unimplemented++;
3704 break;
3707 if (dump_file && (dump_flags & TDF_DETAILS))
3708 fprintf (dump_file, ")\n");
3711 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3712 and only if it fits to the int type. If this is not the case, or the
3713 bound on the number of iterations of LOOP could not be derived, returns
3714 chrec_dont_know. */
3716 static tree
3717 max_stmt_executions_tree (class loop *loop)
3719 widest_int nit;
3721 if (!max_stmt_executions (loop, &nit))
3722 return chrec_dont_know;
3724 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3725 return chrec_dont_know;
3727 return wide_int_to_tree (unsigned_type_node, nit);
3730 /* Determine whether the CHREC is always positive/negative. If the expression
3731 cannot be statically analyzed, return false, otherwise set the answer into
3732 VALUE. */
3734 static bool
3735 chrec_is_positive (tree chrec, bool *value)
3737 bool value0, value1, value2;
3738 tree end_value, nb_iter;
3740 switch (TREE_CODE (chrec))
3742 case POLYNOMIAL_CHREC:
3743 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3744 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3745 return false;
3747 /* FIXME -- overflows. */
3748 if (value0 == value1)
3750 *value = value0;
3751 return true;
3754 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3755 and the proof consists in showing that the sign never
3756 changes during the execution of the loop, from 0 to
3757 loop->nb_iterations. */
3758 if (!evolution_function_is_affine_p (chrec))
3759 return false;
3761 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3762 if (chrec_contains_undetermined (nb_iter))
3763 return false;
3765 #if 0
3766 /* TODO -- If the test is after the exit, we may decrease the number of
3767 iterations by one. */
3768 if (after_exit)
3769 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3770 #endif
3772 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3774 if (!chrec_is_positive (end_value, &value2))
3775 return false;
3777 *value = value0;
3778 return value0 == value1;
3780 case INTEGER_CST:
3781 switch (tree_int_cst_sgn (chrec))
3783 case -1:
3784 *value = false;
3785 break;
3786 case 1:
3787 *value = true;
3788 break;
3789 default:
3790 return false;
3792 return true;
3794 default:
3795 return false;
3800 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3801 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3802 *OVERLAPS_B are initialized to the functions that describe the
3803 relation between the elements accessed twice by CHREC_A and
3804 CHREC_B. For k >= 0, the following property is verified:
3806 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3808 static void
3809 analyze_siv_subscript_cst_affine (tree chrec_a,
3810 tree chrec_b,
3811 conflict_function **overlaps_a,
3812 conflict_function **overlaps_b,
3813 tree *last_conflicts)
3815 bool value0, value1, value2;
3816 tree type, difference, tmp;
3818 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3819 chrec_a = chrec_convert (type, chrec_a, NULL);
3820 chrec_b = chrec_convert (type, chrec_b, NULL);
3821 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3823 /* Special case overlap in the first iteration. */
3824 if (integer_zerop (difference))
3826 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3827 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3828 *last_conflicts = integer_one_node;
3829 return;
3832 if (!chrec_is_positive (initial_condition (difference), &value0))
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3835 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3837 dependence_stats.num_siv_unimplemented++;
3838 *overlaps_a = conflict_fn_not_known ();
3839 *overlaps_b = conflict_fn_not_known ();
3840 *last_conflicts = chrec_dont_know;
3841 return;
3843 else
3845 if (value0 == false)
3847 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3848 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3850 if (dump_file && (dump_flags & TDF_DETAILS))
3851 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3853 *overlaps_a = conflict_fn_not_known ();
3854 *overlaps_b = conflict_fn_not_known ();
3855 *last_conflicts = chrec_dont_know;
3856 dependence_stats.num_siv_unimplemented++;
3857 return;
3859 else
3861 if (value1 == true)
3863 /* Example:
3864 chrec_a = 12
3865 chrec_b = {10, +, 1}
3868 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3870 HOST_WIDE_INT numiter;
3871 class loop *loop = get_chrec_loop (chrec_b);
3873 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3874 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3875 fold_build1 (ABS_EXPR, type, difference),
3876 CHREC_RIGHT (chrec_b));
3877 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3878 *last_conflicts = integer_one_node;
3881 /* Perform weak-zero siv test to see if overlap is
3882 outside the loop bounds. */
3883 numiter = max_stmt_executions_int (loop);
3885 if (numiter >= 0
3886 && compare_tree_int (tmp, numiter) > 0)
3888 free_conflict_function (*overlaps_a);
3889 free_conflict_function (*overlaps_b);
3890 *overlaps_a = conflict_fn_no_dependence ();
3891 *overlaps_b = conflict_fn_no_dependence ();
3892 *last_conflicts = integer_zero_node;
3893 dependence_stats.num_siv_independent++;
3894 return;
3896 dependence_stats.num_siv_dependent++;
3897 return;
3900 /* When the step does not divide the difference, there are
3901 no overlaps. */
3902 else
3904 *overlaps_a = conflict_fn_no_dependence ();
3905 *overlaps_b = conflict_fn_no_dependence ();
3906 *last_conflicts = integer_zero_node;
3907 dependence_stats.num_siv_independent++;
3908 return;
3912 else
3914 /* Example:
3915 chrec_a = 12
3916 chrec_b = {10, +, -1}
3918 In this case, chrec_a will not overlap with chrec_b. */
3919 *overlaps_a = conflict_fn_no_dependence ();
3920 *overlaps_b = conflict_fn_no_dependence ();
3921 *last_conflicts = integer_zero_node;
3922 dependence_stats.num_siv_independent++;
3923 return;
3927 else
3929 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3930 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3932 if (dump_file && (dump_flags & TDF_DETAILS))
3933 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3935 *overlaps_a = conflict_fn_not_known ();
3936 *overlaps_b = conflict_fn_not_known ();
3937 *last_conflicts = chrec_dont_know;
3938 dependence_stats.num_siv_unimplemented++;
3939 return;
3941 else
3943 if (value2 == false)
3945 /* Example:
3946 chrec_a = 3
3947 chrec_b = {10, +, -1}
3949 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3951 HOST_WIDE_INT numiter;
3952 class loop *loop = get_chrec_loop (chrec_b);
3954 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3955 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3956 CHREC_RIGHT (chrec_b));
3957 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3958 *last_conflicts = integer_one_node;
3960 /* Perform weak-zero siv test to see if overlap is
3961 outside the loop bounds. */
3962 numiter = max_stmt_executions_int (loop);
3964 if (numiter >= 0
3965 && compare_tree_int (tmp, numiter) > 0)
3967 free_conflict_function (*overlaps_a);
3968 free_conflict_function (*overlaps_b);
3969 *overlaps_a = conflict_fn_no_dependence ();
3970 *overlaps_b = conflict_fn_no_dependence ();
3971 *last_conflicts = integer_zero_node;
3972 dependence_stats.num_siv_independent++;
3973 return;
3975 dependence_stats.num_siv_dependent++;
3976 return;
3979 /* When the step does not divide the difference, there
3980 are no overlaps. */
3981 else
3983 *overlaps_a = conflict_fn_no_dependence ();
3984 *overlaps_b = conflict_fn_no_dependence ();
3985 *last_conflicts = integer_zero_node;
3986 dependence_stats.num_siv_independent++;
3987 return;
3990 else
3992 /* Example:
3993 chrec_a = 3
3994 chrec_b = {4, +, 1}
3996 In this case, chrec_a will not overlap with chrec_b. */
3997 *overlaps_a = conflict_fn_no_dependence ();
3998 *overlaps_b = conflict_fn_no_dependence ();
3999 *last_conflicts = integer_zero_node;
4000 dependence_stats.num_siv_independent++;
4001 return;
4008 /* Helper recursive function for initializing the matrix A. Returns
4009 the initial value of CHREC. */
4011 static tree
4012 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4014 gcc_assert (chrec);
4016 switch (TREE_CODE (chrec))
4018 case POLYNOMIAL_CHREC:
4019 HOST_WIDE_INT chrec_right;
4020 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4021 return chrec_dont_know;
4022 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4023 /* We want to be able to negate without overflow. */
4024 if (chrec_right == HOST_WIDE_INT_MIN)
4025 return chrec_dont_know;
4026 A[index][0] = mult * chrec_right;
4027 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4029 case PLUS_EXPR:
4030 case MULT_EXPR:
4031 case MINUS_EXPR:
4033 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4036 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4039 CASE_CONVERT:
4041 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4042 return chrec_convert (chrec_type (chrec), op, NULL);
4045 case BIT_NOT_EXPR:
4047 /* Handle ~X as -1 - X. */
4048 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4049 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4050 build_int_cst (TREE_TYPE (chrec), -1), op);
4053 case INTEGER_CST:
4054 return chrec;
4056 default:
4057 gcc_unreachable ();
4058 return NULL_TREE;
4062 #define FLOOR_DIV(x,y) ((x) / (y))
4064 /* Solves the special case of the Diophantine equation:
4065 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4067 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4068 number of iterations that loops X and Y run. The overlaps will be
4069 constructed as evolutions in dimension DIM. */
4071 static void
4072 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4073 HOST_WIDE_INT step_a,
4074 HOST_WIDE_INT step_b,
4075 affine_fn *overlaps_a,
4076 affine_fn *overlaps_b,
4077 tree *last_conflicts, int dim)
4079 if (((step_a > 0 && step_b > 0)
4080 || (step_a < 0 && step_b < 0)))
4082 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4083 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4085 gcd_steps_a_b = gcd (step_a, step_b);
4086 step_overlaps_a = step_b / gcd_steps_a_b;
4087 step_overlaps_b = step_a / gcd_steps_a_b;
4089 if (niter > 0)
4091 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4092 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4093 last_conflict = tau2;
4094 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4096 else
4097 *last_conflicts = chrec_dont_know;
4099 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4100 build_int_cst (NULL_TREE,
4101 step_overlaps_a));
4102 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4103 build_int_cst (NULL_TREE,
4104 step_overlaps_b));
4107 else
4109 *overlaps_a = affine_fn_cst (integer_zero_node);
4110 *overlaps_b = affine_fn_cst (integer_zero_node);
4111 *last_conflicts = integer_zero_node;
4115 /* Solves the special case of a Diophantine equation where CHREC_A is
4116 an affine bivariate function, and CHREC_B is an affine univariate
4117 function. For example,
4119 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4121 has the following overlapping functions:
4123 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4124 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4125 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4127 FORNOW: This is a specialized implementation for a case occurring in
4128 a common benchmark. Implement the general algorithm. */
4130 static void
4131 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4132 conflict_function **overlaps_a,
4133 conflict_function **overlaps_b,
4134 tree *last_conflicts)
4136 bool xz_p, yz_p, xyz_p;
4137 HOST_WIDE_INT step_x, step_y, step_z;
4138 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4139 affine_fn overlaps_a_xz, overlaps_b_xz;
4140 affine_fn overlaps_a_yz, overlaps_b_yz;
4141 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4142 affine_fn ova1, ova2, ovb;
4143 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4145 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4146 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4147 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4149 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4150 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4151 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4153 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4155 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4158 *overlaps_a = conflict_fn_not_known ();
4159 *overlaps_b = conflict_fn_not_known ();
4160 *last_conflicts = chrec_dont_know;
4161 return;
4164 niter = MIN (niter_x, niter_z);
4165 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4166 &overlaps_a_xz,
4167 &overlaps_b_xz,
4168 &last_conflicts_xz, 1);
4169 niter = MIN (niter_y, niter_z);
4170 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4171 &overlaps_a_yz,
4172 &overlaps_b_yz,
4173 &last_conflicts_yz, 2);
4174 niter = MIN (niter_x, niter_z);
4175 niter = MIN (niter_y, niter);
4176 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4177 &overlaps_a_xyz,
4178 &overlaps_b_xyz,
4179 &last_conflicts_xyz, 3);
4181 xz_p = !integer_zerop (last_conflicts_xz);
4182 yz_p = !integer_zerop (last_conflicts_yz);
4183 xyz_p = !integer_zerop (last_conflicts_xyz);
4185 if (xz_p || yz_p || xyz_p)
4187 ova1 = affine_fn_cst (integer_zero_node);
4188 ova2 = affine_fn_cst (integer_zero_node);
4189 ovb = affine_fn_cst (integer_zero_node);
4190 if (xz_p)
4192 affine_fn t0 = ova1;
4193 affine_fn t2 = ovb;
4195 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4196 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4197 affine_fn_free (t0);
4198 affine_fn_free (t2);
4199 *last_conflicts = last_conflicts_xz;
4201 if (yz_p)
4203 affine_fn t0 = ova2;
4204 affine_fn t2 = ovb;
4206 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4207 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4208 affine_fn_free (t0);
4209 affine_fn_free (t2);
4210 *last_conflicts = last_conflicts_yz;
4212 if (xyz_p)
4214 affine_fn t0 = ova1;
4215 affine_fn t2 = ova2;
4216 affine_fn t4 = ovb;
4218 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4219 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4220 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4221 affine_fn_free (t0);
4222 affine_fn_free (t2);
4223 affine_fn_free (t4);
4224 *last_conflicts = last_conflicts_xyz;
4226 *overlaps_a = conflict_fn (2, ova1, ova2);
4227 *overlaps_b = conflict_fn (1, ovb);
4229 else
4231 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4232 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4233 *last_conflicts = integer_zero_node;
4236 affine_fn_free (overlaps_a_xz);
4237 affine_fn_free (overlaps_b_xz);
4238 affine_fn_free (overlaps_a_yz);
4239 affine_fn_free (overlaps_b_yz);
4240 affine_fn_free (overlaps_a_xyz);
4241 affine_fn_free (overlaps_b_xyz);
4244 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4246 static void
4247 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4248 int size)
4250 memcpy (vec2, vec1, size * sizeof (*vec1));
4253 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4255 static void
4256 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4257 int m, int n)
4259 int i;
4261 for (i = 0; i < m; i++)
4262 lambda_vector_copy (mat1[i], mat2[i], n);
4265 /* Store the N x N identity matrix in MAT. */
4267 static void
4268 lambda_matrix_id (lambda_matrix mat, int size)
4270 int i, j;
4272 for (i = 0; i < size; i++)
4273 for (j = 0; j < size; j++)
4274 mat[i][j] = (i == j) ? 1 : 0;
4277 /* Return the index of the first nonzero element of vector VEC1 between
4278 START and N. We must have START <= N.
4279 Returns N if VEC1 is the zero vector. */
4281 static int
4282 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4284 int j = start;
4285 while (j < n && vec1[j] == 0)
4286 j++;
4287 return j;
4290 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4291 R2 = R2 + CONST1 * R1. */
4293 static bool
4294 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4295 lambda_int const1)
4297 int i;
4299 if (const1 == 0)
4300 return true;
4302 for (i = 0; i < n; i++)
4304 bool ovf;
4305 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4306 if (ovf)
4307 return false;
4308 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4309 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4310 return false;
4311 mat[r2][i] = tem2;
4314 return true;
4317 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4318 and store the result in VEC2. */
4320 static void
4321 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4322 int size, lambda_int const1)
4324 int i;
4326 if (const1 == 0)
4327 lambda_vector_clear (vec2, size);
4328 else
4329 for (i = 0; i < size; i++)
4330 vec2[i] = const1 * vec1[i];
4333 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4335 static void
4336 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4337 int size)
4339 lambda_vector_mult_const (vec1, vec2, size, -1);
4342 /* Negate row R1 of matrix MAT which has N columns. */
4344 static void
4345 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4347 lambda_vector_negate (mat[r1], mat[r1], n);
4350 /* Return true if two vectors are equal. */
4352 static bool
4353 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4355 int i;
4356 for (i = 0; i < size; i++)
4357 if (vec1[i] != vec2[i])
4358 return false;
4359 return true;
4362 /* Given an M x N integer matrix A, this function determines an M x
4363 M unimodular matrix U, and an M x N echelon matrix S such that
4364 "U.A = S". This decomposition is also known as "right Hermite".
4366 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4367 Restructuring Compilers" Utpal Banerjee. */
4369 static bool
4370 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4371 lambda_matrix S, lambda_matrix U)
4373 int i, j, i0 = 0;
4375 lambda_matrix_copy (A, S, m, n);
4376 lambda_matrix_id (U, m);
4378 for (j = 0; j < n; j++)
4380 if (lambda_vector_first_nz (S[j], m, i0) < m)
4382 ++i0;
4383 for (i = m - 1; i >= i0; i--)
4385 while (S[i][j] != 0)
4387 lambda_int factor, a, b;
4389 a = S[i-1][j];
4390 b = S[i][j];
4391 gcc_assert (a != HOST_WIDE_INT_MIN);
4392 factor = a / b;
4394 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4395 return false;
4396 std::swap (S[i], S[i-1]);
4398 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4399 return false;
4400 std::swap (U[i], U[i-1]);
4406 return true;
4409 /* Determines the overlapping elements due to accesses CHREC_A and
4410 CHREC_B, that are affine functions. This function cannot handle
4411 symbolic evolution functions, ie. when initial conditions are
4412 parameters, because it uses lambda matrices of integers. */
4414 static void
4415 analyze_subscript_affine_affine (tree chrec_a,
4416 tree chrec_b,
4417 conflict_function **overlaps_a,
4418 conflict_function **overlaps_b,
4419 tree *last_conflicts)
4421 unsigned nb_vars_a, nb_vars_b, dim;
4422 lambda_int gamma, gcd_alpha_beta;
4423 lambda_matrix A, U, S;
4424 struct obstack scratch_obstack;
4426 if (eq_evolutions_p (chrec_a, chrec_b))
4428 /* The accessed index overlaps for each iteration in the
4429 loop. */
4430 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4431 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4432 *last_conflicts = chrec_dont_know;
4433 return;
4435 if (dump_file && (dump_flags & TDF_DETAILS))
4436 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4438 /* For determining the initial intersection, we have to solve a
4439 Diophantine equation. This is the most time consuming part.
4441 For answering to the question: "Is there a dependence?" we have
4442 to prove that there exists a solution to the Diophantine
4443 equation, and that the solution is in the iteration domain,
4444 i.e. the solution is positive or zero, and that the solution
4445 happens before the upper bound loop.nb_iterations. Otherwise
4446 there is no dependence. This function outputs a description of
4447 the iterations that hold the intersections. */
4449 nb_vars_a = nb_vars_in_chrec (chrec_a);
4450 nb_vars_b = nb_vars_in_chrec (chrec_b);
4452 gcc_obstack_init (&scratch_obstack);
4454 dim = nb_vars_a + nb_vars_b;
4455 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4456 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4457 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4459 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4460 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4461 if (init_a == chrec_dont_know
4462 || init_b == chrec_dont_know)
4464 if (dump_file && (dump_flags & TDF_DETAILS))
4465 fprintf (dump_file, "affine-affine test failed: "
4466 "representation issue.\n");
4467 *overlaps_a = conflict_fn_not_known ();
4468 *overlaps_b = conflict_fn_not_known ();
4469 *last_conflicts = chrec_dont_know;
4470 goto end_analyze_subs_aa;
4472 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4474 /* Don't do all the hard work of solving the Diophantine equation
4475 when we already know the solution: for example,
4476 | {3, +, 1}_1
4477 | {3, +, 4}_2
4478 | gamma = 3 - 3 = 0.
4479 Then the first overlap occurs during the first iterations:
4480 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4482 if (gamma == 0)
4484 if (nb_vars_a == 1 && nb_vars_b == 1)
4486 HOST_WIDE_INT step_a, step_b;
4487 HOST_WIDE_INT niter, niter_a, niter_b;
4488 affine_fn ova, ovb;
4490 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4491 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4492 niter = MIN (niter_a, niter_b);
4493 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4494 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4496 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4497 &ova, &ovb,
4498 last_conflicts, 1);
4499 *overlaps_a = conflict_fn (1, ova);
4500 *overlaps_b = conflict_fn (1, ovb);
4503 else if (nb_vars_a == 2 && nb_vars_b == 1)
4504 compute_overlap_steps_for_affine_1_2
4505 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4507 else if (nb_vars_a == 1 && nb_vars_b == 2)
4508 compute_overlap_steps_for_affine_1_2
4509 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4511 else
4513 if (dump_file && (dump_flags & TDF_DETAILS))
4514 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4515 *overlaps_a = conflict_fn_not_known ();
4516 *overlaps_b = conflict_fn_not_known ();
4517 *last_conflicts = chrec_dont_know;
4519 goto end_analyze_subs_aa;
4522 /* U.A = S */
4523 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4525 *overlaps_a = conflict_fn_not_known ();
4526 *overlaps_b = conflict_fn_not_known ();
4527 *last_conflicts = chrec_dont_know;
4528 goto end_analyze_subs_aa;
4531 if (S[0][0] < 0)
4533 S[0][0] *= -1;
4534 lambda_matrix_row_negate (U, dim, 0);
4536 gcd_alpha_beta = S[0][0];
4538 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4539 but that is a quite strange case. Instead of ICEing, answer
4540 don't know. */
4541 if (gcd_alpha_beta == 0)
4543 *overlaps_a = conflict_fn_not_known ();
4544 *overlaps_b = conflict_fn_not_known ();
4545 *last_conflicts = chrec_dont_know;
4546 goto end_analyze_subs_aa;
4549 /* The classic "gcd-test". */
4550 if (!int_divides_p (gcd_alpha_beta, gamma))
4552 /* The "gcd-test" has determined that there is no integer
4553 solution, i.e. there is no dependence. */
4554 *overlaps_a = conflict_fn_no_dependence ();
4555 *overlaps_b = conflict_fn_no_dependence ();
4556 *last_conflicts = integer_zero_node;
4559 /* Both access functions are univariate. This includes SIV and MIV cases. */
4560 else if (nb_vars_a == 1 && nb_vars_b == 1)
4562 /* Both functions should have the same evolution sign. */
4563 if (((A[0][0] > 0 && -A[1][0] > 0)
4564 || (A[0][0] < 0 && -A[1][0] < 0)))
4566 /* The solutions are given by:
4568 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4569 | [u21 u22] [y0]
4571 For a given integer t. Using the following variables,
4573 | i0 = u11 * gamma / gcd_alpha_beta
4574 | j0 = u12 * gamma / gcd_alpha_beta
4575 | i1 = u21
4576 | j1 = u22
4578 the solutions are:
4580 | x0 = i0 + i1 * t,
4581 | y0 = j0 + j1 * t. */
4582 HOST_WIDE_INT i0, j0, i1, j1;
4584 i0 = U[0][0] * gamma / gcd_alpha_beta;
4585 j0 = U[0][1] * gamma / gcd_alpha_beta;
4586 i1 = U[1][0];
4587 j1 = U[1][1];
4589 if ((i1 == 0 && i0 < 0)
4590 || (j1 == 0 && j0 < 0))
4592 /* There is no solution.
4593 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4594 falls in here, but for the moment we don't look at the
4595 upper bound of the iteration domain. */
4596 *overlaps_a = conflict_fn_no_dependence ();
4597 *overlaps_b = conflict_fn_no_dependence ();
4598 *last_conflicts = integer_zero_node;
4599 goto end_analyze_subs_aa;
4602 if (i1 > 0 && j1 > 0)
4604 HOST_WIDE_INT niter_a
4605 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4606 HOST_WIDE_INT niter_b
4607 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4608 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4610 /* (X0, Y0) is a solution of the Diophantine equation:
4611 "chrec_a (X0) = chrec_b (Y0)". */
4612 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4613 CEIL (-j0, j1));
4614 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4615 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4617 /* (X1, Y1) is the smallest positive solution of the eq
4618 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4619 first conflict occurs. */
4620 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4621 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4622 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4624 if (niter > 0)
4626 /* If the overlap occurs outside of the bounds of the
4627 loop, there is no dependence. */
4628 if (x1 >= niter_a || y1 >= niter_b)
4630 *overlaps_a = conflict_fn_no_dependence ();
4631 *overlaps_b = conflict_fn_no_dependence ();
4632 *last_conflicts = integer_zero_node;
4633 goto end_analyze_subs_aa;
4636 /* max stmt executions can get quite large, avoid
4637 overflows by using wide ints here. */
4638 widest_int tau2
4639 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4640 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4641 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4642 if (wi::min_precision (last_conflict, SIGNED)
4643 <= TYPE_PRECISION (integer_type_node))
4644 *last_conflicts
4645 = build_int_cst (integer_type_node,
4646 last_conflict.to_shwi ());
4647 else
4648 *last_conflicts = chrec_dont_know;
4650 else
4651 *last_conflicts = chrec_dont_know;
4653 *overlaps_a
4654 = conflict_fn (1,
4655 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4657 build_int_cst (NULL_TREE, i1)));
4658 *overlaps_b
4659 = conflict_fn (1,
4660 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4662 build_int_cst (NULL_TREE, j1)));
4664 else
4666 /* FIXME: For the moment, the upper bound of the
4667 iteration domain for i and j is not checked. */
4668 if (dump_file && (dump_flags & TDF_DETAILS))
4669 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4670 *overlaps_a = conflict_fn_not_known ();
4671 *overlaps_b = conflict_fn_not_known ();
4672 *last_conflicts = chrec_dont_know;
4675 else
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4678 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4679 *overlaps_a = conflict_fn_not_known ();
4680 *overlaps_b = conflict_fn_not_known ();
4681 *last_conflicts = chrec_dont_know;
4684 else
4686 if (dump_file && (dump_flags & TDF_DETAILS))
4687 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4688 *overlaps_a = conflict_fn_not_known ();
4689 *overlaps_b = conflict_fn_not_known ();
4690 *last_conflicts = chrec_dont_know;
4693 end_analyze_subs_aa:
4694 obstack_free (&scratch_obstack, NULL);
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4697 fprintf (dump_file, " (overlaps_a = ");
4698 dump_conflict_function (dump_file, *overlaps_a);
4699 fprintf (dump_file, ")\n (overlaps_b = ");
4700 dump_conflict_function (dump_file, *overlaps_b);
4701 fprintf (dump_file, "))\n");
4705 /* Returns true when analyze_subscript_affine_affine can be used for
4706 determining the dependence relation between chrec_a and chrec_b,
4707 that contain symbols. This function modifies chrec_a and chrec_b
4708 such that the analysis result is the same, and such that they don't
4709 contain symbols, and then can safely be passed to the analyzer.
4711 Example: The analysis of the following tuples of evolutions produce
4712 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4713 vs. {0, +, 1}_1
4715 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4716 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4719 static bool
4720 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4722 tree diff, type, left_a, left_b, right_b;
4724 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4725 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4726 /* FIXME: For the moment not handled. Might be refined later. */
4727 return false;
4729 type = chrec_type (*chrec_a);
4730 left_a = CHREC_LEFT (*chrec_a);
4731 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4732 diff = chrec_fold_minus (type, left_a, left_b);
4734 if (!evolution_function_is_constant_p (diff))
4735 return false;
4737 if (dump_file && (dump_flags & TDF_DETAILS))
4738 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4740 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4741 diff, CHREC_RIGHT (*chrec_a));
4742 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4743 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4744 build_int_cst (type, 0),
4745 right_b);
4746 return true;
4749 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4750 *OVERLAPS_B are initialized to the functions that describe the
4751 relation between the elements accessed twice by CHREC_A and
4752 CHREC_B. For k >= 0, the following property is verified:
4754 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4756 static void
4757 analyze_siv_subscript (tree chrec_a,
4758 tree chrec_b,
4759 conflict_function **overlaps_a,
4760 conflict_function **overlaps_b,
4761 tree *last_conflicts,
4762 int loop_nest_num)
4764 dependence_stats.num_siv++;
4766 if (dump_file && (dump_flags & TDF_DETAILS))
4767 fprintf (dump_file, "(analyze_siv_subscript \n");
4769 if (evolution_function_is_constant_p (chrec_a)
4770 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4771 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4772 overlaps_a, overlaps_b, last_conflicts);
4774 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4775 && evolution_function_is_constant_p (chrec_b))
4776 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4777 overlaps_b, overlaps_a, last_conflicts);
4779 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4780 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4782 if (!chrec_contains_symbols (chrec_a)
4783 && !chrec_contains_symbols (chrec_b))
4785 analyze_subscript_affine_affine (chrec_a, chrec_b,
4786 overlaps_a, overlaps_b,
4787 last_conflicts);
4789 if (CF_NOT_KNOWN_P (*overlaps_a)
4790 || CF_NOT_KNOWN_P (*overlaps_b))
4791 dependence_stats.num_siv_unimplemented++;
4792 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4793 || CF_NO_DEPENDENCE_P (*overlaps_b))
4794 dependence_stats.num_siv_independent++;
4795 else
4796 dependence_stats.num_siv_dependent++;
4798 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4799 &chrec_b))
4801 analyze_subscript_affine_affine (chrec_a, chrec_b,
4802 overlaps_a, overlaps_b,
4803 last_conflicts);
4805 if (CF_NOT_KNOWN_P (*overlaps_a)
4806 || CF_NOT_KNOWN_P (*overlaps_b))
4807 dependence_stats.num_siv_unimplemented++;
4808 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4809 || CF_NO_DEPENDENCE_P (*overlaps_b))
4810 dependence_stats.num_siv_independent++;
4811 else
4812 dependence_stats.num_siv_dependent++;
4814 else
4815 goto siv_subscript_dontknow;
4818 else
4820 siv_subscript_dontknow:;
4821 if (dump_file && (dump_flags & TDF_DETAILS))
4822 fprintf (dump_file, " siv test failed: unimplemented");
4823 *overlaps_a = conflict_fn_not_known ();
4824 *overlaps_b = conflict_fn_not_known ();
4825 *last_conflicts = chrec_dont_know;
4826 dependence_stats.num_siv_unimplemented++;
4829 if (dump_file && (dump_flags & TDF_DETAILS))
4830 fprintf (dump_file, ")\n");
4833 /* Returns false if we can prove that the greatest common divisor of the steps
4834 of CHREC does not divide CST, false otherwise. */
4836 static bool
4837 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4839 HOST_WIDE_INT cd = 0, val;
4840 tree step;
4842 if (!tree_fits_shwi_p (cst))
4843 return true;
4844 val = tree_to_shwi (cst);
4846 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4848 step = CHREC_RIGHT (chrec);
4849 if (!tree_fits_shwi_p (step))
4850 return true;
4851 cd = gcd (cd, tree_to_shwi (step));
4852 chrec = CHREC_LEFT (chrec);
4855 return val % cd == 0;
4858 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4859 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4860 functions that describe the relation between the elements accessed
4861 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4862 is verified:
4864 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4866 static void
4867 analyze_miv_subscript (tree chrec_a,
4868 tree chrec_b,
4869 conflict_function **overlaps_a,
4870 conflict_function **overlaps_b,
4871 tree *last_conflicts,
4872 class loop *loop_nest)
4874 tree type, difference;
4876 dependence_stats.num_miv++;
4877 if (dump_file && (dump_flags & TDF_DETAILS))
4878 fprintf (dump_file, "(analyze_miv_subscript \n");
4880 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4881 chrec_a = chrec_convert (type, chrec_a, NULL);
4882 chrec_b = chrec_convert (type, chrec_b, NULL);
4883 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4885 if (eq_evolutions_p (chrec_a, chrec_b))
4887 /* Access functions are the same: all the elements are accessed
4888 in the same order. */
4889 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4890 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4891 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4892 dependence_stats.num_miv_dependent++;
4895 else if (evolution_function_is_constant_p (difference)
4896 && evolution_function_is_affine_multivariate_p (chrec_a,
4897 loop_nest->num)
4898 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4900 /* testsuite/.../ssa-chrec-33.c
4901 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4903 The difference is 1, and all the evolution steps are multiples
4904 of 2, consequently there are no overlapping elements. */
4905 *overlaps_a = conflict_fn_no_dependence ();
4906 *overlaps_b = conflict_fn_no_dependence ();
4907 *last_conflicts = integer_zero_node;
4908 dependence_stats.num_miv_independent++;
4911 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4912 && !chrec_contains_symbols (chrec_a, loop_nest)
4913 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4914 && !chrec_contains_symbols (chrec_b, loop_nest))
4916 /* testsuite/.../ssa-chrec-35.c
4917 {0, +, 1}_2 vs. {0, +, 1}_3
4918 the overlapping elements are respectively located at iterations:
4919 {0, +, 1}_x and {0, +, 1}_x,
4920 in other words, we have the equality:
4921 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4923 Other examples:
4924 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4925 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4927 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4928 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4930 analyze_subscript_affine_affine (chrec_a, chrec_b,
4931 overlaps_a, overlaps_b, last_conflicts);
4933 if (CF_NOT_KNOWN_P (*overlaps_a)
4934 || CF_NOT_KNOWN_P (*overlaps_b))
4935 dependence_stats.num_miv_unimplemented++;
4936 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4937 || CF_NO_DEPENDENCE_P (*overlaps_b))
4938 dependence_stats.num_miv_independent++;
4939 else
4940 dependence_stats.num_miv_dependent++;
4943 else
4945 /* When the analysis is too difficult, answer "don't know". */
4946 if (dump_file && (dump_flags & TDF_DETAILS))
4947 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4949 *overlaps_a = conflict_fn_not_known ();
4950 *overlaps_b = conflict_fn_not_known ();
4951 *last_conflicts = chrec_dont_know;
4952 dependence_stats.num_miv_unimplemented++;
4955 if (dump_file && (dump_flags & TDF_DETAILS))
4956 fprintf (dump_file, ")\n");
4959 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4960 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4961 OVERLAP_ITERATIONS_B are initialized with two functions that
4962 describe the iterations that contain conflicting elements.
4964 Remark: For an integer k >= 0, the following equality is true:
4966 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4969 static void
4970 analyze_overlapping_iterations (tree chrec_a,
4971 tree chrec_b,
4972 conflict_function **overlap_iterations_a,
4973 conflict_function **overlap_iterations_b,
4974 tree *last_conflicts, class loop *loop_nest)
4976 unsigned int lnn = loop_nest->num;
4978 dependence_stats.num_subscript_tests++;
4980 if (dump_file && (dump_flags & TDF_DETAILS))
4982 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4983 fprintf (dump_file, " (chrec_a = ");
4984 print_generic_expr (dump_file, chrec_a);
4985 fprintf (dump_file, ")\n (chrec_b = ");
4986 print_generic_expr (dump_file, chrec_b);
4987 fprintf (dump_file, ")\n");
4990 if (chrec_a == NULL_TREE
4991 || chrec_b == NULL_TREE
4992 || chrec_contains_undetermined (chrec_a)
4993 || chrec_contains_undetermined (chrec_b))
4995 dependence_stats.num_subscript_undetermined++;
4997 *overlap_iterations_a = conflict_fn_not_known ();
4998 *overlap_iterations_b = conflict_fn_not_known ();
5001 /* If they are the same chrec, and are affine, they overlap
5002 on every iteration. */
5003 else if (eq_evolutions_p (chrec_a, chrec_b)
5004 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5005 || operand_equal_p (chrec_a, chrec_b, 0)))
5007 dependence_stats.num_same_subscript_function++;
5008 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5009 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5010 *last_conflicts = chrec_dont_know;
5013 /* If they aren't the same, and aren't affine, we can't do anything
5014 yet. */
5015 else if ((chrec_contains_symbols (chrec_a)
5016 || chrec_contains_symbols (chrec_b))
5017 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5018 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5020 dependence_stats.num_subscript_undetermined++;
5021 *overlap_iterations_a = conflict_fn_not_known ();
5022 *overlap_iterations_b = conflict_fn_not_known ();
5025 else if (ziv_subscript_p (chrec_a, chrec_b))
5026 analyze_ziv_subscript (chrec_a, chrec_b,
5027 overlap_iterations_a, overlap_iterations_b,
5028 last_conflicts);
5030 else if (siv_subscript_p (chrec_a, chrec_b))
5031 analyze_siv_subscript (chrec_a, chrec_b,
5032 overlap_iterations_a, overlap_iterations_b,
5033 last_conflicts, lnn);
5035 else
5036 analyze_miv_subscript (chrec_a, chrec_b,
5037 overlap_iterations_a, overlap_iterations_b,
5038 last_conflicts, loop_nest);
5040 if (dump_file && (dump_flags & TDF_DETAILS))
5042 fprintf (dump_file, " (overlap_iterations_a = ");
5043 dump_conflict_function (dump_file, *overlap_iterations_a);
5044 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5045 dump_conflict_function (dump_file, *overlap_iterations_b);
5046 fprintf (dump_file, "))\n");
5050 /* Helper function for uniquely inserting distance vectors. */
5052 static void
5053 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5055 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5056 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5057 return;
5059 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5062 /* Helper function for uniquely inserting direction vectors. */
5064 static void
5065 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5067 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5068 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5069 return;
5071 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5074 /* Add a distance of 1 on all the loops outer than INDEX. If we
5075 haven't yet determined a distance for this outer loop, push a new
5076 distance vector composed of the previous distance, and a distance
5077 of 1 for this outer loop. Example:
5079 | loop_1
5080 | loop_2
5081 | A[10]
5082 | endloop_2
5083 | endloop_1
5085 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5086 save (0, 1), then we have to save (1, 0). */
5088 static void
5089 add_outer_distances (struct data_dependence_relation *ddr,
5090 lambda_vector dist_v, int index)
5092 /* For each outer loop where init_v is not set, the accesses are
5093 in dependence of distance 1 in the loop. */
5094 while (--index >= 0)
5096 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5097 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5098 save_v[index] = 1;
5099 save_dist_v (ddr, save_v);
5103 /* Return false when fail to represent the data dependence as a
5104 distance vector. A_INDEX is the index of the first reference
5105 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5106 second reference. INIT_B is set to true when a component has been
5107 added to the distance vector DIST_V. INDEX_CARRY is then set to
5108 the index in DIST_V that carries the dependence. */
5110 static bool
5111 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5112 unsigned int a_index, unsigned int b_index,
5113 lambda_vector dist_v, bool *init_b,
5114 int *index_carry)
5116 unsigned i;
5117 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5118 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5120 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5122 tree access_fn_a, access_fn_b;
5123 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5125 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5127 non_affine_dependence_relation (ddr);
5128 return false;
5131 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5132 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5134 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5135 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5137 HOST_WIDE_INT dist;
5138 int index;
5139 int var_a = CHREC_VARIABLE (access_fn_a);
5140 int var_b = CHREC_VARIABLE (access_fn_b);
5142 if (var_a != var_b
5143 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5145 non_affine_dependence_relation (ddr);
5146 return false;
5149 /* When data references are collected in a loop while data
5150 dependences are analyzed in loop nest nested in the loop, we
5151 would have more number of access functions than number of
5152 loops. Skip access functions of loops not in the loop nest.
5154 See PR89725 for more information. */
5155 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5156 continue;
5158 dist = int_cst_value (SUB_DISTANCE (subscript));
5159 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5160 *index_carry = MIN (index, *index_carry);
5162 /* This is the subscript coupling test. If we have already
5163 recorded a distance for this loop (a distance coming from
5164 another subscript), it should be the same. For example,
5165 in the following code, there is no dependence:
5167 | loop i = 0, N, 1
5168 | T[i+1][i] = ...
5169 | ... = T[i][i]
5170 | endloop
5172 if (init_v[index] != 0 && dist_v[index] != dist)
5174 finalize_ddr_dependent (ddr, chrec_known);
5175 return false;
5178 dist_v[index] = dist;
5179 init_v[index] = 1;
5180 *init_b = true;
5182 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5184 /* This can be for example an affine vs. constant dependence
5185 (T[i] vs. T[3]) that is not an affine dependence and is
5186 not representable as a distance vector. */
5187 non_affine_dependence_relation (ddr);
5188 return false;
5190 else
5191 *init_b = true;
5194 return true;
5197 /* Return true when the DDR contains only invariant access functions wrto. loop
5198 number LNUM. */
5200 static bool
5201 invariant_access_functions (const struct data_dependence_relation *ddr,
5202 int lnum)
5204 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5205 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5206 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5207 return false;
5209 return true;
5212 /* Helper function for the case where DDR_A and DDR_B are the same
5213 multivariate access function with a constant step. For an example
5214 see pr34635-1.c. */
5216 static void
5217 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5219 int x_1, x_2;
5220 tree c_1 = CHREC_LEFT (c_2);
5221 tree c_0 = CHREC_LEFT (c_1);
5222 lambda_vector dist_v;
5223 HOST_WIDE_INT v1, v2, cd;
5225 /* Polynomials with more than 2 variables are not handled yet. When
5226 the evolution steps are parameters, it is not possible to
5227 represent the dependence using classical distance vectors. */
5228 if (TREE_CODE (c_0) != INTEGER_CST
5229 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5230 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5232 DDR_AFFINE_P (ddr) = false;
5233 return;
5236 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5237 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5239 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5240 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5241 v1 = int_cst_value (CHREC_RIGHT (c_1));
5242 v2 = int_cst_value (CHREC_RIGHT (c_2));
5243 cd = gcd (v1, v2);
5244 v1 /= cd;
5245 v2 /= cd;
5247 if (v2 < 0)
5249 v2 = -v2;
5250 v1 = -v1;
5253 dist_v[x_1] = v2;
5254 dist_v[x_2] = -v1;
5255 save_dist_v (ddr, dist_v);
5257 add_outer_distances (ddr, dist_v, x_1);
5260 /* Helper function for the case where DDR_A and DDR_B are the same
5261 access functions. */
5263 static void
5264 add_other_self_distances (struct data_dependence_relation *ddr)
5266 lambda_vector dist_v;
5267 unsigned i;
5268 int index_carry = DDR_NB_LOOPS (ddr);
5269 subscript *sub;
5270 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5272 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5274 tree access_fun = SUB_ACCESS_FN (sub, 0);
5276 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5278 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5280 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5282 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5283 return;
5286 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5288 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5289 add_multivariate_self_dist (ddr, access_fun);
5290 else
5291 /* The evolution step is not constant: it varies in
5292 the outer loop, so this cannot be represented by a
5293 distance vector. For example in pr34635.c the
5294 evolution is {0, +, {0, +, 4}_1}_2. */
5295 DDR_AFFINE_P (ddr) = false;
5297 return;
5300 /* When data references are collected in a loop while data
5301 dependences are analyzed in loop nest nested in the loop, we
5302 would have more number of access functions than number of
5303 loops. Skip access functions of loops not in the loop nest.
5305 See PR89725 for more information. */
5306 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5307 loop))
5308 continue;
5310 index_carry = MIN (index_carry,
5311 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5312 DDR_LOOP_NEST (ddr)));
5316 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5317 add_outer_distances (ddr, dist_v, index_carry);
5320 static void
5321 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5323 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5325 dist_v[0] = 1;
5326 save_dist_v (ddr, dist_v);
5329 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5330 is the case for example when access functions are the same and
5331 equal to a constant, as in:
5333 | loop_1
5334 | A[3] = ...
5335 | ... = A[3]
5336 | endloop_1
5338 in which case the distance vectors are (0) and (1). */
5340 static void
5341 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5343 unsigned i, j;
5345 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5347 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5348 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5349 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5351 for (j = 0; j < ca->n; j++)
5352 if (affine_function_zero_p (ca->fns[j]))
5354 insert_innermost_unit_dist_vector (ddr);
5355 return;
5358 for (j = 0; j < cb->n; j++)
5359 if (affine_function_zero_p (cb->fns[j]))
5361 insert_innermost_unit_dist_vector (ddr);
5362 return;
5367 /* Return true when the DDR contains two data references that have the
5368 same access functions. */
5370 static inline bool
5371 same_access_functions (const struct data_dependence_relation *ddr)
5373 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5374 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5375 SUB_ACCESS_FN (sub, 1)))
5376 return false;
5378 return true;
5381 /* Compute the classic per loop distance vector. DDR is the data
5382 dependence relation to build a vector from. Return false when fail
5383 to represent the data dependence as a distance vector. */
5385 static bool
5386 build_classic_dist_vector (struct data_dependence_relation *ddr,
5387 class loop *loop_nest)
5389 bool init_b = false;
5390 int index_carry = DDR_NB_LOOPS (ddr);
5391 lambda_vector dist_v;
5393 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5394 return false;
5396 if (same_access_functions (ddr))
5398 /* Save the 0 vector. */
5399 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5400 save_dist_v (ddr, dist_v);
5402 if (invariant_access_functions (ddr, loop_nest->num))
5403 add_distance_for_zero_overlaps (ddr);
5405 if (DDR_NB_LOOPS (ddr) > 1)
5406 add_other_self_distances (ddr);
5408 return true;
5411 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5412 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5413 return false;
5415 /* Save the distance vector if we initialized one. */
5416 if (init_b)
5418 /* Verify a basic constraint: classic distance vectors should
5419 always be lexicographically positive.
5421 Data references are collected in the order of execution of
5422 the program, thus for the following loop
5424 | for (i = 1; i < 100; i++)
5425 | for (j = 1; j < 100; j++)
5427 | t = T[j+1][i-1]; // A
5428 | T[j][i] = t + 2; // B
5431 references are collected following the direction of the wind:
5432 A then B. The data dependence tests are performed also
5433 following this order, such that we're looking at the distance
5434 separating the elements accessed by A from the elements later
5435 accessed by B. But in this example, the distance returned by
5436 test_dep (A, B) is lexicographically negative (-1, 1), that
5437 means that the access A occurs later than B with respect to
5438 the outer loop, ie. we're actually looking upwind. In this
5439 case we solve test_dep (B, A) looking downwind to the
5440 lexicographically positive solution, that returns the
5441 distance vector (1, -1). */
5442 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5444 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5445 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5446 return false;
5447 compute_subscript_distance (ddr);
5448 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5449 &index_carry))
5450 return false;
5451 save_dist_v (ddr, save_v);
5452 DDR_REVERSED_P (ddr) = true;
5454 /* In this case there is a dependence forward for all the
5455 outer loops:
5457 | for (k = 1; k < 100; k++)
5458 | for (i = 1; i < 100; i++)
5459 | for (j = 1; j < 100; j++)
5461 | t = T[j+1][i-1]; // A
5462 | T[j][i] = t + 2; // B
5465 the vectors are:
5466 (0, 1, -1)
5467 (1, 1, -1)
5468 (1, -1, 1)
5470 if (DDR_NB_LOOPS (ddr) > 1)
5472 add_outer_distances (ddr, save_v, index_carry);
5473 add_outer_distances (ddr, dist_v, index_carry);
5476 else
5478 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5479 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5481 if (DDR_NB_LOOPS (ddr) > 1)
5483 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5485 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5486 return false;
5487 compute_subscript_distance (ddr);
5488 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5489 &index_carry))
5490 return false;
5492 save_dist_v (ddr, save_v);
5493 add_outer_distances (ddr, dist_v, index_carry);
5494 add_outer_distances (ddr, opposite_v, index_carry);
5496 else
5497 save_dist_v (ddr, save_v);
5500 else
5502 /* There is a distance of 1 on all the outer loops: Example:
5503 there is a dependence of distance 1 on loop_1 for the array A.
5505 | loop_1
5506 | A[5] = ...
5507 | endloop
5509 add_outer_distances (ddr, dist_v,
5510 lambda_vector_first_nz (dist_v,
5511 DDR_NB_LOOPS (ddr), 0));
5514 if (dump_file && (dump_flags & TDF_DETAILS))
5516 unsigned i;
5518 fprintf (dump_file, "(build_classic_dist_vector\n");
5519 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5521 fprintf (dump_file, " dist_vector = (");
5522 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5523 DDR_NB_LOOPS (ddr));
5524 fprintf (dump_file, " )\n");
5526 fprintf (dump_file, ")\n");
5529 return true;
5532 /* Return the direction for a given distance.
5533 FIXME: Computing dir this way is suboptimal, since dir can catch
5534 cases that dist is unable to represent. */
5536 static inline enum data_dependence_direction
5537 dir_from_dist (int dist)
5539 if (dist > 0)
5540 return dir_positive;
5541 else if (dist < 0)
5542 return dir_negative;
5543 else
5544 return dir_equal;
5547 /* Compute the classic per loop direction vector. DDR is the data
5548 dependence relation to build a vector from. */
5550 static void
5551 build_classic_dir_vector (struct data_dependence_relation *ddr)
5553 unsigned i, j;
5554 lambda_vector dist_v;
5556 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5558 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5560 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5561 dir_v[j] = dir_from_dist (dist_v[j]);
5563 save_dir_v (ddr, dir_v);
5567 /* Helper function. Returns true when there is a dependence between the
5568 data references. A_INDEX is the index of the first reference (0 for
5569 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5571 static bool
5572 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5573 unsigned int a_index, unsigned int b_index,
5574 class loop *loop_nest)
5576 unsigned int i;
5577 tree last_conflicts;
5578 struct subscript *subscript;
5579 tree res = NULL_TREE;
5581 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5583 conflict_function *overlaps_a, *overlaps_b;
5585 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5586 SUB_ACCESS_FN (subscript, b_index),
5587 &overlaps_a, &overlaps_b,
5588 &last_conflicts, loop_nest);
5590 if (SUB_CONFLICTS_IN_A (subscript))
5591 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5592 if (SUB_CONFLICTS_IN_B (subscript))
5593 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5595 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5596 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5597 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5599 /* If there is any undetermined conflict function we have to
5600 give a conservative answer in case we cannot prove that
5601 no dependence exists when analyzing another subscript. */
5602 if (CF_NOT_KNOWN_P (overlaps_a)
5603 || CF_NOT_KNOWN_P (overlaps_b))
5605 res = chrec_dont_know;
5606 continue;
5609 /* When there is a subscript with no dependence we can stop. */
5610 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5611 || CF_NO_DEPENDENCE_P (overlaps_b))
5613 res = chrec_known;
5614 break;
5618 if (res == NULL_TREE)
5619 return true;
5621 if (res == chrec_known)
5622 dependence_stats.num_dependence_independent++;
5623 else
5624 dependence_stats.num_dependence_undetermined++;
5625 finalize_ddr_dependent (ddr, res);
5626 return false;
5629 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5631 static void
5632 subscript_dependence_tester (struct data_dependence_relation *ddr,
5633 class loop *loop_nest)
5635 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5636 dependence_stats.num_dependence_dependent++;
5638 compute_subscript_distance (ddr);
5639 if (build_classic_dist_vector (ddr, loop_nest))
5640 build_classic_dir_vector (ddr);
5643 /* Returns true when all the access functions of A are affine or
5644 constant with respect to LOOP_NEST. */
5646 static bool
5647 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5648 const class loop *loop_nest)
5650 vec<tree> fns = DR_ACCESS_FNS (a);
5651 for (tree t : fns)
5652 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5653 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5654 return false;
5656 return true;
5659 /* This computes the affine dependence relation between A and B with
5660 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5661 independence between two accesses, while CHREC_DONT_KNOW is used
5662 for representing the unknown relation.
5664 Note that it is possible to stop the computation of the dependence
5665 relation the first time we detect a CHREC_KNOWN element for a given
5666 subscript. */
5668 void
5669 compute_affine_dependence (struct data_dependence_relation *ddr,
5670 class loop *loop_nest)
5672 struct data_reference *dra = DDR_A (ddr);
5673 struct data_reference *drb = DDR_B (ddr);
5675 if (dump_file && (dump_flags & TDF_DETAILS))
5677 fprintf (dump_file, "(compute_affine_dependence\n");
5678 fprintf (dump_file, " ref_a: ");
5679 print_generic_expr (dump_file, DR_REF (dra));
5680 fprintf (dump_file, ", stmt_a: ");
5681 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5682 fprintf (dump_file, " ref_b: ");
5683 print_generic_expr (dump_file, DR_REF (drb));
5684 fprintf (dump_file, ", stmt_b: ");
5685 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5688 /* Analyze only when the dependence relation is not yet known. */
5689 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5691 dependence_stats.num_dependence_tests++;
5693 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5694 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5695 subscript_dependence_tester (ddr, loop_nest);
5697 /* As a last case, if the dependence cannot be determined, or if
5698 the dependence is considered too difficult to determine, answer
5699 "don't know". */
5700 else
5702 dependence_stats.num_dependence_undetermined++;
5704 if (dump_file && (dump_flags & TDF_DETAILS))
5706 fprintf (dump_file, "Data ref a:\n");
5707 dump_data_reference (dump_file, dra);
5708 fprintf (dump_file, "Data ref b:\n");
5709 dump_data_reference (dump_file, drb);
5710 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5712 finalize_ddr_dependent (ddr, chrec_dont_know);
5716 if (dump_file && (dump_flags & TDF_DETAILS))
5718 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5719 fprintf (dump_file, ") -> no dependence\n");
5720 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5721 fprintf (dump_file, ") -> dependence analysis failed\n");
5722 else
5723 fprintf (dump_file, ")\n");
5727 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5728 the data references in DATAREFS, in the LOOP_NEST. When
5729 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5730 relations. Return true when successful, i.e. data references number
5731 is small enough to be handled. */
5733 bool
5734 compute_all_dependences (const vec<data_reference_p> &datarefs,
5735 vec<ddr_p> *dependence_relations,
5736 const vec<loop_p> &loop_nest,
5737 bool compute_self_and_rr)
5739 struct data_dependence_relation *ddr;
5740 struct data_reference *a, *b;
5741 unsigned int i, j;
5743 if ((int) datarefs.length ()
5744 > param_loop_max_datarefs_for_datadeps)
5746 struct data_dependence_relation *ddr;
5748 /* Insert a single relation into dependence_relations:
5749 chrec_dont_know. */
5750 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5751 dependence_relations->safe_push (ddr);
5752 return false;
5755 FOR_EACH_VEC_ELT (datarefs, i, a)
5756 for (j = i + 1; datarefs.iterate (j, &b); j++)
5757 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5759 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5760 dependence_relations->safe_push (ddr);
5761 if (loop_nest.exists ())
5762 compute_affine_dependence (ddr, loop_nest[0]);
5765 if (compute_self_and_rr)
5766 FOR_EACH_VEC_ELT (datarefs, i, a)
5768 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5769 dependence_relations->safe_push (ddr);
5770 if (loop_nest.exists ())
5771 compute_affine_dependence (ddr, loop_nest[0]);
5774 return true;
5777 /* Describes a location of a memory reference. */
5779 struct data_ref_loc
5781 /* The memory reference. */
5782 tree ref;
5784 /* True if the memory reference is read. */
5785 bool is_read;
5787 /* True if the data reference is conditional within the containing
5788 statement, i.e. if it might not occur even when the statement
5789 is executed and runs to completion. */
5790 bool is_conditional_in_stmt;
5794 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5795 true if STMT clobbers memory, false otherwise. */
5797 static bool
5798 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5800 bool clobbers_memory = false;
5801 data_ref_loc ref;
5802 tree op0, op1;
5803 enum gimple_code stmt_code = gimple_code (stmt);
5805 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5806 As we cannot model data-references to not spelled out
5807 accesses give up if they may occur. */
5808 if (stmt_code == GIMPLE_CALL
5809 && !(gimple_call_flags (stmt) & ECF_CONST))
5811 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5812 if (gimple_call_internal_p (stmt))
5813 switch (gimple_call_internal_fn (stmt))
5815 case IFN_GOMP_SIMD_LANE:
5817 class loop *loop = gimple_bb (stmt)->loop_father;
5818 tree uid = gimple_call_arg (stmt, 0);
5819 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5820 if (loop == NULL
5821 || loop->simduid != SSA_NAME_VAR (uid))
5822 clobbers_memory = true;
5823 break;
5825 case IFN_MASK_LOAD:
5826 case IFN_MASK_STORE:
5827 break;
5828 case IFN_MASK_CALL:
5830 tree orig_fndecl
5831 = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5832 if (!orig_fndecl
5833 || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5834 clobbers_memory = true;
5836 break;
5837 default:
5838 clobbers_memory = true;
5839 break;
5841 else
5842 clobbers_memory = true;
5844 else if (stmt_code == GIMPLE_ASM
5845 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5846 || gimple_vuse (stmt)))
5847 clobbers_memory = true;
5849 if (!gimple_vuse (stmt))
5850 return clobbers_memory;
5852 if (stmt_code == GIMPLE_ASSIGN)
5854 tree base;
5855 op0 = gimple_assign_lhs (stmt);
5856 op1 = gimple_assign_rhs1 (stmt);
5858 if (DECL_P (op1)
5859 || (REFERENCE_CLASS_P (op1)
5860 && (base = get_base_address (op1))
5861 && TREE_CODE (base) != SSA_NAME
5862 && !is_gimple_min_invariant (base)))
5864 ref.ref = op1;
5865 ref.is_read = true;
5866 ref.is_conditional_in_stmt = false;
5867 references->safe_push (ref);
5870 else if (stmt_code == GIMPLE_CALL)
5872 unsigned i = 0, n;
5873 tree ptr, type;
5874 unsigned int align;
5876 ref.is_read = false;
5877 if (gimple_call_internal_p (stmt))
5878 switch (gimple_call_internal_fn (stmt))
5880 case IFN_MASK_LOAD:
5881 if (gimple_call_lhs (stmt) == NULL_TREE)
5882 break;
5883 ref.is_read = true;
5884 /* FALLTHRU */
5885 case IFN_MASK_STORE:
5886 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5887 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5888 if (ref.is_read)
5889 type = TREE_TYPE (gimple_call_lhs (stmt));
5890 else
5891 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5892 if (TYPE_ALIGN (type) != align)
5893 type = build_aligned_type (type, align);
5894 ref.is_conditional_in_stmt = true;
5895 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5896 ptr);
5897 references->safe_push (ref);
5898 return false;
5899 case IFN_MASK_CALL:
5900 i = 1;
5901 gcc_fallthrough ();
5902 default:
5903 break;
5906 op0 = gimple_call_lhs (stmt);
5907 n = gimple_call_num_args (stmt);
5908 for (; i < n; i++)
5910 op1 = gimple_call_arg (stmt, i);
5912 if (DECL_P (op1)
5913 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5915 ref.ref = op1;
5916 ref.is_read = true;
5917 ref.is_conditional_in_stmt = false;
5918 references->safe_push (ref);
5922 else
5923 return clobbers_memory;
5925 if (op0
5926 && (DECL_P (op0)
5927 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5929 ref.ref = op0;
5930 ref.is_read = false;
5931 ref.is_conditional_in_stmt = false;
5932 references->safe_push (ref);
5934 return clobbers_memory;
5938 /* Returns true if the loop-nest has any data reference. */
5940 bool
5941 loop_nest_has_data_refs (loop_p loop)
5943 basic_block *bbs = get_loop_body (loop);
5944 auto_vec<data_ref_loc, 3> references;
5946 for (unsigned i = 0; i < loop->num_nodes; i++)
5948 basic_block bb = bbs[i];
5949 gimple_stmt_iterator bsi;
5951 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5953 gimple *stmt = gsi_stmt (bsi);
5954 get_references_in_stmt (stmt, &references);
5955 if (references.length ())
5957 free (bbs);
5958 return true;
5962 free (bbs);
5963 return false;
5966 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5967 reference, returns false, otherwise returns true. NEST is the outermost
5968 loop of the loop nest in which the references should be analyzed. */
5970 opt_result
5971 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5972 vec<data_reference_p> *datarefs)
5974 auto_vec<data_ref_loc, 2> references;
5975 data_reference_p dr;
5977 if (get_references_in_stmt (stmt, &references))
5978 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5979 stmt);
5981 for (const data_ref_loc &ref : references)
5983 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5984 loop_containing_stmt (stmt), ref.ref,
5985 stmt, ref.is_read, ref.is_conditional_in_stmt);
5986 gcc_assert (dr != NULL);
5987 datarefs->safe_push (dr);
5990 return opt_result::success ();
5993 /* Stores the data references in STMT to DATAREFS. If there is an
5994 unanalyzable reference, returns false, otherwise returns true.
5995 NEST is the outermost loop of the loop nest in which the references
5996 should be instantiated, LOOP is the loop in which the references
5997 should be analyzed. */
5999 bool
6000 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6001 vec<data_reference_p> *datarefs)
6003 auto_vec<data_ref_loc, 2> references;
6004 bool ret = true;
6005 data_reference_p dr;
6007 if (get_references_in_stmt (stmt, &references))
6008 return false;
6010 for (const data_ref_loc &ref : references)
6012 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6013 ref.is_conditional_in_stmt);
6014 gcc_assert (dr != NULL);
6015 datarefs->safe_push (dr);
6018 return ret;
6021 /* Search the data references in LOOP, and record the information into
6022 DATAREFS. Returns chrec_dont_know when failing to analyze a
6023 difficult case, returns NULL_TREE otherwise. */
6025 tree
6026 find_data_references_in_bb (class loop *loop, basic_block bb,
6027 vec<data_reference_p> *datarefs)
6029 gimple_stmt_iterator bsi;
6031 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6033 gimple *stmt = gsi_stmt (bsi);
6035 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6037 struct data_reference *res;
6038 res = XCNEW (struct data_reference);
6039 datarefs->safe_push (res);
6041 return chrec_dont_know;
6045 return NULL_TREE;
6048 /* Search the data references in LOOP, and record the information into
6049 DATAREFS. Returns chrec_dont_know when failing to analyze a
6050 difficult case, returns NULL_TREE otherwise.
6052 TODO: This function should be made smarter so that it can handle address
6053 arithmetic as if they were array accesses, etc. */
6055 tree
6056 find_data_references_in_loop (class loop *loop,
6057 vec<data_reference_p> *datarefs)
6059 basic_block bb, *bbs;
6060 unsigned int i;
6062 bbs = get_loop_body_in_dom_order (loop);
6064 for (i = 0; i < loop->num_nodes; i++)
6066 bb = bbs[i];
6068 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6070 free (bbs);
6071 return chrec_dont_know;
6074 free (bbs);
6076 return NULL_TREE;
6079 /* Return the alignment in bytes that DRB is guaranteed to have at all
6080 times. */
6082 unsigned int
6083 dr_alignment (innermost_loop_behavior *drb)
6085 /* Get the alignment of BASE_ADDRESS + INIT. */
6086 unsigned int alignment = drb->base_alignment;
6087 unsigned int misalignment = (drb->base_misalignment
6088 + TREE_INT_CST_LOW (drb->init));
6089 if (misalignment != 0)
6090 alignment = MIN (alignment, misalignment & -misalignment);
6092 /* Cap it to the alignment of OFFSET. */
6093 if (!integer_zerop (drb->offset))
6094 alignment = MIN (alignment, drb->offset_alignment);
6096 /* Cap it to the alignment of STEP. */
6097 if (!integer_zerop (drb->step))
6098 alignment = MIN (alignment, drb->step_alignment);
6100 return alignment;
6103 /* If BASE is a pointer-typed SSA name, try to find the object that it
6104 is based on. Return this object X on success and store the alignment
6105 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6107 static tree
6108 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6110 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6111 return NULL_TREE;
6113 gimple *def = SSA_NAME_DEF_STMT (base);
6114 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6116 /* Peel chrecs and record the minimum alignment preserved by
6117 all steps. */
6118 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6119 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6121 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6122 alignment = MIN (alignment, step_alignment);
6123 base = CHREC_LEFT (base);
6126 /* Punt if the expression is too complicated to handle. */
6127 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6128 return NULL_TREE;
6130 /* The only useful cases are those for which a dereference folds to something
6131 other than an INDIRECT_REF. */
6132 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6133 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6134 if (!ref)
6135 return NULL_TREE;
6137 /* Analyze the base to which the steps we peeled were applied. */
6138 poly_int64 bitsize, bitpos, bytepos;
6139 machine_mode mode;
6140 int unsignedp, reversep, volatilep;
6141 tree offset;
6142 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6143 &unsignedp, &reversep, &volatilep);
6144 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6145 return NULL_TREE;
6147 /* Restrict the alignment to that guaranteed by the offsets. */
6148 unsigned int bytepos_alignment = known_alignment (bytepos);
6149 if (bytepos_alignment != 0)
6150 alignment = MIN (alignment, bytepos_alignment);
6151 if (offset)
6153 unsigned int offset_alignment = highest_pow2_factor (offset);
6154 alignment = MIN (alignment, offset_alignment);
6157 *alignment_out = alignment;
6158 return base;
6161 /* Return the object whose alignment would need to be changed in order
6162 to increase the alignment of ADDR. Store the maximum achievable
6163 alignment in *MAX_ALIGNMENT. */
6165 tree
6166 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6168 tree base = get_base_for_alignment_1 (addr, max_alignment);
6169 if (base)
6170 return base;
6172 if (TREE_CODE (addr) == ADDR_EXPR)
6173 addr = TREE_OPERAND (addr, 0);
6174 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6175 return addr;
6178 /* Recursive helper function. */
6180 static bool
6181 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6183 /* Inner loops of the nest should not contain siblings. Example:
6184 when there are two consecutive loops,
6186 | loop_0
6187 | loop_1
6188 | A[{0, +, 1}_1]
6189 | endloop_1
6190 | loop_2
6191 | A[{0, +, 1}_2]
6192 | endloop_2
6193 | endloop_0
6195 the dependence relation cannot be captured by the distance
6196 abstraction. */
6197 if (loop->next)
6198 return false;
6200 loop_nest->safe_push (loop);
6201 if (loop->inner)
6202 return find_loop_nest_1 (loop->inner, loop_nest);
6203 return true;
6206 /* Return false when the LOOP is not well nested. Otherwise return
6207 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6208 contain the loops from the outermost to the innermost, as they will
6209 appear in the classic distance vector. */
6211 bool
6212 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6214 loop_nest->safe_push (loop);
6215 if (loop->inner)
6216 return find_loop_nest_1 (loop->inner, loop_nest);
6217 return true;
6220 /* Returns true when the data dependences have been computed, false otherwise.
6221 Given a loop nest LOOP, the following vectors are returned:
6222 DATAREFS is initialized to all the array elements contained in this loop,
6223 DEPENDENCE_RELATIONS contains the relations between the data references.
6224 Compute read-read and self relations if
6225 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6227 bool
6228 compute_data_dependences_for_loop (class loop *loop,
6229 bool compute_self_and_read_read_dependences,
6230 vec<loop_p> *loop_nest,
6231 vec<data_reference_p> *datarefs,
6232 vec<ddr_p> *dependence_relations)
6234 bool res = true;
6236 memset (&dependence_stats, 0, sizeof (dependence_stats));
6238 /* If the loop nest is not well formed, or one of the data references
6239 is not computable, give up without spending time to compute other
6240 dependences. */
6241 if (!loop
6242 || !find_loop_nest (loop, loop_nest)
6243 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6244 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6245 compute_self_and_read_read_dependences))
6246 res = false;
6248 if (dump_file && (dump_flags & TDF_STATS))
6250 fprintf (dump_file, "Dependence tester statistics:\n");
6252 fprintf (dump_file, "Number of dependence tests: %d\n",
6253 dependence_stats.num_dependence_tests);
6254 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6255 dependence_stats.num_dependence_dependent);
6256 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6257 dependence_stats.num_dependence_independent);
6258 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6259 dependence_stats.num_dependence_undetermined);
6261 fprintf (dump_file, "Number of subscript tests: %d\n",
6262 dependence_stats.num_subscript_tests);
6263 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6264 dependence_stats.num_subscript_undetermined);
6265 fprintf (dump_file, "Number of same subscript function: %d\n",
6266 dependence_stats.num_same_subscript_function);
6268 fprintf (dump_file, "Number of ziv tests: %d\n",
6269 dependence_stats.num_ziv);
6270 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6271 dependence_stats.num_ziv_dependent);
6272 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6273 dependence_stats.num_ziv_independent);
6274 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6275 dependence_stats.num_ziv_unimplemented);
6277 fprintf (dump_file, "Number of siv tests: %d\n",
6278 dependence_stats.num_siv);
6279 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6280 dependence_stats.num_siv_dependent);
6281 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6282 dependence_stats.num_siv_independent);
6283 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6284 dependence_stats.num_siv_unimplemented);
6286 fprintf (dump_file, "Number of miv tests: %d\n",
6287 dependence_stats.num_miv);
6288 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6289 dependence_stats.num_miv_dependent);
6290 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6291 dependence_stats.num_miv_independent);
6292 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6293 dependence_stats.num_miv_unimplemented);
6296 return res;
6299 /* Free the memory used by a data dependence relation DDR. */
6301 void
6302 free_dependence_relation (struct data_dependence_relation *ddr)
6304 if (ddr == NULL)
6305 return;
6307 if (DDR_SUBSCRIPTS (ddr).exists ())
6308 free_subscripts (DDR_SUBSCRIPTS (ddr));
6309 DDR_DIST_VECTS (ddr).release ();
6310 DDR_DIR_VECTS (ddr).release ();
6312 free (ddr);
6315 /* Free the memory used by the data dependence relations from
6316 DEPENDENCE_RELATIONS. */
6318 void
6319 free_dependence_relations (vec<ddr_p>& dependence_relations)
6321 for (data_dependence_relation *ddr : dependence_relations)
6322 if (ddr)
6323 free_dependence_relation (ddr);
6325 dependence_relations.release ();
6328 /* Free the memory used by the data references from DATAREFS. */
6330 void
6331 free_data_refs (vec<data_reference_p>& datarefs)
6333 for (data_reference *dr : datarefs)
6334 free_data_ref (dr);
6335 datarefs.release ();
6338 /* Common routine implementing both dr_direction_indicator and
6339 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6340 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6341 Return the step as the indicator otherwise. */
6343 static tree
6344 dr_step_indicator (struct data_reference *dr, int useful_min)
6346 tree step = DR_STEP (dr);
6347 if (!step)
6348 return NULL_TREE;
6349 STRIP_NOPS (step);
6350 /* Look for cases where the step is scaled by a positive constant
6351 integer, which will often be the access size. If the multiplication
6352 doesn't change the sign (due to overflow effects) then we can
6353 test the unscaled value instead. */
6354 if (TREE_CODE (step) == MULT_EXPR
6355 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6356 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6358 tree factor = TREE_OPERAND (step, 1);
6359 step = TREE_OPERAND (step, 0);
6361 /* Strip widening and truncating conversions as well as nops. */
6362 if (CONVERT_EXPR_P (step)
6363 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6364 step = TREE_OPERAND (step, 0);
6365 tree type = TREE_TYPE (step);
6367 /* Get the range of step values that would not cause overflow. */
6368 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6369 / wi::to_widest (factor));
6370 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6371 / wi::to_widest (factor));
6373 /* Get the range of values that the unconverted step actually has. */
6374 wide_int step_min, step_max;
6375 value_range vr;
6376 if (TREE_CODE (step) != SSA_NAME
6377 || !get_range_query (cfun)->range_of_expr (vr, step)
6378 || vr.undefined_p ())
6380 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6381 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6383 else
6385 step_min = vr.lower_bound ();
6386 step_max = vr.upper_bound ();
6389 /* Check whether the unconverted step has an acceptable range. */
6390 signop sgn = TYPE_SIGN (type);
6391 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6392 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6394 if (wi::ge_p (step_min, useful_min, sgn))
6395 return ssize_int (useful_min);
6396 else if (wi::lt_p (step_max, 0, sgn))
6397 return ssize_int (-1);
6398 else
6399 return fold_convert (ssizetype, step);
6402 return DR_STEP (dr);
6405 /* Return a value that is negative iff DR has a negative step. */
6407 tree
6408 dr_direction_indicator (struct data_reference *dr)
6410 return dr_step_indicator (dr, 0);
6413 /* Return a value that is zero iff DR has a zero step. */
6415 tree
6416 dr_zero_step_indicator (struct data_reference *dr)
6418 return dr_step_indicator (dr, 1);
6421 /* Return true if DR is known to have a nonnegative (but possibly zero)
6422 step. */
6424 bool
6425 dr_known_forward_stride_p (struct data_reference *dr)
6427 tree indicator = dr_direction_indicator (dr);
6428 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6429 fold_convert (ssizetype, indicator),
6430 ssize_int (0));
6431 return neg_step_val && integer_zerop (neg_step_val);