gcov: make profile merging smarter
[official-gcc.git] / gcc / tree-data-ref.c
blob18307a554fc03f471cfa2f5e99f6b201ef667002
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
104 static struct datadep_stats
106 int num_dependence_tests;
107 int num_dependence_dependent;
108 int num_dependence_independent;
109 int num_dependence_undetermined;
111 int num_subscript_tests;
112 int num_subscript_undetermined;
113 int num_same_subscript_function;
115 int num_ziv;
116 int num_ziv_independent;
117 int num_ziv_dependent;
118 int num_ziv_unimplemented;
120 int num_siv;
121 int num_siv_independent;
122 int num_siv_dependent;
123 int num_siv_unimplemented;
125 int num_miv;
126 int num_miv_independent;
127 int num_miv_dependent;
128 int num_miv_unimplemented;
129 } dependence_stats;
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 unsigned int, unsigned int,
133 class loop *);
134 /* Returns true iff A divides B. */
136 static inline bool
137 tree_fold_divides_p (const_tree a, const_tree b)
139 gcc_assert (TREE_CODE (a) == INTEGER_CST);
140 gcc_assert (TREE_CODE (b) == INTEGER_CST);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 /* Returns true iff A divides B. */
146 static inline bool
147 int_divides_p (lambda_int a, lambda_int b)
149 return ((b % a) == 0);
152 /* Return true if reference REF contains a union access. */
154 static bool
155 ref_contains_union_access_p (tree ref)
157 while (handled_component_p (ref))
159 ref = TREE_OPERAND (ref, 0);
160 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 return true;
164 return false;
169 /* Dump into FILE all the data references from DATAREFS. */
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 for (data_reference *dr : datarefs)
175 dump_data_reference (file, dr);
178 /* Unified dump into FILE all the data references from DATAREFS. */
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
183 dump_data_references (stderr, ref);
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
201 dump_data_references (stderr, datarefs);
204 /* Print to STDERR the data_reference DR. */
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
209 dump_data_reference (stderr, dr);
212 /* Dump function for a DATA_REFERENCE structure. */
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
218 unsigned int i;
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 fprintf (outf, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
242 dump_data_reference (stderr, &ref);
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
260 unsigned i;
261 tree coef;
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
272 /* Dumps the conflict function CF to the file OUTF. */
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
277 unsigned i;
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
285 for (i = 0; i < cf->n; i++)
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
296 /* Dump function for a SUBSCRIPT structure. */
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
335 int eq;
337 for (eq = 0; eq < length; eq++)
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
342 switch (dir)
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
370 fprintf (outf, "\n");
373 /* Print a vector of direction vectors. */
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
379 for (lambda_vector v : dir_vects)
380 print_direction_vector (outf, v, length);
383 /* Print out a vector VEC of length N to OUTFILE. */
385 DEBUG_FUNCTION void
386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 int i;
390 for (i = 0; i < n; i++)
391 fprintf (outfile, "%3d ", (int)vector[i]);
392 fprintf (outfile, "\n");
395 /* Print a vector of distance vectors. */
397 DEBUG_FUNCTION void
398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 int length)
401 for (lambda_vector v : dist_vects)
402 print_lambda_vector (outf, v, length);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 struct data_reference *dra, *drb;
412 fprintf (outf, "(Data Dep: \n");
414 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 if (ddr)
418 dra = DDR_A (ddr);
419 drb = DDR_B (ddr);
420 if (dra)
421 dump_data_reference (outf, dra);
422 else
423 fprintf (outf, " (nil)\n");
424 if (drb)
425 dump_data_reference (outf, drb);
426 else
427 fprintf (outf, " (nil)\n");
429 fprintf (outf, " (don't know)\n)\n");
430 return;
433 dra = DDR_A (ddr);
434 drb = DDR_B (ddr);
435 dump_data_reference (outf, dra);
436 dump_data_reference (outf, drb);
438 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439 fprintf (outf, " (no dependence)\n");
441 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 unsigned int i;
444 class loop *loopi;
446 subscript *sub;
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 dump_subscript (outf, sub);
456 fprintf (outf, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 fprintf (outf, "%d ", loopi->num);
459 fprintf (outf, ")\n");
461 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 fprintf (outf, " distance_vector: ");
464 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 DDR_NB_LOOPS (ddr));
468 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 fprintf (outf, " direction_vector: ");
471 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
476 fprintf (outf, ")\n");
479 /* Debug version. */
481 DEBUG_FUNCTION void
482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 dump_data_dependence_relation (stderr, ddr);
487 /* Dump into FILE all the dependence relations from DDRS. */
489 DEBUG_FUNCTION void
490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 for (auto ddr : ddrs)
493 dump_data_dependence_relation (file, ddr);
496 DEBUG_FUNCTION void
497 debug (vec<ddr_p> &ref)
499 dump_data_dependence_relations (stderr, ref);
502 DEBUG_FUNCTION void
503 debug (vec<ddr_p> *ptr)
505 if (ptr)
506 debug (*ptr);
507 else
508 fprintf (stderr, "<nil>\n");
512 /* Dump to STDERR all the dependence relations from DDRS. */
514 DEBUG_FUNCTION void
515 debug_data_dependence_relations (vec<ddr_p> ddrs)
517 dump_data_dependence_relations (stderr, ddrs);
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
523 considered nest. */
525 DEBUG_FUNCTION void
526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 for (data_dependence_relation *ddr : ddrs)
529 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 fprintf (file, "DISTANCE_V (");
534 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 fprintf (file, ")\n");
538 for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 fprintf (file, "DIRECTION_V (");
541 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 fprintf (file, ")\n");
546 fprintf (file, "\n\n");
549 /* Dumps the data dependence relations DDRS in FILE. */
551 DEBUG_FUNCTION void
552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 for (data_dependence_relation *ddr : ddrs)
555 dump_data_dependence_relation (file, ddr);
557 fprintf (file, "\n\n");
560 DEBUG_FUNCTION void
561 debug_ddrs (vec<ddr_p> ddrs)
563 dump_ddrs (stderr, ddrs);
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567 OP0 CODE OP1, where:
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
573 Independently of RESULT_RANGE, try to compute:
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
581 Truncation and sign changes are known to distribute over CODE, i.e.
583 (itype) (A CODE B) == (itype) A CODE (itype) B
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
588 static bool
589 compute_distributive_range (tree type, value_range &op0_range,
590 tree_code code, value_range &op1_range,
591 tree *off, value_range *result_range)
593 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594 if (result_range)
596 range_operator *op = range_op_handler (code, type);
597 op->fold_range (*result_range, type, op0_range, op1_range);
600 /* The distributive property guarantees that if TYPE is no narrower
601 than SIZETYPE,
603 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
605 and so we can treat DELTA as zero. */
606 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
607 return true;
609 /* If overflow is undefined, we can assume that:
611 X == (ssizetype) OP0 CODE (ssizetype) OP1
613 is within the range of TYPE, i.e.:
615 X == (ssizetype) (TYPE) X
617 Distributing the (TYPE) truncation over X gives:
619 X == (ssizetype) (OP0 CODE OP1)
621 Casting both sides to sizetype and distributing the sizetype cast
622 over X gives:
624 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
626 and so we can treat DELTA as zero. */
627 if (TYPE_OVERFLOW_UNDEFINED (type))
628 return true;
630 /* Compute the range of:
632 (ssizetype) OP0 CODE (ssizetype) OP1
634 The distributive property guarantees that this has the same bitpattern as:
636 (sizetype) OP0 CODE (sizetype) OP1
638 but its range is more conducive to analysis. */
639 range_cast (op0_range, ssizetype);
640 range_cast (op1_range, ssizetype);
641 value_range wide_range;
642 range_operator *op = range_op_handler (code, ssizetype);
643 bool saved_flag_wrapv = flag_wrapv;
644 flag_wrapv = 1;
645 op->fold_range (wide_range, ssizetype, op0_range, op1_range);
646 flag_wrapv = saved_flag_wrapv;
647 if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
648 return false;
650 wide_int lb = wide_range.lower_bound ();
651 wide_int ub = wide_range.upper_bound ();
653 /* Calculate the number of times that each end of the range overflows or
654 underflows TYPE. We can only calculate DELTA if the numbers match. */
655 unsigned int precision = TYPE_PRECISION (type);
656 if (!TYPE_UNSIGNED (type))
658 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
659 lb -= type_min;
660 ub -= type_min;
662 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
663 lb &= upper_bits;
664 ub &= upper_bits;
665 if (lb != ub)
666 return false;
668 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
669 negative values indicating underflow. The low PRECISION bits of LB
670 are clear, so DELTA is therefore LB (== UB). */
671 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
672 return true;
675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
676 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
677 FROM_TYPE are integral types. */
679 static bool
680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
682 gcc_assert (INTEGRAL_TYPE_P (to_type)
683 && INTEGRAL_TYPE_P (from_type)
684 && !TYPE_OVERFLOW_TRAPS (to_type)
685 && !TYPE_OVERFLOW_TRAPS (from_type));
687 /* Converting to something no narrower than sizetype and then to sizetype
688 is equivalent to converting directly to sizetype. */
689 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
690 return true;
692 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
693 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
694 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
695 return true;
697 /* For narrowing conversions, we could in principle test whether
698 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
699 and apply a constant adjustment.
701 For other conversions (which involve a sign change) we could
702 check that the signs are always equal, and apply a constant
703 adjustment if the signs are negative.
705 However, both cases should be rare. */
706 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
707 TYPE_SIGN (to_type));
710 static void
711 split_constant_offset (tree type, tree *var, tree *off,
712 value_range *result_range,
713 hash_map<tree, std::pair<tree, tree> > &cache,
714 unsigned *limit);
716 /* Helper function for split_constant_offset. If TYPE is a pointer type,
717 try to express OP0 CODE OP1 as:
719 POINTER_PLUS <*VAR, (sizetype) *OFF>
721 where:
723 - *VAR has type TYPE
724 - *OFF is a constant of type ssizetype.
726 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
728 *VAR + (sizetype) *OFF
730 where:
732 - *VAR has type sizetype
733 - *OFF is a constant of type ssizetype.
735 In both cases, OP0 CODE OP1 has type TYPE.
737 Return true on success. A false return value indicates that we can't
738 do better than set *OFF to zero.
740 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
741 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
743 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
744 visited. LIMIT counts down the number of SSA names that we are
745 allowed to process before giving up. */
747 static bool
748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
749 tree *var, tree *off, value_range *result_range,
750 hash_map<tree, std::pair<tree, tree> > &cache,
751 unsigned *limit)
753 tree var0, var1;
754 tree off0, off1;
755 value_range op0_range, op1_range;
757 *var = NULL_TREE;
758 *off = NULL_TREE;
760 switch (code)
762 case INTEGER_CST:
763 *var = size_int (0);
764 *off = fold_convert (ssizetype, op0);
765 if (result_range)
766 result_range->set (op0, op0);
767 return true;
769 case POINTER_PLUS_EXPR:
770 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
771 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
772 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
773 *off = size_binop (PLUS_EXPR, off0, off1);
774 return true;
776 case PLUS_EXPR:
777 case MINUS_EXPR:
778 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
779 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
780 *off = size_binop (code, off0, off1);
781 if (!compute_distributive_range (type, op0_range, code, op1_range,
782 off, result_range))
783 return false;
784 *var = fold_build2 (code, sizetype, var0, var1);
785 return true;
787 case MULT_EXPR:
788 if (TREE_CODE (op1) != INTEGER_CST)
789 return false;
791 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
792 op1_range.set (op1, op1);
793 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
794 if (!compute_distributive_range (type, op0_range, code, op1_range,
795 off, result_range))
796 return false;
797 *var = fold_build2 (MULT_EXPR, sizetype, var0,
798 fold_convert (sizetype, op1));
799 return true;
801 case ADDR_EXPR:
803 tree base, poffset;
804 poly_int64 pbitsize, pbitpos, pbytepos;
805 machine_mode pmode;
806 int punsignedp, preversep, pvolatilep;
808 op0 = TREE_OPERAND (op0, 0);
809 base
810 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
811 &punsignedp, &preversep, &pvolatilep);
813 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
814 return false;
815 base = build_fold_addr_expr (base);
816 off0 = ssize_int (pbytepos);
818 if (poffset)
820 split_constant_offset (poffset, &poffset, &off1, nullptr,
821 cache, limit);
822 off0 = size_binop (PLUS_EXPR, off0, off1);
823 base = fold_build_pointer_plus (base, poffset);
826 var0 = fold_convert (type, base);
828 /* If variable length types are involved, punt, otherwise casts
829 might be converted into ARRAY_REFs in gimplify_conversion.
830 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
831 possibly no longer appears in current GIMPLE, might resurface.
832 This perhaps could run
833 if (CONVERT_EXPR_P (var0))
835 gimplify_conversion (&var0);
836 // Attempt to fill in any within var0 found ARRAY_REF's
837 // element size from corresponding op embedded ARRAY_REF,
838 // if unsuccessful, just punt.
839 } */
840 while (POINTER_TYPE_P (type))
841 type = TREE_TYPE (type);
842 if (int_size_in_bytes (type) < 0)
843 return false;
845 *var = var0;
846 *off = off0;
847 return true;
850 case SSA_NAME:
852 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
853 return false;
855 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
856 enum tree_code subcode;
858 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
859 return false;
861 subcode = gimple_assign_rhs_code (def_stmt);
863 /* We are using a cache to avoid un-CSEing large amounts of code. */
864 bool use_cache = false;
865 if (!has_single_use (op0)
866 && (subcode == POINTER_PLUS_EXPR
867 || subcode == PLUS_EXPR
868 || subcode == MINUS_EXPR
869 || subcode == MULT_EXPR
870 || subcode == ADDR_EXPR
871 || CONVERT_EXPR_CODE_P (subcode)))
873 use_cache = true;
874 bool existed;
875 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
876 if (existed)
878 if (integer_zerop (e.second))
879 return false;
880 *var = e.first;
881 *off = e.second;
882 /* The caller sets the range in this case. */
883 return true;
885 e = std::make_pair (op0, ssize_int (0));
888 if (*limit == 0)
889 return false;
890 --*limit;
892 var0 = gimple_assign_rhs1 (def_stmt);
893 var1 = gimple_assign_rhs2 (def_stmt);
895 bool res = split_constant_offset_1 (type, var0, subcode, var1,
896 var, off, nullptr, cache, limit);
897 if (res && use_cache)
898 *cache.get (op0) = std::make_pair (*var, *off);
899 /* The caller sets the range in this case. */
900 return res;
902 CASE_CONVERT:
904 /* We can only handle the following conversions:
906 - Conversions from one pointer type to another pointer type.
908 - Conversions from one non-trapping integral type to another
909 non-trapping integral type. In this case, the recursive
910 call makes sure that:
912 (sizetype) OP0
914 can be expressed as a sizetype operation involving VAR and OFF,
915 and all we need to do is check whether:
917 (sizetype) OP0 == (sizetype) (TYPE) OP0
919 - Conversions from a non-trapping sizetype-size integral type to
920 a like-sized pointer type. In this case, the recursive call
921 makes sure that:
923 (sizetype) OP0 == *VAR + (sizetype) *OFF
925 and we can convert that to:
927 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
929 - Conversions from a sizetype-sized pointer type to a like-sized
930 non-trapping integral type. In this case, the recursive call
931 makes sure that:
933 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
935 where the POINTER_PLUS and *VAR have the same precision as
936 TYPE (and the same precision as sizetype). Then:
938 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
939 tree itype = TREE_TYPE (op0);
940 if ((POINTER_TYPE_P (itype)
941 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
942 && (POINTER_TYPE_P (type)
943 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
944 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
945 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
946 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
948 if (POINTER_TYPE_P (type))
950 split_constant_offset (op0, var, off, nullptr, cache, limit);
951 *var = fold_convert (type, *var);
953 else if (POINTER_TYPE_P (itype))
955 split_constant_offset (op0, var, off, nullptr, cache, limit);
956 *var = fold_convert (sizetype, *var);
958 else
960 split_constant_offset (op0, var, off, &op0_range,
961 cache, limit);
962 if (!nop_conversion_for_offset_p (type, itype, op0_range))
963 return false;
964 if (result_range)
966 *result_range = op0_range;
967 range_cast (*result_range, type);
970 return true;
972 return false;
975 default:
976 return false;
980 /* If EXP has pointer type, try to express it as:
982 POINTER_PLUS <*VAR, (sizetype) *OFF>
984 where:
986 - *VAR has the same type as EXP
987 - *OFF is a constant of type ssizetype.
989 If EXP has an integral type, try to express (sizetype) EXP as:
991 *VAR + (sizetype) *OFF
993 where:
995 - *VAR has type sizetype
996 - *OFF is a constant of type ssizetype.
998 If EXP_RANGE is nonnull, set it to the range of EXP.
1000 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1001 visited. LIMIT counts down the number of SSA names that we are
1002 allowed to process before giving up. */
1004 static void
1005 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1006 hash_map<tree, std::pair<tree, tree> > &cache,
1007 unsigned *limit)
1009 tree type = TREE_TYPE (exp), op0, op1;
1010 enum tree_code code;
1012 code = TREE_CODE (exp);
1013 if (exp_range)
1015 *exp_range = type;
1016 if (code == SSA_NAME)
1018 value_range vr;
1019 get_range_query (cfun)->range_of_expr (vr, exp);
1020 if (vr.undefined_p ())
1021 vr.set_varying (TREE_TYPE (exp));
1022 wide_int var_min = wi::to_wide (vr.min ());
1023 wide_int var_max = wi::to_wide (vr.max ());
1024 value_range_kind vr_kind = vr.kind ();
1025 wide_int var_nonzero = get_nonzero_bits (exp);
1026 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1027 &var_min, &var_max,
1028 var_nonzero,
1029 TYPE_SIGN (type));
1030 /* This check for VR_VARYING is here because the old code
1031 using get_range_info would return VR_RANGE for the entire
1032 domain, instead of VR_VARYING. The new code normalizes
1033 full-domain ranges to VR_VARYING. */
1034 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1035 *exp_range = value_range (type, var_min, var_max);
1039 if (!tree_is_chrec (exp)
1040 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1042 extract_ops_from_tree (exp, &code, &op0, &op1);
1043 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1044 exp_range, cache, limit))
1045 return;
1048 *var = exp;
1049 if (INTEGRAL_TYPE_P (type))
1050 *var = fold_convert (sizetype, *var);
1051 *off = ssize_int (0);
1053 value_range r;
1054 if (exp_range && code != SSA_NAME
1055 && get_range_query (cfun)->range_of_expr (r, exp)
1056 && !r.undefined_p ())
1057 *exp_range = r;
1060 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1061 type as EXP while OFF has type ssizetype. */
1063 void
1064 split_constant_offset (tree exp, tree *var, tree *off)
1066 unsigned limit = param_ssa_name_def_chain_limit;
1067 static hash_map<tree, std::pair<tree, tree> > *cache;
1068 if (!cache)
1069 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1070 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1071 *var = fold_convert (TREE_TYPE (exp), *var);
1072 cache->empty ();
1075 /* Returns the address ADDR of an object in a canonical shape (without nop
1076 casts, and with type of pointer to the object). */
1078 static tree
1079 canonicalize_base_object_address (tree addr)
1081 tree orig = addr;
1083 STRIP_NOPS (addr);
1085 /* The base address may be obtained by casting from integer, in that case
1086 keep the cast. */
1087 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1088 return orig;
1090 if (TREE_CODE (addr) != ADDR_EXPR)
1091 return addr;
1093 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1096 /* Analyze the behavior of memory reference REF within STMT.
1097 There are two modes:
1099 - BB analysis. In this case we simply split the address into base,
1100 init and offset components, without reference to any containing loop.
1101 The resulting base and offset are general expressions and they can
1102 vary arbitrarily from one iteration of the containing loop to the next.
1103 The step is always zero.
1105 - loop analysis. In this case we analyze the reference both wrt LOOP
1106 and on the basis that the reference occurs (is "used") in LOOP;
1107 see the comment above analyze_scalar_evolution_in_loop for more
1108 information about this distinction. The base, init, offset and
1109 step fields are all invariant in LOOP.
1111 Perform BB analysis if LOOP is null, or if LOOP is the function's
1112 dummy outermost loop. In other cases perform loop analysis.
1114 Return true if the analysis succeeded and store the results in DRB if so.
1115 BB analysis can only fail for bitfield or reversed-storage accesses. */
1117 opt_result
1118 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1119 class loop *loop, const gimple *stmt)
1121 poly_int64 pbitsize, pbitpos;
1122 tree base, poffset;
1123 machine_mode pmode;
1124 int punsignedp, preversep, pvolatilep;
1125 affine_iv base_iv, offset_iv;
1126 tree init, dinit, step;
1127 bool in_loop = (loop && loop->num);
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1130 fprintf (dump_file, "analyze_innermost: ");
1132 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1133 &punsignedp, &preversep, &pvolatilep);
1134 gcc_assert (base != NULL_TREE);
1136 poly_int64 pbytepos;
1137 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1138 return opt_result::failure_at (stmt,
1139 "failed: bit offset alignment.\n");
1141 if (preversep)
1142 return opt_result::failure_at (stmt,
1143 "failed: reverse storage order.\n");
1145 /* Calculate the alignment and misalignment for the inner reference. */
1146 unsigned int HOST_WIDE_INT bit_base_misalignment;
1147 unsigned int bit_base_alignment;
1148 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1150 /* There are no bitfield references remaining in BASE, so the values
1151 we got back must be whole bytes. */
1152 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1153 && bit_base_misalignment % BITS_PER_UNIT == 0);
1154 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1155 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1157 if (TREE_CODE (base) == MEM_REF)
1159 if (!integer_zerop (TREE_OPERAND (base, 1)))
1161 /* Subtract MOFF from the base and add it to POFFSET instead.
1162 Adjust the misalignment to reflect the amount we subtracted. */
1163 poly_offset_int moff = mem_ref_offset (base);
1164 base_misalignment -= moff.force_shwi ();
1165 tree mofft = wide_int_to_tree (sizetype, moff);
1166 if (!poffset)
1167 poffset = mofft;
1168 else
1169 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1171 base = TREE_OPERAND (base, 0);
1173 else
1174 base = build_fold_addr_expr (base);
1176 if (in_loop)
1178 if (!simple_iv (loop, loop, base, &base_iv, true))
1179 return opt_result::failure_at
1180 (stmt, "failed: evolution of base is not affine.\n");
1182 else
1184 base_iv.base = base;
1185 base_iv.step = ssize_int (0);
1186 base_iv.no_overflow = true;
1189 if (!poffset)
1191 offset_iv.base = ssize_int (0);
1192 offset_iv.step = ssize_int (0);
1194 else
1196 if (!in_loop)
1198 offset_iv.base = poffset;
1199 offset_iv.step = ssize_int (0);
1201 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1202 return opt_result::failure_at
1203 (stmt, "failed: evolution of offset is not affine.\n");
1206 init = ssize_int (pbytepos);
1208 /* Subtract any constant component from the base and add it to INIT instead.
1209 Adjust the misalignment to reflect the amount we subtracted. */
1210 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1211 init = size_binop (PLUS_EXPR, init, dinit);
1212 base_misalignment -= TREE_INT_CST_LOW (dinit);
1214 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1215 init = size_binop (PLUS_EXPR, init, dinit);
1217 step = size_binop (PLUS_EXPR,
1218 fold_convert (ssizetype, base_iv.step),
1219 fold_convert (ssizetype, offset_iv.step));
1221 base = canonicalize_base_object_address (base_iv.base);
1223 /* See if get_pointer_alignment can guarantee a higher alignment than
1224 the one we calculated above. */
1225 unsigned int HOST_WIDE_INT alt_misalignment;
1226 unsigned int alt_alignment;
1227 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1229 /* As above, these values must be whole bytes. */
1230 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1231 && alt_misalignment % BITS_PER_UNIT == 0);
1232 alt_alignment /= BITS_PER_UNIT;
1233 alt_misalignment /= BITS_PER_UNIT;
1235 if (base_alignment < alt_alignment)
1237 base_alignment = alt_alignment;
1238 base_misalignment = alt_misalignment;
1241 drb->base_address = base;
1242 drb->offset = fold_convert (ssizetype, offset_iv.base);
1243 drb->init = init;
1244 drb->step = step;
1245 if (known_misalignment (base_misalignment, base_alignment,
1246 &drb->base_misalignment))
1247 drb->base_alignment = base_alignment;
1248 else
1250 drb->base_alignment = known_alignment (base_misalignment);
1251 drb->base_misalignment = 0;
1253 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1254 drb->step_alignment = highest_pow2_factor (step);
1256 if (dump_file && (dump_flags & TDF_DETAILS))
1257 fprintf (dump_file, "success.\n");
1259 return opt_result::success ();
1262 /* Return true if OP is a valid component reference for a DR access
1263 function. This accepts a subset of what handled_component_p accepts. */
1265 static bool
1266 access_fn_component_p (tree op)
1268 switch (TREE_CODE (op))
1270 case REALPART_EXPR:
1271 case IMAGPART_EXPR:
1272 case ARRAY_REF:
1273 return true;
1275 case COMPONENT_REF:
1276 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1278 default:
1279 return false;
1283 /* Returns whether BASE can have a access_fn_component_p with BASE
1284 as base. */
1286 static bool
1287 base_supports_access_fn_components_p (tree base)
1289 switch (TREE_CODE (TREE_TYPE (base)))
1291 case COMPLEX_TYPE:
1292 case ARRAY_TYPE:
1293 case RECORD_TYPE:
1294 return true;
1295 default:
1296 return false;
1300 /* Determines the base object and the list of indices of memory reference
1301 DR, analyzed in LOOP and instantiated before NEST. */
1303 static void
1304 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1306 /* If analyzing a basic-block there are no indices to analyze
1307 and thus no access functions. */
1308 if (!nest)
1310 dri->base_object = ref;
1311 dri->access_fns.create (0);
1312 return;
1315 vec<tree> access_fns = vNULL;
1317 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1318 into a two element array with a constant index. The base is
1319 then just the immediate underlying object. */
1320 if (TREE_CODE (ref) == REALPART_EXPR)
1322 ref = TREE_OPERAND (ref, 0);
1323 access_fns.safe_push (integer_zero_node);
1325 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1327 ref = TREE_OPERAND (ref, 0);
1328 access_fns.safe_push (integer_one_node);
1331 /* Analyze access functions of dimensions we know to be independent.
1332 The list of component references handled here should be kept in
1333 sync with access_fn_component_p. */
1334 while (handled_component_p (ref))
1336 if (TREE_CODE (ref) == ARRAY_REF)
1338 tree op = TREE_OPERAND (ref, 1);
1339 tree access_fn = analyze_scalar_evolution (loop, op);
1340 access_fn = instantiate_scev (nest, loop, access_fn);
1341 access_fns.safe_push (access_fn);
1343 else if (TREE_CODE (ref) == COMPONENT_REF
1344 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1346 /* For COMPONENT_REFs of records (but not unions!) use the
1347 FIELD_DECL offset as constant access function so we can
1348 disambiguate a[i].f1 and a[i].f2. */
1349 tree off = component_ref_field_offset (ref);
1350 off = size_binop (PLUS_EXPR,
1351 size_binop (MULT_EXPR,
1352 fold_convert (bitsizetype, off),
1353 bitsize_int (BITS_PER_UNIT)),
1354 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1355 access_fns.safe_push (off);
1357 else
1358 /* If we have an unhandled component we could not translate
1359 to an access function stop analyzing. We have determined
1360 our base object in this case. */
1361 break;
1363 ref = TREE_OPERAND (ref, 0);
1366 /* If the address operand of a MEM_REF base has an evolution in the
1367 analyzed nest, add it as an additional independent access-function. */
1368 if (TREE_CODE (ref) == MEM_REF)
1370 tree op = TREE_OPERAND (ref, 0);
1371 tree access_fn = analyze_scalar_evolution (loop, op);
1372 access_fn = instantiate_scev (nest, loop, access_fn);
1373 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1375 tree memoff = TREE_OPERAND (ref, 1);
1376 tree base = initial_condition (access_fn);
1377 tree orig_type = TREE_TYPE (base);
1378 STRIP_USELESS_TYPE_CONVERSION (base);
1379 tree off;
1380 split_constant_offset (base, &base, &off);
1381 STRIP_USELESS_TYPE_CONVERSION (base);
1382 /* Fold the MEM_REF offset into the evolutions initial
1383 value to make more bases comparable. */
1384 if (!integer_zerop (memoff))
1386 off = size_binop (PLUS_EXPR, off,
1387 fold_convert (ssizetype, memoff));
1388 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1390 /* Adjust the offset so it is a multiple of the access type
1391 size and thus we separate bases that can possibly be used
1392 to produce partial overlaps (which the access_fn machinery
1393 cannot handle). */
1394 wide_int rem;
1395 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1396 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1397 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1398 rem = wi::mod_trunc
1399 (wi::to_wide (off),
1400 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1401 SIGNED);
1402 else
1403 /* If we can't compute the remainder simply force the initial
1404 condition to zero. */
1405 rem = wi::to_wide (off);
1406 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1407 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1408 /* And finally replace the initial condition. */
1409 access_fn = chrec_replace_initial_condition
1410 (access_fn, fold_convert (orig_type, off));
1411 /* ??? This is still not a suitable base object for
1412 dr_may_alias_p - the base object needs to be an
1413 access that covers the object as whole. With
1414 an evolution in the pointer this cannot be
1415 guaranteed.
1416 As a band-aid, mark the access so we can special-case
1417 it in dr_may_alias_p. */
1418 tree old = ref;
1419 ref = fold_build2_loc (EXPR_LOCATION (ref),
1420 MEM_REF, TREE_TYPE (ref),
1421 base, memoff);
1422 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1423 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1424 dri->unconstrained_base = true;
1425 access_fns.safe_push (access_fn);
1428 else if (DECL_P (ref))
1430 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1431 ref = build2 (MEM_REF, TREE_TYPE (ref),
1432 build_fold_addr_expr (ref),
1433 build_int_cst (reference_alias_ptr_type (ref), 0));
1436 dri->base_object = ref;
1437 dri->access_fns = access_fns;
1440 /* Extracts the alias analysis information from the memory reference DR. */
1442 static void
1443 dr_analyze_alias (struct data_reference *dr)
1445 tree ref = DR_REF (dr);
1446 tree base = get_base_address (ref), addr;
1448 if (INDIRECT_REF_P (base)
1449 || TREE_CODE (base) == MEM_REF)
1451 addr = TREE_OPERAND (base, 0);
1452 if (TREE_CODE (addr) == SSA_NAME)
1453 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1457 /* Frees data reference DR. */
1459 void
1460 free_data_ref (data_reference_p dr)
1462 DR_ACCESS_FNS (dr).release ();
1463 if (dr->alt_indices.base_object)
1464 dr->alt_indices.access_fns.release ();
1465 free (dr);
1468 /* Analyze memory reference MEMREF, which is accessed in STMT.
1469 The reference is a read if IS_READ is true, otherwise it is a write.
1470 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1471 within STMT, i.e. that it might not occur even if STMT is executed
1472 and runs to completion.
1474 Return the data_reference description of MEMREF. NEST is the outermost
1475 loop in which the reference should be instantiated, LOOP is the loop
1476 in which the data reference should be analyzed. */
1478 struct data_reference *
1479 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1480 bool is_read, bool is_conditional_in_stmt)
1482 struct data_reference *dr;
1484 if (dump_file && (dump_flags & TDF_DETAILS))
1486 fprintf (dump_file, "Creating dr for ");
1487 print_generic_expr (dump_file, memref, TDF_SLIM);
1488 fprintf (dump_file, "\n");
1491 dr = XCNEW (struct data_reference);
1492 DR_STMT (dr) = stmt;
1493 DR_REF (dr) = memref;
1494 DR_IS_READ (dr) = is_read;
1495 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1497 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1498 nest != NULL ? loop : NULL, stmt);
1499 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1500 dr_analyze_alias (dr);
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1504 unsigned i;
1505 fprintf (dump_file, "\tbase_address: ");
1506 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1507 fprintf (dump_file, "\n\toffset from base address: ");
1508 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1509 fprintf (dump_file, "\n\tconstant offset from base address: ");
1510 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1511 fprintf (dump_file, "\n\tstep: ");
1512 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1513 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1514 fprintf (dump_file, "\n\tbase misalignment: %d",
1515 DR_BASE_MISALIGNMENT (dr));
1516 fprintf (dump_file, "\n\toffset alignment: %d",
1517 DR_OFFSET_ALIGNMENT (dr));
1518 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1519 fprintf (dump_file, "\n\tbase_object: ");
1520 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1521 fprintf (dump_file, "\n");
1522 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1524 fprintf (dump_file, "\tAccess function %d: ", i);
1525 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1529 return dr;
1532 /* A helper function computes order between two tree expressions T1 and T2.
1533 This is used in comparator functions sorting objects based on the order
1534 of tree expressions. The function returns -1, 0, or 1. */
1537 data_ref_compare_tree (tree t1, tree t2)
1539 int i, cmp;
1540 enum tree_code code;
1541 char tclass;
1543 if (t1 == t2)
1544 return 0;
1545 if (t1 == NULL)
1546 return -1;
1547 if (t2 == NULL)
1548 return 1;
1550 STRIP_USELESS_TYPE_CONVERSION (t1);
1551 STRIP_USELESS_TYPE_CONVERSION (t2);
1552 if (t1 == t2)
1553 return 0;
1555 if (TREE_CODE (t1) != TREE_CODE (t2)
1556 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1557 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1559 code = TREE_CODE (t1);
1560 switch (code)
1562 case INTEGER_CST:
1563 return tree_int_cst_compare (t1, t2);
1565 case STRING_CST:
1566 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1567 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1568 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1569 TREE_STRING_LENGTH (t1));
1571 case SSA_NAME:
1572 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1573 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1574 break;
1576 default:
1577 if (POLY_INT_CST_P (t1))
1578 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1579 wi::to_poly_widest (t2));
1581 tclass = TREE_CODE_CLASS (code);
1583 /* For decls, compare their UIDs. */
1584 if (tclass == tcc_declaration)
1586 if (DECL_UID (t1) != DECL_UID (t2))
1587 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1588 break;
1590 /* For expressions, compare their operands recursively. */
1591 else if (IS_EXPR_CODE_CLASS (tclass))
1593 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1595 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1596 TREE_OPERAND (t2, i));
1597 if (cmp != 0)
1598 return cmp;
1601 else
1602 gcc_unreachable ();
1605 return 0;
1608 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1609 check. */
1611 opt_result
1612 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1614 if (dump_enabled_p ())
1615 dump_printf (MSG_NOTE,
1616 "consider run-time aliasing test between %T and %T\n",
1617 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1619 if (!speed_p)
1620 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1621 "runtime alias check not supported when"
1622 " optimizing for size.\n");
1624 /* FORNOW: We don't support versioning with outer-loop in either
1625 vectorization or loop distribution. */
1626 if (loop != NULL && loop->inner != NULL)
1627 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1628 "runtime alias check not supported for"
1629 " outer loop.\n");
1631 return opt_result::success ();
1634 /* Operator == between two dr_with_seg_len objects.
1636 This equality operator is used to make sure two data refs
1637 are the same one so that we will consider to combine the
1638 aliasing checks of those two pairs of data dependent data
1639 refs. */
1641 static bool
1642 operator == (const dr_with_seg_len& d1,
1643 const dr_with_seg_len& d2)
1645 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1646 DR_BASE_ADDRESS (d2.dr), 0)
1647 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1648 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1649 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1650 && known_eq (d1.access_size, d2.access_size)
1651 && d1.align == d2.align);
1654 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1655 so that we can combine aliasing checks in one scan. */
1657 static int
1658 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1660 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1661 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1662 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1663 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1665 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1666 if a and c have the same basic address snd step, and b and d have the same
1667 address and step. Therefore, if any a&c or b&d don't have the same address
1668 and step, we don't care the order of those two pairs after sorting. */
1669 int comp_res;
1671 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1672 DR_BASE_ADDRESS (b1.dr))) != 0)
1673 return comp_res;
1674 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1675 DR_BASE_ADDRESS (b2.dr))) != 0)
1676 return comp_res;
1677 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1678 DR_STEP (b1.dr))) != 0)
1679 return comp_res;
1680 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1681 DR_STEP (b2.dr))) != 0)
1682 return comp_res;
1683 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1684 DR_OFFSET (b1.dr))) != 0)
1685 return comp_res;
1686 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1687 DR_INIT (b1.dr))) != 0)
1688 return comp_res;
1689 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1690 DR_OFFSET (b2.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1693 DR_INIT (b2.dr))) != 0)
1694 return comp_res;
1696 return 0;
1699 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1701 static void
1702 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1704 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1705 DR_REF (alias_pair->first.dr),
1706 DR_REF (alias_pair->second.dr));
1708 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1709 alias_pair->first.seg_len);
1710 if (!operand_equal_p (alias_pair->first.seg_len,
1711 alias_pair->second.seg_len, 0))
1712 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1714 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1715 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1716 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1718 dump_printf (MSG_NOTE, " vs. ");
1719 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1722 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1723 alias_pair->first.align);
1724 if (alias_pair->first.align != alias_pair->second.align)
1725 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1727 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1728 if (alias_pair->flags & DR_ALIAS_RAW)
1729 dump_printf (MSG_NOTE, " RAW");
1730 if (alias_pair->flags & DR_ALIAS_WAR)
1731 dump_printf (MSG_NOTE, " WAR");
1732 if (alias_pair->flags & DR_ALIAS_WAW)
1733 dump_printf (MSG_NOTE, " WAW");
1734 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1735 dump_printf (MSG_NOTE, " ARBITRARY");
1736 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1737 dump_printf (MSG_NOTE, " SWAPPED");
1738 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1739 dump_printf (MSG_NOTE, " UNSWAPPED");
1740 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1741 dump_printf (MSG_NOTE, " MIXED_STEPS");
1742 if (alias_pair->flags == 0)
1743 dump_printf (MSG_NOTE, " <none>");
1744 dump_printf (MSG_NOTE, "\n");
1747 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1748 FACTOR is number of iterations that each data reference is accessed.
1750 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1751 we create an expression:
1753 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1754 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1756 for aliasing checks. However, in some cases we can decrease the number
1757 of checks by combining two checks into one. For example, suppose we have
1758 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1759 condition is satisfied:
1761 load_ptr_0 < load_ptr_1 &&
1762 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1764 (this condition means, in each iteration of vectorized loop, the accessed
1765 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1766 load_ptr_1.)
1768 we then can use only the following expression to finish the alising checks
1769 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1771 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1772 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1774 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1775 basic address. */
1777 void
1778 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1779 poly_uint64)
1781 if (alias_pairs->is_empty ())
1782 return;
1784 /* Canonicalize each pair so that the base components are ordered wrt
1785 data_ref_compare_tree. This allows the loop below to merge more
1786 cases. */
1787 unsigned int i;
1788 dr_with_seg_len_pair_t *alias_pair;
1789 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1791 data_reference_p dr_a = alias_pair->first.dr;
1792 data_reference_p dr_b = alias_pair->second.dr;
1793 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1794 DR_BASE_ADDRESS (dr_b));
1795 if (comp_res == 0)
1796 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1797 if (comp_res == 0)
1798 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1799 if (comp_res > 0)
1801 std::swap (alias_pair->first, alias_pair->second);
1802 alias_pair->flags |= DR_ALIAS_SWAPPED;
1804 else
1805 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1808 /* Sort the collected data ref pairs so that we can scan them once to
1809 combine all possible aliasing checks. */
1810 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1812 /* Scan the sorted dr pairs and check if we can combine alias checks
1813 of two neighboring dr pairs. */
1814 unsigned int last = 0;
1815 for (i = 1; i < alias_pairs->length (); ++i)
1817 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1818 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1819 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1821 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1822 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1823 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1824 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1826 /* Remove duplicate data ref pairs. */
1827 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1829 if (dump_enabled_p ())
1830 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1831 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1832 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1833 alias_pair1->flags |= alias_pair2->flags;
1834 continue;
1837 /* Assume that we won't be able to merge the pairs, then correct
1838 if we do. */
1839 last += 1;
1840 if (last != i)
1841 (*alias_pairs)[last] = (*alias_pairs)[i];
1843 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1845 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1846 and DR_A1 and DR_A2 are two consecutive memrefs. */
1847 if (*dr_a1 == *dr_a2)
1849 std::swap (dr_a1, dr_b1);
1850 std::swap (dr_a2, dr_b2);
1853 poly_int64 init_a1, init_a2;
1854 /* Only consider cases in which the distance between the initial
1855 DR_A1 and the initial DR_A2 is known at compile time. */
1856 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1857 DR_BASE_ADDRESS (dr_a2->dr), 0)
1858 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1859 DR_OFFSET (dr_a2->dr), 0)
1860 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1861 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1862 continue;
1864 /* Don't combine if we can't tell which one comes first. */
1865 if (!ordered_p (init_a1, init_a2))
1866 continue;
1868 /* Work out what the segment length would be if we did combine
1869 DR_A1 and DR_A2:
1871 - If DR_A1 and DR_A2 have equal lengths, that length is
1872 also the combined length.
1874 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1875 length is the lower bound on those lengths.
1877 - If DR_A1 and DR_A2 both have positive lengths, the combined
1878 length is the upper bound on those lengths.
1880 Other cases are unlikely to give a useful combination.
1882 The lengths both have sizetype, so the sign is taken from
1883 the step instead. */
1884 poly_uint64 new_seg_len = 0;
1885 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1886 dr_a2->seg_len, 0);
1887 if (new_seg_len_p)
1889 poly_uint64 seg_len_a1, seg_len_a2;
1890 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1891 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1892 continue;
1894 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1895 if (TREE_CODE (indicator_a) != INTEGER_CST)
1896 continue;
1898 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1899 if (TREE_CODE (indicator_b) != INTEGER_CST)
1900 continue;
1902 int sign_a = tree_int_cst_sgn (indicator_a);
1903 int sign_b = tree_int_cst_sgn (indicator_b);
1905 if (sign_a <= 0 && sign_b <= 0)
1906 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1907 else if (sign_a >= 0 && sign_b >= 0)
1908 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1909 else
1910 continue;
1912 /* At this point we're committed to merging the refs. */
1914 /* Make sure dr_a1 starts left of dr_a2. */
1915 if (maybe_gt (init_a1, init_a2))
1917 std::swap (*dr_a1, *dr_a2);
1918 std::swap (init_a1, init_a2);
1921 /* The DR_Bs are equal, so only the DR_As can introduce
1922 mixed steps. */
1923 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1924 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1926 if (new_seg_len_p)
1928 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1929 new_seg_len);
1930 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1933 /* This is always positive due to the swap above. */
1934 poly_uint64 diff = init_a2 - init_a1;
1936 /* The new check will start at DR_A1. Make sure that its access
1937 size encompasses the initial DR_A2. */
1938 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1940 dr_a1->access_size = upper_bound (dr_a1->access_size,
1941 diff + dr_a2->access_size);
1942 unsigned int new_align = known_alignment (dr_a1->access_size);
1943 dr_a1->align = MIN (dr_a1->align, new_align);
1945 if (dump_enabled_p ())
1946 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1947 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1948 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1949 alias_pair1->flags |= alias_pair2->flags;
1950 last -= 1;
1953 alias_pairs->truncate (last + 1);
1955 /* Try to restore the original dr_with_seg_len order within each
1956 dr_with_seg_len_pair_t. If we ended up combining swapped and
1957 unswapped pairs into the same check, we have to invalidate any
1958 RAW, WAR and WAW information for it. */
1959 if (dump_enabled_p ())
1960 dump_printf (MSG_NOTE, "merged alias checks:\n");
1961 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1963 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1964 unsigned int swapped = (alias_pair->flags & swap_mask);
1965 if (swapped == DR_ALIAS_SWAPPED)
1966 std::swap (alias_pair->first, alias_pair->second);
1967 else if (swapped != DR_ALIAS_UNSWAPPED)
1968 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1969 alias_pair->flags &= ~swap_mask;
1970 if (dump_enabled_p ())
1971 dump_alias_pair (alias_pair, " ");
1975 /* A subroutine of create_intersect_range_checks, with a subset of the
1976 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1977 to optimize cases in which the references form a simple RAW, WAR or
1978 WAR dependence. */
1980 static bool
1981 create_ifn_alias_checks (tree *cond_expr,
1982 const dr_with_seg_len_pair_t &alias_pair)
1984 const dr_with_seg_len& dr_a = alias_pair.first;
1985 const dr_with_seg_len& dr_b = alias_pair.second;
1987 /* Check for cases in which:
1989 (a) we have a known RAW, WAR or WAR dependence
1990 (b) the accesses are well-ordered in both the original and new code
1991 (see the comment above the DR_ALIAS_* flags for details); and
1992 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
1993 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1994 return false;
1996 /* Make sure that both DRs access the same pattern of bytes,
1997 with a constant length and step. */
1998 poly_uint64 seg_len;
1999 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2000 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2001 || maybe_ne (dr_a.access_size, dr_b.access_size)
2002 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2003 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2004 return false;
2006 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2007 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2008 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2010 /* See whether the target suports what we want to do. WAW checks are
2011 equivalent to WAR checks here. */
2012 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2013 ? IFN_CHECK_RAW_PTRS
2014 : IFN_CHECK_WAR_PTRS);
2015 unsigned int align = MIN (dr_a.align, dr_b.align);
2016 poly_uint64 full_length = seg_len + bytes;
2017 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2018 full_length, align))
2020 full_length = seg_len + dr_a.access_size;
2021 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022 full_length, align))
2023 return false;
2026 /* Commit to using this form of test. */
2027 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2028 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2030 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2031 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2033 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2034 ifn, boolean_type_node,
2035 4, addr_a, addr_b,
2036 size_int (full_length),
2037 size_int (align));
2039 if (dump_enabled_p ())
2041 if (ifn == IFN_CHECK_RAW_PTRS)
2042 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2043 else
2044 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2046 return true;
2049 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2050 free of aliases, using a condition based on index values instead
2051 of a condition based on addresses. Return true on success,
2052 storing the condition in *COND_EXPR.
2054 This can only be done if the two data references in ALIAS_PAIR access
2055 the same array object and the index is the only difference. For example,
2056 if the two data references are DR_A and DR_B:
2058 DR_A DR_B
2059 data-ref arr[i] arr[j]
2060 base_object arr arr
2061 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2063 The addresses and their index are like:
2065 |<- ADDR_A ->| |<- ADDR_B ->|
2066 ------------------------------------------------------->
2067 | | | | | | | | | |
2068 ------------------------------------------------------->
2069 i_0 ... i_0+4 j_0 ... j_0+4
2071 We can create expression based on index rather than address:
2073 (unsigned) (i_0 - j_0 + 3) <= 6
2075 i.e. the indices are less than 4 apart.
2077 Note evolution step of index needs to be considered in comparison. */
2079 static bool
2080 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2081 const dr_with_seg_len_pair_t &alias_pair)
2083 const dr_with_seg_len &dr_a = alias_pair.first;
2084 const dr_with_seg_len &dr_b = alias_pair.second;
2085 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2086 || integer_zerop (DR_STEP (dr_a.dr))
2087 || integer_zerop (DR_STEP (dr_b.dr))
2088 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2089 return false;
2091 poly_uint64 seg_len1, seg_len2;
2092 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2093 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2094 return false;
2096 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2097 return false;
2099 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2100 return false;
2102 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2103 return false;
2105 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2107 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2108 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2109 if (neg_step)
2111 abs_step = -abs_step;
2112 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2113 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2116 /* Infer the number of iterations with which the memory segment is accessed
2117 by DR. In other words, alias is checked if memory segment accessed by
2118 DR_A in some iterations intersect with memory segment accessed by DR_B
2119 in the same amount iterations.
2120 Note segnment length is a linear function of number of iterations with
2121 DR_STEP as the coefficient. */
2122 poly_uint64 niter_len1, niter_len2;
2123 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2124 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2125 return false;
2127 /* Divide each access size by the byte step, rounding up. */
2128 poly_uint64 niter_access1, niter_access2;
2129 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2130 abs_step, &niter_access1)
2131 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2132 abs_step, &niter_access2))
2133 return false;
2135 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2137 int found = -1;
2138 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2140 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2141 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2142 /* Two indices must be the same if they are not scev, or not scev wrto
2143 current loop being vecorized. */
2144 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2145 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2146 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2147 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2149 if (operand_equal_p (access1, access2, 0))
2150 continue;
2152 return false;
2154 if (found >= 0)
2155 return false;
2156 found = i;
2159 /* Ought not to happen in practice, since if all accesses are equal then the
2160 alias should be decidable at compile time. */
2161 if (found < 0)
2162 return false;
2164 /* The two indices must have the same step. */
2165 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2166 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2167 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2168 return false;
2170 tree idx_step = CHREC_RIGHT (access1);
2171 /* Index must have const step, otherwise DR_STEP won't be constant. */
2172 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2173 /* Index must evaluate in the same direction as DR. */
2174 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2176 tree min1 = CHREC_LEFT (access1);
2177 tree min2 = CHREC_LEFT (access2);
2178 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2179 return false;
2181 /* Ideally, alias can be checked against loop's control IV, but we
2182 need to prove linear mapping between control IV and reference
2183 index. Although that should be true, we check against (array)
2184 index of data reference. Like segment length, index length is
2185 linear function of the number of iterations with index_step as
2186 the coefficient, i.e, niter_len * idx_step. */
2187 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2188 SIGNED);
2189 if (neg_step)
2190 abs_idx_step = -abs_idx_step;
2191 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2192 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2193 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2194 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2196 gcc_assert (known_ge (idx_len1, 0)
2197 && known_ge (idx_len2, 0)
2198 && known_ge (idx_access1, 0)
2199 && known_ge (idx_access2, 0));
2201 /* Each access has the following pattern, with lengths measured
2202 in units of INDEX:
2204 <-- idx_len -->
2205 <--- A: -ve step --->
2206 +-----+-------+-----+-------+-----+
2207 | n-1 | ..... | 0 | ..... | n-1 |
2208 +-----+-------+-----+-------+-----+
2209 <--- B: +ve step --->
2210 <-- idx_len -->
2214 where "n" is the number of scalar iterations covered by the segment
2215 and where each access spans idx_access units.
2217 A is the range of bytes accessed when the step is negative,
2218 B is the range when the step is positive.
2220 When checking for general overlap, we need to test whether
2221 the range:
2223 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2225 overlaps:
2227 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2229 where:
2231 low_offsetN = +ve step ? 0 : -idx_lenN;
2232 high_offsetN = +ve step ? idx_lenN : 0;
2234 This is equivalent to testing whether:
2236 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2237 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2239 Converting this into a single test, there is an overlap if:
2241 0 <= min2 - min1 + bias <= limit
2243 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2244 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2245 + (high_offset2 - low_offset2 + idx_access2 - 1)
2246 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2248 Combining the tests requires limit to be computable in an unsigned
2249 form of the index type; if it isn't, we fall back to the usual
2250 pointer-based checks.
2252 We can do better if DR_B is a write and if DR_A and DR_B are
2253 well-ordered in both the original and the new code (see the
2254 comment above the DR_ALIAS_* flags for details). In this case
2255 we know that for each i in [0, n-1], the write performed by
2256 access i of DR_B occurs after access numbers j<=i of DR_A in
2257 both the original and the new code. Any write or anti
2258 dependencies wrt those DR_A accesses are therefore maintained.
2260 We just need to make sure that each individual write in DR_B does not
2261 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2262 after the DR_B access in the original code but happen before it in
2263 the new code.
2265 We know the steps for both accesses are equal, so by induction, we
2266 just need to test whether the first write of DR_B overlaps a later
2267 access of DR_A. In other words, we need to move min1 along by
2268 one iteration:
2270 min1' = min1 + idx_step
2272 and use the ranges:
2274 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2276 and:
2278 [min2, min2 + idx_access2 - 1]
2280 where:
2282 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2283 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2284 if (waw_or_war_p)
2285 idx_len1 -= abs_idx_step;
2287 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2288 if (!waw_or_war_p)
2289 limit += idx_len2;
2291 tree utype = unsigned_type_for (TREE_TYPE (min1));
2292 if (!wi::fits_to_tree_p (limit, utype))
2293 return false;
2295 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2296 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2297 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2298 /* Equivalent to adding IDX_STEP to MIN1. */
2299 if (waw_or_war_p)
2300 bias -= wi::to_offset (idx_step);
2302 tree subject = fold_build2 (MINUS_EXPR, utype,
2303 fold_convert (utype, min2),
2304 fold_convert (utype, min1));
2305 subject = fold_build2 (PLUS_EXPR, utype, subject,
2306 wide_int_to_tree (utype, bias));
2307 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2308 wide_int_to_tree (utype, limit));
2309 if (*cond_expr)
2310 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2311 *cond_expr, part_cond_expr);
2312 else
2313 *cond_expr = part_cond_expr;
2314 if (dump_enabled_p ())
2316 if (waw_or_war_p)
2317 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2318 else
2319 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2321 return true;
2324 /* A subroutine of create_intersect_range_checks, with a subset of the
2325 same arguments. Try to optimize cases in which the second access
2326 is a write and in which some overlap is valid. */
2328 static bool
2329 create_waw_or_war_checks (tree *cond_expr,
2330 const dr_with_seg_len_pair_t &alias_pair)
2332 const dr_with_seg_len& dr_a = alias_pair.first;
2333 const dr_with_seg_len& dr_b = alias_pair.second;
2335 /* Check for cases in which:
2337 (a) DR_B is always a write;
2338 (b) the accesses are well-ordered in both the original and new code
2339 (see the comment above the DR_ALIAS_* flags for details); and
2340 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2341 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2342 return false;
2344 /* Check for equal (but possibly variable) steps. */
2345 tree step = DR_STEP (dr_a.dr);
2346 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2347 return false;
2349 /* Make sure that we can operate on sizetype without loss of precision. */
2350 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2351 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2352 return false;
2354 /* All addresses involved are known to have a common alignment ALIGN.
2355 We can therefore subtract ALIGN from an exclusive endpoint to get
2356 an inclusive endpoint. In the best (and common) case, ALIGN is the
2357 same as the access sizes of both DRs, and so subtracting ALIGN
2358 cancels out the addition of an access size. */
2359 unsigned int align = MIN (dr_a.align, dr_b.align);
2360 poly_uint64 last_chunk_a = dr_a.access_size - align;
2361 poly_uint64 last_chunk_b = dr_b.access_size - align;
2363 /* Get a boolean expression that is true when the step is negative. */
2364 tree indicator = dr_direction_indicator (dr_a.dr);
2365 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2366 fold_convert (ssizetype, indicator),
2367 ssize_int (0));
2369 /* Get lengths in sizetype. */
2370 tree seg_len_a
2371 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2372 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2374 /* Each access has the following pattern:
2376 <- |seg_len| ->
2377 <--- A: -ve step --->
2378 +-----+-------+-----+-------+-----+
2379 | n-1 | ..... | 0 | ..... | n-1 |
2380 +-----+-------+-----+-------+-----+
2381 <--- B: +ve step --->
2382 <- |seg_len| ->
2384 base address
2386 where "n" is the number of scalar iterations covered by the segment.
2388 A is the range of bytes accessed when the step is negative,
2389 B is the range when the step is positive.
2391 We know that DR_B is a write. We also know (from checking that
2392 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2393 the write performed by access i of DR_B occurs after access numbers
2394 j<=i of DR_A in both the original and the new code. Any write or
2395 anti dependencies wrt those DR_A accesses are therefore maintained.
2397 We just need to make sure that each individual write in DR_B does not
2398 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2399 after the DR_B access in the original code but happen before it in
2400 the new code.
2402 We know the steps for both accesses are equal, so by induction, we
2403 just need to test whether the first write of DR_B overlaps a later
2404 access of DR_A. In other words, we need to move addr_a along by
2405 one iteration:
2407 addr_a' = addr_a + step
2409 and check whether:
2411 [addr_b, addr_b + last_chunk_b]
2413 overlaps:
2415 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2417 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2419 low_offset_a = +ve step ? 0 : seg_len_a - step
2420 high_offset_a = +ve step ? seg_len_a - step : 0
2422 This is equivalent to testing whether:
2424 addr_a' + low_offset_a <= addr_b + last_chunk_b
2425 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2427 Converting this into a single test, there is an overlap if:
2429 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2431 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2433 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2434 less than the size of the object underlying DR_A. We also know
2435 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2436 guaranteed at compile time. There can therefore be no overflow if
2437 "limit" is calculated in an unsigned type with pointer precision. */
2438 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2439 DR_OFFSET (dr_a.dr));
2440 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2442 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2443 DR_OFFSET (dr_b.dr));
2444 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2446 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2447 addr_a = fold_build_pointer_plus (addr_a, step);
2448 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2449 seg_len_a, step);
2450 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2451 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2453 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2454 seg_len_a_minus_step, size_zero_node);
2455 if (!CONSTANT_CLASS_P (low_offset_a))
2456 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2458 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2459 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2460 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2461 low_offset_a);
2463 /* The amount added to addr_b - addr_a'. */
2464 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2465 size_int (last_chunk_b), low_offset_a);
2467 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2468 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2469 size_int (last_chunk_a + last_chunk_b));
2471 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2472 subject = fold_build2 (PLUS_EXPR, sizetype,
2473 fold_convert (sizetype, subject), bias);
2475 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2476 if (dump_enabled_p ())
2477 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2478 return true;
2481 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2482 every address ADDR accessed by D:
2484 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2486 In this case, every element accessed by D is aligned to at least
2487 ALIGN bytes.
2489 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2491 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2493 static void
2494 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2495 tree *seg_max_out, HOST_WIDE_INT align)
2497 /* Each access has the following pattern:
2499 <- |seg_len| ->
2500 <--- A: -ve step --->
2501 +-----+-------+-----+-------+-----+
2502 | n-1 | ,.... | 0 | ..... | n-1 |
2503 +-----+-------+-----+-------+-----+
2504 <--- B: +ve step --->
2505 <- |seg_len| ->
2507 base address
2509 where "n" is the number of scalar iterations covered by the segment.
2510 (This should be VF for a particular pair if we know that both steps
2511 are the same, otherwise it will be the full number of scalar loop
2512 iterations.)
2514 A is the range of bytes accessed when the step is negative,
2515 B is the range when the step is positive.
2517 If the access size is "access_size" bytes, the lowest addressed byte is:
2519 base + (step < 0 ? seg_len : 0) [LB]
2521 and the highest addressed byte is always below:
2523 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2525 Thus:
2527 LB <= ADDR < UB
2529 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2530 bytes, so:
2532 LB <= ADDR <= UB - ALIGN
2534 where "- ALIGN" folds naturally with the "+ access_size" and often
2535 cancels it out.
2537 We don't try to simplify LB and UB beyond this (e.g. by using
2538 MIN and MAX based on whether seg_len rather than the stride is
2539 negative) because it is possible for the absolute size of the
2540 segment to overflow the range of a ssize_t.
2542 Keeping the pointer_plus outside of the cond_expr should allow
2543 the cond_exprs to be shared with other alias checks. */
2544 tree indicator = dr_direction_indicator (d.dr);
2545 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2546 fold_convert (ssizetype, indicator),
2547 ssize_int (0));
2548 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2549 DR_OFFSET (d.dr));
2550 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2551 tree seg_len
2552 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2554 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2555 seg_len, size_zero_node);
2556 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2557 size_zero_node, seg_len);
2558 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2559 size_int (d.access_size - align));
2561 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2562 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2565 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2566 storing the condition in *COND_EXPR. The fallback is to generate a
2567 a test that the two accesses do not overlap:
2569 end_a <= start_b || end_b <= start_a. */
2571 static void
2572 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2573 const dr_with_seg_len_pair_t &alias_pair)
2575 const dr_with_seg_len& dr_a = alias_pair.first;
2576 const dr_with_seg_len& dr_b = alias_pair.second;
2577 *cond_expr = NULL_TREE;
2578 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2579 return;
2581 if (create_ifn_alias_checks (cond_expr, alias_pair))
2582 return;
2584 if (create_waw_or_war_checks (cond_expr, alias_pair))
2585 return;
2587 unsigned HOST_WIDE_INT min_align;
2588 tree_code cmp_code;
2589 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2590 are equivalent. This is just an optimization heuristic. */
2591 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2592 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2594 /* In this case adding access_size to seg_len is likely to give
2595 a simple X * step, where X is either the number of scalar
2596 iterations or the vectorization factor. We're better off
2597 keeping that, rather than subtracting an alignment from it.
2599 In this case the maximum values are exclusive and so there is
2600 no alias if the maximum of one segment equals the minimum
2601 of another. */
2602 min_align = 0;
2603 cmp_code = LE_EXPR;
2605 else
2607 /* Calculate the minimum alignment shared by all four pointers,
2608 then arrange for this alignment to be subtracted from the
2609 exclusive maximum values to get inclusive maximum values.
2610 This "- min_align" is cumulative with a "+ access_size"
2611 in the calculation of the maximum values. In the best
2612 (and common) case, the two cancel each other out, leaving
2613 us with an inclusive bound based only on seg_len. In the
2614 worst case we're simply adding a smaller number than before.
2616 Because the maximum values are inclusive, there is an alias
2617 if the maximum value of one segment is equal to the minimum
2618 value of the other. */
2619 min_align = MIN (dr_a.align, dr_b.align);
2620 cmp_code = LT_EXPR;
2623 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2624 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2625 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2627 *cond_expr
2628 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2629 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2630 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2631 if (dump_enabled_p ())
2632 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2635 /* Create a conditional expression that represents the run-time checks for
2636 overlapping of address ranges represented by a list of data references
2637 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2638 COND_EXPR is the conditional expression to be used in the if statement
2639 that controls which version of the loop gets executed at runtime. */
2641 void
2642 create_runtime_alias_checks (class loop *loop,
2643 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2644 tree * cond_expr)
2646 tree part_cond_expr;
2648 fold_defer_overflow_warnings ();
2649 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2651 gcc_assert (alias_pair.flags);
2652 if (dump_enabled_p ())
2653 dump_printf (MSG_NOTE,
2654 "create runtime check for data references %T and %T\n",
2655 DR_REF (alias_pair.first.dr),
2656 DR_REF (alias_pair.second.dr));
2658 /* Create condition expression for each pair data references. */
2659 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2660 if (*cond_expr)
2661 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2662 *cond_expr, part_cond_expr);
2663 else
2664 *cond_expr = part_cond_expr;
2666 fold_undefer_and_ignore_overflow_warnings ();
2669 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2670 expressions. */
2671 static bool
2672 dr_equal_offsets_p1 (tree offset1, tree offset2)
2674 bool res;
2676 STRIP_NOPS (offset1);
2677 STRIP_NOPS (offset2);
2679 if (offset1 == offset2)
2680 return true;
2682 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2683 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2684 return false;
2686 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2687 TREE_OPERAND (offset2, 0));
2689 if (!res || !BINARY_CLASS_P (offset1))
2690 return res;
2692 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2693 TREE_OPERAND (offset2, 1));
2695 return res;
2698 /* Check if DRA and DRB have equal offsets. */
2699 bool
2700 dr_equal_offsets_p (struct data_reference *dra,
2701 struct data_reference *drb)
2703 tree offset1, offset2;
2705 offset1 = DR_OFFSET (dra);
2706 offset2 = DR_OFFSET (drb);
2708 return dr_equal_offsets_p1 (offset1, offset2);
2711 /* Returns true if FNA == FNB. */
2713 static bool
2714 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2716 unsigned i, n = fna.length ();
2718 if (n != fnb.length ())
2719 return false;
2721 for (i = 0; i < n; i++)
2722 if (!operand_equal_p (fna[i], fnb[i], 0))
2723 return false;
2725 return true;
2728 /* If all the functions in CF are the same, returns one of them,
2729 otherwise returns NULL. */
2731 static affine_fn
2732 common_affine_function (conflict_function *cf)
2734 unsigned i;
2735 affine_fn comm;
2737 if (!CF_NONTRIVIAL_P (cf))
2738 return affine_fn ();
2740 comm = cf->fns[0];
2742 for (i = 1; i < cf->n; i++)
2743 if (!affine_function_equal_p (comm, cf->fns[i]))
2744 return affine_fn ();
2746 return comm;
2749 /* Returns the base of the affine function FN. */
2751 static tree
2752 affine_function_base (affine_fn fn)
2754 return fn[0];
2757 /* Returns true if FN is a constant. */
2759 static bool
2760 affine_function_constant_p (affine_fn fn)
2762 unsigned i;
2763 tree coef;
2765 for (i = 1; fn.iterate (i, &coef); i++)
2766 if (!integer_zerop (coef))
2767 return false;
2769 return true;
2772 /* Returns true if FN is the zero constant function. */
2774 static bool
2775 affine_function_zero_p (affine_fn fn)
2777 return (integer_zerop (affine_function_base (fn))
2778 && affine_function_constant_p (fn));
2781 /* Returns a signed integer type with the largest precision from TA
2782 and TB. */
2784 static tree
2785 signed_type_for_types (tree ta, tree tb)
2787 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2788 return signed_type_for (ta);
2789 else
2790 return signed_type_for (tb);
2793 /* Applies operation OP on affine functions FNA and FNB, and returns the
2794 result. */
2796 static affine_fn
2797 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2799 unsigned i, n, m;
2800 affine_fn ret;
2801 tree coef;
2803 if (fnb.length () > fna.length ())
2805 n = fna.length ();
2806 m = fnb.length ();
2808 else
2810 n = fnb.length ();
2811 m = fna.length ();
2814 ret.create (m);
2815 for (i = 0; i < n; i++)
2817 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2818 TREE_TYPE (fnb[i]));
2819 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2822 for (; fna.iterate (i, &coef); i++)
2823 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2824 coef, integer_zero_node));
2825 for (; fnb.iterate (i, &coef); i++)
2826 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2827 integer_zero_node, coef));
2829 return ret;
2832 /* Returns the sum of affine functions FNA and FNB. */
2834 static affine_fn
2835 affine_fn_plus (affine_fn fna, affine_fn fnb)
2837 return affine_fn_op (PLUS_EXPR, fna, fnb);
2840 /* Returns the difference of affine functions FNA and FNB. */
2842 static affine_fn
2843 affine_fn_minus (affine_fn fna, affine_fn fnb)
2845 return affine_fn_op (MINUS_EXPR, fna, fnb);
2848 /* Frees affine function FN. */
2850 static void
2851 affine_fn_free (affine_fn fn)
2853 fn.release ();
2856 /* Determine for each subscript in the data dependence relation DDR
2857 the distance. */
2859 static void
2860 compute_subscript_distance (struct data_dependence_relation *ddr)
2862 conflict_function *cf_a, *cf_b;
2863 affine_fn fn_a, fn_b, diff;
2865 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2867 unsigned int i;
2869 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2871 struct subscript *subscript;
2873 subscript = DDR_SUBSCRIPT (ddr, i);
2874 cf_a = SUB_CONFLICTS_IN_A (subscript);
2875 cf_b = SUB_CONFLICTS_IN_B (subscript);
2877 fn_a = common_affine_function (cf_a);
2878 fn_b = common_affine_function (cf_b);
2879 if (!fn_a.exists () || !fn_b.exists ())
2881 SUB_DISTANCE (subscript) = chrec_dont_know;
2882 return;
2884 diff = affine_fn_minus (fn_a, fn_b);
2886 if (affine_function_constant_p (diff))
2887 SUB_DISTANCE (subscript) = affine_function_base (diff);
2888 else
2889 SUB_DISTANCE (subscript) = chrec_dont_know;
2891 affine_fn_free (diff);
2896 /* Returns the conflict function for "unknown". */
2898 static conflict_function *
2899 conflict_fn_not_known (void)
2901 conflict_function *fn = XCNEW (conflict_function);
2902 fn->n = NOT_KNOWN;
2904 return fn;
2907 /* Returns the conflict function for "independent". */
2909 static conflict_function *
2910 conflict_fn_no_dependence (void)
2912 conflict_function *fn = XCNEW (conflict_function);
2913 fn->n = NO_DEPENDENCE;
2915 return fn;
2918 /* Returns true if the address of OBJ is invariant in LOOP. */
2920 static bool
2921 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2923 while (handled_component_p (obj))
2925 if (TREE_CODE (obj) == ARRAY_REF)
2927 for (int i = 1; i < 4; ++i)
2928 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2929 loop->num))
2930 return false;
2932 else if (TREE_CODE (obj) == COMPONENT_REF)
2934 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2935 loop->num))
2936 return false;
2938 obj = TREE_OPERAND (obj, 0);
2941 if (!INDIRECT_REF_P (obj)
2942 && TREE_CODE (obj) != MEM_REF)
2943 return true;
2945 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2946 loop->num);
2949 /* Returns false if we can prove that data references A and B do not alias,
2950 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2951 considered. */
2953 bool
2954 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2955 class loop *loop_nest)
2957 tree addr_a = DR_BASE_OBJECT (a);
2958 tree addr_b = DR_BASE_OBJECT (b);
2960 /* If we are not processing a loop nest but scalar code we
2961 do not need to care about possible cross-iteration dependences
2962 and thus can process the full original reference. Do so,
2963 similar to how loop invariant motion applies extra offset-based
2964 disambiguation. */
2965 if (!loop_nest)
2967 aff_tree off1, off2;
2968 poly_widest_int size1, size2;
2969 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2970 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2971 aff_combination_scale (&off1, -1);
2972 aff_combination_add (&off2, &off1);
2973 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2974 return false;
2977 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2978 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2979 /* For cross-iteration dependences the cliques must be valid for the
2980 whole loop, not just individual iterations. */
2981 && (!loop_nest
2982 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2983 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2984 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2985 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2986 return false;
2988 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2989 do not know the size of the base-object. So we cannot do any
2990 offset/overlap based analysis but have to rely on points-to
2991 information only. */
2992 if (TREE_CODE (addr_a) == MEM_REF
2993 && (DR_UNCONSTRAINED_BASE (a)
2994 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2996 /* For true dependences we can apply TBAA. */
2997 if (flag_strict_aliasing
2998 && DR_IS_WRITE (a) && DR_IS_READ (b)
2999 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3000 get_alias_set (DR_REF (b))))
3001 return false;
3002 if (TREE_CODE (addr_b) == MEM_REF)
3003 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3004 TREE_OPERAND (addr_b, 0));
3005 else
3006 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3007 build_fold_addr_expr (addr_b));
3009 else if (TREE_CODE (addr_b) == MEM_REF
3010 && (DR_UNCONSTRAINED_BASE (b)
3011 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3013 /* For true dependences we can apply TBAA. */
3014 if (flag_strict_aliasing
3015 && DR_IS_WRITE (a) && DR_IS_READ (b)
3016 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3017 get_alias_set (DR_REF (b))))
3018 return false;
3019 if (TREE_CODE (addr_a) == MEM_REF)
3020 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3021 TREE_OPERAND (addr_b, 0));
3022 else
3023 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3024 TREE_OPERAND (addr_b, 0));
3027 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3028 that is being subsetted in the loop nest. */
3029 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3030 return refs_output_dependent_p (addr_a, addr_b);
3031 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3032 return refs_anti_dependent_p (addr_a, addr_b);
3033 return refs_may_alias_p (addr_a, addr_b);
3036 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3037 if it is meaningful to compare their associated access functions
3038 when checking for dependencies. */
3040 static bool
3041 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3043 /* Allow pairs of component refs from the following sets:
3045 { REALPART_EXPR, IMAGPART_EXPR }
3046 { COMPONENT_REF }
3047 { ARRAY_REF }. */
3048 tree_code code_a = TREE_CODE (ref_a);
3049 tree_code code_b = TREE_CODE (ref_b);
3050 if (code_a == IMAGPART_EXPR)
3051 code_a = REALPART_EXPR;
3052 if (code_b == IMAGPART_EXPR)
3053 code_b = REALPART_EXPR;
3054 if (code_a != code_b)
3055 return false;
3057 if (TREE_CODE (ref_a) == COMPONENT_REF)
3058 /* ??? We cannot simply use the type of operand #0 of the refs here as
3059 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3060 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3061 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3062 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3064 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3065 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3068 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3069 is true when the main indices of A and B were not comparable so we try again
3070 with alternate indices computed on an indirect reference. */
3072 struct data_dependence_relation *
3073 initialize_data_dependence_relation (struct data_dependence_relation *res,
3074 vec<loop_p> loop_nest,
3075 bool use_alt_indices)
3077 struct data_reference *a = DDR_A (res);
3078 struct data_reference *b = DDR_B (res);
3079 unsigned int i;
3081 struct indices *indices_a = &a->indices;
3082 struct indices *indices_b = &b->indices;
3083 if (use_alt_indices)
3085 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3086 indices_a = &a->alt_indices;
3087 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3088 indices_b = &b->alt_indices;
3090 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3091 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3092 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3094 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3095 return res;
3098 /* For unconstrained bases, the root (highest-indexed) subscript
3099 describes a variation in the base of the original DR_REF rather
3100 than a component access. We have no type that accurately describes
3101 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3102 applying this subscript) so limit the search to the last real
3103 component access.
3105 E.g. for:
3107 void
3108 f (int a[][8], int b[][8])
3110 for (int i = 0; i < 8; ++i)
3111 a[i * 2][0] = b[i][0];
3114 the a and b accesses have a single ARRAY_REF component reference [0]
3115 but have two subscripts. */
3116 if (indices_a->unconstrained_base)
3117 num_dimensions_a -= 1;
3118 if (indices_b->unconstrained_base)
3119 num_dimensions_b -= 1;
3121 /* These structures describe sequences of component references in
3122 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3123 specific access function. */
3124 struct {
3125 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3126 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3127 indices. In C notation, these are the indices of the rightmost
3128 component references; e.g. for a sequence .b.c.d, the start
3129 index is for .d. */
3130 unsigned int start_a;
3131 unsigned int start_b;
3133 /* The sequence contains LENGTH consecutive access functions from
3134 each DR. */
3135 unsigned int length;
3137 /* The enclosing objects for the A and B sequences respectively,
3138 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3139 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3140 tree object_a;
3141 tree object_b;
3142 } full_seq = {}, struct_seq = {};
3144 /* Before each iteration of the loop:
3146 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3147 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3148 unsigned int index_a = 0;
3149 unsigned int index_b = 0;
3150 tree ref_a = DR_REF (a);
3151 tree ref_b = DR_REF (b);
3153 /* Now walk the component references from the final DR_REFs back up to
3154 the enclosing base objects. Each component reference corresponds
3155 to one access function in the DR, with access function 0 being for
3156 the final DR_REF and the highest-indexed access function being the
3157 one that is applied to the base of the DR.
3159 Look for a sequence of component references whose access functions
3160 are comparable (see access_fn_components_comparable_p). If more
3161 than one such sequence exists, pick the one nearest the base
3162 (which is the leftmost sequence in C notation). Store this sequence
3163 in FULL_SEQ.
3165 For example, if we have:
3167 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3169 A: a[0][i].s.c.d
3170 B: __real b[0][i].s.e[i].f
3172 (where d is the same type as the real component of f) then the access
3173 functions would be:
3175 0 1 2 3
3176 A: .d .c .s [i]
3178 0 1 2 3 4 5
3179 B: __real .f [i] .e .s [i]
3181 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3182 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3183 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3184 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3185 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3186 index foo[10] arrays, so is again comparable. The sequence is
3187 therefore:
3189 A: [1, 3] (i.e. [i].s.c)
3190 B: [3, 5] (i.e. [i].s.e)
3192 Also look for sequences of component references whose access
3193 functions are comparable and whose enclosing objects have the same
3194 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3195 example, STRUCT_SEQ would be:
3197 A: [1, 2] (i.e. s.c)
3198 B: [3, 4] (i.e. s.e) */
3199 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3201 /* The alternate indices form always has a single dimension
3202 with unconstrained base. */
3203 gcc_assert (!use_alt_indices);
3205 /* REF_A and REF_B must be one of the component access types
3206 allowed by dr_analyze_indices. */
3207 gcc_checking_assert (access_fn_component_p (ref_a));
3208 gcc_checking_assert (access_fn_component_p (ref_b));
3210 /* Get the immediately-enclosing objects for REF_A and REF_B,
3211 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3212 and DR_ACCESS_FN (B, INDEX_B). */
3213 tree object_a = TREE_OPERAND (ref_a, 0);
3214 tree object_b = TREE_OPERAND (ref_b, 0);
3216 tree type_a = TREE_TYPE (object_a);
3217 tree type_b = TREE_TYPE (object_b);
3218 if (access_fn_components_comparable_p (ref_a, ref_b))
3220 /* This pair of component accesses is comparable for dependence
3221 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3222 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3223 if (full_seq.start_a + full_seq.length != index_a
3224 || full_seq.start_b + full_seq.length != index_b)
3226 /* The accesses don't extend the current sequence,
3227 so start a new one here. */
3228 full_seq.start_a = index_a;
3229 full_seq.start_b = index_b;
3230 full_seq.length = 0;
3233 /* Add this pair of references to the sequence. */
3234 full_seq.length += 1;
3235 full_seq.object_a = object_a;
3236 full_seq.object_b = object_b;
3238 /* If the enclosing objects are structures (and thus have the
3239 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3240 if (TREE_CODE (type_a) == RECORD_TYPE)
3241 struct_seq = full_seq;
3243 /* Move to the next containing reference for both A and B. */
3244 ref_a = object_a;
3245 ref_b = object_b;
3246 index_a += 1;
3247 index_b += 1;
3248 continue;
3251 /* Try to approach equal type sizes. */
3252 if (!COMPLETE_TYPE_P (type_a)
3253 || !COMPLETE_TYPE_P (type_b)
3254 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3255 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3256 break;
3258 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3259 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3260 if (size_a <= size_b)
3262 index_a += 1;
3263 ref_a = object_a;
3265 if (size_b <= size_a)
3267 index_b += 1;
3268 ref_b = object_b;
3272 /* See whether FULL_SEQ ends at the base and whether the two bases
3273 are equal. We do not care about TBAA or alignment info so we can
3274 use OEP_ADDRESS_OF to avoid false negatives. */
3275 tree base_a = indices_a->base_object;
3276 tree base_b = indices_b->base_object;
3277 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3278 && full_seq.start_b + full_seq.length == num_dimensions_b
3279 && (indices_a->unconstrained_base
3280 == indices_b->unconstrained_base)
3281 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3282 && (types_compatible_p (TREE_TYPE (base_a),
3283 TREE_TYPE (base_b))
3284 || (!base_supports_access_fn_components_p (base_a)
3285 && !base_supports_access_fn_components_p (base_b)
3286 && operand_equal_p
3287 (TYPE_SIZE (TREE_TYPE (base_a)),
3288 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3289 && (!loop_nest.exists ()
3290 || (object_address_invariant_in_loop_p
3291 (loop_nest[0], base_a))));
3293 /* If the bases are the same, we can include the base variation too.
3294 E.g. the b accesses in:
3296 for (int i = 0; i < n; ++i)
3297 b[i + 4][0] = b[i][0];
3299 have a definite dependence distance of 4, while for:
3301 for (int i = 0; i < n; ++i)
3302 a[i + 4][0] = b[i][0];
3304 the dependence distance depends on the gap between a and b.
3306 If the bases are different then we can only rely on the sequence
3307 rooted at a structure access, since arrays are allowed to overlap
3308 arbitrarily and change shape arbitrarily. E.g. we treat this as
3309 valid code:
3311 int a[256];
3313 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3315 where two lvalues with the same int[4][3] type overlap, and where
3316 both lvalues are distinct from the object's declared type. */
3317 if (same_base_p)
3319 if (indices_a->unconstrained_base)
3320 full_seq.length += 1;
3322 else
3323 full_seq = struct_seq;
3325 /* Punt if we didn't find a suitable sequence. */
3326 if (full_seq.length == 0)
3328 if (use_alt_indices
3329 || (TREE_CODE (DR_REF (a)) == MEM_REF
3330 && TREE_CODE (DR_REF (b)) == MEM_REF)
3331 || may_be_nonaddressable_p (DR_REF (a))
3332 || may_be_nonaddressable_p (DR_REF (b)))
3334 /* Fully exhausted possibilities. */
3335 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3336 return res;
3339 /* Try evaluating both DRs as dereferences of pointers. */
3340 if (!a->alt_indices.base_object
3341 && TREE_CODE (DR_REF (a)) != MEM_REF)
3343 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3344 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3345 build_int_cst
3346 (reference_alias_ptr_type (DR_REF (a)), 0));
3347 dr_analyze_indices (&a->alt_indices, alt_ref,
3348 loop_preheader_edge (loop_nest[0]),
3349 loop_containing_stmt (DR_STMT (a)));
3351 if (!b->alt_indices.base_object
3352 && TREE_CODE (DR_REF (b)) != MEM_REF)
3354 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3355 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3356 build_int_cst
3357 (reference_alias_ptr_type (DR_REF (b)), 0));
3358 dr_analyze_indices (&b->alt_indices, alt_ref,
3359 loop_preheader_edge (loop_nest[0]),
3360 loop_containing_stmt (DR_STMT (b)));
3362 return initialize_data_dependence_relation (res, loop_nest, true);
3365 if (!same_base_p)
3367 /* Partial overlap is possible for different bases when strict aliasing
3368 is not in effect. It's also possible if either base involves a union
3369 access; e.g. for:
3371 struct s1 { int a[2]; };
3372 struct s2 { struct s1 b; int c; };
3373 struct s3 { int d; struct s1 e; };
3374 union u { struct s2 f; struct s3 g; } *p, *q;
3376 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3377 "p->g.e" (base "p->g") and might partially overlap the s1 at
3378 "q->g.e" (base "q->g"). */
3379 if (!flag_strict_aliasing
3380 || ref_contains_union_access_p (full_seq.object_a)
3381 || ref_contains_union_access_p (full_seq.object_b))
3383 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3384 return res;
3387 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3388 if (!loop_nest.exists ()
3389 || (object_address_invariant_in_loop_p (loop_nest[0],
3390 full_seq.object_a)
3391 && object_address_invariant_in_loop_p (loop_nest[0],
3392 full_seq.object_b)))
3394 DDR_OBJECT_A (res) = full_seq.object_a;
3395 DDR_OBJECT_B (res) = full_seq.object_b;
3399 DDR_AFFINE_P (res) = true;
3400 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3401 DDR_SUBSCRIPTS (res).create (full_seq.length);
3402 DDR_LOOP_NEST (res) = loop_nest;
3403 DDR_SELF_REFERENCE (res) = false;
3405 for (i = 0; i < full_seq.length; ++i)
3407 struct subscript *subscript;
3409 subscript = XNEW (struct subscript);
3410 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3411 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3412 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3413 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3414 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3415 SUB_DISTANCE (subscript) = chrec_dont_know;
3416 DDR_SUBSCRIPTS (res).safe_push (subscript);
3419 return res;
3422 /* Initialize a data dependence relation between data accesses A and
3423 B. NB_LOOPS is the number of loops surrounding the references: the
3424 size of the classic distance/direction vectors. */
3426 struct data_dependence_relation *
3427 initialize_data_dependence_relation (struct data_reference *a,
3428 struct data_reference *b,
3429 vec<loop_p> loop_nest)
3431 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3432 DDR_A (res) = a;
3433 DDR_B (res) = b;
3434 DDR_LOOP_NEST (res).create (0);
3435 DDR_SUBSCRIPTS (res).create (0);
3436 DDR_DIR_VECTS (res).create (0);
3437 DDR_DIST_VECTS (res).create (0);
3439 if (a == NULL || b == NULL)
3441 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3442 return res;
3445 /* If the data references do not alias, then they are independent. */
3446 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3448 DDR_ARE_DEPENDENT (res) = chrec_known;
3449 return res;
3452 return initialize_data_dependence_relation (res, loop_nest, false);
3456 /* Frees memory used by the conflict function F. */
3458 static void
3459 free_conflict_function (conflict_function *f)
3461 unsigned i;
3463 if (CF_NONTRIVIAL_P (f))
3465 for (i = 0; i < f->n; i++)
3466 affine_fn_free (f->fns[i]);
3468 free (f);
3471 /* Frees memory used by SUBSCRIPTS. */
3473 static void
3474 free_subscripts (vec<subscript_p> subscripts)
3476 for (subscript_p s : subscripts)
3478 free_conflict_function (s->conflicting_iterations_in_a);
3479 free_conflict_function (s->conflicting_iterations_in_b);
3480 free (s);
3482 subscripts.release ();
3485 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3486 description. */
3488 static inline void
3489 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3490 tree chrec)
3492 DDR_ARE_DEPENDENT (ddr) = chrec;
3493 free_subscripts (DDR_SUBSCRIPTS (ddr));
3494 DDR_SUBSCRIPTS (ddr).create (0);
3497 /* The dependence relation DDR cannot be represented by a distance
3498 vector. */
3500 static inline void
3501 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3504 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3506 DDR_AFFINE_P (ddr) = false;
3511 /* This section contains the classic Banerjee tests. */
3513 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3514 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3516 static inline bool
3517 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3519 return (evolution_function_is_constant_p (chrec_a)
3520 && evolution_function_is_constant_p (chrec_b));
3523 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3524 variable, i.e., if the SIV (Single Index Variable) test is true. */
3526 static bool
3527 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3529 if ((evolution_function_is_constant_p (chrec_a)
3530 && evolution_function_is_univariate_p (chrec_b))
3531 || (evolution_function_is_constant_p (chrec_b)
3532 && evolution_function_is_univariate_p (chrec_a)))
3533 return true;
3535 if (evolution_function_is_univariate_p (chrec_a)
3536 && evolution_function_is_univariate_p (chrec_b))
3538 switch (TREE_CODE (chrec_a))
3540 case POLYNOMIAL_CHREC:
3541 switch (TREE_CODE (chrec_b))
3543 case POLYNOMIAL_CHREC:
3544 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3545 return false;
3546 /* FALLTHRU */
3548 default:
3549 return true;
3552 default:
3553 return true;
3557 return false;
3560 /* Creates a conflict function with N dimensions. The affine functions
3561 in each dimension follow. */
3563 static conflict_function *
3564 conflict_fn (unsigned n, ...)
3566 unsigned i;
3567 conflict_function *ret = XCNEW (conflict_function);
3568 va_list ap;
3570 gcc_assert (n > 0 && n <= MAX_DIM);
3571 va_start (ap, n);
3573 ret->n = n;
3574 for (i = 0; i < n; i++)
3575 ret->fns[i] = va_arg (ap, affine_fn);
3576 va_end (ap);
3578 return ret;
3581 /* Returns constant affine function with value CST. */
3583 static affine_fn
3584 affine_fn_cst (tree cst)
3586 affine_fn fn;
3587 fn.create (1);
3588 fn.quick_push (cst);
3589 return fn;
3592 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3594 static affine_fn
3595 affine_fn_univar (tree cst, unsigned dim, tree coef)
3597 affine_fn fn;
3598 fn.create (dim + 1);
3599 unsigned i;
3601 gcc_assert (dim > 0);
3602 fn.quick_push (cst);
3603 for (i = 1; i < dim; i++)
3604 fn.quick_push (integer_zero_node);
3605 fn.quick_push (coef);
3606 return fn;
3609 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3610 *OVERLAPS_B are initialized to the functions that describe the
3611 relation between the elements accessed twice by CHREC_A and
3612 CHREC_B. For k >= 0, the following property is verified:
3614 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3616 static void
3617 analyze_ziv_subscript (tree chrec_a,
3618 tree chrec_b,
3619 conflict_function **overlaps_a,
3620 conflict_function **overlaps_b,
3621 tree *last_conflicts)
3623 tree type, difference;
3624 dependence_stats.num_ziv++;
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3627 fprintf (dump_file, "(analyze_ziv_subscript \n");
3629 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3630 chrec_a = chrec_convert (type, chrec_a, NULL);
3631 chrec_b = chrec_convert (type, chrec_b, NULL);
3632 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3634 switch (TREE_CODE (difference))
3636 case INTEGER_CST:
3637 if (integer_zerop (difference))
3639 /* The difference is equal to zero: the accessed index
3640 overlaps for each iteration in the loop. */
3641 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3642 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3643 *last_conflicts = chrec_dont_know;
3644 dependence_stats.num_ziv_dependent++;
3646 else
3648 /* The accesses do not overlap. */
3649 *overlaps_a = conflict_fn_no_dependence ();
3650 *overlaps_b = conflict_fn_no_dependence ();
3651 *last_conflicts = integer_zero_node;
3652 dependence_stats.num_ziv_independent++;
3654 break;
3656 default:
3657 /* We're not sure whether the indexes overlap. For the moment,
3658 conservatively answer "don't know". */
3659 if (dump_file && (dump_flags & TDF_DETAILS))
3660 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3662 *overlaps_a = conflict_fn_not_known ();
3663 *overlaps_b = conflict_fn_not_known ();
3664 *last_conflicts = chrec_dont_know;
3665 dependence_stats.num_ziv_unimplemented++;
3666 break;
3669 if (dump_file && (dump_flags & TDF_DETAILS))
3670 fprintf (dump_file, ")\n");
3673 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3674 and only if it fits to the int type. If this is not the case, or the
3675 bound on the number of iterations of LOOP could not be derived, returns
3676 chrec_dont_know. */
3678 static tree
3679 max_stmt_executions_tree (class loop *loop)
3681 widest_int nit;
3683 if (!max_stmt_executions (loop, &nit))
3684 return chrec_dont_know;
3686 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3687 return chrec_dont_know;
3689 return wide_int_to_tree (unsigned_type_node, nit);
3692 /* Determine whether the CHREC is always positive/negative. If the expression
3693 cannot be statically analyzed, return false, otherwise set the answer into
3694 VALUE. */
3696 static bool
3697 chrec_is_positive (tree chrec, bool *value)
3699 bool value0, value1, value2;
3700 tree end_value, nb_iter;
3702 switch (TREE_CODE (chrec))
3704 case POLYNOMIAL_CHREC:
3705 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3706 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3707 return false;
3709 /* FIXME -- overflows. */
3710 if (value0 == value1)
3712 *value = value0;
3713 return true;
3716 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3717 and the proof consists in showing that the sign never
3718 changes during the execution of the loop, from 0 to
3719 loop->nb_iterations. */
3720 if (!evolution_function_is_affine_p (chrec))
3721 return false;
3723 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3724 if (chrec_contains_undetermined (nb_iter))
3725 return false;
3727 #if 0
3728 /* TODO -- If the test is after the exit, we may decrease the number of
3729 iterations by one. */
3730 if (after_exit)
3731 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3732 #endif
3734 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3736 if (!chrec_is_positive (end_value, &value2))
3737 return false;
3739 *value = value0;
3740 return value0 == value1;
3742 case INTEGER_CST:
3743 switch (tree_int_cst_sgn (chrec))
3745 case -1:
3746 *value = false;
3747 break;
3748 case 1:
3749 *value = true;
3750 break;
3751 default:
3752 return false;
3754 return true;
3756 default:
3757 return false;
3762 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3763 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3764 *OVERLAPS_B are initialized to the functions that describe the
3765 relation between the elements accessed twice by CHREC_A and
3766 CHREC_B. For k >= 0, the following property is verified:
3768 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3770 static void
3771 analyze_siv_subscript_cst_affine (tree chrec_a,
3772 tree chrec_b,
3773 conflict_function **overlaps_a,
3774 conflict_function **overlaps_b,
3775 tree *last_conflicts)
3777 bool value0, value1, value2;
3778 tree type, difference, tmp;
3780 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3781 chrec_a = chrec_convert (type, chrec_a, NULL);
3782 chrec_b = chrec_convert (type, chrec_b, NULL);
3783 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3785 /* Special case overlap in the first iteration. */
3786 if (integer_zerop (difference))
3788 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3789 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3790 *last_conflicts = integer_one_node;
3791 return;
3794 if (!chrec_is_positive (initial_condition (difference), &value0))
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3799 dependence_stats.num_siv_unimplemented++;
3800 *overlaps_a = conflict_fn_not_known ();
3801 *overlaps_b = conflict_fn_not_known ();
3802 *last_conflicts = chrec_dont_know;
3803 return;
3805 else
3807 if (value0 == false)
3809 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3810 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3813 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3815 *overlaps_a = conflict_fn_not_known ();
3816 *overlaps_b = conflict_fn_not_known ();
3817 *last_conflicts = chrec_dont_know;
3818 dependence_stats.num_siv_unimplemented++;
3819 return;
3821 else
3823 if (value1 == true)
3825 /* Example:
3826 chrec_a = 12
3827 chrec_b = {10, +, 1}
3830 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3832 HOST_WIDE_INT numiter;
3833 class loop *loop = get_chrec_loop (chrec_b);
3835 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3836 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3837 fold_build1 (ABS_EXPR, type, difference),
3838 CHREC_RIGHT (chrec_b));
3839 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3840 *last_conflicts = integer_one_node;
3843 /* Perform weak-zero siv test to see if overlap is
3844 outside the loop bounds. */
3845 numiter = max_stmt_executions_int (loop);
3847 if (numiter >= 0
3848 && compare_tree_int (tmp, numiter) > 0)
3850 free_conflict_function (*overlaps_a);
3851 free_conflict_function (*overlaps_b);
3852 *overlaps_a = conflict_fn_no_dependence ();
3853 *overlaps_b = conflict_fn_no_dependence ();
3854 *last_conflicts = integer_zero_node;
3855 dependence_stats.num_siv_independent++;
3856 return;
3858 dependence_stats.num_siv_dependent++;
3859 return;
3862 /* When the step does not divide the difference, there are
3863 no overlaps. */
3864 else
3866 *overlaps_a = conflict_fn_no_dependence ();
3867 *overlaps_b = conflict_fn_no_dependence ();
3868 *last_conflicts = integer_zero_node;
3869 dependence_stats.num_siv_independent++;
3870 return;
3874 else
3876 /* Example:
3877 chrec_a = 12
3878 chrec_b = {10, +, -1}
3880 In this case, chrec_a will not overlap with chrec_b. */
3881 *overlaps_a = conflict_fn_no_dependence ();
3882 *overlaps_b = conflict_fn_no_dependence ();
3883 *last_conflicts = integer_zero_node;
3884 dependence_stats.num_siv_independent++;
3885 return;
3889 else
3891 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3892 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3895 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3897 *overlaps_a = conflict_fn_not_known ();
3898 *overlaps_b = conflict_fn_not_known ();
3899 *last_conflicts = chrec_dont_know;
3900 dependence_stats.num_siv_unimplemented++;
3901 return;
3903 else
3905 if (value2 == false)
3907 /* Example:
3908 chrec_a = 3
3909 chrec_b = {10, +, -1}
3911 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3913 HOST_WIDE_INT numiter;
3914 class loop *loop = get_chrec_loop (chrec_b);
3916 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3917 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3918 CHREC_RIGHT (chrec_b));
3919 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3920 *last_conflicts = integer_one_node;
3922 /* Perform weak-zero siv test to see if overlap is
3923 outside the loop bounds. */
3924 numiter = max_stmt_executions_int (loop);
3926 if (numiter >= 0
3927 && compare_tree_int (tmp, numiter) > 0)
3929 free_conflict_function (*overlaps_a);
3930 free_conflict_function (*overlaps_b);
3931 *overlaps_a = conflict_fn_no_dependence ();
3932 *overlaps_b = conflict_fn_no_dependence ();
3933 *last_conflicts = integer_zero_node;
3934 dependence_stats.num_siv_independent++;
3935 return;
3937 dependence_stats.num_siv_dependent++;
3938 return;
3941 /* When the step does not divide the difference, there
3942 are no overlaps. */
3943 else
3945 *overlaps_a = conflict_fn_no_dependence ();
3946 *overlaps_b = conflict_fn_no_dependence ();
3947 *last_conflicts = integer_zero_node;
3948 dependence_stats.num_siv_independent++;
3949 return;
3952 else
3954 /* Example:
3955 chrec_a = 3
3956 chrec_b = {4, +, 1}
3958 In this case, chrec_a will not overlap with chrec_b. */
3959 *overlaps_a = conflict_fn_no_dependence ();
3960 *overlaps_b = conflict_fn_no_dependence ();
3961 *last_conflicts = integer_zero_node;
3962 dependence_stats.num_siv_independent++;
3963 return;
3970 /* Helper recursive function for initializing the matrix A. Returns
3971 the initial value of CHREC. */
3973 static tree
3974 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3976 gcc_assert (chrec);
3978 switch (TREE_CODE (chrec))
3980 case POLYNOMIAL_CHREC:
3981 HOST_WIDE_INT chrec_right;
3982 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3983 return chrec_dont_know;
3984 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3985 /* We want to be able to negate without overflow. */
3986 if (chrec_right == HOST_WIDE_INT_MIN)
3987 return chrec_dont_know;
3988 A[index][0] = mult * chrec_right;
3989 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3991 case PLUS_EXPR:
3992 case MULT_EXPR:
3993 case MINUS_EXPR:
3995 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3996 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3998 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4001 CASE_CONVERT:
4003 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4004 return chrec_convert (chrec_type (chrec), op, NULL);
4007 case BIT_NOT_EXPR:
4009 /* Handle ~X as -1 - X. */
4010 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4011 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4012 build_int_cst (TREE_TYPE (chrec), -1), op);
4015 case INTEGER_CST:
4016 return chrec;
4018 default:
4019 gcc_unreachable ();
4020 return NULL_TREE;
4024 #define FLOOR_DIV(x,y) ((x) / (y))
4026 /* Solves the special case of the Diophantine equation:
4027 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4029 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4030 number of iterations that loops X and Y run. The overlaps will be
4031 constructed as evolutions in dimension DIM. */
4033 static void
4034 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4035 HOST_WIDE_INT step_a,
4036 HOST_WIDE_INT step_b,
4037 affine_fn *overlaps_a,
4038 affine_fn *overlaps_b,
4039 tree *last_conflicts, int dim)
4041 if (((step_a > 0 && step_b > 0)
4042 || (step_a < 0 && step_b < 0)))
4044 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4045 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4047 gcd_steps_a_b = gcd (step_a, step_b);
4048 step_overlaps_a = step_b / gcd_steps_a_b;
4049 step_overlaps_b = step_a / gcd_steps_a_b;
4051 if (niter > 0)
4053 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4054 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4055 last_conflict = tau2;
4056 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4058 else
4059 *last_conflicts = chrec_dont_know;
4061 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4062 build_int_cst (NULL_TREE,
4063 step_overlaps_a));
4064 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4065 build_int_cst (NULL_TREE,
4066 step_overlaps_b));
4069 else
4071 *overlaps_a = affine_fn_cst (integer_zero_node);
4072 *overlaps_b = affine_fn_cst (integer_zero_node);
4073 *last_conflicts = integer_zero_node;
4077 /* Solves the special case of a Diophantine equation where CHREC_A is
4078 an affine bivariate function, and CHREC_B is an affine univariate
4079 function. For example,
4081 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4083 has the following overlapping functions:
4085 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4086 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4087 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4089 FORNOW: This is a specialized implementation for a case occurring in
4090 a common benchmark. Implement the general algorithm. */
4092 static void
4093 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4094 conflict_function **overlaps_a,
4095 conflict_function **overlaps_b,
4096 tree *last_conflicts)
4098 bool xz_p, yz_p, xyz_p;
4099 HOST_WIDE_INT step_x, step_y, step_z;
4100 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4101 affine_fn overlaps_a_xz, overlaps_b_xz;
4102 affine_fn overlaps_a_yz, overlaps_b_yz;
4103 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4104 affine_fn ova1, ova2, ovb;
4105 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4107 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4108 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4109 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4111 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4112 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4113 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4115 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4120 *overlaps_a = conflict_fn_not_known ();
4121 *overlaps_b = conflict_fn_not_known ();
4122 *last_conflicts = chrec_dont_know;
4123 return;
4126 niter = MIN (niter_x, niter_z);
4127 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4128 &overlaps_a_xz,
4129 &overlaps_b_xz,
4130 &last_conflicts_xz, 1);
4131 niter = MIN (niter_y, niter_z);
4132 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4133 &overlaps_a_yz,
4134 &overlaps_b_yz,
4135 &last_conflicts_yz, 2);
4136 niter = MIN (niter_x, niter_z);
4137 niter = MIN (niter_y, niter);
4138 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4139 &overlaps_a_xyz,
4140 &overlaps_b_xyz,
4141 &last_conflicts_xyz, 3);
4143 xz_p = !integer_zerop (last_conflicts_xz);
4144 yz_p = !integer_zerop (last_conflicts_yz);
4145 xyz_p = !integer_zerop (last_conflicts_xyz);
4147 if (xz_p || yz_p || xyz_p)
4149 ova1 = affine_fn_cst (integer_zero_node);
4150 ova2 = affine_fn_cst (integer_zero_node);
4151 ovb = affine_fn_cst (integer_zero_node);
4152 if (xz_p)
4154 affine_fn t0 = ova1;
4155 affine_fn t2 = ovb;
4157 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4158 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4159 affine_fn_free (t0);
4160 affine_fn_free (t2);
4161 *last_conflicts = last_conflicts_xz;
4163 if (yz_p)
4165 affine_fn t0 = ova2;
4166 affine_fn t2 = ovb;
4168 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4169 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4170 affine_fn_free (t0);
4171 affine_fn_free (t2);
4172 *last_conflicts = last_conflicts_yz;
4174 if (xyz_p)
4176 affine_fn t0 = ova1;
4177 affine_fn t2 = ova2;
4178 affine_fn t4 = ovb;
4180 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4181 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4182 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4183 affine_fn_free (t0);
4184 affine_fn_free (t2);
4185 affine_fn_free (t4);
4186 *last_conflicts = last_conflicts_xyz;
4188 *overlaps_a = conflict_fn (2, ova1, ova2);
4189 *overlaps_b = conflict_fn (1, ovb);
4191 else
4193 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4194 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4195 *last_conflicts = integer_zero_node;
4198 affine_fn_free (overlaps_a_xz);
4199 affine_fn_free (overlaps_b_xz);
4200 affine_fn_free (overlaps_a_yz);
4201 affine_fn_free (overlaps_b_yz);
4202 affine_fn_free (overlaps_a_xyz);
4203 affine_fn_free (overlaps_b_xyz);
4206 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4208 static void
4209 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4210 int size)
4212 memcpy (vec2, vec1, size * sizeof (*vec1));
4215 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4217 static void
4218 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4219 int m, int n)
4221 int i;
4223 for (i = 0; i < m; i++)
4224 lambda_vector_copy (mat1[i], mat2[i], n);
4227 /* Store the N x N identity matrix in MAT. */
4229 static void
4230 lambda_matrix_id (lambda_matrix mat, int size)
4232 int i, j;
4234 for (i = 0; i < size; i++)
4235 for (j = 0; j < size; j++)
4236 mat[i][j] = (i == j) ? 1 : 0;
4239 /* Return the index of the first nonzero element of vector VEC1 between
4240 START and N. We must have START <= N.
4241 Returns N if VEC1 is the zero vector. */
4243 static int
4244 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4246 int j = start;
4247 while (j < n && vec1[j] == 0)
4248 j++;
4249 return j;
4252 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4253 R2 = R2 + CONST1 * R1. */
4255 static bool
4256 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4257 lambda_int const1)
4259 int i;
4261 if (const1 == 0)
4262 return true;
4264 for (i = 0; i < n; i++)
4266 bool ovf;
4267 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4268 if (ovf)
4269 return false;
4270 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4271 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4272 return false;
4273 mat[r2][i] = tem2;
4276 return true;
4279 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4280 and store the result in VEC2. */
4282 static void
4283 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4284 int size, lambda_int const1)
4286 int i;
4288 if (const1 == 0)
4289 lambda_vector_clear (vec2, size);
4290 else
4291 for (i = 0; i < size; i++)
4292 vec2[i] = const1 * vec1[i];
4295 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4297 static void
4298 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4299 int size)
4301 lambda_vector_mult_const (vec1, vec2, size, -1);
4304 /* Negate row R1 of matrix MAT which has N columns. */
4306 static void
4307 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4309 lambda_vector_negate (mat[r1], mat[r1], n);
4312 /* Return true if two vectors are equal. */
4314 static bool
4315 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4317 int i;
4318 for (i = 0; i < size; i++)
4319 if (vec1[i] != vec2[i])
4320 return false;
4321 return true;
4324 /* Given an M x N integer matrix A, this function determines an M x
4325 M unimodular matrix U, and an M x N echelon matrix S such that
4326 "U.A = S". This decomposition is also known as "right Hermite".
4328 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4329 Restructuring Compilers" Utpal Banerjee. */
4331 static bool
4332 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4333 lambda_matrix S, lambda_matrix U)
4335 int i, j, i0 = 0;
4337 lambda_matrix_copy (A, S, m, n);
4338 lambda_matrix_id (U, m);
4340 for (j = 0; j < n; j++)
4342 if (lambda_vector_first_nz (S[j], m, i0) < m)
4344 ++i0;
4345 for (i = m - 1; i >= i0; i--)
4347 while (S[i][j] != 0)
4349 lambda_int factor, a, b;
4351 a = S[i-1][j];
4352 b = S[i][j];
4353 gcc_assert (a != HOST_WIDE_INT_MIN);
4354 factor = a / b;
4356 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4357 return false;
4358 std::swap (S[i], S[i-1]);
4360 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4361 return false;
4362 std::swap (U[i], U[i-1]);
4368 return true;
4371 /* Determines the overlapping elements due to accesses CHREC_A and
4372 CHREC_B, that are affine functions. This function cannot handle
4373 symbolic evolution functions, ie. when initial conditions are
4374 parameters, because it uses lambda matrices of integers. */
4376 static void
4377 analyze_subscript_affine_affine (tree chrec_a,
4378 tree chrec_b,
4379 conflict_function **overlaps_a,
4380 conflict_function **overlaps_b,
4381 tree *last_conflicts)
4383 unsigned nb_vars_a, nb_vars_b, dim;
4384 lambda_int gamma, gcd_alpha_beta;
4385 lambda_matrix A, U, S;
4386 struct obstack scratch_obstack;
4388 if (eq_evolutions_p (chrec_a, chrec_b))
4390 /* The accessed index overlaps for each iteration in the
4391 loop. */
4392 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4393 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4394 *last_conflicts = chrec_dont_know;
4395 return;
4397 if (dump_file && (dump_flags & TDF_DETAILS))
4398 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4400 /* For determining the initial intersection, we have to solve a
4401 Diophantine equation. This is the most time consuming part.
4403 For answering to the question: "Is there a dependence?" we have
4404 to prove that there exists a solution to the Diophantine
4405 equation, and that the solution is in the iteration domain,
4406 i.e. the solution is positive or zero, and that the solution
4407 happens before the upper bound loop.nb_iterations. Otherwise
4408 there is no dependence. This function outputs a description of
4409 the iterations that hold the intersections. */
4411 nb_vars_a = nb_vars_in_chrec (chrec_a);
4412 nb_vars_b = nb_vars_in_chrec (chrec_b);
4414 gcc_obstack_init (&scratch_obstack);
4416 dim = nb_vars_a + nb_vars_b;
4417 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4418 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4419 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4421 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4422 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4423 if (init_a == chrec_dont_know
4424 || init_b == chrec_dont_know)
4426 if (dump_file && (dump_flags & TDF_DETAILS))
4427 fprintf (dump_file, "affine-affine test failed: "
4428 "representation issue.\n");
4429 *overlaps_a = conflict_fn_not_known ();
4430 *overlaps_b = conflict_fn_not_known ();
4431 *last_conflicts = chrec_dont_know;
4432 goto end_analyze_subs_aa;
4434 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4436 /* Don't do all the hard work of solving the Diophantine equation
4437 when we already know the solution: for example,
4438 | {3, +, 1}_1
4439 | {3, +, 4}_2
4440 | gamma = 3 - 3 = 0.
4441 Then the first overlap occurs during the first iterations:
4442 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4444 if (gamma == 0)
4446 if (nb_vars_a == 1 && nb_vars_b == 1)
4448 HOST_WIDE_INT step_a, step_b;
4449 HOST_WIDE_INT niter, niter_a, niter_b;
4450 affine_fn ova, ovb;
4452 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4453 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4454 niter = MIN (niter_a, niter_b);
4455 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4456 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4458 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4459 &ova, &ovb,
4460 last_conflicts, 1);
4461 *overlaps_a = conflict_fn (1, ova);
4462 *overlaps_b = conflict_fn (1, ovb);
4465 else if (nb_vars_a == 2 && nb_vars_b == 1)
4466 compute_overlap_steps_for_affine_1_2
4467 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4469 else if (nb_vars_a == 1 && nb_vars_b == 2)
4470 compute_overlap_steps_for_affine_1_2
4471 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4473 else
4475 if (dump_file && (dump_flags & TDF_DETAILS))
4476 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4477 *overlaps_a = conflict_fn_not_known ();
4478 *overlaps_b = conflict_fn_not_known ();
4479 *last_conflicts = chrec_dont_know;
4481 goto end_analyze_subs_aa;
4484 /* U.A = S */
4485 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4487 *overlaps_a = conflict_fn_not_known ();
4488 *overlaps_b = conflict_fn_not_known ();
4489 *last_conflicts = chrec_dont_know;
4490 goto end_analyze_subs_aa;
4493 if (S[0][0] < 0)
4495 S[0][0] *= -1;
4496 lambda_matrix_row_negate (U, dim, 0);
4498 gcd_alpha_beta = S[0][0];
4500 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4501 but that is a quite strange case. Instead of ICEing, answer
4502 don't know. */
4503 if (gcd_alpha_beta == 0)
4505 *overlaps_a = conflict_fn_not_known ();
4506 *overlaps_b = conflict_fn_not_known ();
4507 *last_conflicts = chrec_dont_know;
4508 goto end_analyze_subs_aa;
4511 /* The classic "gcd-test". */
4512 if (!int_divides_p (gcd_alpha_beta, gamma))
4514 /* The "gcd-test" has determined that there is no integer
4515 solution, i.e. there is no dependence. */
4516 *overlaps_a = conflict_fn_no_dependence ();
4517 *overlaps_b = conflict_fn_no_dependence ();
4518 *last_conflicts = integer_zero_node;
4521 /* Both access functions are univariate. This includes SIV and MIV cases. */
4522 else if (nb_vars_a == 1 && nb_vars_b == 1)
4524 /* Both functions should have the same evolution sign. */
4525 if (((A[0][0] > 0 && -A[1][0] > 0)
4526 || (A[0][0] < 0 && -A[1][0] < 0)))
4528 /* The solutions are given by:
4530 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4531 | [u21 u22] [y0]
4533 For a given integer t. Using the following variables,
4535 | i0 = u11 * gamma / gcd_alpha_beta
4536 | j0 = u12 * gamma / gcd_alpha_beta
4537 | i1 = u21
4538 | j1 = u22
4540 the solutions are:
4542 | x0 = i0 + i1 * t,
4543 | y0 = j0 + j1 * t. */
4544 HOST_WIDE_INT i0, j0, i1, j1;
4546 i0 = U[0][0] * gamma / gcd_alpha_beta;
4547 j0 = U[0][1] * gamma / gcd_alpha_beta;
4548 i1 = U[1][0];
4549 j1 = U[1][1];
4551 if ((i1 == 0 && i0 < 0)
4552 || (j1 == 0 && j0 < 0))
4554 /* There is no solution.
4555 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4556 falls in here, but for the moment we don't look at the
4557 upper bound of the iteration domain. */
4558 *overlaps_a = conflict_fn_no_dependence ();
4559 *overlaps_b = conflict_fn_no_dependence ();
4560 *last_conflicts = integer_zero_node;
4561 goto end_analyze_subs_aa;
4564 if (i1 > 0 && j1 > 0)
4566 HOST_WIDE_INT niter_a
4567 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4568 HOST_WIDE_INT niter_b
4569 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4570 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4572 /* (X0, Y0) is a solution of the Diophantine equation:
4573 "chrec_a (X0) = chrec_b (Y0)". */
4574 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4575 CEIL (-j0, j1));
4576 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4577 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4579 /* (X1, Y1) is the smallest positive solution of the eq
4580 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4581 first conflict occurs. */
4582 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4583 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4584 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4586 if (niter > 0)
4588 /* If the overlap occurs outside of the bounds of the
4589 loop, there is no dependence. */
4590 if (x1 >= niter_a || y1 >= niter_b)
4592 *overlaps_a = conflict_fn_no_dependence ();
4593 *overlaps_b = conflict_fn_no_dependence ();
4594 *last_conflicts = integer_zero_node;
4595 goto end_analyze_subs_aa;
4598 /* max stmt executions can get quite large, avoid
4599 overflows by using wide ints here. */
4600 widest_int tau2
4601 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4602 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4603 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4604 if (wi::min_precision (last_conflict, SIGNED)
4605 <= TYPE_PRECISION (integer_type_node))
4606 *last_conflicts
4607 = build_int_cst (integer_type_node,
4608 last_conflict.to_shwi ());
4609 else
4610 *last_conflicts = chrec_dont_know;
4612 else
4613 *last_conflicts = chrec_dont_know;
4615 *overlaps_a
4616 = conflict_fn (1,
4617 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4619 build_int_cst (NULL_TREE, i1)));
4620 *overlaps_b
4621 = conflict_fn (1,
4622 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4624 build_int_cst (NULL_TREE, j1)));
4626 else
4628 /* FIXME: For the moment, the upper bound of the
4629 iteration domain for i and j is not checked. */
4630 if (dump_file && (dump_flags & TDF_DETAILS))
4631 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4632 *overlaps_a = conflict_fn_not_known ();
4633 *overlaps_b = conflict_fn_not_known ();
4634 *last_conflicts = chrec_dont_know;
4637 else
4639 if (dump_file && (dump_flags & TDF_DETAILS))
4640 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4641 *overlaps_a = conflict_fn_not_known ();
4642 *overlaps_b = conflict_fn_not_known ();
4643 *last_conflicts = chrec_dont_know;
4646 else
4648 if (dump_file && (dump_flags & TDF_DETAILS))
4649 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4650 *overlaps_a = conflict_fn_not_known ();
4651 *overlaps_b = conflict_fn_not_known ();
4652 *last_conflicts = chrec_dont_know;
4655 end_analyze_subs_aa:
4656 obstack_free (&scratch_obstack, NULL);
4657 if (dump_file && (dump_flags & TDF_DETAILS))
4659 fprintf (dump_file, " (overlaps_a = ");
4660 dump_conflict_function (dump_file, *overlaps_a);
4661 fprintf (dump_file, ")\n (overlaps_b = ");
4662 dump_conflict_function (dump_file, *overlaps_b);
4663 fprintf (dump_file, "))\n");
4667 /* Returns true when analyze_subscript_affine_affine can be used for
4668 determining the dependence relation between chrec_a and chrec_b,
4669 that contain symbols. This function modifies chrec_a and chrec_b
4670 such that the analysis result is the same, and such that they don't
4671 contain symbols, and then can safely be passed to the analyzer.
4673 Example: The analysis of the following tuples of evolutions produce
4674 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4675 vs. {0, +, 1}_1
4677 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4678 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4681 static bool
4682 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4684 tree diff, type, left_a, left_b, right_b;
4686 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4687 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4688 /* FIXME: For the moment not handled. Might be refined later. */
4689 return false;
4691 type = chrec_type (*chrec_a);
4692 left_a = CHREC_LEFT (*chrec_a);
4693 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4694 diff = chrec_fold_minus (type, left_a, left_b);
4696 if (!evolution_function_is_constant_p (diff))
4697 return false;
4699 if (dump_file && (dump_flags & TDF_DETAILS))
4700 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4702 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4703 diff, CHREC_RIGHT (*chrec_a));
4704 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4705 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4706 build_int_cst (type, 0),
4707 right_b);
4708 return true;
4711 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4712 *OVERLAPS_B are initialized to the functions that describe the
4713 relation between the elements accessed twice by CHREC_A and
4714 CHREC_B. For k >= 0, the following property is verified:
4716 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4718 static void
4719 analyze_siv_subscript (tree chrec_a,
4720 tree chrec_b,
4721 conflict_function **overlaps_a,
4722 conflict_function **overlaps_b,
4723 tree *last_conflicts,
4724 int loop_nest_num)
4726 dependence_stats.num_siv++;
4728 if (dump_file && (dump_flags & TDF_DETAILS))
4729 fprintf (dump_file, "(analyze_siv_subscript \n");
4731 if (evolution_function_is_constant_p (chrec_a)
4732 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4733 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4734 overlaps_a, overlaps_b, last_conflicts);
4736 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4737 && evolution_function_is_constant_p (chrec_b))
4738 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4739 overlaps_b, overlaps_a, last_conflicts);
4741 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4742 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4744 if (!chrec_contains_symbols (chrec_a)
4745 && !chrec_contains_symbols (chrec_b))
4747 analyze_subscript_affine_affine (chrec_a, chrec_b,
4748 overlaps_a, overlaps_b,
4749 last_conflicts);
4751 if (CF_NOT_KNOWN_P (*overlaps_a)
4752 || CF_NOT_KNOWN_P (*overlaps_b))
4753 dependence_stats.num_siv_unimplemented++;
4754 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4755 || CF_NO_DEPENDENCE_P (*overlaps_b))
4756 dependence_stats.num_siv_independent++;
4757 else
4758 dependence_stats.num_siv_dependent++;
4760 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4761 &chrec_b))
4763 analyze_subscript_affine_affine (chrec_a, chrec_b,
4764 overlaps_a, overlaps_b,
4765 last_conflicts);
4767 if (CF_NOT_KNOWN_P (*overlaps_a)
4768 || CF_NOT_KNOWN_P (*overlaps_b))
4769 dependence_stats.num_siv_unimplemented++;
4770 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4771 || CF_NO_DEPENDENCE_P (*overlaps_b))
4772 dependence_stats.num_siv_independent++;
4773 else
4774 dependence_stats.num_siv_dependent++;
4776 else
4777 goto siv_subscript_dontknow;
4780 else
4782 siv_subscript_dontknow:;
4783 if (dump_file && (dump_flags & TDF_DETAILS))
4784 fprintf (dump_file, " siv test failed: unimplemented");
4785 *overlaps_a = conflict_fn_not_known ();
4786 *overlaps_b = conflict_fn_not_known ();
4787 *last_conflicts = chrec_dont_know;
4788 dependence_stats.num_siv_unimplemented++;
4791 if (dump_file && (dump_flags & TDF_DETAILS))
4792 fprintf (dump_file, ")\n");
4795 /* Returns false if we can prove that the greatest common divisor of the steps
4796 of CHREC does not divide CST, false otherwise. */
4798 static bool
4799 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4801 HOST_WIDE_INT cd = 0, val;
4802 tree step;
4804 if (!tree_fits_shwi_p (cst))
4805 return true;
4806 val = tree_to_shwi (cst);
4808 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4810 step = CHREC_RIGHT (chrec);
4811 if (!tree_fits_shwi_p (step))
4812 return true;
4813 cd = gcd (cd, tree_to_shwi (step));
4814 chrec = CHREC_LEFT (chrec);
4817 return val % cd == 0;
4820 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4821 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4822 functions that describe the relation between the elements accessed
4823 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4824 is verified:
4826 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4828 static void
4829 analyze_miv_subscript (tree chrec_a,
4830 tree chrec_b,
4831 conflict_function **overlaps_a,
4832 conflict_function **overlaps_b,
4833 tree *last_conflicts,
4834 class loop *loop_nest)
4836 tree type, difference;
4838 dependence_stats.num_miv++;
4839 if (dump_file && (dump_flags & TDF_DETAILS))
4840 fprintf (dump_file, "(analyze_miv_subscript \n");
4842 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4843 chrec_a = chrec_convert (type, chrec_a, NULL);
4844 chrec_b = chrec_convert (type, chrec_b, NULL);
4845 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4847 if (eq_evolutions_p (chrec_a, chrec_b))
4849 /* Access functions are the same: all the elements are accessed
4850 in the same order. */
4851 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4852 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4853 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4854 dependence_stats.num_miv_dependent++;
4857 else if (evolution_function_is_constant_p (difference)
4858 && evolution_function_is_affine_multivariate_p (chrec_a,
4859 loop_nest->num)
4860 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4862 /* testsuite/.../ssa-chrec-33.c
4863 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4865 The difference is 1, and all the evolution steps are multiples
4866 of 2, consequently there are no overlapping elements. */
4867 *overlaps_a = conflict_fn_no_dependence ();
4868 *overlaps_b = conflict_fn_no_dependence ();
4869 *last_conflicts = integer_zero_node;
4870 dependence_stats.num_miv_independent++;
4873 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4874 && !chrec_contains_symbols (chrec_a, loop_nest)
4875 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4876 && !chrec_contains_symbols (chrec_b, loop_nest))
4878 /* testsuite/.../ssa-chrec-35.c
4879 {0, +, 1}_2 vs. {0, +, 1}_3
4880 the overlapping elements are respectively located at iterations:
4881 {0, +, 1}_x and {0, +, 1}_x,
4882 in other words, we have the equality:
4883 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4885 Other examples:
4886 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4887 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4889 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4890 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4892 analyze_subscript_affine_affine (chrec_a, chrec_b,
4893 overlaps_a, overlaps_b, last_conflicts);
4895 if (CF_NOT_KNOWN_P (*overlaps_a)
4896 || CF_NOT_KNOWN_P (*overlaps_b))
4897 dependence_stats.num_miv_unimplemented++;
4898 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4899 || CF_NO_DEPENDENCE_P (*overlaps_b))
4900 dependence_stats.num_miv_independent++;
4901 else
4902 dependence_stats.num_miv_dependent++;
4905 else
4907 /* When the analysis is too difficult, answer "don't know". */
4908 if (dump_file && (dump_flags & TDF_DETAILS))
4909 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4911 *overlaps_a = conflict_fn_not_known ();
4912 *overlaps_b = conflict_fn_not_known ();
4913 *last_conflicts = chrec_dont_know;
4914 dependence_stats.num_miv_unimplemented++;
4917 if (dump_file && (dump_flags & TDF_DETAILS))
4918 fprintf (dump_file, ")\n");
4921 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4922 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4923 OVERLAP_ITERATIONS_B are initialized with two functions that
4924 describe the iterations that contain conflicting elements.
4926 Remark: For an integer k >= 0, the following equality is true:
4928 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4931 static void
4932 analyze_overlapping_iterations (tree chrec_a,
4933 tree chrec_b,
4934 conflict_function **overlap_iterations_a,
4935 conflict_function **overlap_iterations_b,
4936 tree *last_conflicts, class loop *loop_nest)
4938 unsigned int lnn = loop_nest->num;
4940 dependence_stats.num_subscript_tests++;
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4944 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4945 fprintf (dump_file, " (chrec_a = ");
4946 print_generic_expr (dump_file, chrec_a);
4947 fprintf (dump_file, ")\n (chrec_b = ");
4948 print_generic_expr (dump_file, chrec_b);
4949 fprintf (dump_file, ")\n");
4952 if (chrec_a == NULL_TREE
4953 || chrec_b == NULL_TREE
4954 || chrec_contains_undetermined (chrec_a)
4955 || chrec_contains_undetermined (chrec_b))
4957 dependence_stats.num_subscript_undetermined++;
4959 *overlap_iterations_a = conflict_fn_not_known ();
4960 *overlap_iterations_b = conflict_fn_not_known ();
4963 /* If they are the same chrec, and are affine, they overlap
4964 on every iteration. */
4965 else if (eq_evolutions_p (chrec_a, chrec_b)
4966 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4967 || operand_equal_p (chrec_a, chrec_b, 0)))
4969 dependence_stats.num_same_subscript_function++;
4970 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4971 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4972 *last_conflicts = chrec_dont_know;
4975 /* If they aren't the same, and aren't affine, we can't do anything
4976 yet. */
4977 else if ((chrec_contains_symbols (chrec_a)
4978 || chrec_contains_symbols (chrec_b))
4979 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4980 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4982 dependence_stats.num_subscript_undetermined++;
4983 *overlap_iterations_a = conflict_fn_not_known ();
4984 *overlap_iterations_b = conflict_fn_not_known ();
4987 else if (ziv_subscript_p (chrec_a, chrec_b))
4988 analyze_ziv_subscript (chrec_a, chrec_b,
4989 overlap_iterations_a, overlap_iterations_b,
4990 last_conflicts);
4992 else if (siv_subscript_p (chrec_a, chrec_b))
4993 analyze_siv_subscript (chrec_a, chrec_b,
4994 overlap_iterations_a, overlap_iterations_b,
4995 last_conflicts, lnn);
4997 else
4998 analyze_miv_subscript (chrec_a, chrec_b,
4999 overlap_iterations_a, overlap_iterations_b,
5000 last_conflicts, loop_nest);
5002 if (dump_file && (dump_flags & TDF_DETAILS))
5004 fprintf (dump_file, " (overlap_iterations_a = ");
5005 dump_conflict_function (dump_file, *overlap_iterations_a);
5006 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5007 dump_conflict_function (dump_file, *overlap_iterations_b);
5008 fprintf (dump_file, "))\n");
5012 /* Helper function for uniquely inserting distance vectors. */
5014 static void
5015 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5017 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5018 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5019 return;
5021 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5024 /* Helper function for uniquely inserting direction vectors. */
5026 static void
5027 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5029 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5030 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5031 return;
5033 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5036 /* Add a distance of 1 on all the loops outer than INDEX. If we
5037 haven't yet determined a distance for this outer loop, push a new
5038 distance vector composed of the previous distance, and a distance
5039 of 1 for this outer loop. Example:
5041 | loop_1
5042 | loop_2
5043 | A[10]
5044 | endloop_2
5045 | endloop_1
5047 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5048 save (0, 1), then we have to save (1, 0). */
5050 static void
5051 add_outer_distances (struct data_dependence_relation *ddr,
5052 lambda_vector dist_v, int index)
5054 /* For each outer loop where init_v is not set, the accesses are
5055 in dependence of distance 1 in the loop. */
5056 while (--index >= 0)
5058 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5059 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5060 save_v[index] = 1;
5061 save_dist_v (ddr, save_v);
5065 /* Return false when fail to represent the data dependence as a
5066 distance vector. A_INDEX is the index of the first reference
5067 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5068 second reference. INIT_B is set to true when a component has been
5069 added to the distance vector DIST_V. INDEX_CARRY is then set to
5070 the index in DIST_V that carries the dependence. */
5072 static bool
5073 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5074 unsigned int a_index, unsigned int b_index,
5075 lambda_vector dist_v, bool *init_b,
5076 int *index_carry)
5078 unsigned i;
5079 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5080 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5082 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5084 tree access_fn_a, access_fn_b;
5085 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5087 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5089 non_affine_dependence_relation (ddr);
5090 return false;
5093 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5094 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5096 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5097 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5099 HOST_WIDE_INT dist;
5100 int index;
5101 int var_a = CHREC_VARIABLE (access_fn_a);
5102 int var_b = CHREC_VARIABLE (access_fn_b);
5104 if (var_a != var_b
5105 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5107 non_affine_dependence_relation (ddr);
5108 return false;
5111 /* When data references are collected in a loop while data
5112 dependences are analyzed in loop nest nested in the loop, we
5113 would have more number of access functions than number of
5114 loops. Skip access functions of loops not in the loop nest.
5116 See PR89725 for more information. */
5117 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5118 continue;
5120 dist = int_cst_value (SUB_DISTANCE (subscript));
5121 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5122 *index_carry = MIN (index, *index_carry);
5124 /* This is the subscript coupling test. If we have already
5125 recorded a distance for this loop (a distance coming from
5126 another subscript), it should be the same. For example,
5127 in the following code, there is no dependence:
5129 | loop i = 0, N, 1
5130 | T[i+1][i] = ...
5131 | ... = T[i][i]
5132 | endloop
5134 if (init_v[index] != 0 && dist_v[index] != dist)
5136 finalize_ddr_dependent (ddr, chrec_known);
5137 return false;
5140 dist_v[index] = dist;
5141 init_v[index] = 1;
5142 *init_b = true;
5144 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5146 /* This can be for example an affine vs. constant dependence
5147 (T[i] vs. T[3]) that is not an affine dependence and is
5148 not representable as a distance vector. */
5149 non_affine_dependence_relation (ddr);
5150 return false;
5152 else
5153 *init_b = true;
5156 return true;
5159 /* Return true when the DDR contains only invariant access functions wrto. loop
5160 number LNUM. */
5162 static bool
5163 invariant_access_functions (const struct data_dependence_relation *ddr,
5164 int lnum)
5166 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5167 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5168 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5169 return false;
5171 return true;
5174 /* Helper function for the case where DDR_A and DDR_B are the same
5175 multivariate access function with a constant step. For an example
5176 see pr34635-1.c. */
5178 static void
5179 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5181 int x_1, x_2;
5182 tree c_1 = CHREC_LEFT (c_2);
5183 tree c_0 = CHREC_LEFT (c_1);
5184 lambda_vector dist_v;
5185 HOST_WIDE_INT v1, v2, cd;
5187 /* Polynomials with more than 2 variables are not handled yet. When
5188 the evolution steps are parameters, it is not possible to
5189 represent the dependence using classical distance vectors. */
5190 if (TREE_CODE (c_0) != INTEGER_CST
5191 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5192 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5194 DDR_AFFINE_P (ddr) = false;
5195 return;
5198 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5199 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5201 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5202 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5203 v1 = int_cst_value (CHREC_RIGHT (c_1));
5204 v2 = int_cst_value (CHREC_RIGHT (c_2));
5205 cd = gcd (v1, v2);
5206 v1 /= cd;
5207 v2 /= cd;
5209 if (v2 < 0)
5211 v2 = -v2;
5212 v1 = -v1;
5215 dist_v[x_1] = v2;
5216 dist_v[x_2] = -v1;
5217 save_dist_v (ddr, dist_v);
5219 add_outer_distances (ddr, dist_v, x_1);
5222 /* Helper function for the case where DDR_A and DDR_B are the same
5223 access functions. */
5225 static void
5226 add_other_self_distances (struct data_dependence_relation *ddr)
5228 lambda_vector dist_v;
5229 unsigned i;
5230 int index_carry = DDR_NB_LOOPS (ddr);
5231 subscript *sub;
5232 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5234 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5236 tree access_fun = SUB_ACCESS_FN (sub, 0);
5238 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5240 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5242 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5244 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5245 return;
5248 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5250 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5251 add_multivariate_self_dist (ddr, access_fun);
5252 else
5253 /* The evolution step is not constant: it varies in
5254 the outer loop, so this cannot be represented by a
5255 distance vector. For example in pr34635.c the
5256 evolution is {0, +, {0, +, 4}_1}_2. */
5257 DDR_AFFINE_P (ddr) = false;
5259 return;
5262 /* When data references are collected in a loop while data
5263 dependences are analyzed in loop nest nested in the loop, we
5264 would have more number of access functions than number of
5265 loops. Skip access functions of loops not in the loop nest.
5267 See PR89725 for more information. */
5268 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5269 loop))
5270 continue;
5272 index_carry = MIN (index_carry,
5273 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5274 DDR_LOOP_NEST (ddr)));
5278 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5279 add_outer_distances (ddr, dist_v, index_carry);
5282 static void
5283 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5285 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5287 dist_v[0] = 1;
5288 save_dist_v (ddr, dist_v);
5291 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5292 is the case for example when access functions are the same and
5293 equal to a constant, as in:
5295 | loop_1
5296 | A[3] = ...
5297 | ... = A[3]
5298 | endloop_1
5300 in which case the distance vectors are (0) and (1). */
5302 static void
5303 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5305 unsigned i, j;
5307 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5309 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5310 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5311 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5313 for (j = 0; j < ca->n; j++)
5314 if (affine_function_zero_p (ca->fns[j]))
5316 insert_innermost_unit_dist_vector (ddr);
5317 return;
5320 for (j = 0; j < cb->n; j++)
5321 if (affine_function_zero_p (cb->fns[j]))
5323 insert_innermost_unit_dist_vector (ddr);
5324 return;
5329 /* Return true when the DDR contains two data references that have the
5330 same access functions. */
5332 static inline bool
5333 same_access_functions (const struct data_dependence_relation *ddr)
5335 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5336 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5337 SUB_ACCESS_FN (sub, 1)))
5338 return false;
5340 return true;
5343 /* Compute the classic per loop distance vector. DDR is the data
5344 dependence relation to build a vector from. Return false when fail
5345 to represent the data dependence as a distance vector. */
5347 static bool
5348 build_classic_dist_vector (struct data_dependence_relation *ddr,
5349 class loop *loop_nest)
5351 bool init_b = false;
5352 int index_carry = DDR_NB_LOOPS (ddr);
5353 lambda_vector dist_v;
5355 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5356 return false;
5358 if (same_access_functions (ddr))
5360 /* Save the 0 vector. */
5361 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5362 save_dist_v (ddr, dist_v);
5364 if (invariant_access_functions (ddr, loop_nest->num))
5365 add_distance_for_zero_overlaps (ddr);
5367 if (DDR_NB_LOOPS (ddr) > 1)
5368 add_other_self_distances (ddr);
5370 return true;
5373 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5374 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5375 return false;
5377 /* Save the distance vector if we initialized one. */
5378 if (init_b)
5380 /* Verify a basic constraint: classic distance vectors should
5381 always be lexicographically positive.
5383 Data references are collected in the order of execution of
5384 the program, thus for the following loop
5386 | for (i = 1; i < 100; i++)
5387 | for (j = 1; j < 100; j++)
5389 | t = T[j+1][i-1]; // A
5390 | T[j][i] = t + 2; // B
5393 references are collected following the direction of the wind:
5394 A then B. The data dependence tests are performed also
5395 following this order, such that we're looking at the distance
5396 separating the elements accessed by A from the elements later
5397 accessed by B. But in this example, the distance returned by
5398 test_dep (A, B) is lexicographically negative (-1, 1), that
5399 means that the access A occurs later than B with respect to
5400 the outer loop, ie. we're actually looking upwind. In this
5401 case we solve test_dep (B, A) looking downwind to the
5402 lexicographically positive solution, that returns the
5403 distance vector (1, -1). */
5404 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5406 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5407 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5408 return false;
5409 compute_subscript_distance (ddr);
5410 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5411 &index_carry))
5412 return false;
5413 save_dist_v (ddr, save_v);
5414 DDR_REVERSED_P (ddr) = true;
5416 /* In this case there is a dependence forward for all the
5417 outer loops:
5419 | for (k = 1; k < 100; k++)
5420 | for (i = 1; i < 100; i++)
5421 | for (j = 1; j < 100; j++)
5423 | t = T[j+1][i-1]; // A
5424 | T[j][i] = t + 2; // B
5427 the vectors are:
5428 (0, 1, -1)
5429 (1, 1, -1)
5430 (1, -1, 1)
5432 if (DDR_NB_LOOPS (ddr) > 1)
5434 add_outer_distances (ddr, save_v, index_carry);
5435 add_outer_distances (ddr, dist_v, index_carry);
5438 else
5440 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5441 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5443 if (DDR_NB_LOOPS (ddr) > 1)
5445 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5447 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5448 return false;
5449 compute_subscript_distance (ddr);
5450 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5451 &index_carry))
5452 return false;
5454 save_dist_v (ddr, save_v);
5455 add_outer_distances (ddr, dist_v, index_carry);
5456 add_outer_distances (ddr, opposite_v, index_carry);
5458 else
5459 save_dist_v (ddr, save_v);
5462 else
5464 /* There is a distance of 1 on all the outer loops: Example:
5465 there is a dependence of distance 1 on loop_1 for the array A.
5467 | loop_1
5468 | A[5] = ...
5469 | endloop
5471 add_outer_distances (ddr, dist_v,
5472 lambda_vector_first_nz (dist_v,
5473 DDR_NB_LOOPS (ddr), 0));
5476 if (dump_file && (dump_flags & TDF_DETAILS))
5478 unsigned i;
5480 fprintf (dump_file, "(build_classic_dist_vector\n");
5481 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5483 fprintf (dump_file, " dist_vector = (");
5484 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5485 DDR_NB_LOOPS (ddr));
5486 fprintf (dump_file, " )\n");
5488 fprintf (dump_file, ")\n");
5491 return true;
5494 /* Return the direction for a given distance.
5495 FIXME: Computing dir this way is suboptimal, since dir can catch
5496 cases that dist is unable to represent. */
5498 static inline enum data_dependence_direction
5499 dir_from_dist (int dist)
5501 if (dist > 0)
5502 return dir_positive;
5503 else if (dist < 0)
5504 return dir_negative;
5505 else
5506 return dir_equal;
5509 /* Compute the classic per loop direction vector. DDR is the data
5510 dependence relation to build a vector from. */
5512 static void
5513 build_classic_dir_vector (struct data_dependence_relation *ddr)
5515 unsigned i, j;
5516 lambda_vector dist_v;
5518 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5520 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5522 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5523 dir_v[j] = dir_from_dist (dist_v[j]);
5525 save_dir_v (ddr, dir_v);
5529 /* Helper function. Returns true when there is a dependence between the
5530 data references. A_INDEX is the index of the first reference (0 for
5531 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5533 static bool
5534 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5535 unsigned int a_index, unsigned int b_index,
5536 class loop *loop_nest)
5538 unsigned int i;
5539 tree last_conflicts;
5540 struct subscript *subscript;
5541 tree res = NULL_TREE;
5543 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5545 conflict_function *overlaps_a, *overlaps_b;
5547 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5548 SUB_ACCESS_FN (subscript, b_index),
5549 &overlaps_a, &overlaps_b,
5550 &last_conflicts, loop_nest);
5552 if (SUB_CONFLICTS_IN_A (subscript))
5553 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5554 if (SUB_CONFLICTS_IN_B (subscript))
5555 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5557 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5558 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5559 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5561 /* If there is any undetermined conflict function we have to
5562 give a conservative answer in case we cannot prove that
5563 no dependence exists when analyzing another subscript. */
5564 if (CF_NOT_KNOWN_P (overlaps_a)
5565 || CF_NOT_KNOWN_P (overlaps_b))
5567 res = chrec_dont_know;
5568 continue;
5571 /* When there is a subscript with no dependence we can stop. */
5572 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5573 || CF_NO_DEPENDENCE_P (overlaps_b))
5575 res = chrec_known;
5576 break;
5580 if (res == NULL_TREE)
5581 return true;
5583 if (res == chrec_known)
5584 dependence_stats.num_dependence_independent++;
5585 else
5586 dependence_stats.num_dependence_undetermined++;
5587 finalize_ddr_dependent (ddr, res);
5588 return false;
5591 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5593 static void
5594 subscript_dependence_tester (struct data_dependence_relation *ddr,
5595 class loop *loop_nest)
5597 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5598 dependence_stats.num_dependence_dependent++;
5600 compute_subscript_distance (ddr);
5601 if (build_classic_dist_vector (ddr, loop_nest))
5602 build_classic_dir_vector (ddr);
5605 /* Returns true when all the access functions of A are affine or
5606 constant with respect to LOOP_NEST. */
5608 static bool
5609 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5610 const class loop *loop_nest)
5612 vec<tree> fns = DR_ACCESS_FNS (a);
5613 for (tree t : fns)
5614 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5615 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5616 return false;
5618 return true;
5621 /* This computes the affine dependence relation between A and B with
5622 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5623 independence between two accesses, while CHREC_DONT_KNOW is used
5624 for representing the unknown relation.
5626 Note that it is possible to stop the computation of the dependence
5627 relation the first time we detect a CHREC_KNOWN element for a given
5628 subscript. */
5630 void
5631 compute_affine_dependence (struct data_dependence_relation *ddr,
5632 class loop *loop_nest)
5634 struct data_reference *dra = DDR_A (ddr);
5635 struct data_reference *drb = DDR_B (ddr);
5637 if (dump_file && (dump_flags & TDF_DETAILS))
5639 fprintf (dump_file, "(compute_affine_dependence\n");
5640 fprintf (dump_file, " ref_a: ");
5641 print_generic_expr (dump_file, DR_REF (dra));
5642 fprintf (dump_file, ", stmt_a: ");
5643 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5644 fprintf (dump_file, " ref_b: ");
5645 print_generic_expr (dump_file, DR_REF (drb));
5646 fprintf (dump_file, ", stmt_b: ");
5647 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5650 /* Analyze only when the dependence relation is not yet known. */
5651 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5653 dependence_stats.num_dependence_tests++;
5655 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5656 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5657 subscript_dependence_tester (ddr, loop_nest);
5659 /* As a last case, if the dependence cannot be determined, or if
5660 the dependence is considered too difficult to determine, answer
5661 "don't know". */
5662 else
5664 dependence_stats.num_dependence_undetermined++;
5666 if (dump_file && (dump_flags & TDF_DETAILS))
5668 fprintf (dump_file, "Data ref a:\n");
5669 dump_data_reference (dump_file, dra);
5670 fprintf (dump_file, "Data ref b:\n");
5671 dump_data_reference (dump_file, drb);
5672 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5674 finalize_ddr_dependent (ddr, chrec_dont_know);
5678 if (dump_file && (dump_flags & TDF_DETAILS))
5680 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5681 fprintf (dump_file, ") -> no dependence\n");
5682 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5683 fprintf (dump_file, ") -> dependence analysis failed\n");
5684 else
5685 fprintf (dump_file, ")\n");
5689 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5690 the data references in DATAREFS, in the LOOP_NEST. When
5691 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5692 relations. Return true when successful, i.e. data references number
5693 is small enough to be handled. */
5695 bool
5696 compute_all_dependences (const vec<data_reference_p> &datarefs,
5697 vec<ddr_p> *dependence_relations,
5698 const vec<loop_p> &loop_nest,
5699 bool compute_self_and_rr)
5701 struct data_dependence_relation *ddr;
5702 struct data_reference *a, *b;
5703 unsigned int i, j;
5705 if ((int) datarefs.length ()
5706 > param_loop_max_datarefs_for_datadeps)
5708 struct data_dependence_relation *ddr;
5710 /* Insert a single relation into dependence_relations:
5711 chrec_dont_know. */
5712 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5713 dependence_relations->safe_push (ddr);
5714 return false;
5717 FOR_EACH_VEC_ELT (datarefs, i, a)
5718 for (j = i + 1; datarefs.iterate (j, &b); j++)
5719 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5721 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5722 dependence_relations->safe_push (ddr);
5723 if (loop_nest.exists ())
5724 compute_affine_dependence (ddr, loop_nest[0]);
5727 if (compute_self_and_rr)
5728 FOR_EACH_VEC_ELT (datarefs, i, a)
5730 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5731 dependence_relations->safe_push (ddr);
5732 if (loop_nest.exists ())
5733 compute_affine_dependence (ddr, loop_nest[0]);
5736 return true;
5739 /* Describes a location of a memory reference. */
5741 struct data_ref_loc
5743 /* The memory reference. */
5744 tree ref;
5746 /* True if the memory reference is read. */
5747 bool is_read;
5749 /* True if the data reference is conditional within the containing
5750 statement, i.e. if it might not occur even when the statement
5751 is executed and runs to completion. */
5752 bool is_conditional_in_stmt;
5756 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5757 true if STMT clobbers memory, false otherwise. */
5759 static bool
5760 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5762 bool clobbers_memory = false;
5763 data_ref_loc ref;
5764 tree op0, op1;
5765 enum gimple_code stmt_code = gimple_code (stmt);
5767 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5768 As we cannot model data-references to not spelled out
5769 accesses give up if they may occur. */
5770 if (stmt_code == GIMPLE_CALL
5771 && !(gimple_call_flags (stmt) & ECF_CONST))
5773 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5774 if (gimple_call_internal_p (stmt))
5775 switch (gimple_call_internal_fn (stmt))
5777 case IFN_GOMP_SIMD_LANE:
5779 class loop *loop = gimple_bb (stmt)->loop_father;
5780 tree uid = gimple_call_arg (stmt, 0);
5781 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5782 if (loop == NULL
5783 || loop->simduid != SSA_NAME_VAR (uid))
5784 clobbers_memory = true;
5785 break;
5787 case IFN_MASK_LOAD:
5788 case IFN_MASK_STORE:
5789 break;
5790 default:
5791 clobbers_memory = true;
5792 break;
5794 else
5795 clobbers_memory = true;
5797 else if (stmt_code == GIMPLE_ASM
5798 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5799 || gimple_vuse (stmt)))
5800 clobbers_memory = true;
5802 if (!gimple_vuse (stmt))
5803 return clobbers_memory;
5805 if (stmt_code == GIMPLE_ASSIGN)
5807 tree base;
5808 op0 = gimple_assign_lhs (stmt);
5809 op1 = gimple_assign_rhs1 (stmt);
5811 if (DECL_P (op1)
5812 || (REFERENCE_CLASS_P (op1)
5813 && (base = get_base_address (op1))
5814 && TREE_CODE (base) != SSA_NAME
5815 && !is_gimple_min_invariant (base)))
5817 ref.ref = op1;
5818 ref.is_read = true;
5819 ref.is_conditional_in_stmt = false;
5820 references->safe_push (ref);
5823 else if (stmt_code == GIMPLE_CALL)
5825 unsigned i, n;
5826 tree ptr, type;
5827 unsigned int align;
5829 ref.is_read = false;
5830 if (gimple_call_internal_p (stmt))
5831 switch (gimple_call_internal_fn (stmt))
5833 case IFN_MASK_LOAD:
5834 if (gimple_call_lhs (stmt) == NULL_TREE)
5835 break;
5836 ref.is_read = true;
5837 /* FALLTHRU */
5838 case IFN_MASK_STORE:
5839 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5840 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5841 if (ref.is_read)
5842 type = TREE_TYPE (gimple_call_lhs (stmt));
5843 else
5844 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5845 if (TYPE_ALIGN (type) != align)
5846 type = build_aligned_type (type, align);
5847 ref.is_conditional_in_stmt = true;
5848 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5849 ptr);
5850 references->safe_push (ref);
5851 return false;
5852 default:
5853 break;
5856 op0 = gimple_call_lhs (stmt);
5857 n = gimple_call_num_args (stmt);
5858 for (i = 0; i < n; i++)
5860 op1 = gimple_call_arg (stmt, i);
5862 if (DECL_P (op1)
5863 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5865 ref.ref = op1;
5866 ref.is_read = true;
5867 ref.is_conditional_in_stmt = false;
5868 references->safe_push (ref);
5872 else
5873 return clobbers_memory;
5875 if (op0
5876 && (DECL_P (op0)
5877 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5879 ref.ref = op0;
5880 ref.is_read = false;
5881 ref.is_conditional_in_stmt = false;
5882 references->safe_push (ref);
5884 return clobbers_memory;
5888 /* Returns true if the loop-nest has any data reference. */
5890 bool
5891 loop_nest_has_data_refs (loop_p loop)
5893 basic_block *bbs = get_loop_body (loop);
5894 auto_vec<data_ref_loc, 3> references;
5896 for (unsigned i = 0; i < loop->num_nodes; i++)
5898 basic_block bb = bbs[i];
5899 gimple_stmt_iterator bsi;
5901 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5903 gimple *stmt = gsi_stmt (bsi);
5904 get_references_in_stmt (stmt, &references);
5905 if (references.length ())
5907 free (bbs);
5908 return true;
5912 free (bbs);
5913 return false;
5916 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5917 reference, returns false, otherwise returns true. NEST is the outermost
5918 loop of the loop nest in which the references should be analyzed. */
5920 opt_result
5921 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5922 vec<data_reference_p> *datarefs)
5924 auto_vec<data_ref_loc, 2> references;
5925 data_reference_p dr;
5927 if (get_references_in_stmt (stmt, &references))
5928 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5929 stmt);
5931 for (const data_ref_loc &ref : references)
5933 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5934 loop_containing_stmt (stmt), ref.ref,
5935 stmt, ref.is_read, ref.is_conditional_in_stmt);
5936 gcc_assert (dr != NULL);
5937 datarefs->safe_push (dr);
5940 return opt_result::success ();
5943 /* Stores the data references in STMT to DATAREFS. If there is an
5944 unanalyzable reference, returns false, otherwise returns true.
5945 NEST is the outermost loop of the loop nest in which the references
5946 should be instantiated, LOOP is the loop in which the references
5947 should be analyzed. */
5949 bool
5950 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5951 vec<data_reference_p> *datarefs)
5953 auto_vec<data_ref_loc, 2> references;
5954 bool ret = true;
5955 data_reference_p dr;
5957 if (get_references_in_stmt (stmt, &references))
5958 return false;
5960 for (const data_ref_loc &ref : references)
5962 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5963 ref.is_conditional_in_stmt);
5964 gcc_assert (dr != NULL);
5965 datarefs->safe_push (dr);
5968 return ret;
5971 /* Search the data references in LOOP, and record the information into
5972 DATAREFS. Returns chrec_dont_know when failing to analyze a
5973 difficult case, returns NULL_TREE otherwise. */
5975 tree
5976 find_data_references_in_bb (class loop *loop, basic_block bb,
5977 vec<data_reference_p> *datarefs)
5979 gimple_stmt_iterator bsi;
5981 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5983 gimple *stmt = gsi_stmt (bsi);
5985 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5987 struct data_reference *res;
5988 res = XCNEW (struct data_reference);
5989 datarefs->safe_push (res);
5991 return chrec_dont_know;
5995 return NULL_TREE;
5998 /* Search the data references in LOOP, and record the information into
5999 DATAREFS. Returns chrec_dont_know when failing to analyze a
6000 difficult case, returns NULL_TREE otherwise.
6002 TODO: This function should be made smarter so that it can handle address
6003 arithmetic as if they were array accesses, etc. */
6005 tree
6006 find_data_references_in_loop (class loop *loop,
6007 vec<data_reference_p> *datarefs)
6009 basic_block bb, *bbs;
6010 unsigned int i;
6012 bbs = get_loop_body_in_dom_order (loop);
6014 for (i = 0; i < loop->num_nodes; i++)
6016 bb = bbs[i];
6018 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6020 free (bbs);
6021 return chrec_dont_know;
6024 free (bbs);
6026 return NULL_TREE;
6029 /* Return the alignment in bytes that DRB is guaranteed to have at all
6030 times. */
6032 unsigned int
6033 dr_alignment (innermost_loop_behavior *drb)
6035 /* Get the alignment of BASE_ADDRESS + INIT. */
6036 unsigned int alignment = drb->base_alignment;
6037 unsigned int misalignment = (drb->base_misalignment
6038 + TREE_INT_CST_LOW (drb->init));
6039 if (misalignment != 0)
6040 alignment = MIN (alignment, misalignment & -misalignment);
6042 /* Cap it to the alignment of OFFSET. */
6043 if (!integer_zerop (drb->offset))
6044 alignment = MIN (alignment, drb->offset_alignment);
6046 /* Cap it to the alignment of STEP. */
6047 if (!integer_zerop (drb->step))
6048 alignment = MIN (alignment, drb->step_alignment);
6050 return alignment;
6053 /* If BASE is a pointer-typed SSA name, try to find the object that it
6054 is based on. Return this object X on success and store the alignment
6055 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6057 static tree
6058 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6060 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6061 return NULL_TREE;
6063 gimple *def = SSA_NAME_DEF_STMT (base);
6064 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6066 /* Peel chrecs and record the minimum alignment preserved by
6067 all steps. */
6068 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6069 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6071 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6072 alignment = MIN (alignment, step_alignment);
6073 base = CHREC_LEFT (base);
6076 /* Punt if the expression is too complicated to handle. */
6077 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6078 return NULL_TREE;
6080 /* The only useful cases are those for which a dereference folds to something
6081 other than an INDIRECT_REF. */
6082 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6083 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6084 if (!ref)
6085 return NULL_TREE;
6087 /* Analyze the base to which the steps we peeled were applied. */
6088 poly_int64 bitsize, bitpos, bytepos;
6089 machine_mode mode;
6090 int unsignedp, reversep, volatilep;
6091 tree offset;
6092 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6093 &unsignedp, &reversep, &volatilep);
6094 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6095 return NULL_TREE;
6097 /* Restrict the alignment to that guaranteed by the offsets. */
6098 unsigned int bytepos_alignment = known_alignment (bytepos);
6099 if (bytepos_alignment != 0)
6100 alignment = MIN (alignment, bytepos_alignment);
6101 if (offset)
6103 unsigned int offset_alignment = highest_pow2_factor (offset);
6104 alignment = MIN (alignment, offset_alignment);
6107 *alignment_out = alignment;
6108 return base;
6111 /* Return the object whose alignment would need to be changed in order
6112 to increase the alignment of ADDR. Store the maximum achievable
6113 alignment in *MAX_ALIGNMENT. */
6115 tree
6116 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6118 tree base = get_base_for_alignment_1 (addr, max_alignment);
6119 if (base)
6120 return base;
6122 if (TREE_CODE (addr) == ADDR_EXPR)
6123 addr = TREE_OPERAND (addr, 0);
6124 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6125 return addr;
6128 /* Recursive helper function. */
6130 static bool
6131 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6133 /* Inner loops of the nest should not contain siblings. Example:
6134 when there are two consecutive loops,
6136 | loop_0
6137 | loop_1
6138 | A[{0, +, 1}_1]
6139 | endloop_1
6140 | loop_2
6141 | A[{0, +, 1}_2]
6142 | endloop_2
6143 | endloop_0
6145 the dependence relation cannot be captured by the distance
6146 abstraction. */
6147 if (loop->next)
6148 return false;
6150 loop_nest->safe_push (loop);
6151 if (loop->inner)
6152 return find_loop_nest_1 (loop->inner, loop_nest);
6153 return true;
6156 /* Return false when the LOOP is not well nested. Otherwise return
6157 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6158 contain the loops from the outermost to the innermost, as they will
6159 appear in the classic distance vector. */
6161 bool
6162 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6164 loop_nest->safe_push (loop);
6165 if (loop->inner)
6166 return find_loop_nest_1 (loop->inner, loop_nest);
6167 return true;
6170 /* Returns true when the data dependences have been computed, false otherwise.
6171 Given a loop nest LOOP, the following vectors are returned:
6172 DATAREFS is initialized to all the array elements contained in this loop,
6173 DEPENDENCE_RELATIONS contains the relations between the data references.
6174 Compute read-read and self relations if
6175 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6177 bool
6178 compute_data_dependences_for_loop (class loop *loop,
6179 bool compute_self_and_read_read_dependences,
6180 vec<loop_p> *loop_nest,
6181 vec<data_reference_p> *datarefs,
6182 vec<ddr_p> *dependence_relations)
6184 bool res = true;
6186 memset (&dependence_stats, 0, sizeof (dependence_stats));
6188 /* If the loop nest is not well formed, or one of the data references
6189 is not computable, give up without spending time to compute other
6190 dependences. */
6191 if (!loop
6192 || !find_loop_nest (loop, loop_nest)
6193 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6194 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6195 compute_self_and_read_read_dependences))
6196 res = false;
6198 if (dump_file && (dump_flags & TDF_STATS))
6200 fprintf (dump_file, "Dependence tester statistics:\n");
6202 fprintf (dump_file, "Number of dependence tests: %d\n",
6203 dependence_stats.num_dependence_tests);
6204 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6205 dependence_stats.num_dependence_dependent);
6206 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6207 dependence_stats.num_dependence_independent);
6208 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6209 dependence_stats.num_dependence_undetermined);
6211 fprintf (dump_file, "Number of subscript tests: %d\n",
6212 dependence_stats.num_subscript_tests);
6213 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6214 dependence_stats.num_subscript_undetermined);
6215 fprintf (dump_file, "Number of same subscript function: %d\n",
6216 dependence_stats.num_same_subscript_function);
6218 fprintf (dump_file, "Number of ziv tests: %d\n",
6219 dependence_stats.num_ziv);
6220 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6221 dependence_stats.num_ziv_dependent);
6222 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6223 dependence_stats.num_ziv_independent);
6224 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6225 dependence_stats.num_ziv_unimplemented);
6227 fprintf (dump_file, "Number of siv tests: %d\n",
6228 dependence_stats.num_siv);
6229 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6230 dependence_stats.num_siv_dependent);
6231 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6232 dependence_stats.num_siv_independent);
6233 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6234 dependence_stats.num_siv_unimplemented);
6236 fprintf (dump_file, "Number of miv tests: %d\n",
6237 dependence_stats.num_miv);
6238 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6239 dependence_stats.num_miv_dependent);
6240 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6241 dependence_stats.num_miv_independent);
6242 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6243 dependence_stats.num_miv_unimplemented);
6246 return res;
6249 /* Free the memory used by a data dependence relation DDR. */
6251 void
6252 free_dependence_relation (struct data_dependence_relation *ddr)
6254 if (ddr == NULL)
6255 return;
6257 if (DDR_SUBSCRIPTS (ddr).exists ())
6258 free_subscripts (DDR_SUBSCRIPTS (ddr));
6259 DDR_DIST_VECTS (ddr).release ();
6260 DDR_DIR_VECTS (ddr).release ();
6262 free (ddr);
6265 /* Free the memory used by the data dependence relations from
6266 DEPENDENCE_RELATIONS. */
6268 void
6269 free_dependence_relations (vec<ddr_p>& dependence_relations)
6271 for (data_dependence_relation *ddr : dependence_relations)
6272 if (ddr)
6273 free_dependence_relation (ddr);
6275 dependence_relations.release ();
6278 /* Free the memory used by the data references from DATAREFS. */
6280 void
6281 free_data_refs (vec<data_reference_p>& datarefs)
6283 for (data_reference *dr : datarefs)
6284 free_data_ref (dr);
6285 datarefs.release ();
6288 /* Common routine implementing both dr_direction_indicator and
6289 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6290 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6291 Return the step as the indicator otherwise. */
6293 static tree
6294 dr_step_indicator (struct data_reference *dr, int useful_min)
6296 tree step = DR_STEP (dr);
6297 if (!step)
6298 return NULL_TREE;
6299 STRIP_NOPS (step);
6300 /* Look for cases where the step is scaled by a positive constant
6301 integer, which will often be the access size. If the multiplication
6302 doesn't change the sign (due to overflow effects) then we can
6303 test the unscaled value instead. */
6304 if (TREE_CODE (step) == MULT_EXPR
6305 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6306 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6308 tree factor = TREE_OPERAND (step, 1);
6309 step = TREE_OPERAND (step, 0);
6311 /* Strip widening and truncating conversions as well as nops. */
6312 if (CONVERT_EXPR_P (step)
6313 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6314 step = TREE_OPERAND (step, 0);
6315 tree type = TREE_TYPE (step);
6317 /* Get the range of step values that would not cause overflow. */
6318 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6319 / wi::to_widest (factor));
6320 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6321 / wi::to_widest (factor));
6323 /* Get the range of values that the unconverted step actually has. */
6324 wide_int step_min, step_max;
6325 value_range vr;
6326 if (TREE_CODE (step) != SSA_NAME
6327 || !get_range_query (cfun)->range_of_expr (vr, step)
6328 || vr.kind () != VR_RANGE)
6330 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6331 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6333 else
6335 step_min = vr.lower_bound ();
6336 step_max = vr.upper_bound ();
6339 /* Check whether the unconverted step has an acceptable range. */
6340 signop sgn = TYPE_SIGN (type);
6341 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6342 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6344 if (wi::ge_p (step_min, useful_min, sgn))
6345 return ssize_int (useful_min);
6346 else if (wi::lt_p (step_max, 0, sgn))
6347 return ssize_int (-1);
6348 else
6349 return fold_convert (ssizetype, step);
6352 return DR_STEP (dr);
6355 /* Return a value that is negative iff DR has a negative step. */
6357 tree
6358 dr_direction_indicator (struct data_reference *dr)
6360 return dr_step_indicator (dr, 0);
6363 /* Return a value that is zero iff DR has a zero step. */
6365 tree
6366 dr_zero_step_indicator (struct data_reference *dr)
6368 return dr_step_indicator (dr, 1);
6371 /* Return true if DR is known to have a nonnegative (but possibly zero)
6372 step. */
6374 bool
6375 dr_known_forward_stride_p (struct data_reference *dr)
6377 tree indicator = dr_direction_indicator (dr);
6378 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6379 fold_convert (ssizetype, indicator),
6380 ssize_int (0));
6381 return neg_step_val && integer_zerop (neg_step_val);