tilegx: Fix infinite loop in gen-mul-tables generator
[official-gcc.git] / gcc / tree-data-ref.cc
blobff9327f6deb2bb85abbd3853dca9c666699e7a37
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
104 static struct datadep_stats
106 int num_dependence_tests;
107 int num_dependence_dependent;
108 int num_dependence_independent;
109 int num_dependence_undetermined;
111 int num_subscript_tests;
112 int num_subscript_undetermined;
113 int num_same_subscript_function;
115 int num_ziv;
116 int num_ziv_independent;
117 int num_ziv_dependent;
118 int num_ziv_unimplemented;
120 int num_siv;
121 int num_siv_independent;
122 int num_siv_dependent;
123 int num_siv_unimplemented;
125 int num_miv;
126 int num_miv_independent;
127 int num_miv_dependent;
128 int num_miv_unimplemented;
129 } dependence_stats;
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 unsigned int, unsigned int,
133 class loop *);
134 /* Returns true iff A divides B. */
136 static inline bool
137 tree_fold_divides_p (const_tree a, const_tree b)
139 gcc_assert (TREE_CODE (a) == INTEGER_CST);
140 gcc_assert (TREE_CODE (b) == INTEGER_CST);
141 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144 /* Returns true iff A divides B. */
146 static inline bool
147 int_divides_p (lambda_int a, lambda_int b)
149 return ((b % a) == 0);
152 /* Return true if reference REF contains a union access. */
154 static bool
155 ref_contains_union_access_p (tree ref)
157 while (handled_component_p (ref))
159 ref = TREE_OPERAND (ref, 0);
160 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 return true;
164 return false;
169 /* Dump into FILE all the data references from DATAREFS. */
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 for (data_reference *dr : datarefs)
175 dump_data_reference (file, dr);
178 /* Unified dump into FILE all the data references from DATAREFS. */
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
183 dump_data_references (stderr, ref);
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
201 dump_data_references (stderr, datarefs);
204 /* Print to STDERR the data_reference DR. */
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
209 dump_data_reference (stderr, dr);
212 /* Dump function for a DATA_REFERENCE structure. */
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
218 unsigned int i;
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 fprintf (outf, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
242 dump_data_reference (stderr, &ref);
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
260 unsigned i;
261 tree coef;
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
272 /* Dumps the conflict function CF to the file OUTF. */
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
277 unsigned i;
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
285 for (i = 0; i < cf->n; i++)
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
296 /* Dump function for a SUBSCRIPT structure. */
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
335 int eq;
337 for (eq = 0; eq < length; eq++)
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
342 switch (dir)
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
370 fprintf (outf, "\n");
373 /* Print a vector of direction vectors. */
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
379 for (lambda_vector v : dir_vects)
380 print_direction_vector (outf, v, length);
383 /* Print out a vector VEC of length N to OUTFILE. */
385 DEBUG_FUNCTION void
386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 int i;
390 for (i = 0; i < n; i++)
391 fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392 fprintf (outfile, "\n");
395 /* Print a vector of distance vectors. */
397 DEBUG_FUNCTION void
398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 int length)
401 for (lambda_vector v : dist_vects)
402 print_lambda_vector (outf, v, length);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 struct data_reference *dra, *drb;
412 fprintf (outf, "(Data Dep: \n");
414 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 if (ddr)
418 dra = DDR_A (ddr);
419 drb = DDR_B (ddr);
420 if (dra)
421 dump_data_reference (outf, dra);
422 else
423 fprintf (outf, " (nil)\n");
424 if (drb)
425 dump_data_reference (outf, drb);
426 else
427 fprintf (outf, " (nil)\n");
429 fprintf (outf, " (don't know)\n)\n");
430 return;
433 dra = DDR_A (ddr);
434 drb = DDR_B (ddr);
435 dump_data_reference (outf, dra);
436 dump_data_reference (outf, drb);
438 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439 fprintf (outf, " (no dependence)\n");
441 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 unsigned int i;
444 class loop *loopi;
446 subscript *sub;
447 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 dump_subscript (outf, sub);
456 fprintf (outf, " loop nest: (");
457 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 fprintf (outf, "%d ", loopi->num);
459 fprintf (outf, ")\n");
461 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 fprintf (outf, " distance_vector: ");
464 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 DDR_NB_LOOPS (ddr));
468 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 fprintf (outf, " direction_vector: ");
471 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
476 fprintf (outf, ")\n");
479 /* Debug version. */
481 DEBUG_FUNCTION void
482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 dump_data_dependence_relation (stderr, ddr);
487 /* Dump into FILE all the dependence relations from DDRS. */
489 DEBUG_FUNCTION void
490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 for (auto ddr : ddrs)
493 dump_data_dependence_relation (file, ddr);
496 DEBUG_FUNCTION void
497 debug (vec<ddr_p> &ref)
499 dump_data_dependence_relations (stderr, ref);
502 DEBUG_FUNCTION void
503 debug (vec<ddr_p> *ptr)
505 if (ptr)
506 debug (*ptr);
507 else
508 fprintf (stderr, "<nil>\n");
512 /* Dump to STDERR all the dependence relations from DDRS. */
514 DEBUG_FUNCTION void
515 debug_data_dependence_relations (vec<ddr_p> ddrs)
517 dump_data_dependence_relations (stderr, ddrs);
520 /* Dumps the distance and direction vectors in FILE. DDRS contains
521 the dependence relations, and VECT_SIZE is the size of the
522 dependence vectors, or in other words the number of loops in the
523 considered nest. */
525 DEBUG_FUNCTION void
526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 for (data_dependence_relation *ddr : ddrs)
529 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 fprintf (file, "DISTANCE_V (");
534 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 fprintf (file, ")\n");
538 for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 fprintf (file, "DIRECTION_V (");
541 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 fprintf (file, ")\n");
546 fprintf (file, "\n\n");
549 /* Dumps the data dependence relations DDRS in FILE. */
551 DEBUG_FUNCTION void
552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 for (data_dependence_relation *ddr : ddrs)
555 dump_data_dependence_relation (file, ddr);
557 fprintf (file, "\n\n");
560 DEBUG_FUNCTION void
561 debug_ddrs (vec<ddr_p> ddrs)
563 dump_ddrs (stderr, ddrs);
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567 OP0 CODE OP1, where:
569 - OP0 CODE OP1 has integral type TYPE
570 - the range of OP0 is given by OP0_RANGE and
571 - the range of OP1 is given by OP1_RANGE.
573 Independently of RESULT_RANGE, try to compute:
575 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 - (sizetype) (OP0 CODE OP1)
578 as a constant and subtract DELTA from the ssizetype constant in *OFF.
579 Return true on success, or false if DELTA is not known at compile time.
581 Truncation and sign changes are known to distribute over CODE, i.e.
583 (itype) (A CODE B) == (itype) A CODE (itype) B
585 for any integral type ITYPE whose precision is no greater than the
586 precision of A and B. */
588 static bool
589 compute_distributive_range (tree type, value_range &op0_range,
590 tree_code code, value_range &op1_range,
591 tree *off, value_range *result_range)
593 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594 if (result_range)
596 range_op_handler op (code, type);
597 op.fold_range (*result_range, type, op0_range, op1_range);
600 /* The distributive property guarantees that if TYPE is no narrower
601 than SIZETYPE,
603 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
605 and so we can treat DELTA as zero. */
606 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
607 return true;
609 /* If overflow is undefined, we can assume that:
611 X == (ssizetype) OP0 CODE (ssizetype) OP1
613 is within the range of TYPE, i.e.:
615 X == (ssizetype) (TYPE) X
617 Distributing the (TYPE) truncation over X gives:
619 X == (ssizetype) (OP0 CODE OP1)
621 Casting both sides to sizetype and distributing the sizetype cast
622 over X gives:
624 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
626 and so we can treat DELTA as zero. */
627 if (TYPE_OVERFLOW_UNDEFINED (type))
628 return true;
630 /* Compute the range of:
632 (ssizetype) OP0 CODE (ssizetype) OP1
634 The distributive property guarantees that this has the same bitpattern as:
636 (sizetype) OP0 CODE (sizetype) OP1
638 but its range is more conducive to analysis. */
639 range_cast (op0_range, ssizetype);
640 range_cast (op1_range, ssizetype);
641 value_range wide_range;
642 range_op_handler op (code, ssizetype);
643 bool saved_flag_wrapv = flag_wrapv;
644 flag_wrapv = 1;
645 op.fold_range (wide_range, ssizetype, op0_range, op1_range);
646 flag_wrapv = saved_flag_wrapv;
647 if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
648 return false;
650 wide_int lb = wide_range.lower_bound ();
651 wide_int ub = wide_range.upper_bound ();
653 /* Calculate the number of times that each end of the range overflows or
654 underflows TYPE. We can only calculate DELTA if the numbers match. */
655 unsigned int precision = TYPE_PRECISION (type);
656 if (!TYPE_UNSIGNED (type))
658 wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
659 lb -= type_min;
660 ub -= type_min;
662 wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
663 lb &= upper_bits;
664 ub &= upper_bits;
665 if (lb != ub)
666 return false;
668 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
669 negative values indicating underflow. The low PRECISION bits of LB
670 are clear, so DELTA is therefore LB (== UB). */
671 *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
672 return true;
675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
676 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
677 FROM_TYPE are integral types. */
679 static bool
680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
682 gcc_assert (INTEGRAL_TYPE_P (to_type)
683 && INTEGRAL_TYPE_P (from_type)
684 && !TYPE_OVERFLOW_TRAPS (to_type)
685 && !TYPE_OVERFLOW_TRAPS (from_type));
687 /* Converting to something no narrower than sizetype and then to sizetype
688 is equivalent to converting directly to sizetype. */
689 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
690 return true;
692 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
693 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
694 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
695 return true;
697 /* For narrowing conversions, we could in principle test whether
698 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
699 and apply a constant adjustment.
701 For other conversions (which involve a sign change) we could
702 check that the signs are always equal, and apply a constant
703 adjustment if the signs are negative.
705 However, both cases should be rare. */
706 return range_fits_type_p (&range, TYPE_PRECISION (to_type),
707 TYPE_SIGN (to_type));
710 static void
711 split_constant_offset (tree type, tree *var, tree *off,
712 value_range *result_range,
713 hash_map<tree, std::pair<tree, tree> > &cache,
714 unsigned *limit);
716 /* Helper function for split_constant_offset. If TYPE is a pointer type,
717 try to express OP0 CODE OP1 as:
719 POINTER_PLUS <*VAR, (sizetype) *OFF>
721 where:
723 - *VAR has type TYPE
724 - *OFF is a constant of type ssizetype.
726 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
728 *VAR + (sizetype) *OFF
730 where:
732 - *VAR has type sizetype
733 - *OFF is a constant of type ssizetype.
735 In both cases, OP0 CODE OP1 has type TYPE.
737 Return true on success. A false return value indicates that we can't
738 do better than set *OFF to zero.
740 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
741 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
743 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
744 visited. LIMIT counts down the number of SSA names that we are
745 allowed to process before giving up. */
747 static bool
748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
749 tree *var, tree *off, value_range *result_range,
750 hash_map<tree, std::pair<tree, tree> > &cache,
751 unsigned *limit)
753 tree var0, var1;
754 tree off0, off1;
755 value_range op0_range, op1_range;
757 *var = NULL_TREE;
758 *off = NULL_TREE;
760 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
761 return false;
763 switch (code)
765 case INTEGER_CST:
766 *var = size_int (0);
767 *off = fold_convert (ssizetype, op0);
768 if (result_range)
769 result_range->set (op0, op0);
770 return true;
772 case POINTER_PLUS_EXPR:
773 split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
774 split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
775 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
776 *off = size_binop (PLUS_EXPR, off0, off1);
777 return true;
779 case PLUS_EXPR:
780 case MINUS_EXPR:
781 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
782 split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
783 *off = size_binop (code, off0, off1);
784 if (!compute_distributive_range (type, op0_range, code, op1_range,
785 off, result_range))
786 return false;
787 *var = fold_build2 (code, sizetype, var0, var1);
788 return true;
790 case MULT_EXPR:
791 if (TREE_CODE (op1) != INTEGER_CST)
792 return false;
794 split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
795 op1_range.set (op1, op1);
796 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
797 if (!compute_distributive_range (type, op0_range, code, op1_range,
798 off, result_range))
799 return false;
800 *var = fold_build2 (MULT_EXPR, sizetype, var0,
801 fold_convert (sizetype, op1));
802 return true;
804 case ADDR_EXPR:
806 tree base, poffset;
807 poly_int64 pbitsize, pbitpos, pbytepos;
808 machine_mode pmode;
809 int punsignedp, preversep, pvolatilep;
811 op0 = TREE_OPERAND (op0, 0);
812 base
813 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
814 &punsignedp, &preversep, &pvolatilep);
816 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
817 return false;
818 base = build_fold_addr_expr (base);
819 off0 = ssize_int (pbytepos);
821 if (poffset)
823 split_constant_offset (poffset, &poffset, &off1, nullptr,
824 cache, limit);
825 off0 = size_binop (PLUS_EXPR, off0, off1);
826 base = fold_build_pointer_plus (base, poffset);
829 var0 = fold_convert (type, base);
831 /* If variable length types are involved, punt, otherwise casts
832 might be converted into ARRAY_REFs in gimplify_conversion.
833 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
834 possibly no longer appears in current GIMPLE, might resurface.
835 This perhaps could run
836 if (CONVERT_EXPR_P (var0))
838 gimplify_conversion (&var0);
839 // Attempt to fill in any within var0 found ARRAY_REF's
840 // element size from corresponding op embedded ARRAY_REF,
841 // if unsuccessful, just punt.
842 } */
843 while (POINTER_TYPE_P (type))
844 type = TREE_TYPE (type);
845 if (int_size_in_bytes (type) < 0)
846 return false;
848 *var = var0;
849 *off = off0;
850 return true;
853 case SSA_NAME:
855 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
856 return false;
858 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
859 enum tree_code subcode;
861 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
862 return false;
864 subcode = gimple_assign_rhs_code (def_stmt);
866 /* We are using a cache to avoid un-CSEing large amounts of code. */
867 bool use_cache = false;
868 if (!has_single_use (op0)
869 && (subcode == POINTER_PLUS_EXPR
870 || subcode == PLUS_EXPR
871 || subcode == MINUS_EXPR
872 || subcode == MULT_EXPR
873 || subcode == ADDR_EXPR
874 || CONVERT_EXPR_CODE_P (subcode)))
876 use_cache = true;
877 bool existed;
878 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
879 if (existed)
881 if (integer_zerop (e.second))
882 return false;
883 *var = e.first;
884 *off = e.second;
885 /* The caller sets the range in this case. */
886 return true;
888 e = std::make_pair (op0, ssize_int (0));
891 if (*limit == 0)
892 return false;
893 --*limit;
895 var0 = gimple_assign_rhs1 (def_stmt);
896 var1 = gimple_assign_rhs2 (def_stmt);
898 bool res = split_constant_offset_1 (type, var0, subcode, var1,
899 var, off, nullptr, cache, limit);
900 if (res && use_cache)
901 *cache.get (op0) = std::make_pair (*var, *off);
902 /* The caller sets the range in this case. */
903 return res;
905 CASE_CONVERT:
907 /* We can only handle the following conversions:
909 - Conversions from one pointer type to another pointer type.
911 - Conversions from one non-trapping integral type to another
912 non-trapping integral type. In this case, the recursive
913 call makes sure that:
915 (sizetype) OP0
917 can be expressed as a sizetype operation involving VAR and OFF,
918 and all we need to do is check whether:
920 (sizetype) OP0 == (sizetype) (TYPE) OP0
922 - Conversions from a non-trapping sizetype-size integral type to
923 a like-sized pointer type. In this case, the recursive call
924 makes sure that:
926 (sizetype) OP0 == *VAR + (sizetype) *OFF
928 and we can convert that to:
930 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
932 - Conversions from a sizetype-sized pointer type to a like-sized
933 non-trapping integral type. In this case, the recursive call
934 makes sure that:
936 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
938 where the POINTER_PLUS and *VAR have the same precision as
939 TYPE (and the same precision as sizetype). Then:
941 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
942 tree itype = TREE_TYPE (op0);
943 if ((POINTER_TYPE_P (itype)
944 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
945 && (POINTER_TYPE_P (type)
946 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
947 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
948 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
949 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
951 if (POINTER_TYPE_P (type))
953 split_constant_offset (op0, var, off, nullptr, cache, limit);
954 *var = fold_convert (type, *var);
956 else if (POINTER_TYPE_P (itype))
958 split_constant_offset (op0, var, off, nullptr, cache, limit);
959 *var = fold_convert (sizetype, *var);
961 else
963 split_constant_offset (op0, var, off, &op0_range,
964 cache, limit);
965 if (!nop_conversion_for_offset_p (type, itype, op0_range))
966 return false;
967 if (result_range)
969 *result_range = op0_range;
970 range_cast (*result_range, type);
973 return true;
975 return false;
978 default:
979 return false;
983 /* If EXP has pointer type, try to express it as:
985 POINTER_PLUS <*VAR, (sizetype) *OFF>
987 where:
989 - *VAR has the same type as EXP
990 - *OFF is a constant of type ssizetype.
992 If EXP has an integral type, try to express (sizetype) EXP as:
994 *VAR + (sizetype) *OFF
996 where:
998 - *VAR has type sizetype
999 - *OFF is a constant of type ssizetype.
1001 If EXP_RANGE is nonnull, set it to the range of EXP.
1003 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1004 visited. LIMIT counts down the number of SSA names that we are
1005 allowed to process before giving up. */
1007 static void
1008 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1009 hash_map<tree, std::pair<tree, tree> > &cache,
1010 unsigned *limit)
1012 tree type = TREE_TYPE (exp), op0, op1;
1013 enum tree_code code;
1015 code = TREE_CODE (exp);
1016 if (exp_range)
1018 *exp_range = type;
1019 if (code == SSA_NAME)
1021 value_range vr;
1022 get_range_query (cfun)->range_of_expr (vr, exp);
1023 if (vr.undefined_p ())
1024 vr.set_varying (TREE_TYPE (exp));
1025 wide_int var_min = wi::to_wide (vr.min ());
1026 wide_int var_max = wi::to_wide (vr.max ());
1027 value_range_kind vr_kind = vr.kind ();
1028 wide_int var_nonzero = get_nonzero_bits (exp);
1029 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1030 &var_min, &var_max,
1031 var_nonzero,
1032 TYPE_SIGN (type));
1033 /* This check for VR_VARYING is here because the old code
1034 using get_range_info would return VR_RANGE for the entire
1035 domain, instead of VR_VARYING. The new code normalizes
1036 full-domain ranges to VR_VARYING. */
1037 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1038 *exp_range = value_range (type, var_min, var_max);
1042 if (!tree_is_chrec (exp)
1043 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1045 extract_ops_from_tree (exp, &code, &op0, &op1);
1046 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1047 exp_range, cache, limit))
1048 return;
1051 *var = exp;
1052 if (INTEGRAL_TYPE_P (type))
1053 *var = fold_convert (sizetype, *var);
1054 *off = ssize_int (0);
1056 value_range r;
1057 if (exp_range && code != SSA_NAME
1058 && get_range_query (cfun)->range_of_expr (r, exp)
1059 && !r.undefined_p ())
1060 *exp_range = r;
1063 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1064 type as EXP while OFF has type ssizetype. */
1066 void
1067 split_constant_offset (tree exp, tree *var, tree *off)
1069 unsigned limit = param_ssa_name_def_chain_limit;
1070 static hash_map<tree, std::pair<tree, tree> > *cache;
1071 if (!cache)
1072 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1073 split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1074 *var = fold_convert (TREE_TYPE (exp), *var);
1075 cache->empty ();
1078 /* Returns the address ADDR of an object in a canonical shape (without nop
1079 casts, and with type of pointer to the object). */
1081 static tree
1082 canonicalize_base_object_address (tree addr)
1084 tree orig = addr;
1086 STRIP_NOPS (addr);
1088 /* The base address may be obtained by casting from integer, in that case
1089 keep the cast. */
1090 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1091 return orig;
1093 if (TREE_CODE (addr) != ADDR_EXPR)
1094 return addr;
1096 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1099 /* Analyze the behavior of memory reference REF within STMT.
1100 There are two modes:
1102 - BB analysis. In this case we simply split the address into base,
1103 init and offset components, without reference to any containing loop.
1104 The resulting base and offset are general expressions and they can
1105 vary arbitrarily from one iteration of the containing loop to the next.
1106 The step is always zero.
1108 - loop analysis. In this case we analyze the reference both wrt LOOP
1109 and on the basis that the reference occurs (is "used") in LOOP;
1110 see the comment above analyze_scalar_evolution_in_loop for more
1111 information about this distinction. The base, init, offset and
1112 step fields are all invariant in LOOP.
1114 Perform BB analysis if LOOP is null, or if LOOP is the function's
1115 dummy outermost loop. In other cases perform loop analysis.
1117 Return true if the analysis succeeded and store the results in DRB if so.
1118 BB analysis can only fail for bitfield or reversed-storage accesses. */
1120 opt_result
1121 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1122 class loop *loop, const gimple *stmt)
1124 poly_int64 pbitsize, pbitpos;
1125 tree base, poffset;
1126 machine_mode pmode;
1127 int punsignedp, preversep, pvolatilep;
1128 affine_iv base_iv, offset_iv;
1129 tree init, dinit, step;
1130 bool in_loop = (loop && loop->num);
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, "analyze_innermost: ");
1135 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1136 &punsignedp, &preversep, &pvolatilep);
1137 gcc_assert (base != NULL_TREE);
1139 poly_int64 pbytepos;
1140 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1141 return opt_result::failure_at (stmt,
1142 "failed: bit offset alignment.\n");
1144 if (preversep)
1145 return opt_result::failure_at (stmt,
1146 "failed: reverse storage order.\n");
1148 /* Calculate the alignment and misalignment for the inner reference. */
1149 unsigned int HOST_WIDE_INT bit_base_misalignment;
1150 unsigned int bit_base_alignment;
1151 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1153 /* There are no bitfield references remaining in BASE, so the values
1154 we got back must be whole bytes. */
1155 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1156 && bit_base_misalignment % BITS_PER_UNIT == 0);
1157 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1158 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1160 if (TREE_CODE (base) == MEM_REF)
1162 if (!integer_zerop (TREE_OPERAND (base, 1)))
1164 /* Subtract MOFF from the base and add it to POFFSET instead.
1165 Adjust the misalignment to reflect the amount we subtracted. */
1166 poly_offset_int moff = mem_ref_offset (base);
1167 base_misalignment -= moff.force_shwi ();
1168 tree mofft = wide_int_to_tree (sizetype, moff);
1169 if (!poffset)
1170 poffset = mofft;
1171 else
1172 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1174 base = TREE_OPERAND (base, 0);
1176 else
1177 base = build_fold_addr_expr (base);
1179 if (in_loop)
1181 if (!simple_iv (loop, loop, base, &base_iv, true))
1182 return opt_result::failure_at
1183 (stmt, "failed: evolution of base is not affine.\n");
1185 else
1187 base_iv.base = base;
1188 base_iv.step = ssize_int (0);
1189 base_iv.no_overflow = true;
1192 if (!poffset)
1194 offset_iv.base = ssize_int (0);
1195 offset_iv.step = ssize_int (0);
1197 else
1199 if (!in_loop)
1201 offset_iv.base = poffset;
1202 offset_iv.step = ssize_int (0);
1204 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1205 return opt_result::failure_at
1206 (stmt, "failed: evolution of offset is not affine.\n");
1209 init = ssize_int (pbytepos);
1211 /* Subtract any constant component from the base and add it to INIT instead.
1212 Adjust the misalignment to reflect the amount we subtracted. */
1213 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1214 init = size_binop (PLUS_EXPR, init, dinit);
1215 base_misalignment -= TREE_INT_CST_LOW (dinit);
1217 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1218 init = size_binop (PLUS_EXPR, init, dinit);
1220 step = size_binop (PLUS_EXPR,
1221 fold_convert (ssizetype, base_iv.step),
1222 fold_convert (ssizetype, offset_iv.step));
1224 base = canonicalize_base_object_address (base_iv.base);
1226 /* See if get_pointer_alignment can guarantee a higher alignment than
1227 the one we calculated above. */
1228 unsigned int HOST_WIDE_INT alt_misalignment;
1229 unsigned int alt_alignment;
1230 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1232 /* As above, these values must be whole bytes. */
1233 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1234 && alt_misalignment % BITS_PER_UNIT == 0);
1235 alt_alignment /= BITS_PER_UNIT;
1236 alt_misalignment /= BITS_PER_UNIT;
1238 if (base_alignment < alt_alignment)
1240 base_alignment = alt_alignment;
1241 base_misalignment = alt_misalignment;
1244 drb->base_address = base;
1245 drb->offset = fold_convert (ssizetype, offset_iv.base);
1246 drb->init = init;
1247 drb->step = step;
1248 if (known_misalignment (base_misalignment, base_alignment,
1249 &drb->base_misalignment))
1250 drb->base_alignment = base_alignment;
1251 else
1253 drb->base_alignment = known_alignment (base_misalignment);
1254 drb->base_misalignment = 0;
1256 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1257 drb->step_alignment = highest_pow2_factor (step);
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1260 fprintf (dump_file, "success.\n");
1262 return opt_result::success ();
1265 /* Return true if OP is a valid component reference for a DR access
1266 function. This accepts a subset of what handled_component_p accepts. */
1268 static bool
1269 access_fn_component_p (tree op)
1271 switch (TREE_CODE (op))
1273 case REALPART_EXPR:
1274 case IMAGPART_EXPR:
1275 case ARRAY_REF:
1276 return true;
1278 case COMPONENT_REF:
1279 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1281 default:
1282 return false;
1286 /* Returns whether BASE can have a access_fn_component_p with BASE
1287 as base. */
1289 static bool
1290 base_supports_access_fn_components_p (tree base)
1292 switch (TREE_CODE (TREE_TYPE (base)))
1294 case COMPLEX_TYPE:
1295 case ARRAY_TYPE:
1296 case RECORD_TYPE:
1297 return true;
1298 default:
1299 return false;
1303 /* Determines the base object and the list of indices of memory reference
1304 DR, analyzed in LOOP and instantiated before NEST. */
1306 static void
1307 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1309 /* If analyzing a basic-block there are no indices to analyze
1310 and thus no access functions. */
1311 if (!nest)
1313 dri->base_object = ref;
1314 dri->access_fns.create (0);
1315 return;
1318 vec<tree> access_fns = vNULL;
1320 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321 into a two element array with a constant index. The base is
1322 then just the immediate underlying object. */
1323 if (TREE_CODE (ref) == REALPART_EXPR)
1325 ref = TREE_OPERAND (ref, 0);
1326 access_fns.safe_push (integer_zero_node);
1328 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1330 ref = TREE_OPERAND (ref, 0);
1331 access_fns.safe_push (integer_one_node);
1334 /* Analyze access functions of dimensions we know to be independent.
1335 The list of component references handled here should be kept in
1336 sync with access_fn_component_p. */
1337 while (handled_component_p (ref))
1339 if (TREE_CODE (ref) == ARRAY_REF)
1341 tree op = TREE_OPERAND (ref, 1);
1342 tree access_fn = analyze_scalar_evolution (loop, op);
1343 access_fn = instantiate_scev (nest, loop, access_fn);
1344 access_fns.safe_push (access_fn);
1346 else if (TREE_CODE (ref) == COMPONENT_REF
1347 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1349 /* For COMPONENT_REFs of records (but not unions!) use the
1350 FIELD_DECL offset as constant access function so we can
1351 disambiguate a[i].f1 and a[i].f2. */
1352 tree off = component_ref_field_offset (ref);
1353 off = size_binop (PLUS_EXPR,
1354 size_binop (MULT_EXPR,
1355 fold_convert (bitsizetype, off),
1356 bitsize_int (BITS_PER_UNIT)),
1357 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358 access_fns.safe_push (off);
1360 else
1361 /* If we have an unhandled component we could not translate
1362 to an access function stop analyzing. We have determined
1363 our base object in this case. */
1364 break;
1366 ref = TREE_OPERAND (ref, 0);
1369 /* If the address operand of a MEM_REF base has an evolution in the
1370 analyzed nest, add it as an additional independent access-function. */
1371 if (TREE_CODE (ref) == MEM_REF)
1373 tree op = TREE_OPERAND (ref, 0);
1374 tree access_fn = analyze_scalar_evolution (loop, op);
1375 access_fn = instantiate_scev (nest, loop, access_fn);
1376 STRIP_NOPS (access_fn);
1377 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1379 tree memoff = TREE_OPERAND (ref, 1);
1380 tree base = initial_condition (access_fn);
1381 tree orig_type = TREE_TYPE (base);
1382 STRIP_USELESS_TYPE_CONVERSION (base);
1383 tree off;
1384 split_constant_offset (base, &base, &off);
1385 STRIP_USELESS_TYPE_CONVERSION (base);
1386 /* Fold the MEM_REF offset into the evolutions initial
1387 value to make more bases comparable. */
1388 if (!integer_zerop (memoff))
1390 off = size_binop (PLUS_EXPR, off,
1391 fold_convert (ssizetype, memoff));
1392 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1394 /* Adjust the offset so it is a multiple of the access type
1395 size and thus we separate bases that can possibly be used
1396 to produce partial overlaps (which the access_fn machinery
1397 cannot handle). */
1398 wide_int rem;
1399 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1400 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1401 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1402 rem = wi::mod_trunc
1403 (wi::to_wide (off),
1404 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1405 SIGNED);
1406 else
1407 /* If we can't compute the remainder simply force the initial
1408 condition to zero. */
1409 rem = wi::to_wide (off);
1410 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1411 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1412 /* And finally replace the initial condition. */
1413 access_fn = chrec_replace_initial_condition
1414 (access_fn, fold_convert (orig_type, off));
1415 /* ??? This is still not a suitable base object for
1416 dr_may_alias_p - the base object needs to be an
1417 access that covers the object as whole. With
1418 an evolution in the pointer this cannot be
1419 guaranteed.
1420 As a band-aid, mark the access so we can special-case
1421 it in dr_may_alias_p. */
1422 tree old = ref;
1423 ref = fold_build2_loc (EXPR_LOCATION (ref),
1424 MEM_REF, TREE_TYPE (ref),
1425 base, memoff);
1426 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1427 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1428 dri->unconstrained_base = true;
1429 access_fns.safe_push (access_fn);
1432 else if (DECL_P (ref))
1434 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1435 ref = build2 (MEM_REF, TREE_TYPE (ref),
1436 build_fold_addr_expr (ref),
1437 build_int_cst (reference_alias_ptr_type (ref), 0));
1440 dri->base_object = ref;
1441 dri->access_fns = access_fns;
1444 /* Extracts the alias analysis information from the memory reference DR. */
1446 static void
1447 dr_analyze_alias (struct data_reference *dr)
1449 tree ref = DR_REF (dr);
1450 tree base = get_base_address (ref), addr;
1452 if (INDIRECT_REF_P (base)
1453 || TREE_CODE (base) == MEM_REF)
1455 addr = TREE_OPERAND (base, 0);
1456 if (TREE_CODE (addr) == SSA_NAME)
1457 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1461 /* Frees data reference DR. */
1463 void
1464 free_data_ref (data_reference_p dr)
1466 DR_ACCESS_FNS (dr).release ();
1467 if (dr->alt_indices.base_object)
1468 dr->alt_indices.access_fns.release ();
1469 free (dr);
1472 /* Analyze memory reference MEMREF, which is accessed in STMT.
1473 The reference is a read if IS_READ is true, otherwise it is a write.
1474 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1475 within STMT, i.e. that it might not occur even if STMT is executed
1476 and runs to completion.
1478 Return the data_reference description of MEMREF. NEST is the outermost
1479 loop in which the reference should be instantiated, LOOP is the loop
1480 in which the data reference should be analyzed. */
1482 struct data_reference *
1483 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1484 bool is_read, bool is_conditional_in_stmt)
1486 struct data_reference *dr;
1488 if (dump_file && (dump_flags & TDF_DETAILS))
1490 fprintf (dump_file, "Creating dr for ");
1491 print_generic_expr (dump_file, memref, TDF_SLIM);
1492 fprintf (dump_file, "\n");
1495 dr = XCNEW (struct data_reference);
1496 DR_STMT (dr) = stmt;
1497 DR_REF (dr) = memref;
1498 DR_IS_READ (dr) = is_read;
1499 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1501 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1502 nest != NULL ? loop : NULL, stmt);
1503 dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1504 dr_analyze_alias (dr);
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1508 unsigned i;
1509 fprintf (dump_file, "\tbase_address: ");
1510 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1511 fprintf (dump_file, "\n\toffset from base address: ");
1512 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1513 fprintf (dump_file, "\n\tconstant offset from base address: ");
1514 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1515 fprintf (dump_file, "\n\tstep: ");
1516 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1517 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1518 fprintf (dump_file, "\n\tbase misalignment: %d",
1519 DR_BASE_MISALIGNMENT (dr));
1520 fprintf (dump_file, "\n\toffset alignment: %d",
1521 DR_OFFSET_ALIGNMENT (dr));
1522 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1523 fprintf (dump_file, "\n\tbase_object: ");
1524 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1525 fprintf (dump_file, "\n");
1526 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1528 fprintf (dump_file, "\tAccess function %d: ", i);
1529 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1533 return dr;
1536 /* A helper function computes order between two tree expressions T1 and T2.
1537 This is used in comparator functions sorting objects based on the order
1538 of tree expressions. The function returns -1, 0, or 1. */
1541 data_ref_compare_tree (tree t1, tree t2)
1543 int i, cmp;
1544 enum tree_code code;
1545 char tclass;
1547 if (t1 == t2)
1548 return 0;
1549 if (t1 == NULL)
1550 return -1;
1551 if (t2 == NULL)
1552 return 1;
1554 STRIP_USELESS_TYPE_CONVERSION (t1);
1555 STRIP_USELESS_TYPE_CONVERSION (t2);
1556 if (t1 == t2)
1557 return 0;
1559 if (TREE_CODE (t1) != TREE_CODE (t2)
1560 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1561 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1563 code = TREE_CODE (t1);
1564 switch (code)
1566 case INTEGER_CST:
1567 return tree_int_cst_compare (t1, t2);
1569 case STRING_CST:
1570 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1571 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1572 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1573 TREE_STRING_LENGTH (t1));
1575 case SSA_NAME:
1576 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1577 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1578 break;
1580 default:
1581 if (POLY_INT_CST_P (t1))
1582 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1583 wi::to_poly_widest (t2));
1585 tclass = TREE_CODE_CLASS (code);
1587 /* For decls, compare their UIDs. */
1588 if (tclass == tcc_declaration)
1590 if (DECL_UID (t1) != DECL_UID (t2))
1591 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1592 break;
1594 /* For expressions, compare their operands recursively. */
1595 else if (IS_EXPR_CODE_CLASS (tclass))
1597 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1599 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1600 TREE_OPERAND (t2, i));
1601 if (cmp != 0)
1602 return cmp;
1605 else
1606 gcc_unreachable ();
1609 return 0;
1612 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1613 check. */
1615 opt_result
1616 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1618 if (dump_enabled_p ())
1619 dump_printf (MSG_NOTE,
1620 "consider run-time aliasing test between %T and %T\n",
1621 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1623 if (!speed_p)
1624 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1625 "runtime alias check not supported when"
1626 " optimizing for size.\n");
1628 /* FORNOW: We don't support versioning with outer-loop in either
1629 vectorization or loop distribution. */
1630 if (loop != NULL && loop->inner != NULL)
1631 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632 "runtime alias check not supported for"
1633 " outer loop.\n");
1635 return opt_result::success ();
1638 /* Operator == between two dr_with_seg_len objects.
1640 This equality operator is used to make sure two data refs
1641 are the same one so that we will consider to combine the
1642 aliasing checks of those two pairs of data dependent data
1643 refs. */
1645 static bool
1646 operator == (const dr_with_seg_len& d1,
1647 const dr_with_seg_len& d2)
1649 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1650 DR_BASE_ADDRESS (d2.dr), 0)
1651 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1652 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1653 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1654 && known_eq (d1.access_size, d2.access_size)
1655 && d1.align == d2.align);
1658 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1659 so that we can combine aliasing checks in one scan. */
1661 static int
1662 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1664 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1665 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1666 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1667 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1669 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1670 if a and c have the same basic address snd step, and b and d have the same
1671 address and step. Therefore, if any a&c or b&d don't have the same address
1672 and step, we don't care the order of those two pairs after sorting. */
1673 int comp_res;
1675 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1676 DR_BASE_ADDRESS (b1.dr))) != 0)
1677 return comp_res;
1678 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1679 DR_BASE_ADDRESS (b2.dr))) != 0)
1680 return comp_res;
1681 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1682 DR_STEP (b1.dr))) != 0)
1683 return comp_res;
1684 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1685 DR_STEP (b2.dr))) != 0)
1686 return comp_res;
1687 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1688 DR_OFFSET (b1.dr))) != 0)
1689 return comp_res;
1690 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1691 DR_INIT (b1.dr))) != 0)
1692 return comp_res;
1693 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1694 DR_OFFSET (b2.dr))) != 0)
1695 return comp_res;
1696 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1697 DR_INIT (b2.dr))) != 0)
1698 return comp_res;
1700 return 0;
1703 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1705 static void
1706 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1708 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1709 DR_REF (alias_pair->first.dr),
1710 DR_REF (alias_pair->second.dr));
1712 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1713 alias_pair->first.seg_len);
1714 if (!operand_equal_p (alias_pair->first.seg_len,
1715 alias_pair->second.seg_len, 0))
1716 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1718 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1719 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1720 if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1722 dump_printf (MSG_NOTE, " vs. ");
1723 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1726 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1727 alias_pair->first.align);
1728 if (alias_pair->first.align != alias_pair->second.align)
1729 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1731 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1732 if (alias_pair->flags & DR_ALIAS_RAW)
1733 dump_printf (MSG_NOTE, " RAW");
1734 if (alias_pair->flags & DR_ALIAS_WAR)
1735 dump_printf (MSG_NOTE, " WAR");
1736 if (alias_pair->flags & DR_ALIAS_WAW)
1737 dump_printf (MSG_NOTE, " WAW");
1738 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1739 dump_printf (MSG_NOTE, " ARBITRARY");
1740 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1741 dump_printf (MSG_NOTE, " SWAPPED");
1742 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1743 dump_printf (MSG_NOTE, " UNSWAPPED");
1744 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1745 dump_printf (MSG_NOTE, " MIXED_STEPS");
1746 if (alias_pair->flags == 0)
1747 dump_printf (MSG_NOTE, " <none>");
1748 dump_printf (MSG_NOTE, "\n");
1751 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1752 FACTOR is number of iterations that each data reference is accessed.
1754 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1755 we create an expression:
1757 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1758 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1760 for aliasing checks. However, in some cases we can decrease the number
1761 of checks by combining two checks into one. For example, suppose we have
1762 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1763 condition is satisfied:
1765 load_ptr_0 < load_ptr_1 &&
1766 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1768 (this condition means, in each iteration of vectorized loop, the accessed
1769 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1770 load_ptr_1.)
1772 we then can use only the following expression to finish the alising checks
1773 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1775 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1776 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1778 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1779 basic address. */
1781 void
1782 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1783 poly_uint64)
1785 if (alias_pairs->is_empty ())
1786 return;
1788 /* Canonicalize each pair so that the base components are ordered wrt
1789 data_ref_compare_tree. This allows the loop below to merge more
1790 cases. */
1791 unsigned int i;
1792 dr_with_seg_len_pair_t *alias_pair;
1793 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1795 data_reference_p dr_a = alias_pair->first.dr;
1796 data_reference_p dr_b = alias_pair->second.dr;
1797 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1798 DR_BASE_ADDRESS (dr_b));
1799 if (comp_res == 0)
1800 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1801 if (comp_res == 0)
1802 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1803 if (comp_res > 0)
1805 std::swap (alias_pair->first, alias_pair->second);
1806 alias_pair->flags |= DR_ALIAS_SWAPPED;
1808 else
1809 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1812 /* Sort the collected data ref pairs so that we can scan them once to
1813 combine all possible aliasing checks. */
1814 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1816 /* Scan the sorted dr pairs and check if we can combine alias checks
1817 of two neighboring dr pairs. */
1818 unsigned int last = 0;
1819 for (i = 1; i < alias_pairs->length (); ++i)
1821 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1822 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1823 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1825 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1826 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1827 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1828 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1830 /* Remove duplicate data ref pairs. */
1831 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1833 if (dump_enabled_p ())
1834 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1835 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1836 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1837 alias_pair1->flags |= alias_pair2->flags;
1838 continue;
1841 /* Assume that we won't be able to merge the pairs, then correct
1842 if we do. */
1843 last += 1;
1844 if (last != i)
1845 (*alias_pairs)[last] = (*alias_pairs)[i];
1847 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1849 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1850 and DR_A1 and DR_A2 are two consecutive memrefs. */
1851 if (*dr_a1 == *dr_a2)
1853 std::swap (dr_a1, dr_b1);
1854 std::swap (dr_a2, dr_b2);
1857 poly_int64 init_a1, init_a2;
1858 /* Only consider cases in which the distance between the initial
1859 DR_A1 and the initial DR_A2 is known at compile time. */
1860 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1861 DR_BASE_ADDRESS (dr_a2->dr), 0)
1862 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1863 DR_OFFSET (dr_a2->dr), 0)
1864 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1865 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1866 continue;
1868 /* Don't combine if we can't tell which one comes first. */
1869 if (!ordered_p (init_a1, init_a2))
1870 continue;
1872 /* Work out what the segment length would be if we did combine
1873 DR_A1 and DR_A2:
1875 - If DR_A1 and DR_A2 have equal lengths, that length is
1876 also the combined length.
1878 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1879 length is the lower bound on those lengths.
1881 - If DR_A1 and DR_A2 both have positive lengths, the combined
1882 length is the upper bound on those lengths.
1884 Other cases are unlikely to give a useful combination.
1886 The lengths both have sizetype, so the sign is taken from
1887 the step instead. */
1888 poly_uint64 new_seg_len = 0;
1889 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1890 dr_a2->seg_len, 0);
1891 if (new_seg_len_p)
1893 poly_uint64 seg_len_a1, seg_len_a2;
1894 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1895 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1896 continue;
1898 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1899 if (TREE_CODE (indicator_a) != INTEGER_CST)
1900 continue;
1902 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1903 if (TREE_CODE (indicator_b) != INTEGER_CST)
1904 continue;
1906 int sign_a = tree_int_cst_sgn (indicator_a);
1907 int sign_b = tree_int_cst_sgn (indicator_b);
1909 if (sign_a <= 0 && sign_b <= 0)
1910 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1911 else if (sign_a >= 0 && sign_b >= 0)
1912 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1913 else
1914 continue;
1916 /* At this point we're committed to merging the refs. */
1918 /* Make sure dr_a1 starts left of dr_a2. */
1919 if (maybe_gt (init_a1, init_a2))
1921 std::swap (*dr_a1, *dr_a2);
1922 std::swap (init_a1, init_a2);
1925 /* The DR_Bs are equal, so only the DR_As can introduce
1926 mixed steps. */
1927 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1928 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1930 if (new_seg_len_p)
1932 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1933 new_seg_len);
1934 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1937 /* This is always positive due to the swap above. */
1938 poly_uint64 diff = init_a2 - init_a1;
1940 /* The new check will start at DR_A1. Make sure that its access
1941 size encompasses the initial DR_A2. */
1942 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1944 dr_a1->access_size = upper_bound (dr_a1->access_size,
1945 diff + dr_a2->access_size);
1946 unsigned int new_align = known_alignment (dr_a1->access_size);
1947 dr_a1->align = MIN (dr_a1->align, new_align);
1949 if (dump_enabled_p ())
1950 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1951 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1952 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1953 alias_pair1->flags |= alias_pair2->flags;
1954 last -= 1;
1957 alias_pairs->truncate (last + 1);
1959 /* Try to restore the original dr_with_seg_len order within each
1960 dr_with_seg_len_pair_t. If we ended up combining swapped and
1961 unswapped pairs into the same check, we have to invalidate any
1962 RAW, WAR and WAW information for it. */
1963 if (dump_enabled_p ())
1964 dump_printf (MSG_NOTE, "merged alias checks:\n");
1965 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1967 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1968 unsigned int swapped = (alias_pair->flags & swap_mask);
1969 if (swapped == DR_ALIAS_SWAPPED)
1970 std::swap (alias_pair->first, alias_pair->second);
1971 else if (swapped != DR_ALIAS_UNSWAPPED)
1972 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1973 alias_pair->flags &= ~swap_mask;
1974 if (dump_enabled_p ())
1975 dump_alias_pair (alias_pair, " ");
1979 /* A subroutine of create_intersect_range_checks, with a subset of the
1980 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1981 to optimize cases in which the references form a simple RAW, WAR or
1982 WAR dependence. */
1984 static bool
1985 create_ifn_alias_checks (tree *cond_expr,
1986 const dr_with_seg_len_pair_t &alias_pair)
1988 const dr_with_seg_len& dr_a = alias_pair.first;
1989 const dr_with_seg_len& dr_b = alias_pair.second;
1991 /* Check for cases in which:
1993 (a) we have a known RAW, WAR or WAR dependence
1994 (b) the accesses are well-ordered in both the original and new code
1995 (see the comment above the DR_ALIAS_* flags for details); and
1996 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
1997 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1998 return false;
2000 /* Make sure that both DRs access the same pattern of bytes,
2001 with a constant length and step. */
2002 poly_uint64 seg_len;
2003 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2004 || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2005 || maybe_ne (dr_a.access_size, dr_b.access_size)
2006 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2007 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2008 return false;
2010 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2011 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2012 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2014 /* See whether the target suports what we want to do. WAW checks are
2015 equivalent to WAR checks here. */
2016 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2017 ? IFN_CHECK_RAW_PTRS
2018 : IFN_CHECK_WAR_PTRS);
2019 unsigned int align = MIN (dr_a.align, dr_b.align);
2020 poly_uint64 full_length = seg_len + bytes;
2021 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022 full_length, align))
2024 full_length = seg_len + dr_a.access_size;
2025 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2026 full_length, align))
2027 return false;
2030 /* Commit to using this form of test. */
2031 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2032 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2034 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2035 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2037 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2038 ifn, boolean_type_node,
2039 4, addr_a, addr_b,
2040 size_int (full_length),
2041 size_int (align));
2043 if (dump_enabled_p ())
2045 if (ifn == IFN_CHECK_RAW_PTRS)
2046 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2047 else
2048 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2050 return true;
2053 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2054 free of aliases, using a condition based on index values instead
2055 of a condition based on addresses. Return true on success,
2056 storing the condition in *COND_EXPR.
2058 This can only be done if the two data references in ALIAS_PAIR access
2059 the same array object and the index is the only difference. For example,
2060 if the two data references are DR_A and DR_B:
2062 DR_A DR_B
2063 data-ref arr[i] arr[j]
2064 base_object arr arr
2065 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2067 The addresses and their index are like:
2069 |<- ADDR_A ->| |<- ADDR_B ->|
2070 ------------------------------------------------------->
2071 | | | | | | | | | |
2072 ------------------------------------------------------->
2073 i_0 ... i_0+4 j_0 ... j_0+4
2075 We can create expression based on index rather than address:
2077 (unsigned) (i_0 - j_0 + 3) <= 6
2079 i.e. the indices are less than 4 apart.
2081 Note evolution step of index needs to be considered in comparison. */
2083 static bool
2084 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2085 const dr_with_seg_len_pair_t &alias_pair)
2087 const dr_with_seg_len &dr_a = alias_pair.first;
2088 const dr_with_seg_len &dr_b = alias_pair.second;
2089 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2090 || integer_zerop (DR_STEP (dr_a.dr))
2091 || integer_zerop (DR_STEP (dr_b.dr))
2092 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2093 return false;
2095 poly_uint64 seg_len1, seg_len2;
2096 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2097 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2098 return false;
2100 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2101 return false;
2103 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2104 return false;
2106 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2107 return false;
2109 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2111 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2112 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2113 if (neg_step)
2115 abs_step = -abs_step;
2116 seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2117 seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2120 /* Infer the number of iterations with which the memory segment is accessed
2121 by DR. In other words, alias is checked if memory segment accessed by
2122 DR_A in some iterations intersect with memory segment accessed by DR_B
2123 in the same amount iterations.
2124 Note segnment length is a linear function of number of iterations with
2125 DR_STEP as the coefficient. */
2126 poly_uint64 niter_len1, niter_len2;
2127 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2128 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2129 return false;
2131 /* Divide each access size by the byte step, rounding up. */
2132 poly_uint64 niter_access1, niter_access2;
2133 if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2134 abs_step, &niter_access1)
2135 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2136 abs_step, &niter_access2))
2137 return false;
2139 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2141 int found = -1;
2142 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2144 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2145 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2146 /* Two indices must be the same if they are not scev, or not scev wrto
2147 current loop being vecorized. */
2148 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2149 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2150 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2151 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2153 if (operand_equal_p (access1, access2, 0))
2154 continue;
2156 return false;
2158 if (found >= 0)
2159 return false;
2160 found = i;
2163 /* Ought not to happen in practice, since if all accesses are equal then the
2164 alias should be decidable at compile time. */
2165 if (found < 0)
2166 return false;
2168 /* The two indices must have the same step. */
2169 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2170 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2171 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2172 return false;
2174 tree idx_step = CHREC_RIGHT (access1);
2175 /* Index must have const step, otherwise DR_STEP won't be constant. */
2176 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2177 /* Index must evaluate in the same direction as DR. */
2178 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2180 tree min1 = CHREC_LEFT (access1);
2181 tree min2 = CHREC_LEFT (access2);
2182 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2183 return false;
2185 /* Ideally, alias can be checked against loop's control IV, but we
2186 need to prove linear mapping between control IV and reference
2187 index. Although that should be true, we check against (array)
2188 index of data reference. Like segment length, index length is
2189 linear function of the number of iterations with index_step as
2190 the coefficient, i.e, niter_len * idx_step. */
2191 offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2192 SIGNED);
2193 if (neg_step)
2194 abs_idx_step = -abs_idx_step;
2195 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2196 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2197 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2198 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2200 gcc_assert (known_ge (idx_len1, 0)
2201 && known_ge (idx_len2, 0)
2202 && known_ge (idx_access1, 0)
2203 && known_ge (idx_access2, 0));
2205 /* Each access has the following pattern, with lengths measured
2206 in units of INDEX:
2208 <-- idx_len -->
2209 <--- A: -ve step --->
2210 +-----+-------+-----+-------+-----+
2211 | n-1 | ..... | 0 | ..... | n-1 |
2212 +-----+-------+-----+-------+-----+
2213 <--- B: +ve step --->
2214 <-- idx_len -->
2218 where "n" is the number of scalar iterations covered by the segment
2219 and where each access spans idx_access units.
2221 A is the range of bytes accessed when the step is negative,
2222 B is the range when the step is positive.
2224 When checking for general overlap, we need to test whether
2225 the range:
2227 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2229 overlaps:
2231 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2233 where:
2235 low_offsetN = +ve step ? 0 : -idx_lenN;
2236 high_offsetN = +ve step ? idx_lenN : 0;
2238 This is equivalent to testing whether:
2240 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2241 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2243 Converting this into a single test, there is an overlap if:
2245 0 <= min2 - min1 + bias <= limit
2247 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2248 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2249 + (high_offset2 - low_offset2 + idx_access2 - 1)
2250 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2252 Combining the tests requires limit to be computable in an unsigned
2253 form of the index type; if it isn't, we fall back to the usual
2254 pointer-based checks.
2256 We can do better if DR_B is a write and if DR_A and DR_B are
2257 well-ordered in both the original and the new code (see the
2258 comment above the DR_ALIAS_* flags for details). In this case
2259 we know that for each i in [0, n-1], the write performed by
2260 access i of DR_B occurs after access numbers j<=i of DR_A in
2261 both the original and the new code. Any write or anti
2262 dependencies wrt those DR_A accesses are therefore maintained.
2264 We just need to make sure that each individual write in DR_B does not
2265 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2266 after the DR_B access in the original code but happen before it in
2267 the new code.
2269 We know the steps for both accesses are equal, so by induction, we
2270 just need to test whether the first write of DR_B overlaps a later
2271 access of DR_A. In other words, we need to move min1 along by
2272 one iteration:
2274 min1' = min1 + idx_step
2276 and use the ranges:
2278 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2280 and:
2282 [min2, min2 + idx_access2 - 1]
2284 where:
2286 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2287 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2288 if (waw_or_war_p)
2289 idx_len1 -= abs_idx_step;
2291 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2292 if (!waw_or_war_p)
2293 limit += idx_len2;
2295 tree utype = unsigned_type_for (TREE_TYPE (min1));
2296 if (!wi::fits_to_tree_p (limit, utype))
2297 return false;
2299 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2300 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2301 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2302 /* Equivalent to adding IDX_STEP to MIN1. */
2303 if (waw_or_war_p)
2304 bias -= wi::to_offset (idx_step);
2306 tree subject = fold_build2 (MINUS_EXPR, utype,
2307 fold_convert (utype, min2),
2308 fold_convert (utype, min1));
2309 subject = fold_build2 (PLUS_EXPR, utype, subject,
2310 wide_int_to_tree (utype, bias));
2311 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2312 wide_int_to_tree (utype, limit));
2313 if (*cond_expr)
2314 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2315 *cond_expr, part_cond_expr);
2316 else
2317 *cond_expr = part_cond_expr;
2318 if (dump_enabled_p ())
2320 if (waw_or_war_p)
2321 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2322 else
2323 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2325 return true;
2328 /* A subroutine of create_intersect_range_checks, with a subset of the
2329 same arguments. Try to optimize cases in which the second access
2330 is a write and in which some overlap is valid. */
2332 static bool
2333 create_waw_or_war_checks (tree *cond_expr,
2334 const dr_with_seg_len_pair_t &alias_pair)
2336 const dr_with_seg_len& dr_a = alias_pair.first;
2337 const dr_with_seg_len& dr_b = alias_pair.second;
2339 /* Check for cases in which:
2341 (a) DR_B is always a write;
2342 (b) the accesses are well-ordered in both the original and new code
2343 (see the comment above the DR_ALIAS_* flags for details); and
2344 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2345 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2346 return false;
2348 /* Check for equal (but possibly variable) steps. */
2349 tree step = DR_STEP (dr_a.dr);
2350 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2351 return false;
2353 /* Make sure that we can operate on sizetype without loss of precision. */
2354 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2355 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2356 return false;
2358 /* All addresses involved are known to have a common alignment ALIGN.
2359 We can therefore subtract ALIGN from an exclusive endpoint to get
2360 an inclusive endpoint. In the best (and common) case, ALIGN is the
2361 same as the access sizes of both DRs, and so subtracting ALIGN
2362 cancels out the addition of an access size. */
2363 unsigned int align = MIN (dr_a.align, dr_b.align);
2364 poly_uint64 last_chunk_a = dr_a.access_size - align;
2365 poly_uint64 last_chunk_b = dr_b.access_size - align;
2367 /* Get a boolean expression that is true when the step is negative. */
2368 tree indicator = dr_direction_indicator (dr_a.dr);
2369 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2370 fold_convert (ssizetype, indicator),
2371 ssize_int (0));
2373 /* Get lengths in sizetype. */
2374 tree seg_len_a
2375 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2376 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2378 /* Each access has the following pattern:
2380 <- |seg_len| ->
2381 <--- A: -ve step --->
2382 +-----+-------+-----+-------+-----+
2383 | n-1 | ..... | 0 | ..... | n-1 |
2384 +-----+-------+-----+-------+-----+
2385 <--- B: +ve step --->
2386 <- |seg_len| ->
2388 base address
2390 where "n" is the number of scalar iterations covered by the segment.
2392 A is the range of bytes accessed when the step is negative,
2393 B is the range when the step is positive.
2395 We know that DR_B is a write. We also know (from checking that
2396 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2397 the write performed by access i of DR_B occurs after access numbers
2398 j<=i of DR_A in both the original and the new code. Any write or
2399 anti dependencies wrt those DR_A accesses are therefore maintained.
2401 We just need to make sure that each individual write in DR_B does not
2402 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2403 after the DR_B access in the original code but happen before it in
2404 the new code.
2406 We know the steps for both accesses are equal, so by induction, we
2407 just need to test whether the first write of DR_B overlaps a later
2408 access of DR_A. In other words, we need to move addr_a along by
2409 one iteration:
2411 addr_a' = addr_a + step
2413 and check whether:
2415 [addr_b, addr_b + last_chunk_b]
2417 overlaps:
2419 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2421 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2423 low_offset_a = +ve step ? 0 : seg_len_a - step
2424 high_offset_a = +ve step ? seg_len_a - step : 0
2426 This is equivalent to testing whether:
2428 addr_a' + low_offset_a <= addr_b + last_chunk_b
2429 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2431 Converting this into a single test, there is an overlap if:
2433 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2435 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2437 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2438 less than the size of the object underlying DR_A. We also know
2439 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2440 guaranteed at compile time. There can therefore be no overflow if
2441 "limit" is calculated in an unsigned type with pointer precision. */
2442 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2443 DR_OFFSET (dr_a.dr));
2444 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2446 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2447 DR_OFFSET (dr_b.dr));
2448 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2450 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2451 addr_a = fold_build_pointer_plus (addr_a, step);
2452 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2453 seg_len_a, step);
2454 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2455 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2457 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2458 seg_len_a_minus_step, size_zero_node);
2459 if (!CONSTANT_CLASS_P (low_offset_a))
2460 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2462 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2463 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2464 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2465 low_offset_a);
2467 /* The amount added to addr_b - addr_a'. */
2468 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2469 size_int (last_chunk_b), low_offset_a);
2471 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2472 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2473 size_int (last_chunk_a + last_chunk_b));
2475 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2476 subject = fold_build2 (PLUS_EXPR, sizetype,
2477 fold_convert (sizetype, subject), bias);
2479 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2480 if (dump_enabled_p ())
2481 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2482 return true;
2485 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2486 every address ADDR accessed by D:
2488 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2490 In this case, every element accessed by D is aligned to at least
2491 ALIGN bytes.
2493 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2495 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2497 static void
2498 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2499 tree *seg_max_out, HOST_WIDE_INT align)
2501 /* Each access has the following pattern:
2503 <- |seg_len| ->
2504 <--- A: -ve step --->
2505 +-----+-------+-----+-------+-----+
2506 | n-1 | ,.... | 0 | ..... | n-1 |
2507 +-----+-------+-----+-------+-----+
2508 <--- B: +ve step --->
2509 <- |seg_len| ->
2511 base address
2513 where "n" is the number of scalar iterations covered by the segment.
2514 (This should be VF for a particular pair if we know that both steps
2515 are the same, otherwise it will be the full number of scalar loop
2516 iterations.)
2518 A is the range of bytes accessed when the step is negative,
2519 B is the range when the step is positive.
2521 If the access size is "access_size" bytes, the lowest addressed byte is:
2523 base + (step < 0 ? seg_len : 0) [LB]
2525 and the highest addressed byte is always below:
2527 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2529 Thus:
2531 LB <= ADDR < UB
2533 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2534 bytes, so:
2536 LB <= ADDR <= UB - ALIGN
2538 where "- ALIGN" folds naturally with the "+ access_size" and often
2539 cancels it out.
2541 We don't try to simplify LB and UB beyond this (e.g. by using
2542 MIN and MAX based on whether seg_len rather than the stride is
2543 negative) because it is possible for the absolute size of the
2544 segment to overflow the range of a ssize_t.
2546 Keeping the pointer_plus outside of the cond_expr should allow
2547 the cond_exprs to be shared with other alias checks. */
2548 tree indicator = dr_direction_indicator (d.dr);
2549 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2550 fold_convert (ssizetype, indicator),
2551 ssize_int (0));
2552 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2553 DR_OFFSET (d.dr));
2554 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2555 tree seg_len
2556 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2558 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2559 seg_len, size_zero_node);
2560 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561 size_zero_node, seg_len);
2562 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2563 size_int (d.access_size - align));
2565 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2566 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2569 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2570 storing the condition in *COND_EXPR. The fallback is to generate a
2571 a test that the two accesses do not overlap:
2573 end_a <= start_b || end_b <= start_a. */
2575 static void
2576 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2577 const dr_with_seg_len_pair_t &alias_pair)
2579 const dr_with_seg_len& dr_a = alias_pair.first;
2580 const dr_with_seg_len& dr_b = alias_pair.second;
2581 *cond_expr = NULL_TREE;
2582 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2583 return;
2585 if (create_ifn_alias_checks (cond_expr, alias_pair))
2586 return;
2588 if (create_waw_or_war_checks (cond_expr, alias_pair))
2589 return;
2591 unsigned HOST_WIDE_INT min_align;
2592 tree_code cmp_code;
2593 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2594 are equivalent. This is just an optimization heuristic. */
2595 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2596 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2598 /* In this case adding access_size to seg_len is likely to give
2599 a simple X * step, where X is either the number of scalar
2600 iterations or the vectorization factor. We're better off
2601 keeping that, rather than subtracting an alignment from it.
2603 In this case the maximum values are exclusive and so there is
2604 no alias if the maximum of one segment equals the minimum
2605 of another. */
2606 min_align = 0;
2607 cmp_code = LE_EXPR;
2609 else
2611 /* Calculate the minimum alignment shared by all four pointers,
2612 then arrange for this alignment to be subtracted from the
2613 exclusive maximum values to get inclusive maximum values.
2614 This "- min_align" is cumulative with a "+ access_size"
2615 in the calculation of the maximum values. In the best
2616 (and common) case, the two cancel each other out, leaving
2617 us with an inclusive bound based only on seg_len. In the
2618 worst case we're simply adding a smaller number than before.
2620 Because the maximum values are inclusive, there is an alias
2621 if the maximum value of one segment is equal to the minimum
2622 value of the other. */
2623 min_align = MIN (dr_a.align, dr_b.align);
2624 cmp_code = LT_EXPR;
2627 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2628 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2629 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2631 *cond_expr
2632 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2633 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2634 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2635 if (dump_enabled_p ())
2636 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2639 /* Create a conditional expression that represents the run-time checks for
2640 overlapping of address ranges represented by a list of data references
2641 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2642 COND_EXPR is the conditional expression to be used in the if statement
2643 that controls which version of the loop gets executed at runtime. */
2645 void
2646 create_runtime_alias_checks (class loop *loop,
2647 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2648 tree * cond_expr)
2650 tree part_cond_expr;
2652 fold_defer_overflow_warnings ();
2653 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2655 gcc_assert (alias_pair.flags);
2656 if (dump_enabled_p ())
2657 dump_printf (MSG_NOTE,
2658 "create runtime check for data references %T and %T\n",
2659 DR_REF (alias_pair.first.dr),
2660 DR_REF (alias_pair.second.dr));
2662 /* Create condition expression for each pair data references. */
2663 create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2664 if (*cond_expr)
2665 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2666 *cond_expr, part_cond_expr);
2667 else
2668 *cond_expr = part_cond_expr;
2670 fold_undefer_and_ignore_overflow_warnings ();
2673 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2674 expressions. */
2675 static bool
2676 dr_equal_offsets_p1 (tree offset1, tree offset2)
2678 bool res;
2680 STRIP_NOPS (offset1);
2681 STRIP_NOPS (offset2);
2683 if (offset1 == offset2)
2684 return true;
2686 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2687 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2688 return false;
2690 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2691 TREE_OPERAND (offset2, 0));
2693 if (!res || !BINARY_CLASS_P (offset1))
2694 return res;
2696 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2697 TREE_OPERAND (offset2, 1));
2699 return res;
2702 /* Check if DRA and DRB have equal offsets. */
2703 bool
2704 dr_equal_offsets_p (struct data_reference *dra,
2705 struct data_reference *drb)
2707 tree offset1, offset2;
2709 offset1 = DR_OFFSET (dra);
2710 offset2 = DR_OFFSET (drb);
2712 return dr_equal_offsets_p1 (offset1, offset2);
2715 /* Returns true if FNA == FNB. */
2717 static bool
2718 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2720 unsigned i, n = fna.length ();
2722 if (n != fnb.length ())
2723 return false;
2725 for (i = 0; i < n; i++)
2726 if (!operand_equal_p (fna[i], fnb[i], 0))
2727 return false;
2729 return true;
2732 /* If all the functions in CF are the same, returns one of them,
2733 otherwise returns NULL. */
2735 static affine_fn
2736 common_affine_function (conflict_function *cf)
2738 unsigned i;
2739 affine_fn comm;
2741 if (!CF_NONTRIVIAL_P (cf))
2742 return affine_fn ();
2744 comm = cf->fns[0];
2746 for (i = 1; i < cf->n; i++)
2747 if (!affine_function_equal_p (comm, cf->fns[i]))
2748 return affine_fn ();
2750 return comm;
2753 /* Returns the base of the affine function FN. */
2755 static tree
2756 affine_function_base (affine_fn fn)
2758 return fn[0];
2761 /* Returns true if FN is a constant. */
2763 static bool
2764 affine_function_constant_p (affine_fn fn)
2766 unsigned i;
2767 tree coef;
2769 for (i = 1; fn.iterate (i, &coef); i++)
2770 if (!integer_zerop (coef))
2771 return false;
2773 return true;
2776 /* Returns true if FN is the zero constant function. */
2778 static bool
2779 affine_function_zero_p (affine_fn fn)
2781 return (integer_zerop (affine_function_base (fn))
2782 && affine_function_constant_p (fn));
2785 /* Returns a signed integer type with the largest precision from TA
2786 and TB. */
2788 static tree
2789 signed_type_for_types (tree ta, tree tb)
2791 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2792 return signed_type_for (ta);
2793 else
2794 return signed_type_for (tb);
2797 /* Applies operation OP on affine functions FNA and FNB, and returns the
2798 result. */
2800 static affine_fn
2801 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2803 unsigned i, n, m;
2804 affine_fn ret;
2805 tree coef;
2807 if (fnb.length () > fna.length ())
2809 n = fna.length ();
2810 m = fnb.length ();
2812 else
2814 n = fnb.length ();
2815 m = fna.length ();
2818 ret.create (m);
2819 for (i = 0; i < n; i++)
2821 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2822 TREE_TYPE (fnb[i]));
2823 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2826 for (; fna.iterate (i, &coef); i++)
2827 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828 coef, integer_zero_node));
2829 for (; fnb.iterate (i, &coef); i++)
2830 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2831 integer_zero_node, coef));
2833 return ret;
2836 /* Returns the sum of affine functions FNA and FNB. */
2838 static affine_fn
2839 affine_fn_plus (affine_fn fna, affine_fn fnb)
2841 return affine_fn_op (PLUS_EXPR, fna, fnb);
2844 /* Returns the difference of affine functions FNA and FNB. */
2846 static affine_fn
2847 affine_fn_minus (affine_fn fna, affine_fn fnb)
2849 return affine_fn_op (MINUS_EXPR, fna, fnb);
2852 /* Frees affine function FN. */
2854 static void
2855 affine_fn_free (affine_fn fn)
2857 fn.release ();
2860 /* Determine for each subscript in the data dependence relation DDR
2861 the distance. */
2863 static void
2864 compute_subscript_distance (struct data_dependence_relation *ddr)
2866 conflict_function *cf_a, *cf_b;
2867 affine_fn fn_a, fn_b, diff;
2869 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2871 unsigned int i;
2873 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2875 struct subscript *subscript;
2877 subscript = DDR_SUBSCRIPT (ddr, i);
2878 cf_a = SUB_CONFLICTS_IN_A (subscript);
2879 cf_b = SUB_CONFLICTS_IN_B (subscript);
2881 fn_a = common_affine_function (cf_a);
2882 fn_b = common_affine_function (cf_b);
2883 if (!fn_a.exists () || !fn_b.exists ())
2885 SUB_DISTANCE (subscript) = chrec_dont_know;
2886 return;
2888 diff = affine_fn_minus (fn_a, fn_b);
2890 if (affine_function_constant_p (diff))
2891 SUB_DISTANCE (subscript) = affine_function_base (diff);
2892 else
2893 SUB_DISTANCE (subscript) = chrec_dont_know;
2895 affine_fn_free (diff);
2900 /* Returns the conflict function for "unknown". */
2902 static conflict_function *
2903 conflict_fn_not_known (void)
2905 conflict_function *fn = XCNEW (conflict_function);
2906 fn->n = NOT_KNOWN;
2908 return fn;
2911 /* Returns the conflict function for "independent". */
2913 static conflict_function *
2914 conflict_fn_no_dependence (void)
2916 conflict_function *fn = XCNEW (conflict_function);
2917 fn->n = NO_DEPENDENCE;
2919 return fn;
2922 /* Returns true if the address of OBJ is invariant in LOOP. */
2924 static bool
2925 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2927 while (handled_component_p (obj))
2929 if (TREE_CODE (obj) == ARRAY_REF)
2931 for (int i = 1; i < 4; ++i)
2932 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2933 loop->num))
2934 return false;
2936 else if (TREE_CODE (obj) == COMPONENT_REF)
2938 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2939 loop->num))
2940 return false;
2942 obj = TREE_OPERAND (obj, 0);
2945 if (!INDIRECT_REF_P (obj)
2946 && TREE_CODE (obj) != MEM_REF)
2947 return true;
2949 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2950 loop->num);
2953 /* Returns false if we can prove that data references A and B do not alias,
2954 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2955 considered. */
2957 bool
2958 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2959 class loop *loop_nest)
2961 tree addr_a = DR_BASE_OBJECT (a);
2962 tree addr_b = DR_BASE_OBJECT (b);
2964 /* If we are not processing a loop nest but scalar code we
2965 do not need to care about possible cross-iteration dependences
2966 and thus can process the full original reference. Do so,
2967 similar to how loop invariant motion applies extra offset-based
2968 disambiguation. */
2969 if (!loop_nest)
2971 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2972 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2974 if (DR_BASE_ADDRESS (a)
2975 && DR_BASE_ADDRESS (b)
2976 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2977 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2978 && poly_int_tree_p (tree_size_a)
2979 && poly_int_tree_p (tree_size_b)
2980 && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
2981 wi::to_widest (tree_size_a),
2982 wi::to_widest (DR_INIT (b)),
2983 wi::to_widest (tree_size_b)))
2985 gcc_assert (integer_zerop (DR_STEP (a))
2986 && integer_zerop (DR_STEP (b)));
2987 return false;
2990 aff_tree off1, off2;
2991 poly_widest_int size1, size2;
2992 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2993 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2994 aff_combination_scale (&off1, -1);
2995 aff_combination_add (&off2, &off1);
2996 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2997 return false;
3000 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3001 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3002 /* For cross-iteration dependences the cliques must be valid for the
3003 whole loop, not just individual iterations. */
3004 && (!loop_nest
3005 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3006 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3007 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3008 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3009 return false;
3011 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3012 do not know the size of the base-object. So we cannot do any
3013 offset/overlap based analysis but have to rely on points-to
3014 information only. */
3015 if (TREE_CODE (addr_a) == MEM_REF
3016 && (DR_UNCONSTRAINED_BASE (a)
3017 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3019 /* For true dependences we can apply TBAA. */
3020 if (flag_strict_aliasing
3021 && DR_IS_WRITE (a) && DR_IS_READ (b)
3022 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3023 get_alias_set (DR_REF (b))))
3024 return false;
3025 if (TREE_CODE (addr_b) == MEM_REF)
3026 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3027 TREE_OPERAND (addr_b, 0));
3028 else
3029 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3030 build_fold_addr_expr (addr_b));
3032 else if (TREE_CODE (addr_b) == MEM_REF
3033 && (DR_UNCONSTRAINED_BASE (b)
3034 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3036 /* For true dependences we can apply TBAA. */
3037 if (flag_strict_aliasing
3038 && DR_IS_WRITE (a) && DR_IS_READ (b)
3039 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3040 get_alias_set (DR_REF (b))))
3041 return false;
3042 if (TREE_CODE (addr_a) == MEM_REF)
3043 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3044 TREE_OPERAND (addr_b, 0));
3045 else
3046 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3047 TREE_OPERAND (addr_b, 0));
3050 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3051 that is being subsetted in the loop nest. */
3052 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3053 return refs_output_dependent_p (addr_a, addr_b);
3054 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3055 return refs_anti_dependent_p (addr_a, addr_b);
3056 return refs_may_alias_p (addr_a, addr_b);
3059 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3060 if it is meaningful to compare their associated access functions
3061 when checking for dependencies. */
3063 static bool
3064 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3066 /* Allow pairs of component refs from the following sets:
3068 { REALPART_EXPR, IMAGPART_EXPR }
3069 { COMPONENT_REF }
3070 { ARRAY_REF }. */
3071 tree_code code_a = TREE_CODE (ref_a);
3072 tree_code code_b = TREE_CODE (ref_b);
3073 if (code_a == IMAGPART_EXPR)
3074 code_a = REALPART_EXPR;
3075 if (code_b == IMAGPART_EXPR)
3076 code_b = REALPART_EXPR;
3077 if (code_a != code_b)
3078 return false;
3080 if (TREE_CODE (ref_a) == COMPONENT_REF)
3081 /* ??? We cannot simply use the type of operand #0 of the refs here as
3082 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3083 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3084 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3085 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3087 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3088 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3091 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3092 is true when the main indices of A and B were not comparable so we try again
3093 with alternate indices computed on an indirect reference. */
3095 struct data_dependence_relation *
3096 initialize_data_dependence_relation (struct data_dependence_relation *res,
3097 vec<loop_p> loop_nest,
3098 bool use_alt_indices)
3100 struct data_reference *a = DDR_A (res);
3101 struct data_reference *b = DDR_B (res);
3102 unsigned int i;
3104 struct indices *indices_a = &a->indices;
3105 struct indices *indices_b = &b->indices;
3106 if (use_alt_indices)
3108 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3109 indices_a = &a->alt_indices;
3110 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3111 indices_b = &b->alt_indices;
3113 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3114 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3115 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3117 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3118 return res;
3121 /* For unconstrained bases, the root (highest-indexed) subscript
3122 describes a variation in the base of the original DR_REF rather
3123 than a component access. We have no type that accurately describes
3124 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3125 applying this subscript) so limit the search to the last real
3126 component access.
3128 E.g. for:
3130 void
3131 f (int a[][8], int b[][8])
3133 for (int i = 0; i < 8; ++i)
3134 a[i * 2][0] = b[i][0];
3137 the a and b accesses have a single ARRAY_REF component reference [0]
3138 but have two subscripts. */
3139 if (indices_a->unconstrained_base)
3140 num_dimensions_a -= 1;
3141 if (indices_b->unconstrained_base)
3142 num_dimensions_b -= 1;
3144 /* These structures describe sequences of component references in
3145 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3146 specific access function. */
3147 struct {
3148 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3149 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3150 indices. In C notation, these are the indices of the rightmost
3151 component references; e.g. for a sequence .b.c.d, the start
3152 index is for .d. */
3153 unsigned int start_a;
3154 unsigned int start_b;
3156 /* The sequence contains LENGTH consecutive access functions from
3157 each DR. */
3158 unsigned int length;
3160 /* The enclosing objects for the A and B sequences respectively,
3161 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3162 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3163 tree object_a;
3164 tree object_b;
3165 } full_seq = {}, struct_seq = {};
3167 /* Before each iteration of the loop:
3169 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3170 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3171 unsigned int index_a = 0;
3172 unsigned int index_b = 0;
3173 tree ref_a = DR_REF (a);
3174 tree ref_b = DR_REF (b);
3176 /* Now walk the component references from the final DR_REFs back up to
3177 the enclosing base objects. Each component reference corresponds
3178 to one access function in the DR, with access function 0 being for
3179 the final DR_REF and the highest-indexed access function being the
3180 one that is applied to the base of the DR.
3182 Look for a sequence of component references whose access functions
3183 are comparable (see access_fn_components_comparable_p). If more
3184 than one such sequence exists, pick the one nearest the base
3185 (which is the leftmost sequence in C notation). Store this sequence
3186 in FULL_SEQ.
3188 For example, if we have:
3190 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3192 A: a[0][i].s.c.d
3193 B: __real b[0][i].s.e[i].f
3195 (where d is the same type as the real component of f) then the access
3196 functions would be:
3198 0 1 2 3
3199 A: .d .c .s [i]
3201 0 1 2 3 4 5
3202 B: __real .f [i] .e .s [i]
3204 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3205 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3206 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3207 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3208 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3209 index foo[10] arrays, so is again comparable. The sequence is
3210 therefore:
3212 A: [1, 3] (i.e. [i].s.c)
3213 B: [3, 5] (i.e. [i].s.e)
3215 Also look for sequences of component references whose access
3216 functions are comparable and whose enclosing objects have the same
3217 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3218 example, STRUCT_SEQ would be:
3220 A: [1, 2] (i.e. s.c)
3221 B: [3, 4] (i.e. s.e) */
3222 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3224 /* The alternate indices form always has a single dimension
3225 with unconstrained base. */
3226 gcc_assert (!use_alt_indices);
3228 /* REF_A and REF_B must be one of the component access types
3229 allowed by dr_analyze_indices. */
3230 gcc_checking_assert (access_fn_component_p (ref_a));
3231 gcc_checking_assert (access_fn_component_p (ref_b));
3233 /* Get the immediately-enclosing objects for REF_A and REF_B,
3234 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3235 and DR_ACCESS_FN (B, INDEX_B). */
3236 tree object_a = TREE_OPERAND (ref_a, 0);
3237 tree object_b = TREE_OPERAND (ref_b, 0);
3239 tree type_a = TREE_TYPE (object_a);
3240 tree type_b = TREE_TYPE (object_b);
3241 if (access_fn_components_comparable_p (ref_a, ref_b))
3243 /* This pair of component accesses is comparable for dependence
3244 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3245 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3246 if (full_seq.start_a + full_seq.length != index_a
3247 || full_seq.start_b + full_seq.length != index_b)
3249 /* The accesses don't extend the current sequence,
3250 so start a new one here. */
3251 full_seq.start_a = index_a;
3252 full_seq.start_b = index_b;
3253 full_seq.length = 0;
3256 /* Add this pair of references to the sequence. */
3257 full_seq.length += 1;
3258 full_seq.object_a = object_a;
3259 full_seq.object_b = object_b;
3261 /* If the enclosing objects are structures (and thus have the
3262 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3263 if (TREE_CODE (type_a) == RECORD_TYPE)
3264 struct_seq = full_seq;
3266 /* Move to the next containing reference for both A and B. */
3267 ref_a = object_a;
3268 ref_b = object_b;
3269 index_a += 1;
3270 index_b += 1;
3271 continue;
3274 /* Try to approach equal type sizes. */
3275 if (!COMPLETE_TYPE_P (type_a)
3276 || !COMPLETE_TYPE_P (type_b)
3277 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3278 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3279 break;
3281 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3282 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3283 if (size_a <= size_b)
3285 index_a += 1;
3286 ref_a = object_a;
3288 if (size_b <= size_a)
3290 index_b += 1;
3291 ref_b = object_b;
3295 /* See whether FULL_SEQ ends at the base and whether the two bases
3296 are equal. We do not care about TBAA or alignment info so we can
3297 use OEP_ADDRESS_OF to avoid false negatives. */
3298 tree base_a = indices_a->base_object;
3299 tree base_b = indices_b->base_object;
3300 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3301 && full_seq.start_b + full_seq.length == num_dimensions_b
3302 && (indices_a->unconstrained_base
3303 == indices_b->unconstrained_base)
3304 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3305 && (types_compatible_p (TREE_TYPE (base_a),
3306 TREE_TYPE (base_b))
3307 || (!base_supports_access_fn_components_p (base_a)
3308 && !base_supports_access_fn_components_p (base_b)
3309 && operand_equal_p
3310 (TYPE_SIZE (TREE_TYPE (base_a)),
3311 TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3312 && (!loop_nest.exists ()
3313 || (object_address_invariant_in_loop_p
3314 (loop_nest[0], base_a))));
3316 /* If the bases are the same, we can include the base variation too.
3317 E.g. the b accesses in:
3319 for (int i = 0; i < n; ++i)
3320 b[i + 4][0] = b[i][0];
3322 have a definite dependence distance of 4, while for:
3324 for (int i = 0; i < n; ++i)
3325 a[i + 4][0] = b[i][0];
3327 the dependence distance depends on the gap between a and b.
3329 If the bases are different then we can only rely on the sequence
3330 rooted at a structure access, since arrays are allowed to overlap
3331 arbitrarily and change shape arbitrarily. E.g. we treat this as
3332 valid code:
3334 int a[256];
3336 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3338 where two lvalues with the same int[4][3] type overlap, and where
3339 both lvalues are distinct from the object's declared type. */
3340 if (same_base_p)
3342 if (indices_a->unconstrained_base)
3343 full_seq.length += 1;
3345 else
3346 full_seq = struct_seq;
3348 /* Punt if we didn't find a suitable sequence. */
3349 if (full_seq.length == 0)
3351 if (use_alt_indices
3352 || (TREE_CODE (DR_REF (a)) == MEM_REF
3353 && TREE_CODE (DR_REF (b)) == MEM_REF)
3354 || may_be_nonaddressable_p (DR_REF (a))
3355 || may_be_nonaddressable_p (DR_REF (b)))
3357 /* Fully exhausted possibilities. */
3358 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3359 return res;
3362 /* Try evaluating both DRs as dereferences of pointers. */
3363 if (!a->alt_indices.base_object
3364 && TREE_CODE (DR_REF (a)) != MEM_REF)
3366 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3367 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3368 build_int_cst
3369 (reference_alias_ptr_type (DR_REF (a)), 0));
3370 dr_analyze_indices (&a->alt_indices, alt_ref,
3371 loop_preheader_edge (loop_nest[0]),
3372 loop_containing_stmt (DR_STMT (a)));
3374 if (!b->alt_indices.base_object
3375 && TREE_CODE (DR_REF (b)) != MEM_REF)
3377 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3378 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3379 build_int_cst
3380 (reference_alias_ptr_type (DR_REF (b)), 0));
3381 dr_analyze_indices (&b->alt_indices, alt_ref,
3382 loop_preheader_edge (loop_nest[0]),
3383 loop_containing_stmt (DR_STMT (b)));
3385 return initialize_data_dependence_relation (res, loop_nest, true);
3388 if (!same_base_p)
3390 /* Partial overlap is possible for different bases when strict aliasing
3391 is not in effect. It's also possible if either base involves a union
3392 access; e.g. for:
3394 struct s1 { int a[2]; };
3395 struct s2 { struct s1 b; int c; };
3396 struct s3 { int d; struct s1 e; };
3397 union u { struct s2 f; struct s3 g; } *p, *q;
3399 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3400 "p->g.e" (base "p->g") and might partially overlap the s1 at
3401 "q->g.e" (base "q->g"). */
3402 if (!flag_strict_aliasing
3403 || ref_contains_union_access_p (full_seq.object_a)
3404 || ref_contains_union_access_p (full_seq.object_b))
3406 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3407 return res;
3410 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3411 if (!loop_nest.exists ()
3412 || (object_address_invariant_in_loop_p (loop_nest[0],
3413 full_seq.object_a)
3414 && object_address_invariant_in_loop_p (loop_nest[0],
3415 full_seq.object_b)))
3417 DDR_OBJECT_A (res) = full_seq.object_a;
3418 DDR_OBJECT_B (res) = full_seq.object_b;
3422 DDR_AFFINE_P (res) = true;
3423 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3424 DDR_SUBSCRIPTS (res).create (full_seq.length);
3425 DDR_LOOP_NEST (res) = loop_nest;
3426 DDR_SELF_REFERENCE (res) = false;
3428 for (i = 0; i < full_seq.length; ++i)
3430 struct subscript *subscript;
3432 subscript = XNEW (struct subscript);
3433 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3434 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3435 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3436 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3437 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3438 SUB_DISTANCE (subscript) = chrec_dont_know;
3439 DDR_SUBSCRIPTS (res).safe_push (subscript);
3442 return res;
3445 /* Initialize a data dependence relation between data accesses A and
3446 B. NB_LOOPS is the number of loops surrounding the references: the
3447 size of the classic distance/direction vectors. */
3449 struct data_dependence_relation *
3450 initialize_data_dependence_relation (struct data_reference *a,
3451 struct data_reference *b,
3452 vec<loop_p> loop_nest)
3454 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3455 DDR_A (res) = a;
3456 DDR_B (res) = b;
3457 DDR_LOOP_NEST (res).create (0);
3458 DDR_SUBSCRIPTS (res).create (0);
3459 DDR_DIR_VECTS (res).create (0);
3460 DDR_DIST_VECTS (res).create (0);
3462 if (a == NULL || b == NULL)
3464 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3465 return res;
3468 /* If the data references do not alias, then they are independent. */
3469 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3471 DDR_ARE_DEPENDENT (res) = chrec_known;
3472 return res;
3475 return initialize_data_dependence_relation (res, loop_nest, false);
3479 /* Frees memory used by the conflict function F. */
3481 static void
3482 free_conflict_function (conflict_function *f)
3484 unsigned i;
3486 if (CF_NONTRIVIAL_P (f))
3488 for (i = 0; i < f->n; i++)
3489 affine_fn_free (f->fns[i]);
3491 free (f);
3494 /* Frees memory used by SUBSCRIPTS. */
3496 static void
3497 free_subscripts (vec<subscript_p> subscripts)
3499 for (subscript_p s : subscripts)
3501 free_conflict_function (s->conflicting_iterations_in_a);
3502 free_conflict_function (s->conflicting_iterations_in_b);
3503 free (s);
3505 subscripts.release ();
3508 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3509 description. */
3511 static inline void
3512 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3513 tree chrec)
3515 DDR_ARE_DEPENDENT (ddr) = chrec;
3516 free_subscripts (DDR_SUBSCRIPTS (ddr));
3517 DDR_SUBSCRIPTS (ddr).create (0);
3520 /* The dependence relation DDR cannot be represented by a distance
3521 vector. */
3523 static inline void
3524 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3526 if (dump_file && (dump_flags & TDF_DETAILS))
3527 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3529 DDR_AFFINE_P (ddr) = false;
3534 /* This section contains the classic Banerjee tests. */
3536 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3537 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3539 static inline bool
3540 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3542 return (evolution_function_is_constant_p (chrec_a)
3543 && evolution_function_is_constant_p (chrec_b));
3546 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3547 variable, i.e., if the SIV (Single Index Variable) test is true. */
3549 static bool
3550 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3552 if ((evolution_function_is_constant_p (chrec_a)
3553 && evolution_function_is_univariate_p (chrec_b))
3554 || (evolution_function_is_constant_p (chrec_b)
3555 && evolution_function_is_univariate_p (chrec_a)))
3556 return true;
3558 if (evolution_function_is_univariate_p (chrec_a)
3559 && evolution_function_is_univariate_p (chrec_b))
3561 switch (TREE_CODE (chrec_a))
3563 case POLYNOMIAL_CHREC:
3564 switch (TREE_CODE (chrec_b))
3566 case POLYNOMIAL_CHREC:
3567 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3568 return false;
3569 /* FALLTHRU */
3571 default:
3572 return true;
3575 default:
3576 return true;
3580 return false;
3583 /* Creates a conflict function with N dimensions. The affine functions
3584 in each dimension follow. */
3586 static conflict_function *
3587 conflict_fn (unsigned n, ...)
3589 unsigned i;
3590 conflict_function *ret = XCNEW (conflict_function);
3591 va_list ap;
3593 gcc_assert (n > 0 && n <= MAX_DIM);
3594 va_start (ap, n);
3596 ret->n = n;
3597 for (i = 0; i < n; i++)
3598 ret->fns[i] = va_arg (ap, affine_fn);
3599 va_end (ap);
3601 return ret;
3604 /* Returns constant affine function with value CST. */
3606 static affine_fn
3607 affine_fn_cst (tree cst)
3609 affine_fn fn;
3610 fn.create (1);
3611 fn.quick_push (cst);
3612 return fn;
3615 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3617 static affine_fn
3618 affine_fn_univar (tree cst, unsigned dim, tree coef)
3620 affine_fn fn;
3621 fn.create (dim + 1);
3622 unsigned i;
3624 gcc_assert (dim > 0);
3625 fn.quick_push (cst);
3626 for (i = 1; i < dim; i++)
3627 fn.quick_push (integer_zero_node);
3628 fn.quick_push (coef);
3629 return fn;
3632 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3633 *OVERLAPS_B are initialized to the functions that describe the
3634 relation between the elements accessed twice by CHREC_A and
3635 CHREC_B. For k >= 0, the following property is verified:
3637 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3639 static void
3640 analyze_ziv_subscript (tree chrec_a,
3641 tree chrec_b,
3642 conflict_function **overlaps_a,
3643 conflict_function **overlaps_b,
3644 tree *last_conflicts)
3646 tree type, difference;
3647 dependence_stats.num_ziv++;
3649 if (dump_file && (dump_flags & TDF_DETAILS))
3650 fprintf (dump_file, "(analyze_ziv_subscript \n");
3652 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3653 chrec_a = chrec_convert (type, chrec_a, NULL);
3654 chrec_b = chrec_convert (type, chrec_b, NULL);
3655 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3657 switch (TREE_CODE (difference))
3659 case INTEGER_CST:
3660 if (integer_zerop (difference))
3662 /* The difference is equal to zero: the accessed index
3663 overlaps for each iteration in the loop. */
3664 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3665 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3666 *last_conflicts = chrec_dont_know;
3667 dependence_stats.num_ziv_dependent++;
3669 else
3671 /* The accesses do not overlap. */
3672 *overlaps_a = conflict_fn_no_dependence ();
3673 *overlaps_b = conflict_fn_no_dependence ();
3674 *last_conflicts = integer_zero_node;
3675 dependence_stats.num_ziv_independent++;
3677 break;
3679 default:
3680 /* We're not sure whether the indexes overlap. For the moment,
3681 conservatively answer "don't know". */
3682 if (dump_file && (dump_flags & TDF_DETAILS))
3683 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3685 *overlaps_a = conflict_fn_not_known ();
3686 *overlaps_b = conflict_fn_not_known ();
3687 *last_conflicts = chrec_dont_know;
3688 dependence_stats.num_ziv_unimplemented++;
3689 break;
3692 if (dump_file && (dump_flags & TDF_DETAILS))
3693 fprintf (dump_file, ")\n");
3696 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3697 and only if it fits to the int type. If this is not the case, or the
3698 bound on the number of iterations of LOOP could not be derived, returns
3699 chrec_dont_know. */
3701 static tree
3702 max_stmt_executions_tree (class loop *loop)
3704 widest_int nit;
3706 if (!max_stmt_executions (loop, &nit))
3707 return chrec_dont_know;
3709 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3710 return chrec_dont_know;
3712 return wide_int_to_tree (unsigned_type_node, nit);
3715 /* Determine whether the CHREC is always positive/negative. If the expression
3716 cannot be statically analyzed, return false, otherwise set the answer into
3717 VALUE. */
3719 static bool
3720 chrec_is_positive (tree chrec, bool *value)
3722 bool value0, value1, value2;
3723 tree end_value, nb_iter;
3725 switch (TREE_CODE (chrec))
3727 case POLYNOMIAL_CHREC:
3728 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3729 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3730 return false;
3732 /* FIXME -- overflows. */
3733 if (value0 == value1)
3735 *value = value0;
3736 return true;
3739 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3740 and the proof consists in showing that the sign never
3741 changes during the execution of the loop, from 0 to
3742 loop->nb_iterations. */
3743 if (!evolution_function_is_affine_p (chrec))
3744 return false;
3746 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3747 if (chrec_contains_undetermined (nb_iter))
3748 return false;
3750 #if 0
3751 /* TODO -- If the test is after the exit, we may decrease the number of
3752 iterations by one. */
3753 if (after_exit)
3754 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3755 #endif
3757 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3759 if (!chrec_is_positive (end_value, &value2))
3760 return false;
3762 *value = value0;
3763 return value0 == value1;
3765 case INTEGER_CST:
3766 switch (tree_int_cst_sgn (chrec))
3768 case -1:
3769 *value = false;
3770 break;
3771 case 1:
3772 *value = true;
3773 break;
3774 default:
3775 return false;
3777 return true;
3779 default:
3780 return false;
3785 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3786 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3787 *OVERLAPS_B are initialized to the functions that describe the
3788 relation between the elements accessed twice by CHREC_A and
3789 CHREC_B. For k >= 0, the following property is verified:
3791 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3793 static void
3794 analyze_siv_subscript_cst_affine (tree chrec_a,
3795 tree chrec_b,
3796 conflict_function **overlaps_a,
3797 conflict_function **overlaps_b,
3798 tree *last_conflicts)
3800 bool value0, value1, value2;
3801 tree type, difference, tmp;
3803 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3804 chrec_a = chrec_convert (type, chrec_a, NULL);
3805 chrec_b = chrec_convert (type, chrec_b, NULL);
3806 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3808 /* Special case overlap in the first iteration. */
3809 if (integer_zerop (difference))
3811 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3812 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3813 *last_conflicts = integer_one_node;
3814 return;
3817 if (!chrec_is_positive (initial_condition (difference), &value0))
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3822 dependence_stats.num_siv_unimplemented++;
3823 *overlaps_a = conflict_fn_not_known ();
3824 *overlaps_b = conflict_fn_not_known ();
3825 *last_conflicts = chrec_dont_know;
3826 return;
3828 else
3830 if (value0 == false)
3832 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3833 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3835 if (dump_file && (dump_flags & TDF_DETAILS))
3836 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3838 *overlaps_a = conflict_fn_not_known ();
3839 *overlaps_b = conflict_fn_not_known ();
3840 *last_conflicts = chrec_dont_know;
3841 dependence_stats.num_siv_unimplemented++;
3842 return;
3844 else
3846 if (value1 == true)
3848 /* Example:
3849 chrec_a = 12
3850 chrec_b = {10, +, 1}
3853 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3855 HOST_WIDE_INT numiter;
3856 class loop *loop = get_chrec_loop (chrec_b);
3858 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3859 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3860 fold_build1 (ABS_EXPR, type, difference),
3861 CHREC_RIGHT (chrec_b));
3862 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3863 *last_conflicts = integer_one_node;
3866 /* Perform weak-zero siv test to see if overlap is
3867 outside the loop bounds. */
3868 numiter = max_stmt_executions_int (loop);
3870 if (numiter >= 0
3871 && compare_tree_int (tmp, numiter) > 0)
3873 free_conflict_function (*overlaps_a);
3874 free_conflict_function (*overlaps_b);
3875 *overlaps_a = conflict_fn_no_dependence ();
3876 *overlaps_b = conflict_fn_no_dependence ();
3877 *last_conflicts = integer_zero_node;
3878 dependence_stats.num_siv_independent++;
3879 return;
3881 dependence_stats.num_siv_dependent++;
3882 return;
3885 /* When the step does not divide the difference, there are
3886 no overlaps. */
3887 else
3889 *overlaps_a = conflict_fn_no_dependence ();
3890 *overlaps_b = conflict_fn_no_dependence ();
3891 *last_conflicts = integer_zero_node;
3892 dependence_stats.num_siv_independent++;
3893 return;
3897 else
3899 /* Example:
3900 chrec_a = 12
3901 chrec_b = {10, +, -1}
3903 In this case, chrec_a will not overlap with chrec_b. */
3904 *overlaps_a = conflict_fn_no_dependence ();
3905 *overlaps_b = conflict_fn_no_dependence ();
3906 *last_conflicts = integer_zero_node;
3907 dependence_stats.num_siv_independent++;
3908 return;
3912 else
3914 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3915 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3917 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3920 *overlaps_a = conflict_fn_not_known ();
3921 *overlaps_b = conflict_fn_not_known ();
3922 *last_conflicts = chrec_dont_know;
3923 dependence_stats.num_siv_unimplemented++;
3924 return;
3926 else
3928 if (value2 == false)
3930 /* Example:
3931 chrec_a = 3
3932 chrec_b = {10, +, -1}
3934 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3936 HOST_WIDE_INT numiter;
3937 class loop *loop = get_chrec_loop (chrec_b);
3939 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3940 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3941 CHREC_RIGHT (chrec_b));
3942 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3943 *last_conflicts = integer_one_node;
3945 /* Perform weak-zero siv test to see if overlap is
3946 outside the loop bounds. */
3947 numiter = max_stmt_executions_int (loop);
3949 if (numiter >= 0
3950 && compare_tree_int (tmp, numiter) > 0)
3952 free_conflict_function (*overlaps_a);
3953 free_conflict_function (*overlaps_b);
3954 *overlaps_a = conflict_fn_no_dependence ();
3955 *overlaps_b = conflict_fn_no_dependence ();
3956 *last_conflicts = integer_zero_node;
3957 dependence_stats.num_siv_independent++;
3958 return;
3960 dependence_stats.num_siv_dependent++;
3961 return;
3964 /* When the step does not divide the difference, there
3965 are no overlaps. */
3966 else
3968 *overlaps_a = conflict_fn_no_dependence ();
3969 *overlaps_b = conflict_fn_no_dependence ();
3970 *last_conflicts = integer_zero_node;
3971 dependence_stats.num_siv_independent++;
3972 return;
3975 else
3977 /* Example:
3978 chrec_a = 3
3979 chrec_b = {4, +, 1}
3981 In this case, chrec_a will not overlap with chrec_b. */
3982 *overlaps_a = conflict_fn_no_dependence ();
3983 *overlaps_b = conflict_fn_no_dependence ();
3984 *last_conflicts = integer_zero_node;
3985 dependence_stats.num_siv_independent++;
3986 return;
3993 /* Helper recursive function for initializing the matrix A. Returns
3994 the initial value of CHREC. */
3996 static tree
3997 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3999 gcc_assert (chrec);
4001 switch (TREE_CODE (chrec))
4003 case POLYNOMIAL_CHREC:
4004 HOST_WIDE_INT chrec_right;
4005 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4006 return chrec_dont_know;
4007 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4008 /* We want to be able to negate without overflow. */
4009 if (chrec_right == HOST_WIDE_INT_MIN)
4010 return chrec_dont_know;
4011 A[index][0] = mult * chrec_right;
4012 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4014 case PLUS_EXPR:
4015 case MULT_EXPR:
4016 case MINUS_EXPR:
4018 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4019 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4021 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4024 CASE_CONVERT:
4026 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4027 return chrec_convert (chrec_type (chrec), op, NULL);
4030 case BIT_NOT_EXPR:
4032 /* Handle ~X as -1 - X. */
4033 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4035 build_int_cst (TREE_TYPE (chrec), -1), op);
4038 case INTEGER_CST:
4039 return chrec;
4041 default:
4042 gcc_unreachable ();
4043 return NULL_TREE;
4047 #define FLOOR_DIV(x,y) ((x) / (y))
4049 /* Solves the special case of the Diophantine equation:
4050 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4052 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4053 number of iterations that loops X and Y run. The overlaps will be
4054 constructed as evolutions in dimension DIM. */
4056 static void
4057 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4058 HOST_WIDE_INT step_a,
4059 HOST_WIDE_INT step_b,
4060 affine_fn *overlaps_a,
4061 affine_fn *overlaps_b,
4062 tree *last_conflicts, int dim)
4064 if (((step_a > 0 && step_b > 0)
4065 || (step_a < 0 && step_b < 0)))
4067 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4068 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4070 gcd_steps_a_b = gcd (step_a, step_b);
4071 step_overlaps_a = step_b / gcd_steps_a_b;
4072 step_overlaps_b = step_a / gcd_steps_a_b;
4074 if (niter > 0)
4076 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4077 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4078 last_conflict = tau2;
4079 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4081 else
4082 *last_conflicts = chrec_dont_know;
4084 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4085 build_int_cst (NULL_TREE,
4086 step_overlaps_a));
4087 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4088 build_int_cst (NULL_TREE,
4089 step_overlaps_b));
4092 else
4094 *overlaps_a = affine_fn_cst (integer_zero_node);
4095 *overlaps_b = affine_fn_cst (integer_zero_node);
4096 *last_conflicts = integer_zero_node;
4100 /* Solves the special case of a Diophantine equation where CHREC_A is
4101 an affine bivariate function, and CHREC_B is an affine univariate
4102 function. For example,
4104 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4106 has the following overlapping functions:
4108 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4109 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4110 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4112 FORNOW: This is a specialized implementation for a case occurring in
4113 a common benchmark. Implement the general algorithm. */
4115 static void
4116 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4117 conflict_function **overlaps_a,
4118 conflict_function **overlaps_b,
4119 tree *last_conflicts)
4121 bool xz_p, yz_p, xyz_p;
4122 HOST_WIDE_INT step_x, step_y, step_z;
4123 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4124 affine_fn overlaps_a_xz, overlaps_b_xz;
4125 affine_fn overlaps_a_yz, overlaps_b_yz;
4126 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4127 affine_fn ova1, ova2, ovb;
4128 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4130 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4131 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4132 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4134 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4135 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4136 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4138 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4140 if (dump_file && (dump_flags & TDF_DETAILS))
4141 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4143 *overlaps_a = conflict_fn_not_known ();
4144 *overlaps_b = conflict_fn_not_known ();
4145 *last_conflicts = chrec_dont_know;
4146 return;
4149 niter = MIN (niter_x, niter_z);
4150 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4151 &overlaps_a_xz,
4152 &overlaps_b_xz,
4153 &last_conflicts_xz, 1);
4154 niter = MIN (niter_y, niter_z);
4155 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4156 &overlaps_a_yz,
4157 &overlaps_b_yz,
4158 &last_conflicts_yz, 2);
4159 niter = MIN (niter_x, niter_z);
4160 niter = MIN (niter_y, niter);
4161 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4162 &overlaps_a_xyz,
4163 &overlaps_b_xyz,
4164 &last_conflicts_xyz, 3);
4166 xz_p = !integer_zerop (last_conflicts_xz);
4167 yz_p = !integer_zerop (last_conflicts_yz);
4168 xyz_p = !integer_zerop (last_conflicts_xyz);
4170 if (xz_p || yz_p || xyz_p)
4172 ova1 = affine_fn_cst (integer_zero_node);
4173 ova2 = affine_fn_cst (integer_zero_node);
4174 ovb = affine_fn_cst (integer_zero_node);
4175 if (xz_p)
4177 affine_fn t0 = ova1;
4178 affine_fn t2 = ovb;
4180 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4181 ovb = affine_fn_plus (ovb, overlaps_b_xz);
4182 affine_fn_free (t0);
4183 affine_fn_free (t2);
4184 *last_conflicts = last_conflicts_xz;
4186 if (yz_p)
4188 affine_fn t0 = ova2;
4189 affine_fn t2 = ovb;
4191 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4192 ovb = affine_fn_plus (ovb, overlaps_b_yz);
4193 affine_fn_free (t0);
4194 affine_fn_free (t2);
4195 *last_conflicts = last_conflicts_yz;
4197 if (xyz_p)
4199 affine_fn t0 = ova1;
4200 affine_fn t2 = ova2;
4201 affine_fn t4 = ovb;
4203 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4204 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4205 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4206 affine_fn_free (t0);
4207 affine_fn_free (t2);
4208 affine_fn_free (t4);
4209 *last_conflicts = last_conflicts_xyz;
4211 *overlaps_a = conflict_fn (2, ova1, ova2);
4212 *overlaps_b = conflict_fn (1, ovb);
4214 else
4216 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4217 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4218 *last_conflicts = integer_zero_node;
4221 affine_fn_free (overlaps_a_xz);
4222 affine_fn_free (overlaps_b_xz);
4223 affine_fn_free (overlaps_a_yz);
4224 affine_fn_free (overlaps_b_yz);
4225 affine_fn_free (overlaps_a_xyz);
4226 affine_fn_free (overlaps_b_xyz);
4229 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4231 static void
4232 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4233 int size)
4235 memcpy (vec2, vec1, size * sizeof (*vec1));
4238 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4240 static void
4241 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4242 int m, int n)
4244 int i;
4246 for (i = 0; i < m; i++)
4247 lambda_vector_copy (mat1[i], mat2[i], n);
4250 /* Store the N x N identity matrix in MAT. */
4252 static void
4253 lambda_matrix_id (lambda_matrix mat, int size)
4255 int i, j;
4257 for (i = 0; i < size; i++)
4258 for (j = 0; j < size; j++)
4259 mat[i][j] = (i == j) ? 1 : 0;
4262 /* Return the index of the first nonzero element of vector VEC1 between
4263 START and N. We must have START <= N.
4264 Returns N if VEC1 is the zero vector. */
4266 static int
4267 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4269 int j = start;
4270 while (j < n && vec1[j] == 0)
4271 j++;
4272 return j;
4275 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4276 R2 = R2 + CONST1 * R1. */
4278 static bool
4279 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4280 lambda_int const1)
4282 int i;
4284 if (const1 == 0)
4285 return true;
4287 for (i = 0; i < n; i++)
4289 bool ovf;
4290 lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4291 if (ovf)
4292 return false;
4293 lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4294 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4295 return false;
4296 mat[r2][i] = tem2;
4299 return true;
4302 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4303 and store the result in VEC2. */
4305 static void
4306 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4307 int size, lambda_int const1)
4309 int i;
4311 if (const1 == 0)
4312 lambda_vector_clear (vec2, size);
4313 else
4314 for (i = 0; i < size; i++)
4315 vec2[i] = const1 * vec1[i];
4318 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4320 static void
4321 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4322 int size)
4324 lambda_vector_mult_const (vec1, vec2, size, -1);
4327 /* Negate row R1 of matrix MAT which has N columns. */
4329 static void
4330 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4332 lambda_vector_negate (mat[r1], mat[r1], n);
4335 /* Return true if two vectors are equal. */
4337 static bool
4338 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4340 int i;
4341 for (i = 0; i < size; i++)
4342 if (vec1[i] != vec2[i])
4343 return false;
4344 return true;
4347 /* Given an M x N integer matrix A, this function determines an M x
4348 M unimodular matrix U, and an M x N echelon matrix S such that
4349 "U.A = S". This decomposition is also known as "right Hermite".
4351 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4352 Restructuring Compilers" Utpal Banerjee. */
4354 static bool
4355 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4356 lambda_matrix S, lambda_matrix U)
4358 int i, j, i0 = 0;
4360 lambda_matrix_copy (A, S, m, n);
4361 lambda_matrix_id (U, m);
4363 for (j = 0; j < n; j++)
4365 if (lambda_vector_first_nz (S[j], m, i0) < m)
4367 ++i0;
4368 for (i = m - 1; i >= i0; i--)
4370 while (S[i][j] != 0)
4372 lambda_int factor, a, b;
4374 a = S[i-1][j];
4375 b = S[i][j];
4376 gcc_assert (a != HOST_WIDE_INT_MIN);
4377 factor = a / b;
4379 if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4380 return false;
4381 std::swap (S[i], S[i-1]);
4383 if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4384 return false;
4385 std::swap (U[i], U[i-1]);
4391 return true;
4394 /* Determines the overlapping elements due to accesses CHREC_A and
4395 CHREC_B, that are affine functions. This function cannot handle
4396 symbolic evolution functions, ie. when initial conditions are
4397 parameters, because it uses lambda matrices of integers. */
4399 static void
4400 analyze_subscript_affine_affine (tree chrec_a,
4401 tree chrec_b,
4402 conflict_function **overlaps_a,
4403 conflict_function **overlaps_b,
4404 tree *last_conflicts)
4406 unsigned nb_vars_a, nb_vars_b, dim;
4407 lambda_int gamma, gcd_alpha_beta;
4408 lambda_matrix A, U, S;
4409 struct obstack scratch_obstack;
4411 if (eq_evolutions_p (chrec_a, chrec_b))
4413 /* The accessed index overlaps for each iteration in the
4414 loop. */
4415 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4416 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4417 *last_conflicts = chrec_dont_know;
4418 return;
4420 if (dump_file && (dump_flags & TDF_DETAILS))
4421 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4423 /* For determining the initial intersection, we have to solve a
4424 Diophantine equation. This is the most time consuming part.
4426 For answering to the question: "Is there a dependence?" we have
4427 to prove that there exists a solution to the Diophantine
4428 equation, and that the solution is in the iteration domain,
4429 i.e. the solution is positive or zero, and that the solution
4430 happens before the upper bound loop.nb_iterations. Otherwise
4431 there is no dependence. This function outputs a description of
4432 the iterations that hold the intersections. */
4434 nb_vars_a = nb_vars_in_chrec (chrec_a);
4435 nb_vars_b = nb_vars_in_chrec (chrec_b);
4437 gcc_obstack_init (&scratch_obstack);
4439 dim = nb_vars_a + nb_vars_b;
4440 U = lambda_matrix_new (dim, dim, &scratch_obstack);
4441 A = lambda_matrix_new (dim, 1, &scratch_obstack);
4442 S = lambda_matrix_new (dim, 1, &scratch_obstack);
4444 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4445 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4446 if (init_a == chrec_dont_know
4447 || init_b == chrec_dont_know)
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4450 fprintf (dump_file, "affine-affine test failed: "
4451 "representation issue.\n");
4452 *overlaps_a = conflict_fn_not_known ();
4453 *overlaps_b = conflict_fn_not_known ();
4454 *last_conflicts = chrec_dont_know;
4455 goto end_analyze_subs_aa;
4457 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4459 /* Don't do all the hard work of solving the Diophantine equation
4460 when we already know the solution: for example,
4461 | {3, +, 1}_1
4462 | {3, +, 4}_2
4463 | gamma = 3 - 3 = 0.
4464 Then the first overlap occurs during the first iterations:
4465 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4467 if (gamma == 0)
4469 if (nb_vars_a == 1 && nb_vars_b == 1)
4471 HOST_WIDE_INT step_a, step_b;
4472 HOST_WIDE_INT niter, niter_a, niter_b;
4473 affine_fn ova, ovb;
4475 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4476 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4477 niter = MIN (niter_a, niter_b);
4478 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4479 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4481 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4482 &ova, &ovb,
4483 last_conflicts, 1);
4484 *overlaps_a = conflict_fn (1, ova);
4485 *overlaps_b = conflict_fn (1, ovb);
4488 else if (nb_vars_a == 2 && nb_vars_b == 1)
4489 compute_overlap_steps_for_affine_1_2
4490 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4492 else if (nb_vars_a == 1 && nb_vars_b == 2)
4493 compute_overlap_steps_for_affine_1_2
4494 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4496 else
4498 if (dump_file && (dump_flags & TDF_DETAILS))
4499 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4500 *overlaps_a = conflict_fn_not_known ();
4501 *overlaps_b = conflict_fn_not_known ();
4502 *last_conflicts = chrec_dont_know;
4504 goto end_analyze_subs_aa;
4507 /* U.A = S */
4508 if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4510 *overlaps_a = conflict_fn_not_known ();
4511 *overlaps_b = conflict_fn_not_known ();
4512 *last_conflicts = chrec_dont_know;
4513 goto end_analyze_subs_aa;
4516 if (S[0][0] < 0)
4518 S[0][0] *= -1;
4519 lambda_matrix_row_negate (U, dim, 0);
4521 gcd_alpha_beta = S[0][0];
4523 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4524 but that is a quite strange case. Instead of ICEing, answer
4525 don't know. */
4526 if (gcd_alpha_beta == 0)
4528 *overlaps_a = conflict_fn_not_known ();
4529 *overlaps_b = conflict_fn_not_known ();
4530 *last_conflicts = chrec_dont_know;
4531 goto end_analyze_subs_aa;
4534 /* The classic "gcd-test". */
4535 if (!int_divides_p (gcd_alpha_beta, gamma))
4537 /* The "gcd-test" has determined that there is no integer
4538 solution, i.e. there is no dependence. */
4539 *overlaps_a = conflict_fn_no_dependence ();
4540 *overlaps_b = conflict_fn_no_dependence ();
4541 *last_conflicts = integer_zero_node;
4544 /* Both access functions are univariate. This includes SIV and MIV cases. */
4545 else if (nb_vars_a == 1 && nb_vars_b == 1)
4547 /* Both functions should have the same evolution sign. */
4548 if (((A[0][0] > 0 && -A[1][0] > 0)
4549 || (A[0][0] < 0 && -A[1][0] < 0)))
4551 /* The solutions are given by:
4553 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4554 | [u21 u22] [y0]
4556 For a given integer t. Using the following variables,
4558 | i0 = u11 * gamma / gcd_alpha_beta
4559 | j0 = u12 * gamma / gcd_alpha_beta
4560 | i1 = u21
4561 | j1 = u22
4563 the solutions are:
4565 | x0 = i0 + i1 * t,
4566 | y0 = j0 + j1 * t. */
4567 HOST_WIDE_INT i0, j0, i1, j1;
4569 i0 = U[0][0] * gamma / gcd_alpha_beta;
4570 j0 = U[0][1] * gamma / gcd_alpha_beta;
4571 i1 = U[1][0];
4572 j1 = U[1][1];
4574 if ((i1 == 0 && i0 < 0)
4575 || (j1 == 0 && j0 < 0))
4577 /* There is no solution.
4578 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4579 falls in here, but for the moment we don't look at the
4580 upper bound of the iteration domain. */
4581 *overlaps_a = conflict_fn_no_dependence ();
4582 *overlaps_b = conflict_fn_no_dependence ();
4583 *last_conflicts = integer_zero_node;
4584 goto end_analyze_subs_aa;
4587 if (i1 > 0 && j1 > 0)
4589 HOST_WIDE_INT niter_a
4590 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4591 HOST_WIDE_INT niter_b
4592 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4593 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4595 /* (X0, Y0) is a solution of the Diophantine equation:
4596 "chrec_a (X0) = chrec_b (Y0)". */
4597 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4598 CEIL (-j0, j1));
4599 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4600 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4602 /* (X1, Y1) is the smallest positive solution of the eq
4603 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4604 first conflict occurs. */
4605 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4606 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4607 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4609 if (niter > 0)
4611 /* If the overlap occurs outside of the bounds of the
4612 loop, there is no dependence. */
4613 if (x1 >= niter_a || y1 >= niter_b)
4615 *overlaps_a = conflict_fn_no_dependence ();
4616 *overlaps_b = conflict_fn_no_dependence ();
4617 *last_conflicts = integer_zero_node;
4618 goto end_analyze_subs_aa;
4621 /* max stmt executions can get quite large, avoid
4622 overflows by using wide ints here. */
4623 widest_int tau2
4624 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4625 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4626 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4627 if (wi::min_precision (last_conflict, SIGNED)
4628 <= TYPE_PRECISION (integer_type_node))
4629 *last_conflicts
4630 = build_int_cst (integer_type_node,
4631 last_conflict.to_shwi ());
4632 else
4633 *last_conflicts = chrec_dont_know;
4635 else
4636 *last_conflicts = chrec_dont_know;
4638 *overlaps_a
4639 = conflict_fn (1,
4640 affine_fn_univar (build_int_cst (NULL_TREE, x1),
4642 build_int_cst (NULL_TREE, i1)));
4643 *overlaps_b
4644 = conflict_fn (1,
4645 affine_fn_univar (build_int_cst (NULL_TREE, y1),
4647 build_int_cst (NULL_TREE, j1)));
4649 else
4651 /* FIXME: For the moment, the upper bound of the
4652 iteration domain for i and j is not checked. */
4653 if (dump_file && (dump_flags & TDF_DETAILS))
4654 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4655 *overlaps_a = conflict_fn_not_known ();
4656 *overlaps_b = conflict_fn_not_known ();
4657 *last_conflicts = chrec_dont_know;
4660 else
4662 if (dump_file && (dump_flags & TDF_DETAILS))
4663 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4664 *overlaps_a = conflict_fn_not_known ();
4665 *overlaps_b = conflict_fn_not_known ();
4666 *last_conflicts = chrec_dont_know;
4669 else
4671 if (dump_file && (dump_flags & TDF_DETAILS))
4672 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4673 *overlaps_a = conflict_fn_not_known ();
4674 *overlaps_b = conflict_fn_not_known ();
4675 *last_conflicts = chrec_dont_know;
4678 end_analyze_subs_aa:
4679 obstack_free (&scratch_obstack, NULL);
4680 if (dump_file && (dump_flags & TDF_DETAILS))
4682 fprintf (dump_file, " (overlaps_a = ");
4683 dump_conflict_function (dump_file, *overlaps_a);
4684 fprintf (dump_file, ")\n (overlaps_b = ");
4685 dump_conflict_function (dump_file, *overlaps_b);
4686 fprintf (dump_file, "))\n");
4690 /* Returns true when analyze_subscript_affine_affine can be used for
4691 determining the dependence relation between chrec_a and chrec_b,
4692 that contain symbols. This function modifies chrec_a and chrec_b
4693 such that the analysis result is the same, and such that they don't
4694 contain symbols, and then can safely be passed to the analyzer.
4696 Example: The analysis of the following tuples of evolutions produce
4697 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4698 vs. {0, +, 1}_1
4700 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4701 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4704 static bool
4705 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4707 tree diff, type, left_a, left_b, right_b;
4709 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4710 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4711 /* FIXME: For the moment not handled. Might be refined later. */
4712 return false;
4714 type = chrec_type (*chrec_a);
4715 left_a = CHREC_LEFT (*chrec_a);
4716 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4717 diff = chrec_fold_minus (type, left_a, left_b);
4719 if (!evolution_function_is_constant_p (diff))
4720 return false;
4722 if (dump_file && (dump_flags & TDF_DETAILS))
4723 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4725 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4726 diff, CHREC_RIGHT (*chrec_a));
4727 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4728 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4729 build_int_cst (type, 0),
4730 right_b);
4731 return true;
4734 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4735 *OVERLAPS_B are initialized to the functions that describe the
4736 relation between the elements accessed twice by CHREC_A and
4737 CHREC_B. For k >= 0, the following property is verified:
4739 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4741 static void
4742 analyze_siv_subscript (tree chrec_a,
4743 tree chrec_b,
4744 conflict_function **overlaps_a,
4745 conflict_function **overlaps_b,
4746 tree *last_conflicts,
4747 int loop_nest_num)
4749 dependence_stats.num_siv++;
4751 if (dump_file && (dump_flags & TDF_DETAILS))
4752 fprintf (dump_file, "(analyze_siv_subscript \n");
4754 if (evolution_function_is_constant_p (chrec_a)
4755 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4756 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4757 overlaps_a, overlaps_b, last_conflicts);
4759 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4760 && evolution_function_is_constant_p (chrec_b))
4761 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4762 overlaps_b, overlaps_a, last_conflicts);
4764 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4765 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4767 if (!chrec_contains_symbols (chrec_a)
4768 && !chrec_contains_symbols (chrec_b))
4770 analyze_subscript_affine_affine (chrec_a, chrec_b,
4771 overlaps_a, overlaps_b,
4772 last_conflicts);
4774 if (CF_NOT_KNOWN_P (*overlaps_a)
4775 || CF_NOT_KNOWN_P (*overlaps_b))
4776 dependence_stats.num_siv_unimplemented++;
4777 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4778 || CF_NO_DEPENDENCE_P (*overlaps_b))
4779 dependence_stats.num_siv_independent++;
4780 else
4781 dependence_stats.num_siv_dependent++;
4783 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4784 &chrec_b))
4786 analyze_subscript_affine_affine (chrec_a, chrec_b,
4787 overlaps_a, overlaps_b,
4788 last_conflicts);
4790 if (CF_NOT_KNOWN_P (*overlaps_a)
4791 || CF_NOT_KNOWN_P (*overlaps_b))
4792 dependence_stats.num_siv_unimplemented++;
4793 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4794 || CF_NO_DEPENDENCE_P (*overlaps_b))
4795 dependence_stats.num_siv_independent++;
4796 else
4797 dependence_stats.num_siv_dependent++;
4799 else
4800 goto siv_subscript_dontknow;
4803 else
4805 siv_subscript_dontknow:;
4806 if (dump_file && (dump_flags & TDF_DETAILS))
4807 fprintf (dump_file, " siv test failed: unimplemented");
4808 *overlaps_a = conflict_fn_not_known ();
4809 *overlaps_b = conflict_fn_not_known ();
4810 *last_conflicts = chrec_dont_know;
4811 dependence_stats.num_siv_unimplemented++;
4814 if (dump_file && (dump_flags & TDF_DETAILS))
4815 fprintf (dump_file, ")\n");
4818 /* Returns false if we can prove that the greatest common divisor of the steps
4819 of CHREC does not divide CST, false otherwise. */
4821 static bool
4822 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4824 HOST_WIDE_INT cd = 0, val;
4825 tree step;
4827 if (!tree_fits_shwi_p (cst))
4828 return true;
4829 val = tree_to_shwi (cst);
4831 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4833 step = CHREC_RIGHT (chrec);
4834 if (!tree_fits_shwi_p (step))
4835 return true;
4836 cd = gcd (cd, tree_to_shwi (step));
4837 chrec = CHREC_LEFT (chrec);
4840 return val % cd == 0;
4843 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4844 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4845 functions that describe the relation between the elements accessed
4846 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4847 is verified:
4849 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4851 static void
4852 analyze_miv_subscript (tree chrec_a,
4853 tree chrec_b,
4854 conflict_function **overlaps_a,
4855 conflict_function **overlaps_b,
4856 tree *last_conflicts,
4857 class loop *loop_nest)
4859 tree type, difference;
4861 dependence_stats.num_miv++;
4862 if (dump_file && (dump_flags & TDF_DETAILS))
4863 fprintf (dump_file, "(analyze_miv_subscript \n");
4865 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4866 chrec_a = chrec_convert (type, chrec_a, NULL);
4867 chrec_b = chrec_convert (type, chrec_b, NULL);
4868 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4870 if (eq_evolutions_p (chrec_a, chrec_b))
4872 /* Access functions are the same: all the elements are accessed
4873 in the same order. */
4874 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4875 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4876 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4877 dependence_stats.num_miv_dependent++;
4880 else if (evolution_function_is_constant_p (difference)
4881 && evolution_function_is_affine_multivariate_p (chrec_a,
4882 loop_nest->num)
4883 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4885 /* testsuite/.../ssa-chrec-33.c
4886 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4888 The difference is 1, and all the evolution steps are multiples
4889 of 2, consequently there are no overlapping elements. */
4890 *overlaps_a = conflict_fn_no_dependence ();
4891 *overlaps_b = conflict_fn_no_dependence ();
4892 *last_conflicts = integer_zero_node;
4893 dependence_stats.num_miv_independent++;
4896 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4897 && !chrec_contains_symbols (chrec_a, loop_nest)
4898 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4899 && !chrec_contains_symbols (chrec_b, loop_nest))
4901 /* testsuite/.../ssa-chrec-35.c
4902 {0, +, 1}_2 vs. {0, +, 1}_3
4903 the overlapping elements are respectively located at iterations:
4904 {0, +, 1}_x and {0, +, 1}_x,
4905 in other words, we have the equality:
4906 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4908 Other examples:
4909 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4910 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4912 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4913 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4915 analyze_subscript_affine_affine (chrec_a, chrec_b,
4916 overlaps_a, overlaps_b, last_conflicts);
4918 if (CF_NOT_KNOWN_P (*overlaps_a)
4919 || CF_NOT_KNOWN_P (*overlaps_b))
4920 dependence_stats.num_miv_unimplemented++;
4921 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4922 || CF_NO_DEPENDENCE_P (*overlaps_b))
4923 dependence_stats.num_miv_independent++;
4924 else
4925 dependence_stats.num_miv_dependent++;
4928 else
4930 /* When the analysis is too difficult, answer "don't know". */
4931 if (dump_file && (dump_flags & TDF_DETAILS))
4932 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4934 *overlaps_a = conflict_fn_not_known ();
4935 *overlaps_b = conflict_fn_not_known ();
4936 *last_conflicts = chrec_dont_know;
4937 dependence_stats.num_miv_unimplemented++;
4940 if (dump_file && (dump_flags & TDF_DETAILS))
4941 fprintf (dump_file, ")\n");
4944 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4945 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4946 OVERLAP_ITERATIONS_B are initialized with two functions that
4947 describe the iterations that contain conflicting elements.
4949 Remark: For an integer k >= 0, the following equality is true:
4951 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4954 static void
4955 analyze_overlapping_iterations (tree chrec_a,
4956 tree chrec_b,
4957 conflict_function **overlap_iterations_a,
4958 conflict_function **overlap_iterations_b,
4959 tree *last_conflicts, class loop *loop_nest)
4961 unsigned int lnn = loop_nest->num;
4963 dependence_stats.num_subscript_tests++;
4965 if (dump_file && (dump_flags & TDF_DETAILS))
4967 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4968 fprintf (dump_file, " (chrec_a = ");
4969 print_generic_expr (dump_file, chrec_a);
4970 fprintf (dump_file, ")\n (chrec_b = ");
4971 print_generic_expr (dump_file, chrec_b);
4972 fprintf (dump_file, ")\n");
4975 if (chrec_a == NULL_TREE
4976 || chrec_b == NULL_TREE
4977 || chrec_contains_undetermined (chrec_a)
4978 || chrec_contains_undetermined (chrec_b))
4980 dependence_stats.num_subscript_undetermined++;
4982 *overlap_iterations_a = conflict_fn_not_known ();
4983 *overlap_iterations_b = conflict_fn_not_known ();
4986 /* If they are the same chrec, and are affine, they overlap
4987 on every iteration. */
4988 else if (eq_evolutions_p (chrec_a, chrec_b)
4989 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4990 || operand_equal_p (chrec_a, chrec_b, 0)))
4992 dependence_stats.num_same_subscript_function++;
4993 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4994 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4995 *last_conflicts = chrec_dont_know;
4998 /* If they aren't the same, and aren't affine, we can't do anything
4999 yet. */
5000 else if ((chrec_contains_symbols (chrec_a)
5001 || chrec_contains_symbols (chrec_b))
5002 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5003 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5005 dependence_stats.num_subscript_undetermined++;
5006 *overlap_iterations_a = conflict_fn_not_known ();
5007 *overlap_iterations_b = conflict_fn_not_known ();
5010 else if (ziv_subscript_p (chrec_a, chrec_b))
5011 analyze_ziv_subscript (chrec_a, chrec_b,
5012 overlap_iterations_a, overlap_iterations_b,
5013 last_conflicts);
5015 else if (siv_subscript_p (chrec_a, chrec_b))
5016 analyze_siv_subscript (chrec_a, chrec_b,
5017 overlap_iterations_a, overlap_iterations_b,
5018 last_conflicts, lnn);
5020 else
5021 analyze_miv_subscript (chrec_a, chrec_b,
5022 overlap_iterations_a, overlap_iterations_b,
5023 last_conflicts, loop_nest);
5025 if (dump_file && (dump_flags & TDF_DETAILS))
5027 fprintf (dump_file, " (overlap_iterations_a = ");
5028 dump_conflict_function (dump_file, *overlap_iterations_a);
5029 fprintf (dump_file, ")\n (overlap_iterations_b = ");
5030 dump_conflict_function (dump_file, *overlap_iterations_b);
5031 fprintf (dump_file, "))\n");
5035 /* Helper function for uniquely inserting distance vectors. */
5037 static void
5038 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5040 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5041 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5042 return;
5044 DDR_DIST_VECTS (ddr).safe_push (dist_v);
5047 /* Helper function for uniquely inserting direction vectors. */
5049 static void
5050 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5052 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5053 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5054 return;
5056 DDR_DIR_VECTS (ddr).safe_push (dir_v);
5059 /* Add a distance of 1 on all the loops outer than INDEX. If we
5060 haven't yet determined a distance for this outer loop, push a new
5061 distance vector composed of the previous distance, and a distance
5062 of 1 for this outer loop. Example:
5064 | loop_1
5065 | loop_2
5066 | A[10]
5067 | endloop_2
5068 | endloop_1
5070 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5071 save (0, 1), then we have to save (1, 0). */
5073 static void
5074 add_outer_distances (struct data_dependence_relation *ddr,
5075 lambda_vector dist_v, int index)
5077 /* For each outer loop where init_v is not set, the accesses are
5078 in dependence of distance 1 in the loop. */
5079 while (--index >= 0)
5081 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5082 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5083 save_v[index] = 1;
5084 save_dist_v (ddr, save_v);
5088 /* Return false when fail to represent the data dependence as a
5089 distance vector. A_INDEX is the index of the first reference
5090 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5091 second reference. INIT_B is set to true when a component has been
5092 added to the distance vector DIST_V. INDEX_CARRY is then set to
5093 the index in DIST_V that carries the dependence. */
5095 static bool
5096 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5097 unsigned int a_index, unsigned int b_index,
5098 lambda_vector dist_v, bool *init_b,
5099 int *index_carry)
5101 unsigned i;
5102 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5103 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5105 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5107 tree access_fn_a, access_fn_b;
5108 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5110 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5112 non_affine_dependence_relation (ddr);
5113 return false;
5116 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5117 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5119 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5120 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5122 HOST_WIDE_INT dist;
5123 int index;
5124 int var_a = CHREC_VARIABLE (access_fn_a);
5125 int var_b = CHREC_VARIABLE (access_fn_b);
5127 if (var_a != var_b
5128 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5130 non_affine_dependence_relation (ddr);
5131 return false;
5134 /* When data references are collected in a loop while data
5135 dependences are analyzed in loop nest nested in the loop, we
5136 would have more number of access functions than number of
5137 loops. Skip access functions of loops not in the loop nest.
5139 See PR89725 for more information. */
5140 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5141 continue;
5143 dist = int_cst_value (SUB_DISTANCE (subscript));
5144 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5145 *index_carry = MIN (index, *index_carry);
5147 /* This is the subscript coupling test. If we have already
5148 recorded a distance for this loop (a distance coming from
5149 another subscript), it should be the same. For example,
5150 in the following code, there is no dependence:
5152 | loop i = 0, N, 1
5153 | T[i+1][i] = ...
5154 | ... = T[i][i]
5155 | endloop
5157 if (init_v[index] != 0 && dist_v[index] != dist)
5159 finalize_ddr_dependent (ddr, chrec_known);
5160 return false;
5163 dist_v[index] = dist;
5164 init_v[index] = 1;
5165 *init_b = true;
5167 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5169 /* This can be for example an affine vs. constant dependence
5170 (T[i] vs. T[3]) that is not an affine dependence and is
5171 not representable as a distance vector. */
5172 non_affine_dependence_relation (ddr);
5173 return false;
5175 else
5176 *init_b = true;
5179 return true;
5182 /* Return true when the DDR contains only invariant access functions wrto. loop
5183 number LNUM. */
5185 static bool
5186 invariant_access_functions (const struct data_dependence_relation *ddr,
5187 int lnum)
5189 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5190 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5191 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5192 return false;
5194 return true;
5197 /* Helper function for the case where DDR_A and DDR_B are the same
5198 multivariate access function with a constant step. For an example
5199 see pr34635-1.c. */
5201 static void
5202 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5204 int x_1, x_2;
5205 tree c_1 = CHREC_LEFT (c_2);
5206 tree c_0 = CHREC_LEFT (c_1);
5207 lambda_vector dist_v;
5208 HOST_WIDE_INT v1, v2, cd;
5210 /* Polynomials with more than 2 variables are not handled yet. When
5211 the evolution steps are parameters, it is not possible to
5212 represent the dependence using classical distance vectors. */
5213 if (TREE_CODE (c_0) != INTEGER_CST
5214 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5215 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5217 DDR_AFFINE_P (ddr) = false;
5218 return;
5221 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5222 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5224 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5225 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5226 v1 = int_cst_value (CHREC_RIGHT (c_1));
5227 v2 = int_cst_value (CHREC_RIGHT (c_2));
5228 cd = gcd (v1, v2);
5229 v1 /= cd;
5230 v2 /= cd;
5232 if (v2 < 0)
5234 v2 = -v2;
5235 v1 = -v1;
5238 dist_v[x_1] = v2;
5239 dist_v[x_2] = -v1;
5240 save_dist_v (ddr, dist_v);
5242 add_outer_distances (ddr, dist_v, x_1);
5245 /* Helper function for the case where DDR_A and DDR_B are the same
5246 access functions. */
5248 static void
5249 add_other_self_distances (struct data_dependence_relation *ddr)
5251 lambda_vector dist_v;
5252 unsigned i;
5253 int index_carry = DDR_NB_LOOPS (ddr);
5254 subscript *sub;
5255 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5257 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5259 tree access_fun = SUB_ACCESS_FN (sub, 0);
5261 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5263 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5265 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5267 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5268 return;
5271 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5273 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5274 add_multivariate_self_dist (ddr, access_fun);
5275 else
5276 /* The evolution step is not constant: it varies in
5277 the outer loop, so this cannot be represented by a
5278 distance vector. For example in pr34635.c the
5279 evolution is {0, +, {0, +, 4}_1}_2. */
5280 DDR_AFFINE_P (ddr) = false;
5282 return;
5285 /* When data references are collected in a loop while data
5286 dependences are analyzed in loop nest nested in the loop, we
5287 would have more number of access functions than number of
5288 loops. Skip access functions of loops not in the loop nest.
5290 See PR89725 for more information. */
5291 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5292 loop))
5293 continue;
5295 index_carry = MIN (index_carry,
5296 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5297 DDR_LOOP_NEST (ddr)));
5301 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5302 add_outer_distances (ddr, dist_v, index_carry);
5305 static void
5306 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5308 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5310 dist_v[0] = 1;
5311 save_dist_v (ddr, dist_v);
5314 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5315 is the case for example when access functions are the same and
5316 equal to a constant, as in:
5318 | loop_1
5319 | A[3] = ...
5320 | ... = A[3]
5321 | endloop_1
5323 in which case the distance vectors are (0) and (1). */
5325 static void
5326 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5328 unsigned i, j;
5330 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5332 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5333 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5334 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5336 for (j = 0; j < ca->n; j++)
5337 if (affine_function_zero_p (ca->fns[j]))
5339 insert_innermost_unit_dist_vector (ddr);
5340 return;
5343 for (j = 0; j < cb->n; j++)
5344 if (affine_function_zero_p (cb->fns[j]))
5346 insert_innermost_unit_dist_vector (ddr);
5347 return;
5352 /* Return true when the DDR contains two data references that have the
5353 same access functions. */
5355 static inline bool
5356 same_access_functions (const struct data_dependence_relation *ddr)
5358 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5359 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5360 SUB_ACCESS_FN (sub, 1)))
5361 return false;
5363 return true;
5366 /* Compute the classic per loop distance vector. DDR is the data
5367 dependence relation to build a vector from. Return false when fail
5368 to represent the data dependence as a distance vector. */
5370 static bool
5371 build_classic_dist_vector (struct data_dependence_relation *ddr,
5372 class loop *loop_nest)
5374 bool init_b = false;
5375 int index_carry = DDR_NB_LOOPS (ddr);
5376 lambda_vector dist_v;
5378 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5379 return false;
5381 if (same_access_functions (ddr))
5383 /* Save the 0 vector. */
5384 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5385 save_dist_v (ddr, dist_v);
5387 if (invariant_access_functions (ddr, loop_nest->num))
5388 add_distance_for_zero_overlaps (ddr);
5390 if (DDR_NB_LOOPS (ddr) > 1)
5391 add_other_self_distances (ddr);
5393 return true;
5396 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5397 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5398 return false;
5400 /* Save the distance vector if we initialized one. */
5401 if (init_b)
5403 /* Verify a basic constraint: classic distance vectors should
5404 always be lexicographically positive.
5406 Data references are collected in the order of execution of
5407 the program, thus for the following loop
5409 | for (i = 1; i < 100; i++)
5410 | for (j = 1; j < 100; j++)
5412 | t = T[j+1][i-1]; // A
5413 | T[j][i] = t + 2; // B
5416 references are collected following the direction of the wind:
5417 A then B. The data dependence tests are performed also
5418 following this order, such that we're looking at the distance
5419 separating the elements accessed by A from the elements later
5420 accessed by B. But in this example, the distance returned by
5421 test_dep (A, B) is lexicographically negative (-1, 1), that
5422 means that the access A occurs later than B with respect to
5423 the outer loop, ie. we're actually looking upwind. In this
5424 case we solve test_dep (B, A) looking downwind to the
5425 lexicographically positive solution, that returns the
5426 distance vector (1, -1). */
5427 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5429 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5430 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5431 return false;
5432 compute_subscript_distance (ddr);
5433 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5434 &index_carry))
5435 return false;
5436 save_dist_v (ddr, save_v);
5437 DDR_REVERSED_P (ddr) = true;
5439 /* In this case there is a dependence forward for all the
5440 outer loops:
5442 | for (k = 1; k < 100; k++)
5443 | for (i = 1; i < 100; i++)
5444 | for (j = 1; j < 100; j++)
5446 | t = T[j+1][i-1]; // A
5447 | T[j][i] = t + 2; // B
5450 the vectors are:
5451 (0, 1, -1)
5452 (1, 1, -1)
5453 (1, -1, 1)
5455 if (DDR_NB_LOOPS (ddr) > 1)
5457 add_outer_distances (ddr, save_v, index_carry);
5458 add_outer_distances (ddr, dist_v, index_carry);
5461 else
5463 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5464 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5466 if (DDR_NB_LOOPS (ddr) > 1)
5468 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5470 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5471 return false;
5472 compute_subscript_distance (ddr);
5473 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5474 &index_carry))
5475 return false;
5477 save_dist_v (ddr, save_v);
5478 add_outer_distances (ddr, dist_v, index_carry);
5479 add_outer_distances (ddr, opposite_v, index_carry);
5481 else
5482 save_dist_v (ddr, save_v);
5485 else
5487 /* There is a distance of 1 on all the outer loops: Example:
5488 there is a dependence of distance 1 on loop_1 for the array A.
5490 | loop_1
5491 | A[5] = ...
5492 | endloop
5494 add_outer_distances (ddr, dist_v,
5495 lambda_vector_first_nz (dist_v,
5496 DDR_NB_LOOPS (ddr), 0));
5499 if (dump_file && (dump_flags & TDF_DETAILS))
5501 unsigned i;
5503 fprintf (dump_file, "(build_classic_dist_vector\n");
5504 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5506 fprintf (dump_file, " dist_vector = (");
5507 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5508 DDR_NB_LOOPS (ddr));
5509 fprintf (dump_file, " )\n");
5511 fprintf (dump_file, ")\n");
5514 return true;
5517 /* Return the direction for a given distance.
5518 FIXME: Computing dir this way is suboptimal, since dir can catch
5519 cases that dist is unable to represent. */
5521 static inline enum data_dependence_direction
5522 dir_from_dist (int dist)
5524 if (dist > 0)
5525 return dir_positive;
5526 else if (dist < 0)
5527 return dir_negative;
5528 else
5529 return dir_equal;
5532 /* Compute the classic per loop direction vector. DDR is the data
5533 dependence relation to build a vector from. */
5535 static void
5536 build_classic_dir_vector (struct data_dependence_relation *ddr)
5538 unsigned i, j;
5539 lambda_vector dist_v;
5541 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5543 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5545 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5546 dir_v[j] = dir_from_dist (dist_v[j]);
5548 save_dir_v (ddr, dir_v);
5552 /* Helper function. Returns true when there is a dependence between the
5553 data references. A_INDEX is the index of the first reference (0 for
5554 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5556 static bool
5557 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5558 unsigned int a_index, unsigned int b_index,
5559 class loop *loop_nest)
5561 unsigned int i;
5562 tree last_conflicts;
5563 struct subscript *subscript;
5564 tree res = NULL_TREE;
5566 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5568 conflict_function *overlaps_a, *overlaps_b;
5570 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5571 SUB_ACCESS_FN (subscript, b_index),
5572 &overlaps_a, &overlaps_b,
5573 &last_conflicts, loop_nest);
5575 if (SUB_CONFLICTS_IN_A (subscript))
5576 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5577 if (SUB_CONFLICTS_IN_B (subscript))
5578 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5580 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5581 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5582 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5584 /* If there is any undetermined conflict function we have to
5585 give a conservative answer in case we cannot prove that
5586 no dependence exists when analyzing another subscript. */
5587 if (CF_NOT_KNOWN_P (overlaps_a)
5588 || CF_NOT_KNOWN_P (overlaps_b))
5590 res = chrec_dont_know;
5591 continue;
5594 /* When there is a subscript with no dependence we can stop. */
5595 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5596 || CF_NO_DEPENDENCE_P (overlaps_b))
5598 res = chrec_known;
5599 break;
5603 if (res == NULL_TREE)
5604 return true;
5606 if (res == chrec_known)
5607 dependence_stats.num_dependence_independent++;
5608 else
5609 dependence_stats.num_dependence_undetermined++;
5610 finalize_ddr_dependent (ddr, res);
5611 return false;
5614 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5616 static void
5617 subscript_dependence_tester (struct data_dependence_relation *ddr,
5618 class loop *loop_nest)
5620 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5621 dependence_stats.num_dependence_dependent++;
5623 compute_subscript_distance (ddr);
5624 if (build_classic_dist_vector (ddr, loop_nest))
5625 build_classic_dir_vector (ddr);
5628 /* Returns true when all the access functions of A are affine or
5629 constant with respect to LOOP_NEST. */
5631 static bool
5632 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5633 const class loop *loop_nest)
5635 vec<tree> fns = DR_ACCESS_FNS (a);
5636 for (tree t : fns)
5637 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5638 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5639 return false;
5641 return true;
5644 /* This computes the affine dependence relation between A and B with
5645 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5646 independence between two accesses, while CHREC_DONT_KNOW is used
5647 for representing the unknown relation.
5649 Note that it is possible to stop the computation of the dependence
5650 relation the first time we detect a CHREC_KNOWN element for a given
5651 subscript. */
5653 void
5654 compute_affine_dependence (struct data_dependence_relation *ddr,
5655 class loop *loop_nest)
5657 struct data_reference *dra = DDR_A (ddr);
5658 struct data_reference *drb = DDR_B (ddr);
5660 if (dump_file && (dump_flags & TDF_DETAILS))
5662 fprintf (dump_file, "(compute_affine_dependence\n");
5663 fprintf (dump_file, " ref_a: ");
5664 print_generic_expr (dump_file, DR_REF (dra));
5665 fprintf (dump_file, ", stmt_a: ");
5666 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5667 fprintf (dump_file, " ref_b: ");
5668 print_generic_expr (dump_file, DR_REF (drb));
5669 fprintf (dump_file, ", stmt_b: ");
5670 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5673 /* Analyze only when the dependence relation is not yet known. */
5674 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5676 dependence_stats.num_dependence_tests++;
5678 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5679 && access_functions_are_affine_or_constant_p (drb, loop_nest))
5680 subscript_dependence_tester (ddr, loop_nest);
5682 /* As a last case, if the dependence cannot be determined, or if
5683 the dependence is considered too difficult to determine, answer
5684 "don't know". */
5685 else
5687 dependence_stats.num_dependence_undetermined++;
5689 if (dump_file && (dump_flags & TDF_DETAILS))
5691 fprintf (dump_file, "Data ref a:\n");
5692 dump_data_reference (dump_file, dra);
5693 fprintf (dump_file, "Data ref b:\n");
5694 dump_data_reference (dump_file, drb);
5695 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5697 finalize_ddr_dependent (ddr, chrec_dont_know);
5701 if (dump_file && (dump_flags & TDF_DETAILS))
5703 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5704 fprintf (dump_file, ") -> no dependence\n");
5705 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5706 fprintf (dump_file, ") -> dependence analysis failed\n");
5707 else
5708 fprintf (dump_file, ")\n");
5712 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5713 the data references in DATAREFS, in the LOOP_NEST. When
5714 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5715 relations. Return true when successful, i.e. data references number
5716 is small enough to be handled. */
5718 bool
5719 compute_all_dependences (const vec<data_reference_p> &datarefs,
5720 vec<ddr_p> *dependence_relations,
5721 const vec<loop_p> &loop_nest,
5722 bool compute_self_and_rr)
5724 struct data_dependence_relation *ddr;
5725 struct data_reference *a, *b;
5726 unsigned int i, j;
5728 if ((int) datarefs.length ()
5729 > param_loop_max_datarefs_for_datadeps)
5731 struct data_dependence_relation *ddr;
5733 /* Insert a single relation into dependence_relations:
5734 chrec_dont_know. */
5735 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5736 dependence_relations->safe_push (ddr);
5737 return false;
5740 FOR_EACH_VEC_ELT (datarefs, i, a)
5741 for (j = i + 1; datarefs.iterate (j, &b); j++)
5742 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5744 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5745 dependence_relations->safe_push (ddr);
5746 if (loop_nest.exists ())
5747 compute_affine_dependence (ddr, loop_nest[0]);
5750 if (compute_self_and_rr)
5751 FOR_EACH_VEC_ELT (datarefs, i, a)
5753 ddr = initialize_data_dependence_relation (a, a, loop_nest);
5754 dependence_relations->safe_push (ddr);
5755 if (loop_nest.exists ())
5756 compute_affine_dependence (ddr, loop_nest[0]);
5759 return true;
5762 /* Describes a location of a memory reference. */
5764 struct data_ref_loc
5766 /* The memory reference. */
5767 tree ref;
5769 /* True if the memory reference is read. */
5770 bool is_read;
5772 /* True if the data reference is conditional within the containing
5773 statement, i.e. if it might not occur even when the statement
5774 is executed and runs to completion. */
5775 bool is_conditional_in_stmt;
5779 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5780 true if STMT clobbers memory, false otherwise. */
5782 static bool
5783 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5785 bool clobbers_memory = false;
5786 data_ref_loc ref;
5787 tree op0, op1;
5788 enum gimple_code stmt_code = gimple_code (stmt);
5790 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5791 As we cannot model data-references to not spelled out
5792 accesses give up if they may occur. */
5793 if (stmt_code == GIMPLE_CALL
5794 && !(gimple_call_flags (stmt) & ECF_CONST))
5796 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5797 if (gimple_call_internal_p (stmt))
5798 switch (gimple_call_internal_fn (stmt))
5800 case IFN_GOMP_SIMD_LANE:
5802 class loop *loop = gimple_bb (stmt)->loop_father;
5803 tree uid = gimple_call_arg (stmt, 0);
5804 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5805 if (loop == NULL
5806 || loop->simduid != SSA_NAME_VAR (uid))
5807 clobbers_memory = true;
5808 break;
5810 case IFN_MASK_LOAD:
5811 case IFN_MASK_STORE:
5812 break;
5813 default:
5814 clobbers_memory = true;
5815 break;
5817 else
5818 clobbers_memory = true;
5820 else if (stmt_code == GIMPLE_ASM
5821 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5822 || gimple_vuse (stmt)))
5823 clobbers_memory = true;
5825 if (!gimple_vuse (stmt))
5826 return clobbers_memory;
5828 if (stmt_code == GIMPLE_ASSIGN)
5830 tree base;
5831 op0 = gimple_assign_lhs (stmt);
5832 op1 = gimple_assign_rhs1 (stmt);
5834 if (DECL_P (op1)
5835 || (REFERENCE_CLASS_P (op1)
5836 && (base = get_base_address (op1))
5837 && TREE_CODE (base) != SSA_NAME
5838 && !is_gimple_min_invariant (base)))
5840 ref.ref = op1;
5841 ref.is_read = true;
5842 ref.is_conditional_in_stmt = false;
5843 references->safe_push (ref);
5846 else if (stmt_code == GIMPLE_CALL)
5848 unsigned i, n;
5849 tree ptr, type;
5850 unsigned int align;
5852 ref.is_read = false;
5853 if (gimple_call_internal_p (stmt))
5854 switch (gimple_call_internal_fn (stmt))
5856 case IFN_MASK_LOAD:
5857 if (gimple_call_lhs (stmt) == NULL_TREE)
5858 break;
5859 ref.is_read = true;
5860 /* FALLTHRU */
5861 case IFN_MASK_STORE:
5862 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5863 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5864 if (ref.is_read)
5865 type = TREE_TYPE (gimple_call_lhs (stmt));
5866 else
5867 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5868 if (TYPE_ALIGN (type) != align)
5869 type = build_aligned_type (type, align);
5870 ref.is_conditional_in_stmt = true;
5871 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5872 ptr);
5873 references->safe_push (ref);
5874 return false;
5875 default:
5876 break;
5879 op0 = gimple_call_lhs (stmt);
5880 n = gimple_call_num_args (stmt);
5881 for (i = 0; i < n; i++)
5883 op1 = gimple_call_arg (stmt, i);
5885 if (DECL_P (op1)
5886 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5888 ref.ref = op1;
5889 ref.is_read = true;
5890 ref.is_conditional_in_stmt = false;
5891 references->safe_push (ref);
5895 else
5896 return clobbers_memory;
5898 if (op0
5899 && (DECL_P (op0)
5900 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5902 ref.ref = op0;
5903 ref.is_read = false;
5904 ref.is_conditional_in_stmt = false;
5905 references->safe_push (ref);
5907 return clobbers_memory;
5911 /* Returns true if the loop-nest has any data reference. */
5913 bool
5914 loop_nest_has_data_refs (loop_p loop)
5916 basic_block *bbs = get_loop_body (loop);
5917 auto_vec<data_ref_loc, 3> references;
5919 for (unsigned i = 0; i < loop->num_nodes; i++)
5921 basic_block bb = bbs[i];
5922 gimple_stmt_iterator bsi;
5924 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5926 gimple *stmt = gsi_stmt (bsi);
5927 get_references_in_stmt (stmt, &references);
5928 if (references.length ())
5930 free (bbs);
5931 return true;
5935 free (bbs);
5936 return false;
5939 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5940 reference, returns false, otherwise returns true. NEST is the outermost
5941 loop of the loop nest in which the references should be analyzed. */
5943 opt_result
5944 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5945 vec<data_reference_p> *datarefs)
5947 auto_vec<data_ref_loc, 2> references;
5948 data_reference_p dr;
5950 if (get_references_in_stmt (stmt, &references))
5951 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5952 stmt);
5954 for (const data_ref_loc &ref : references)
5956 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5957 loop_containing_stmt (stmt), ref.ref,
5958 stmt, ref.is_read, ref.is_conditional_in_stmt);
5959 gcc_assert (dr != NULL);
5960 datarefs->safe_push (dr);
5963 return opt_result::success ();
5966 /* Stores the data references in STMT to DATAREFS. If there is an
5967 unanalyzable reference, returns false, otherwise returns true.
5968 NEST is the outermost loop of the loop nest in which the references
5969 should be instantiated, LOOP is the loop in which the references
5970 should be analyzed. */
5972 bool
5973 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5974 vec<data_reference_p> *datarefs)
5976 auto_vec<data_ref_loc, 2> references;
5977 bool ret = true;
5978 data_reference_p dr;
5980 if (get_references_in_stmt (stmt, &references))
5981 return false;
5983 for (const data_ref_loc &ref : references)
5985 dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5986 ref.is_conditional_in_stmt);
5987 gcc_assert (dr != NULL);
5988 datarefs->safe_push (dr);
5991 return ret;
5994 /* Search the data references in LOOP, and record the information into
5995 DATAREFS. Returns chrec_dont_know when failing to analyze a
5996 difficult case, returns NULL_TREE otherwise. */
5998 tree
5999 find_data_references_in_bb (class loop *loop, basic_block bb,
6000 vec<data_reference_p> *datarefs)
6002 gimple_stmt_iterator bsi;
6004 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6006 gimple *stmt = gsi_stmt (bsi);
6008 if (!find_data_references_in_stmt (loop, stmt, datarefs))
6010 struct data_reference *res;
6011 res = XCNEW (struct data_reference);
6012 datarefs->safe_push (res);
6014 return chrec_dont_know;
6018 return NULL_TREE;
6021 /* Search the data references in LOOP, and record the information into
6022 DATAREFS. Returns chrec_dont_know when failing to analyze a
6023 difficult case, returns NULL_TREE otherwise.
6025 TODO: This function should be made smarter so that it can handle address
6026 arithmetic as if they were array accesses, etc. */
6028 tree
6029 find_data_references_in_loop (class loop *loop,
6030 vec<data_reference_p> *datarefs)
6032 basic_block bb, *bbs;
6033 unsigned int i;
6035 bbs = get_loop_body_in_dom_order (loop);
6037 for (i = 0; i < loop->num_nodes; i++)
6039 bb = bbs[i];
6041 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6043 free (bbs);
6044 return chrec_dont_know;
6047 free (bbs);
6049 return NULL_TREE;
6052 /* Return the alignment in bytes that DRB is guaranteed to have at all
6053 times. */
6055 unsigned int
6056 dr_alignment (innermost_loop_behavior *drb)
6058 /* Get the alignment of BASE_ADDRESS + INIT. */
6059 unsigned int alignment = drb->base_alignment;
6060 unsigned int misalignment = (drb->base_misalignment
6061 + TREE_INT_CST_LOW (drb->init));
6062 if (misalignment != 0)
6063 alignment = MIN (alignment, misalignment & -misalignment);
6065 /* Cap it to the alignment of OFFSET. */
6066 if (!integer_zerop (drb->offset))
6067 alignment = MIN (alignment, drb->offset_alignment);
6069 /* Cap it to the alignment of STEP. */
6070 if (!integer_zerop (drb->step))
6071 alignment = MIN (alignment, drb->step_alignment);
6073 return alignment;
6076 /* If BASE is a pointer-typed SSA name, try to find the object that it
6077 is based on. Return this object X on success and store the alignment
6078 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6080 static tree
6081 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6083 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6084 return NULL_TREE;
6086 gimple *def = SSA_NAME_DEF_STMT (base);
6087 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6089 /* Peel chrecs and record the minimum alignment preserved by
6090 all steps. */
6091 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6092 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6094 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6095 alignment = MIN (alignment, step_alignment);
6096 base = CHREC_LEFT (base);
6099 /* Punt if the expression is too complicated to handle. */
6100 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6101 return NULL_TREE;
6103 /* The only useful cases are those for which a dereference folds to something
6104 other than an INDIRECT_REF. */
6105 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6106 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6107 if (!ref)
6108 return NULL_TREE;
6110 /* Analyze the base to which the steps we peeled were applied. */
6111 poly_int64 bitsize, bitpos, bytepos;
6112 machine_mode mode;
6113 int unsignedp, reversep, volatilep;
6114 tree offset;
6115 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6116 &unsignedp, &reversep, &volatilep);
6117 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6118 return NULL_TREE;
6120 /* Restrict the alignment to that guaranteed by the offsets. */
6121 unsigned int bytepos_alignment = known_alignment (bytepos);
6122 if (bytepos_alignment != 0)
6123 alignment = MIN (alignment, bytepos_alignment);
6124 if (offset)
6126 unsigned int offset_alignment = highest_pow2_factor (offset);
6127 alignment = MIN (alignment, offset_alignment);
6130 *alignment_out = alignment;
6131 return base;
6134 /* Return the object whose alignment would need to be changed in order
6135 to increase the alignment of ADDR. Store the maximum achievable
6136 alignment in *MAX_ALIGNMENT. */
6138 tree
6139 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6141 tree base = get_base_for_alignment_1 (addr, max_alignment);
6142 if (base)
6143 return base;
6145 if (TREE_CODE (addr) == ADDR_EXPR)
6146 addr = TREE_OPERAND (addr, 0);
6147 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6148 return addr;
6151 /* Recursive helper function. */
6153 static bool
6154 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6156 /* Inner loops of the nest should not contain siblings. Example:
6157 when there are two consecutive loops,
6159 | loop_0
6160 | loop_1
6161 | A[{0, +, 1}_1]
6162 | endloop_1
6163 | loop_2
6164 | A[{0, +, 1}_2]
6165 | endloop_2
6166 | endloop_0
6168 the dependence relation cannot be captured by the distance
6169 abstraction. */
6170 if (loop->next)
6171 return false;
6173 loop_nest->safe_push (loop);
6174 if (loop->inner)
6175 return find_loop_nest_1 (loop->inner, loop_nest);
6176 return true;
6179 /* Return false when the LOOP is not well nested. Otherwise return
6180 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6181 contain the loops from the outermost to the innermost, as they will
6182 appear in the classic distance vector. */
6184 bool
6185 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6187 loop_nest->safe_push (loop);
6188 if (loop->inner)
6189 return find_loop_nest_1 (loop->inner, loop_nest);
6190 return true;
6193 /* Returns true when the data dependences have been computed, false otherwise.
6194 Given a loop nest LOOP, the following vectors are returned:
6195 DATAREFS is initialized to all the array elements contained in this loop,
6196 DEPENDENCE_RELATIONS contains the relations between the data references.
6197 Compute read-read and self relations if
6198 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6200 bool
6201 compute_data_dependences_for_loop (class loop *loop,
6202 bool compute_self_and_read_read_dependences,
6203 vec<loop_p> *loop_nest,
6204 vec<data_reference_p> *datarefs,
6205 vec<ddr_p> *dependence_relations)
6207 bool res = true;
6209 memset (&dependence_stats, 0, sizeof (dependence_stats));
6211 /* If the loop nest is not well formed, or one of the data references
6212 is not computable, give up without spending time to compute other
6213 dependences. */
6214 if (!loop
6215 || !find_loop_nest (loop, loop_nest)
6216 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6217 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6218 compute_self_and_read_read_dependences))
6219 res = false;
6221 if (dump_file && (dump_flags & TDF_STATS))
6223 fprintf (dump_file, "Dependence tester statistics:\n");
6225 fprintf (dump_file, "Number of dependence tests: %d\n",
6226 dependence_stats.num_dependence_tests);
6227 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6228 dependence_stats.num_dependence_dependent);
6229 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6230 dependence_stats.num_dependence_independent);
6231 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6232 dependence_stats.num_dependence_undetermined);
6234 fprintf (dump_file, "Number of subscript tests: %d\n",
6235 dependence_stats.num_subscript_tests);
6236 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6237 dependence_stats.num_subscript_undetermined);
6238 fprintf (dump_file, "Number of same subscript function: %d\n",
6239 dependence_stats.num_same_subscript_function);
6241 fprintf (dump_file, "Number of ziv tests: %d\n",
6242 dependence_stats.num_ziv);
6243 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6244 dependence_stats.num_ziv_dependent);
6245 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6246 dependence_stats.num_ziv_independent);
6247 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6248 dependence_stats.num_ziv_unimplemented);
6250 fprintf (dump_file, "Number of siv tests: %d\n",
6251 dependence_stats.num_siv);
6252 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6253 dependence_stats.num_siv_dependent);
6254 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6255 dependence_stats.num_siv_independent);
6256 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6257 dependence_stats.num_siv_unimplemented);
6259 fprintf (dump_file, "Number of miv tests: %d\n",
6260 dependence_stats.num_miv);
6261 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6262 dependence_stats.num_miv_dependent);
6263 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6264 dependence_stats.num_miv_independent);
6265 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6266 dependence_stats.num_miv_unimplemented);
6269 return res;
6272 /* Free the memory used by a data dependence relation DDR. */
6274 void
6275 free_dependence_relation (struct data_dependence_relation *ddr)
6277 if (ddr == NULL)
6278 return;
6280 if (DDR_SUBSCRIPTS (ddr).exists ())
6281 free_subscripts (DDR_SUBSCRIPTS (ddr));
6282 DDR_DIST_VECTS (ddr).release ();
6283 DDR_DIR_VECTS (ddr).release ();
6285 free (ddr);
6288 /* Free the memory used by the data dependence relations from
6289 DEPENDENCE_RELATIONS. */
6291 void
6292 free_dependence_relations (vec<ddr_p>& dependence_relations)
6294 for (data_dependence_relation *ddr : dependence_relations)
6295 if (ddr)
6296 free_dependence_relation (ddr);
6298 dependence_relations.release ();
6301 /* Free the memory used by the data references from DATAREFS. */
6303 void
6304 free_data_refs (vec<data_reference_p>& datarefs)
6306 for (data_reference *dr : datarefs)
6307 free_data_ref (dr);
6308 datarefs.release ();
6311 /* Common routine implementing both dr_direction_indicator and
6312 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6313 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6314 Return the step as the indicator otherwise. */
6316 static tree
6317 dr_step_indicator (struct data_reference *dr, int useful_min)
6319 tree step = DR_STEP (dr);
6320 if (!step)
6321 return NULL_TREE;
6322 STRIP_NOPS (step);
6323 /* Look for cases where the step is scaled by a positive constant
6324 integer, which will often be the access size. If the multiplication
6325 doesn't change the sign (due to overflow effects) then we can
6326 test the unscaled value instead. */
6327 if (TREE_CODE (step) == MULT_EXPR
6328 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6329 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6331 tree factor = TREE_OPERAND (step, 1);
6332 step = TREE_OPERAND (step, 0);
6334 /* Strip widening and truncating conversions as well as nops. */
6335 if (CONVERT_EXPR_P (step)
6336 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6337 step = TREE_OPERAND (step, 0);
6338 tree type = TREE_TYPE (step);
6340 /* Get the range of step values that would not cause overflow. */
6341 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6342 / wi::to_widest (factor));
6343 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6344 / wi::to_widest (factor));
6346 /* Get the range of values that the unconverted step actually has. */
6347 wide_int step_min, step_max;
6348 value_range vr;
6349 if (TREE_CODE (step) != SSA_NAME
6350 || !get_range_query (cfun)->range_of_expr (vr, step)
6351 || vr.kind () != VR_RANGE)
6353 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6354 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6356 else
6358 step_min = vr.lower_bound ();
6359 step_max = vr.upper_bound ();
6362 /* Check whether the unconverted step has an acceptable range. */
6363 signop sgn = TYPE_SIGN (type);
6364 if (wi::les_p (minv, widest_int::from (step_min, sgn))
6365 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6367 if (wi::ge_p (step_min, useful_min, sgn))
6368 return ssize_int (useful_min);
6369 else if (wi::lt_p (step_max, 0, sgn))
6370 return ssize_int (-1);
6371 else
6372 return fold_convert (ssizetype, step);
6375 return DR_STEP (dr);
6378 /* Return a value that is negative iff DR has a negative step. */
6380 tree
6381 dr_direction_indicator (struct data_reference *dr)
6383 return dr_step_indicator (dr, 0);
6386 /* Return a value that is zero iff DR has a zero step. */
6388 tree
6389 dr_zero_step_indicator (struct data_reference *dr)
6391 return dr_step_indicator (dr, 1);
6394 /* Return true if DR is known to have a nonnegative (but possibly zero)
6395 step. */
6397 bool
6398 dr_known_forward_stride_p (struct data_reference *dr)
6400 tree indicator = dr_direction_indicator (dr);
6401 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6402 fold_convert (ssizetype, indicator),
6403 ssize_int (0));
6404 return neg_step_val && integer_zerop (neg_step_val);