Move canonicalisation of dr_with_seg_len_pair_ts
[official-gcc.git] / gcc / tree-data-ref.c
blobb31ed4247dd948a05ee11cffda9fceb1d2b5c9bf
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
100 static struct datadep_stats
102 int num_dependence_tests;
103 int num_dependence_dependent;
104 int num_dependence_independent;
105 int num_dependence_undetermined;
107 int num_subscript_tests;
108 int num_subscript_undetermined;
109 int num_same_subscript_function;
111 int num_ziv;
112 int num_ziv_independent;
113 int num_ziv_dependent;
114 int num_ziv_unimplemented;
116 int num_siv;
117 int num_siv_independent;
118 int num_siv_dependent;
119 int num_siv_unimplemented;
121 int num_miv;
122 int num_miv_independent;
123 int num_miv_dependent;
124 int num_miv_unimplemented;
125 } dependence_stats;
127 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
128 unsigned int, unsigned int,
129 class loop *);
130 /* Returns true iff A divides B. */
132 static inline bool
133 tree_fold_divides_p (const_tree a, const_tree b)
135 gcc_assert (TREE_CODE (a) == INTEGER_CST);
136 gcc_assert (TREE_CODE (b) == INTEGER_CST);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
140 /* Returns true iff A divides B. */
142 static inline bool
143 int_divides_p (int a, int b)
145 return ((b % a) == 0);
148 /* Return true if reference REF contains a union access. */
150 static bool
151 ref_contains_union_access_p (tree ref)
153 while (handled_component_p (ref))
155 ref = TREE_OPERAND (ref, 0);
156 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
157 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
158 return true;
160 return false;
165 /* Dump into FILE all the data references from DATAREFS. */
167 static void
168 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
170 unsigned int i;
171 struct data_reference *dr;
173 FOR_EACH_VEC_ELT (datarefs, i, dr)
174 dump_data_reference (file, dr);
177 /* Unified dump into FILE all the data references from DATAREFS. */
179 DEBUG_FUNCTION void
180 debug (vec<data_reference_p> &ref)
182 dump_data_references (stderr, ref);
185 DEBUG_FUNCTION void
186 debug (vec<data_reference_p> *ptr)
188 if (ptr)
189 debug (*ptr);
190 else
191 fprintf (stderr, "<nil>\n");
195 /* Dump into STDERR all the data references from DATAREFS. */
197 DEBUG_FUNCTION void
198 debug_data_references (vec<data_reference_p> datarefs)
200 dump_data_references (stderr, datarefs);
203 /* Print to STDERR the data_reference DR. */
205 DEBUG_FUNCTION void
206 debug_data_reference (struct data_reference *dr)
208 dump_data_reference (stderr, dr);
211 /* Dump function for a DATA_REFERENCE structure. */
213 void
214 dump_data_reference (FILE *outf,
215 struct data_reference *dr)
217 unsigned int i;
219 fprintf (outf, "#(Data Ref: \n");
220 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
221 fprintf (outf, "# stmt: ");
222 print_gimple_stmt (outf, DR_STMT (dr), 0);
223 fprintf (outf, "# ref: ");
224 print_generic_stmt (outf, DR_REF (dr));
225 fprintf (outf, "# base_object: ");
226 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230 fprintf (outf, "# Access function %d: ", i);
231 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233 fprintf (outf, "#)\n");
236 /* Unified dump function for a DATA_REFERENCE structure. */
238 DEBUG_FUNCTION void
239 debug (data_reference &ref)
241 dump_data_reference (stderr, &ref);
244 DEBUG_FUNCTION void
245 debug (data_reference *ptr)
247 if (ptr)
248 debug (*ptr);
249 else
250 fprintf (stderr, "<nil>\n");
254 /* Dumps the affine function described by FN to the file OUTF. */
256 DEBUG_FUNCTION void
257 dump_affine_function (FILE *outf, affine_fn fn)
259 unsigned i;
260 tree coef;
262 print_generic_expr (outf, fn[0], TDF_SLIM);
263 for (i = 1; fn.iterate (i, &coef); i++)
265 fprintf (outf, " + ");
266 print_generic_expr (outf, coef, TDF_SLIM);
267 fprintf (outf, " * x_%u", i);
271 /* Dumps the conflict function CF to the file OUTF. */
273 DEBUG_FUNCTION void
274 dump_conflict_function (FILE *outf, conflict_function *cf)
276 unsigned i;
278 if (cf->n == NO_DEPENDENCE)
279 fprintf (outf, "no dependence");
280 else if (cf->n == NOT_KNOWN)
281 fprintf (outf, "not known");
282 else
284 for (i = 0; i < cf->n; i++)
286 if (i != 0)
287 fprintf (outf, " ");
288 fprintf (outf, "[");
289 dump_affine_function (outf, cf->fns[i]);
290 fprintf (outf, "]");
295 /* Dump function for a SUBSCRIPT structure. */
297 DEBUG_FUNCTION void
298 dump_subscript (FILE *outf, struct subscript *subscript)
300 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302 fprintf (outf, "\n (subscript \n");
303 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
304 dump_conflict_function (outf, cf);
305 if (CF_NONTRIVIAL_P (cf))
307 tree last_iteration = SUB_LAST_CONFLICT (subscript);
308 fprintf (outf, "\n last_conflict: ");
309 print_generic_expr (outf, last_iteration);
312 cf = SUB_CONFLICTS_IN_B (subscript);
313 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
314 dump_conflict_function (outf, cf);
315 if (CF_NONTRIVIAL_P (cf))
317 tree last_iteration = SUB_LAST_CONFLICT (subscript);
318 fprintf (outf, "\n last_conflict: ");
319 print_generic_expr (outf, last_iteration);
322 fprintf (outf, "\n (Subscript distance: ");
323 print_generic_expr (outf, SUB_DISTANCE (subscript));
324 fprintf (outf, " ))\n");
327 /* Print the classic direction vector DIRV to OUTF. */
329 DEBUG_FUNCTION void
330 print_direction_vector (FILE *outf,
331 lambda_vector dirv,
332 int length)
334 int eq;
336 for (eq = 0; eq < length; eq++)
338 enum data_dependence_direction dir = ((enum data_dependence_direction)
339 dirv[eq]);
341 switch (dir)
343 case dir_positive:
344 fprintf (outf, " +");
345 break;
346 case dir_negative:
347 fprintf (outf, " -");
348 break;
349 case dir_equal:
350 fprintf (outf, " =");
351 break;
352 case dir_positive_or_equal:
353 fprintf (outf, " +=");
354 break;
355 case dir_positive_or_negative:
356 fprintf (outf, " +-");
357 break;
358 case dir_negative_or_equal:
359 fprintf (outf, " -=");
360 break;
361 case dir_star:
362 fprintf (outf, " *");
363 break;
364 default:
365 fprintf (outf, "indep");
366 break;
369 fprintf (outf, "\n");
372 /* Print a vector of direction vectors. */
374 DEBUG_FUNCTION void
375 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
376 int length)
378 unsigned j;
379 lambda_vector v;
381 FOR_EACH_VEC_ELT (dir_vects, j, v)
382 print_direction_vector (outf, v, length);
385 /* Print out a vector VEC of length N to OUTFILE. */
387 DEBUG_FUNCTION void
388 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
390 int i;
392 for (i = 0; i < n; i++)
393 fprintf (outfile, "%3d ", (int)vector[i]);
394 fprintf (outfile, "\n");
397 /* Print a vector of distance vectors. */
399 DEBUG_FUNCTION void
400 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 int length)
403 unsigned j;
404 lambda_vector v;
406 FOR_EACH_VEC_ELT (dist_vects, j, v)
407 print_lambda_vector (outf, v, length);
410 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
412 DEBUG_FUNCTION void
413 dump_data_dependence_relation (FILE *outf,
414 struct data_dependence_relation *ddr)
416 struct data_reference *dra, *drb;
418 fprintf (outf, "(Data Dep: \n");
420 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
422 if (ddr)
424 dra = DDR_A (ddr);
425 drb = DDR_B (ddr);
426 if (dra)
427 dump_data_reference (outf, dra);
428 else
429 fprintf (outf, " (nil)\n");
430 if (drb)
431 dump_data_reference (outf, drb);
432 else
433 fprintf (outf, " (nil)\n");
435 fprintf (outf, " (don't know)\n)\n");
436 return;
439 dra = DDR_A (ddr);
440 drb = DDR_B (ddr);
441 dump_data_reference (outf, dra);
442 dump_data_reference (outf, drb);
444 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
445 fprintf (outf, " (no dependence)\n");
447 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
449 unsigned int i;
450 class loop *loopi;
452 subscript *sub;
453 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
455 fprintf (outf, " access_fn_A: ");
456 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
457 fprintf (outf, " access_fn_B: ");
458 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
459 dump_subscript (outf, sub);
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 static void
584 split_constant_offset (tree exp, tree *var, tree *off,
585 hash_map<tree, std::pair<tree, tree> > &cache,
586 unsigned *limit);
588 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
589 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
590 constant of type ssizetype, and returns true. If we cannot do this
591 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
592 is returned. */
594 static bool
595 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
596 tree *var, tree *off,
597 hash_map<tree, std::pair<tree, tree> > &cache,
598 unsigned *limit)
600 tree var0, var1;
601 tree off0, off1;
602 enum tree_code ocode = code;
604 *var = NULL_TREE;
605 *off = NULL_TREE;
607 switch (code)
609 case INTEGER_CST:
610 *var = build_int_cst (type, 0);
611 *off = fold_convert (ssizetype, op0);
612 return true;
614 case POINTER_PLUS_EXPR:
615 ocode = PLUS_EXPR;
616 /* FALLTHROUGH */
617 case PLUS_EXPR:
618 case MINUS_EXPR:
619 if (TREE_CODE (op1) == INTEGER_CST)
621 split_constant_offset (op0, &var0, &off0, cache, limit);
622 *var = var0;
623 *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
624 return true;
626 split_constant_offset (op0, &var0, &off0, cache, limit);
627 split_constant_offset (op1, &var1, &off1, cache, limit);
628 *var = fold_build2 (code, type, var0, var1);
629 *off = size_binop (ocode, off0, off1);
630 return true;
632 case MULT_EXPR:
633 if (TREE_CODE (op1) != INTEGER_CST)
634 return false;
636 split_constant_offset (op0, &var0, &off0, cache, limit);
637 *var = fold_build2 (MULT_EXPR, type, var0, op1);
638 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
639 return true;
641 case ADDR_EXPR:
643 tree base, poffset;
644 poly_int64 pbitsize, pbitpos, pbytepos;
645 machine_mode pmode;
646 int punsignedp, preversep, pvolatilep;
648 op0 = TREE_OPERAND (op0, 0);
649 base
650 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
651 &punsignedp, &preversep, &pvolatilep);
653 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
654 return false;
655 base = build_fold_addr_expr (base);
656 off0 = ssize_int (pbytepos);
658 if (poffset)
660 split_constant_offset (poffset, &poffset, &off1, cache, limit);
661 off0 = size_binop (PLUS_EXPR, off0, off1);
662 if (POINTER_TYPE_P (TREE_TYPE (base)))
663 base = fold_build_pointer_plus (base, poffset);
664 else
665 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
666 fold_convert (TREE_TYPE (base), poffset));
669 var0 = fold_convert (type, base);
671 /* If variable length types are involved, punt, otherwise casts
672 might be converted into ARRAY_REFs in gimplify_conversion.
673 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
674 possibly no longer appears in current GIMPLE, might resurface.
675 This perhaps could run
676 if (CONVERT_EXPR_P (var0))
678 gimplify_conversion (&var0);
679 // Attempt to fill in any within var0 found ARRAY_REF's
680 // element size from corresponding op embedded ARRAY_REF,
681 // if unsuccessful, just punt.
682 } */
683 while (POINTER_TYPE_P (type))
684 type = TREE_TYPE (type);
685 if (int_size_in_bytes (type) < 0)
686 return false;
688 *var = var0;
689 *off = off0;
690 return true;
693 case SSA_NAME:
695 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
696 return false;
698 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
699 enum tree_code subcode;
701 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
702 return false;
704 subcode = gimple_assign_rhs_code (def_stmt);
706 /* We are using a cache to avoid un-CSEing large amounts of code. */
707 bool use_cache = false;
708 if (!has_single_use (op0)
709 && (subcode == POINTER_PLUS_EXPR
710 || subcode == PLUS_EXPR
711 || subcode == MINUS_EXPR
712 || subcode == MULT_EXPR
713 || subcode == ADDR_EXPR
714 || CONVERT_EXPR_CODE_P (subcode)))
716 use_cache = true;
717 bool existed;
718 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
719 if (existed)
721 if (integer_zerop (e.second))
722 return false;
723 *var = e.first;
724 *off = e.second;
725 return true;
727 e = std::make_pair (op0, ssize_int (0));
730 if (*limit == 0)
731 return false;
732 --*limit;
734 var0 = gimple_assign_rhs1 (def_stmt);
735 var1 = gimple_assign_rhs2 (def_stmt);
737 bool res = split_constant_offset_1 (type, var0, subcode, var1,
738 var, off, cache, limit);
739 if (res && use_cache)
740 *cache.get (op0) = std::make_pair (*var, *off);
741 return res;
743 CASE_CONVERT:
745 /* We must not introduce undefined overflow, and we must not change
746 the value. Hence we're okay if the inner type doesn't overflow
747 to start with (pointer or signed), the outer type also is an
748 integer or pointer and the outer precision is at least as large
749 as the inner. */
750 tree itype = TREE_TYPE (op0);
751 if ((POINTER_TYPE_P (itype)
752 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
753 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
754 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
756 if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
758 /* Split the unconverted operand and try to prove that
759 wrapping isn't a problem. */
760 tree tmp_var, tmp_off;
761 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
763 /* See whether we have an SSA_NAME whose range is known
764 to be [A, B]. */
765 if (TREE_CODE (tmp_var) != SSA_NAME)
766 return false;
767 wide_int var_min, var_max;
768 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
769 &var_max);
770 wide_int var_nonzero = get_nonzero_bits (tmp_var);
771 signop sgn = TYPE_SIGN (itype);
772 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
773 &var_max, var_nonzero,
774 sgn) != VR_RANGE)
775 return false;
777 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
778 is known to be [A + TMP_OFF, B + TMP_OFF], with all
779 operations done in ITYPE. The addition must overflow
780 at both ends of the range or at neither. */
781 wi::overflow_type overflow[2];
782 unsigned int prec = TYPE_PRECISION (itype);
783 wide_int woff = wi::to_wide (tmp_off, prec);
784 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
785 wi::add (var_max, woff, sgn, &overflow[1]);
786 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
787 return false;
789 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
790 widest_int diff = (widest_int::from (op0_min, sgn)
791 - widest_int::from (var_min, sgn));
792 var0 = tmp_var;
793 *off = wide_int_to_tree (ssizetype, diff);
795 else
796 split_constant_offset (op0, &var0, off, cache, limit);
797 *var = fold_convert (type, var0);
798 return true;
800 return false;
803 default:
804 return false;
808 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
809 will be ssizetype. */
811 static void
812 split_constant_offset (tree exp, tree *var, tree *off,
813 hash_map<tree, std::pair<tree, tree> > &cache,
814 unsigned *limit)
816 tree type = TREE_TYPE (exp), op0, op1, e, o;
817 enum tree_code code;
819 *var = exp;
820 *off = ssize_int (0);
822 if (tree_is_chrec (exp)
823 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
824 return;
826 code = TREE_CODE (exp);
827 extract_ops_from_tree (exp, &code, &op0, &op1);
828 if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
830 *var = e;
831 *off = o;
835 void
836 split_constant_offset (tree exp, tree *var, tree *off)
838 unsigned limit = param_ssa_name_def_chain_limit;
839 static hash_map<tree, std::pair<tree, tree> > *cache;
840 if (!cache)
841 cache = new hash_map<tree, std::pair<tree, tree> > (37);
842 split_constant_offset (exp, var, off, *cache, &limit);
843 cache->empty ();
846 /* Returns the address ADDR of an object in a canonical shape (without nop
847 casts, and with type of pointer to the object). */
849 static tree
850 canonicalize_base_object_address (tree addr)
852 tree orig = addr;
854 STRIP_NOPS (addr);
856 /* The base address may be obtained by casting from integer, in that case
857 keep the cast. */
858 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
859 return orig;
861 if (TREE_CODE (addr) != ADDR_EXPR)
862 return addr;
864 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
867 /* Analyze the behavior of memory reference REF within STMT.
868 There are two modes:
870 - BB analysis. In this case we simply split the address into base,
871 init and offset components, without reference to any containing loop.
872 The resulting base and offset are general expressions and they can
873 vary arbitrarily from one iteration of the containing loop to the next.
874 The step is always zero.
876 - loop analysis. In this case we analyze the reference both wrt LOOP
877 and on the basis that the reference occurs (is "used") in LOOP;
878 see the comment above analyze_scalar_evolution_in_loop for more
879 information about this distinction. The base, init, offset and
880 step fields are all invariant in LOOP.
882 Perform BB analysis if LOOP is null, or if LOOP is the function's
883 dummy outermost loop. In other cases perform loop analysis.
885 Return true if the analysis succeeded and store the results in DRB if so.
886 BB analysis can only fail for bitfield or reversed-storage accesses. */
888 opt_result
889 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
890 class loop *loop, const gimple *stmt)
892 poly_int64 pbitsize, pbitpos;
893 tree base, poffset;
894 machine_mode pmode;
895 int punsignedp, preversep, pvolatilep;
896 affine_iv base_iv, offset_iv;
897 tree init, dinit, step;
898 bool in_loop = (loop && loop->num);
900 if (dump_file && (dump_flags & TDF_DETAILS))
901 fprintf (dump_file, "analyze_innermost: ");
903 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
904 &punsignedp, &preversep, &pvolatilep);
905 gcc_assert (base != NULL_TREE);
907 poly_int64 pbytepos;
908 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
909 return opt_result::failure_at (stmt,
910 "failed: bit offset alignment.\n");
912 if (preversep)
913 return opt_result::failure_at (stmt,
914 "failed: reverse storage order.\n");
916 /* Calculate the alignment and misalignment for the inner reference. */
917 unsigned int HOST_WIDE_INT bit_base_misalignment;
918 unsigned int bit_base_alignment;
919 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
921 /* There are no bitfield references remaining in BASE, so the values
922 we got back must be whole bytes. */
923 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
924 && bit_base_misalignment % BITS_PER_UNIT == 0);
925 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
926 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
928 if (TREE_CODE (base) == MEM_REF)
930 if (!integer_zerop (TREE_OPERAND (base, 1)))
932 /* Subtract MOFF from the base and add it to POFFSET instead.
933 Adjust the misalignment to reflect the amount we subtracted. */
934 poly_offset_int moff = mem_ref_offset (base);
935 base_misalignment -= moff.force_shwi ();
936 tree mofft = wide_int_to_tree (sizetype, moff);
937 if (!poffset)
938 poffset = mofft;
939 else
940 poffset = size_binop (PLUS_EXPR, poffset, mofft);
942 base = TREE_OPERAND (base, 0);
944 else
945 base = build_fold_addr_expr (base);
947 if (in_loop)
949 if (!simple_iv (loop, loop, base, &base_iv, true))
950 return opt_result::failure_at
951 (stmt, "failed: evolution of base is not affine.\n");
953 else
955 base_iv.base = base;
956 base_iv.step = ssize_int (0);
957 base_iv.no_overflow = true;
960 if (!poffset)
962 offset_iv.base = ssize_int (0);
963 offset_iv.step = ssize_int (0);
965 else
967 if (!in_loop)
969 offset_iv.base = poffset;
970 offset_iv.step = ssize_int (0);
972 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
973 return opt_result::failure_at
974 (stmt, "failed: evolution of offset is not affine.\n");
977 init = ssize_int (pbytepos);
979 /* Subtract any constant component from the base and add it to INIT instead.
980 Adjust the misalignment to reflect the amount we subtracted. */
981 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
982 init = size_binop (PLUS_EXPR, init, dinit);
983 base_misalignment -= TREE_INT_CST_LOW (dinit);
985 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
986 init = size_binop (PLUS_EXPR, init, dinit);
988 step = size_binop (PLUS_EXPR,
989 fold_convert (ssizetype, base_iv.step),
990 fold_convert (ssizetype, offset_iv.step));
992 base = canonicalize_base_object_address (base_iv.base);
994 /* See if get_pointer_alignment can guarantee a higher alignment than
995 the one we calculated above. */
996 unsigned int HOST_WIDE_INT alt_misalignment;
997 unsigned int alt_alignment;
998 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000 /* As above, these values must be whole bytes. */
1001 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1002 && alt_misalignment % BITS_PER_UNIT == 0);
1003 alt_alignment /= BITS_PER_UNIT;
1004 alt_misalignment /= BITS_PER_UNIT;
1006 if (base_alignment < alt_alignment)
1008 base_alignment = alt_alignment;
1009 base_misalignment = alt_misalignment;
1012 drb->base_address = base;
1013 drb->offset = fold_convert (ssizetype, offset_iv.base);
1014 drb->init = init;
1015 drb->step = step;
1016 if (known_misalignment (base_misalignment, base_alignment,
1017 &drb->base_misalignment))
1018 drb->base_alignment = base_alignment;
1019 else
1021 drb->base_alignment = known_alignment (base_misalignment);
1022 drb->base_misalignment = 0;
1024 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1025 drb->step_alignment = highest_pow2_factor (step);
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "success.\n");
1030 return opt_result::success ();
1033 /* Return true if OP is a valid component reference for a DR access
1034 function. This accepts a subset of what handled_component_p accepts. */
1036 static bool
1037 access_fn_component_p (tree op)
1039 switch (TREE_CODE (op))
1041 case REALPART_EXPR:
1042 case IMAGPART_EXPR:
1043 case ARRAY_REF:
1044 return true;
1046 case COMPONENT_REF:
1047 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049 default:
1050 return false;
1054 /* Determines the base object and the list of indices of memory reference
1055 DR, analyzed in LOOP and instantiated before NEST. */
1057 static void
1058 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 vec<tree> access_fns = vNULL;
1061 tree ref, op;
1062 tree base, off, access_fn;
1064 /* If analyzing a basic-block there are no indices to analyze
1065 and thus no access functions. */
1066 if (!nest)
1068 DR_BASE_OBJECT (dr) = DR_REF (dr);
1069 DR_ACCESS_FNS (dr).create (0);
1070 return;
1073 ref = DR_REF (dr);
1075 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1076 into a two element array with a constant index. The base is
1077 then just the immediate underlying object. */
1078 if (TREE_CODE (ref) == REALPART_EXPR)
1080 ref = TREE_OPERAND (ref, 0);
1081 access_fns.safe_push (integer_zero_node);
1083 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085 ref = TREE_OPERAND (ref, 0);
1086 access_fns.safe_push (integer_one_node);
1089 /* Analyze access functions of dimensions we know to be independent.
1090 The list of component references handled here should be kept in
1091 sync with access_fn_component_p. */
1092 while (handled_component_p (ref))
1094 if (TREE_CODE (ref) == ARRAY_REF)
1096 op = TREE_OPERAND (ref, 1);
1097 access_fn = analyze_scalar_evolution (loop, op);
1098 access_fn = instantiate_scev (nest, loop, access_fn);
1099 access_fns.safe_push (access_fn);
1101 else if (TREE_CODE (ref) == COMPONENT_REF
1102 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104 /* For COMPONENT_REFs of records (but not unions!) use the
1105 FIELD_DECL offset as constant access function so we can
1106 disambiguate a[i].f1 and a[i].f2. */
1107 tree off = component_ref_field_offset (ref);
1108 off = size_binop (PLUS_EXPR,
1109 size_binop (MULT_EXPR,
1110 fold_convert (bitsizetype, off),
1111 bitsize_int (BITS_PER_UNIT)),
1112 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1113 access_fns.safe_push (off);
1115 else
1116 /* If we have an unhandled component we could not translate
1117 to an access function stop analyzing. We have determined
1118 our base object in this case. */
1119 break;
1121 ref = TREE_OPERAND (ref, 0);
1124 /* If the address operand of a MEM_REF base has an evolution in the
1125 analyzed nest, add it as an additional independent access-function. */
1126 if (TREE_CODE (ref) == MEM_REF)
1128 op = TREE_OPERAND (ref, 0);
1129 access_fn = analyze_scalar_evolution (loop, op);
1130 access_fn = instantiate_scev (nest, loop, access_fn);
1131 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133 tree orig_type;
1134 tree memoff = TREE_OPERAND (ref, 1);
1135 base = initial_condition (access_fn);
1136 orig_type = TREE_TYPE (base);
1137 STRIP_USELESS_TYPE_CONVERSION (base);
1138 split_constant_offset (base, &base, &off);
1139 STRIP_USELESS_TYPE_CONVERSION (base);
1140 /* Fold the MEM_REF offset into the evolutions initial
1141 value to make more bases comparable. */
1142 if (!integer_zerop (memoff))
1144 off = size_binop (PLUS_EXPR, off,
1145 fold_convert (ssizetype, memoff));
1146 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148 /* Adjust the offset so it is a multiple of the access type
1149 size and thus we separate bases that can possibly be used
1150 to produce partial overlaps (which the access_fn machinery
1151 cannot handle). */
1152 wide_int rem;
1153 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1154 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1155 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1156 rem = wi::mod_trunc
1157 (wi::to_wide (off),
1158 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1159 SIGNED);
1160 else
1161 /* If we can't compute the remainder simply force the initial
1162 condition to zero. */
1163 rem = wi::to_wide (off);
1164 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1165 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1166 /* And finally replace the initial condition. */
1167 access_fn = chrec_replace_initial_condition
1168 (access_fn, fold_convert (orig_type, off));
1169 /* ??? This is still not a suitable base object for
1170 dr_may_alias_p - the base object needs to be an
1171 access that covers the object as whole. With
1172 an evolution in the pointer this cannot be
1173 guaranteed.
1174 As a band-aid, mark the access so we can special-case
1175 it in dr_may_alias_p. */
1176 tree old = ref;
1177 ref = fold_build2_loc (EXPR_LOCATION (ref),
1178 MEM_REF, TREE_TYPE (ref),
1179 base, memoff);
1180 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1181 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1182 DR_UNCONSTRAINED_BASE (dr) = true;
1183 access_fns.safe_push (access_fn);
1186 else if (DECL_P (ref))
1188 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1189 ref = build2 (MEM_REF, TREE_TYPE (ref),
1190 build_fold_addr_expr (ref),
1191 build_int_cst (reference_alias_ptr_type (ref), 0));
1194 DR_BASE_OBJECT (dr) = ref;
1195 DR_ACCESS_FNS (dr) = access_fns;
1198 /* Extracts the alias analysis information from the memory reference DR. */
1200 static void
1201 dr_analyze_alias (struct data_reference *dr)
1203 tree ref = DR_REF (dr);
1204 tree base = get_base_address (ref), addr;
1206 if (INDIRECT_REF_P (base)
1207 || TREE_CODE (base) == MEM_REF)
1209 addr = TREE_OPERAND (base, 0);
1210 if (TREE_CODE (addr) == SSA_NAME)
1211 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1215 /* Frees data reference DR. */
1217 void
1218 free_data_ref (data_reference_p dr)
1220 DR_ACCESS_FNS (dr).release ();
1221 free (dr);
1224 /* Analyze memory reference MEMREF, which is accessed in STMT.
1225 The reference is a read if IS_READ is true, otherwise it is a write.
1226 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1227 within STMT, i.e. that it might not occur even if STMT is executed
1228 and runs to completion.
1230 Return the data_reference description of MEMREF. NEST is the outermost
1231 loop in which the reference should be instantiated, LOOP is the loop
1232 in which the data reference should be analyzed. */
1234 struct data_reference *
1235 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1236 bool is_read, bool is_conditional_in_stmt)
1238 struct data_reference *dr;
1240 if (dump_file && (dump_flags & TDF_DETAILS))
1242 fprintf (dump_file, "Creating dr for ");
1243 print_generic_expr (dump_file, memref, TDF_SLIM);
1244 fprintf (dump_file, "\n");
1247 dr = XCNEW (struct data_reference);
1248 DR_STMT (dr) = stmt;
1249 DR_REF (dr) = memref;
1250 DR_IS_READ (dr) = is_read;
1251 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1254 nest != NULL ? loop : NULL, stmt);
1255 dr_analyze_indices (dr, nest, loop);
1256 dr_analyze_alias (dr);
1258 if (dump_file && (dump_flags & TDF_DETAILS))
1260 unsigned i;
1261 fprintf (dump_file, "\tbase_address: ");
1262 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1263 fprintf (dump_file, "\n\toffset from base address: ");
1264 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1265 fprintf (dump_file, "\n\tconstant offset from base address: ");
1266 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1267 fprintf (dump_file, "\n\tstep: ");
1268 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1269 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1270 fprintf (dump_file, "\n\tbase misalignment: %d",
1271 DR_BASE_MISALIGNMENT (dr));
1272 fprintf (dump_file, "\n\toffset alignment: %d",
1273 DR_OFFSET_ALIGNMENT (dr));
1274 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1275 fprintf (dump_file, "\n\tbase_object: ");
1276 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1277 fprintf (dump_file, "\n");
1278 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280 fprintf (dump_file, "\tAccess function %d: ", i);
1281 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1285 return dr;
1288 /* A helper function computes order between two tree expressions T1 and T2.
1289 This is used in comparator functions sorting objects based on the order
1290 of tree expressions. The function returns -1, 0, or 1. */
1293 data_ref_compare_tree (tree t1, tree t2)
1295 int i, cmp;
1296 enum tree_code code;
1297 char tclass;
1299 if (t1 == t2)
1300 return 0;
1301 if (t1 == NULL)
1302 return -1;
1303 if (t2 == NULL)
1304 return 1;
1306 STRIP_USELESS_TYPE_CONVERSION (t1);
1307 STRIP_USELESS_TYPE_CONVERSION (t2);
1308 if (t1 == t2)
1309 return 0;
1311 if (TREE_CODE (t1) != TREE_CODE (t2)
1312 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1313 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315 code = TREE_CODE (t1);
1316 switch (code)
1318 case INTEGER_CST:
1319 return tree_int_cst_compare (t1, t2);
1321 case STRING_CST:
1322 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1323 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1324 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1325 TREE_STRING_LENGTH (t1));
1327 case SSA_NAME:
1328 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1329 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1330 break;
1332 default:
1333 if (POLY_INT_CST_P (t1))
1334 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1335 wi::to_poly_widest (t2));
1337 tclass = TREE_CODE_CLASS (code);
1339 /* For decls, compare their UIDs. */
1340 if (tclass == tcc_declaration)
1342 if (DECL_UID (t1) != DECL_UID (t2))
1343 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1344 break;
1346 /* For expressions, compare their operands recursively. */
1347 else if (IS_EXPR_CODE_CLASS (tclass))
1349 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1352 TREE_OPERAND (t2, i));
1353 if (cmp != 0)
1354 return cmp;
1357 else
1358 gcc_unreachable ();
1361 return 0;
1364 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1365 check. */
1367 opt_result
1368 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 if (dump_enabled_p ())
1371 dump_printf (MSG_NOTE,
1372 "consider run-time aliasing test between %T and %T\n",
1373 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375 if (!speed_p)
1376 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1377 "runtime alias check not supported when"
1378 " optimizing for size.\n");
1380 /* FORNOW: We don't support versioning with outer-loop in either
1381 vectorization or loop distribution. */
1382 if (loop != NULL && loop->inner != NULL)
1383 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1384 "runtime alias check not supported for"
1385 " outer loop.\n");
1387 return opt_result::success ();
1390 /* Operator == between two dr_with_seg_len objects.
1392 This equality operator is used to make sure two data refs
1393 are the same one so that we will consider to combine the
1394 aliasing checks of those two pairs of data dependent data
1395 refs. */
1397 static bool
1398 operator == (const dr_with_seg_len& d1,
1399 const dr_with_seg_len& d2)
1401 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1402 DR_BASE_ADDRESS (d2.dr), 0)
1403 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1404 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1405 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1406 && known_eq (d1.access_size, d2.access_size)
1407 && d1.align == d2.align);
1410 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1411 so that we can combine aliasing checks in one scan. */
1413 static int
1414 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1417 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1418 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1419 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1422 if a and c have the same basic address snd step, and b and d have the same
1423 address and step. Therefore, if any a&c or b&d don't have the same address
1424 and step, we don't care the order of those two pairs after sorting. */
1425 int comp_res;
1427 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1428 DR_BASE_ADDRESS (b1.dr))) != 0)
1429 return comp_res;
1430 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1431 DR_BASE_ADDRESS (b2.dr))) != 0)
1432 return comp_res;
1433 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1434 DR_STEP (b1.dr))) != 0)
1435 return comp_res;
1436 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1437 DR_STEP (b2.dr))) != 0)
1438 return comp_res;
1439 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1440 DR_OFFSET (b1.dr))) != 0)
1441 return comp_res;
1442 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1443 DR_INIT (b1.dr))) != 0)
1444 return comp_res;
1445 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1446 DR_OFFSET (b2.dr))) != 0)
1447 return comp_res;
1448 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1449 DR_INIT (b2.dr))) != 0)
1450 return comp_res;
1452 return 0;
1455 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1456 FACTOR is number of iterations that each data reference is accessed.
1458 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1459 we create an expression:
1461 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1462 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1464 for aliasing checks. However, in some cases we can decrease the number
1465 of checks by combining two checks into one. For example, suppose we have
1466 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1467 condition is satisfied:
1469 load_ptr_0 < load_ptr_1 &&
1470 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1472 (this condition means, in each iteration of vectorized loop, the accessed
1473 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1474 load_ptr_1.)
1476 we then can use only the following expression to finish the alising checks
1477 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1479 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1480 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1482 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1483 basic address. */
1485 void
1486 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1487 poly_uint64)
1489 /* Canonicalize each pair so that the base components are ordered wrt
1490 data_ref_compare_tree. This allows the loop below to merge more
1491 cases. */
1492 unsigned int i;
1493 dr_with_seg_len_pair_t *alias_pair;
1494 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1496 data_reference_p dr_a = alias_pair->first.dr;
1497 data_reference_p dr_b = alias_pair->second.dr;
1498 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1499 DR_BASE_ADDRESS (dr_b));
1500 if (comp_res == 0)
1501 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1502 if (comp_res == 0)
1503 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1504 if (comp_res > 0)
1505 std::swap (alias_pair->first, alias_pair->second);
1508 /* Sort the collected data ref pairs so that we can scan them once to
1509 combine all possible aliasing checks. */
1510 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1512 /* Scan the sorted dr pairs and check if we can combine alias checks
1513 of two neighboring dr pairs. */
1514 for (i = 1; i < alias_pairs->length (); ++i)
1516 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1517 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1518 *dr_b1 = &(*alias_pairs)[i-1].second,
1519 *dr_a2 = &(*alias_pairs)[i].first,
1520 *dr_b2 = &(*alias_pairs)[i].second;
1522 /* Remove duplicate data ref pairs. */
1523 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1525 if (dump_enabled_p ())
1526 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1527 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1528 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1529 alias_pairs->ordered_remove (i--);
1530 continue;
1533 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1535 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1536 and DR_A1 and DR_A2 are two consecutive memrefs. */
1537 if (*dr_a1 == *dr_a2)
1539 std::swap (dr_a1, dr_b1);
1540 std::swap (dr_a2, dr_b2);
1543 poly_int64 init_a1, init_a2;
1544 /* Only consider cases in which the distance between the initial
1545 DR_A1 and the initial DR_A2 is known at compile time. */
1546 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1547 DR_BASE_ADDRESS (dr_a2->dr), 0)
1548 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1549 DR_OFFSET (dr_a2->dr), 0)
1550 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1551 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1552 continue;
1554 /* Don't combine if we can't tell which one comes first. */
1555 if (!ordered_p (init_a1, init_a2))
1556 continue;
1558 /* Make sure dr_a1 starts left of dr_a2. */
1559 if (maybe_gt (init_a1, init_a2))
1561 std::swap (*dr_a1, *dr_a2);
1562 std::swap (init_a1, init_a2);
1565 /* Work out what the segment length would be if we did combine
1566 DR_A1 and DR_A2:
1568 - If DR_A1 and DR_A2 have equal lengths, that length is
1569 also the combined length.
1571 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1572 length is the lower bound on those lengths.
1574 - If DR_A1 and DR_A2 both have positive lengths, the combined
1575 length is the upper bound on those lengths.
1577 Other cases are unlikely to give a useful combination.
1579 The lengths both have sizetype, so the sign is taken from
1580 the step instead. */
1581 if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1583 poly_uint64 seg_len_a1, seg_len_a2;
1584 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1585 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1586 continue;
1588 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1589 if (TREE_CODE (indicator_a) != INTEGER_CST)
1590 continue;
1592 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1593 if (TREE_CODE (indicator_b) != INTEGER_CST)
1594 continue;
1596 int sign_a = tree_int_cst_sgn (indicator_a);
1597 int sign_b = tree_int_cst_sgn (indicator_b);
1599 poly_uint64 new_seg_len;
1600 if (sign_a <= 0 && sign_b <= 0)
1601 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1602 else if (sign_a >= 0 && sign_b >= 0)
1603 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1604 else
1605 continue;
1607 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1608 new_seg_len);
1609 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1612 /* This is always positive due to the swap above. */
1613 poly_uint64 diff = init_a2 - init_a1;
1615 /* The new check will start at DR_A1. Make sure that its access
1616 size encompasses the initial DR_A2. */
1617 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1619 dr_a1->access_size = upper_bound (dr_a1->access_size,
1620 diff + dr_a2->access_size);
1621 unsigned int new_align = known_alignment (dr_a1->access_size);
1622 dr_a1->align = MIN (dr_a1->align, new_align);
1624 if (dump_enabled_p ())
1625 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1626 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1627 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1628 alias_pairs->ordered_remove (i);
1629 i--;
1634 /* Given LOOP's two data references and segment lengths described by DR_A
1635 and DR_B, create expression checking if the two addresses ranges intersect
1636 with each other based on index of the two addresses. This can only be
1637 done if DR_A and DR_B referring to the same (array) object and the index
1638 is the only difference. For example:
1640 DR_A DR_B
1641 data-ref arr[i] arr[j]
1642 base_object arr arr
1643 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1645 The addresses and their index are like:
1647 |<- ADDR_A ->| |<- ADDR_B ->|
1648 ------------------------------------------------------->
1649 | | | | | | | | | |
1650 ------------------------------------------------------->
1651 i_0 ... i_0+4 j_0 ... j_0+4
1653 We can create expression based on index rather than address:
1655 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1657 Note evolution step of index needs to be considered in comparison. */
1659 static bool
1660 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1661 const dr_with_seg_len& dr_a,
1662 const dr_with_seg_len& dr_b)
1664 if (integer_zerop (DR_STEP (dr_a.dr))
1665 || integer_zerop (DR_STEP (dr_b.dr))
1666 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1667 return false;
1669 poly_uint64 seg_len1, seg_len2;
1670 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1671 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1672 return false;
1674 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1675 return false;
1677 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1678 return false;
1680 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1681 return false;
1683 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1685 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1686 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1687 if (neg_step)
1689 abs_step = -abs_step;
1690 seg_len1 = -seg_len1;
1691 seg_len2 = -seg_len2;
1693 else
1695 /* Include the access size in the length, so that we only have one
1696 tree addition below. */
1697 seg_len1 += dr_a.access_size;
1698 seg_len2 += dr_b.access_size;
1701 /* Infer the number of iterations with which the memory segment is accessed
1702 by DR. In other words, alias is checked if memory segment accessed by
1703 DR_A in some iterations intersect with memory segment accessed by DR_B
1704 in the same amount iterations.
1705 Note segnment length is a linear function of number of iterations with
1706 DR_STEP as the coefficient. */
1707 poly_uint64 niter_len1, niter_len2;
1708 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1709 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1710 return false;
1712 poly_uint64 niter_access1 = 0, niter_access2 = 0;
1713 if (neg_step)
1715 /* Divide each access size by the byte step, rounding up. */
1716 if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1717 abs_step, &niter_access1)
1718 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1719 abs_step, &niter_access2))
1720 return false;
1723 unsigned int i;
1724 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1726 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1727 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1728 /* Two indices must be the same if they are not scev, or not scev wrto
1729 current loop being vecorized. */
1730 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1731 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1732 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1733 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1735 if (operand_equal_p (access1, access2, 0))
1736 continue;
1738 return false;
1740 /* The two indices must have the same step. */
1741 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1742 return false;
1744 tree idx_step = CHREC_RIGHT (access1);
1745 /* Index must have const step, otherwise DR_STEP won't be constant. */
1746 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1747 /* Index must evaluate in the same direction as DR. */
1748 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1750 tree min1 = CHREC_LEFT (access1);
1751 tree min2 = CHREC_LEFT (access2);
1752 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1753 return false;
1755 /* Ideally, alias can be checked against loop's control IV, but we
1756 need to prove linear mapping between control IV and reference
1757 index. Although that should be true, we check against (array)
1758 index of data reference. Like segment length, index length is
1759 linear function of the number of iterations with index_step as
1760 the coefficient, i.e, niter_len * idx_step. */
1761 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1762 build_int_cst (TREE_TYPE (min1),
1763 niter_len1));
1764 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1765 build_int_cst (TREE_TYPE (min2),
1766 niter_len2));
1767 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1768 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1769 /* Adjust ranges for negative step. */
1770 if (neg_step)
1772 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1773 std::swap (min1, max1);
1774 std::swap (min2, max2);
1776 /* As with the lengths just calculated, we've measured the access
1777 sizes in iterations, so multiply them by the index step. */
1778 tree idx_access1
1779 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1780 build_int_cst (TREE_TYPE (min1), niter_access1));
1781 tree idx_access2
1782 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1783 build_int_cst (TREE_TYPE (min2), niter_access2));
1785 /* MINUS_EXPR because the above values are negative. */
1786 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1787 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1789 tree part_cond_expr
1790 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1791 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1792 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1793 if (*cond_expr)
1794 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1795 *cond_expr, part_cond_expr);
1796 else
1797 *cond_expr = part_cond_expr;
1799 return true;
1802 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1803 every address ADDR accessed by D:
1805 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1807 In this case, every element accessed by D is aligned to at least
1808 ALIGN bytes.
1810 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1812 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1814 static void
1815 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1816 tree *seg_max_out, HOST_WIDE_INT align)
1818 /* Each access has the following pattern:
1820 <- |seg_len| ->
1821 <--- A: -ve step --->
1822 +-----+-------+-----+-------+-----+
1823 | n-1 | ,.... | 0 | ..... | n-1 |
1824 +-----+-------+-----+-------+-----+
1825 <--- B: +ve step --->
1826 <- |seg_len| ->
1828 base address
1830 where "n" is the number of scalar iterations covered by the segment.
1831 (This should be VF for a particular pair if we know that both steps
1832 are the same, otherwise it will be the full number of scalar loop
1833 iterations.)
1835 A is the range of bytes accessed when the step is negative,
1836 B is the range when the step is positive.
1838 If the access size is "access_size" bytes, the lowest addressed byte is:
1840 base + (step < 0 ? seg_len : 0) [LB]
1842 and the highest addressed byte is always below:
1844 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1846 Thus:
1848 LB <= ADDR < UB
1850 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1851 bytes, so:
1853 LB <= ADDR <= UB - ALIGN
1855 where "- ALIGN" folds naturally with the "+ access_size" and often
1856 cancels it out.
1858 We don't try to simplify LB and UB beyond this (e.g. by using
1859 MIN and MAX based on whether seg_len rather than the stride is
1860 negative) because it is possible for the absolute size of the
1861 segment to overflow the range of a ssize_t.
1863 Keeping the pointer_plus outside of the cond_expr should allow
1864 the cond_exprs to be shared with other alias checks. */
1865 tree indicator = dr_direction_indicator (d.dr);
1866 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1867 fold_convert (ssizetype, indicator),
1868 ssize_int (0));
1869 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1870 DR_OFFSET (d.dr));
1871 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1872 tree seg_len
1873 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1875 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1876 seg_len, size_zero_node);
1877 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1878 size_zero_node, seg_len);
1879 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1880 size_int (d.access_size - align));
1882 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1883 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1886 /* Given two data references and segment lengths described by DR_A and DR_B,
1887 create expression checking if the two addresses ranges intersect with
1888 each other:
1890 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1891 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1893 static void
1894 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1895 const dr_with_seg_len& dr_a,
1896 const dr_with_seg_len& dr_b)
1898 *cond_expr = NULL_TREE;
1899 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1900 return;
1902 unsigned HOST_WIDE_INT min_align;
1903 tree_code cmp_code;
1904 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1905 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1907 /* In this case adding access_size to seg_len is likely to give
1908 a simple X * step, where X is either the number of scalar
1909 iterations or the vectorization factor. We're better off
1910 keeping that, rather than subtracting an alignment from it.
1912 In this case the maximum values are exclusive and so there is
1913 no alias if the maximum of one segment equals the minimum
1914 of another. */
1915 min_align = 0;
1916 cmp_code = LE_EXPR;
1918 else
1920 /* Calculate the minimum alignment shared by all four pointers,
1921 then arrange for this alignment to be subtracted from the
1922 exclusive maximum values to get inclusive maximum values.
1923 This "- min_align" is cumulative with a "+ access_size"
1924 in the calculation of the maximum values. In the best
1925 (and common) case, the two cancel each other out, leaving
1926 us with an inclusive bound based only on seg_len. In the
1927 worst case we're simply adding a smaller number than before.
1929 Because the maximum values are inclusive, there is an alias
1930 if the maximum value of one segment is equal to the minimum
1931 value of the other. */
1932 min_align = MIN (dr_a.align, dr_b.align);
1933 cmp_code = LT_EXPR;
1936 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1937 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1938 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1940 *cond_expr
1941 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1942 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1943 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1946 /* Create a conditional expression that represents the run-time checks for
1947 overlapping of address ranges represented by a list of data references
1948 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1949 COND_EXPR is the conditional expression to be used in the if statement
1950 that controls which version of the loop gets executed at runtime. */
1952 void
1953 create_runtime_alias_checks (class loop *loop,
1954 vec<dr_with_seg_len_pair_t> *alias_pairs,
1955 tree * cond_expr)
1957 tree part_cond_expr;
1959 fold_defer_overflow_warnings ();
1960 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1962 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1963 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1965 if (dump_enabled_p ())
1966 dump_printf (MSG_NOTE,
1967 "create runtime check for data references %T and %T\n",
1968 DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1970 /* Create condition expression for each pair data references. */
1971 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1972 if (*cond_expr)
1973 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1974 *cond_expr, part_cond_expr);
1975 else
1976 *cond_expr = part_cond_expr;
1978 fold_undefer_and_ignore_overflow_warnings ();
1981 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1982 expressions. */
1983 static bool
1984 dr_equal_offsets_p1 (tree offset1, tree offset2)
1986 bool res;
1988 STRIP_NOPS (offset1);
1989 STRIP_NOPS (offset2);
1991 if (offset1 == offset2)
1992 return true;
1994 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1995 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1996 return false;
1998 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1999 TREE_OPERAND (offset2, 0));
2001 if (!res || !BINARY_CLASS_P (offset1))
2002 return res;
2004 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2005 TREE_OPERAND (offset2, 1));
2007 return res;
2010 /* Check if DRA and DRB have equal offsets. */
2011 bool
2012 dr_equal_offsets_p (struct data_reference *dra,
2013 struct data_reference *drb)
2015 tree offset1, offset2;
2017 offset1 = DR_OFFSET (dra);
2018 offset2 = DR_OFFSET (drb);
2020 return dr_equal_offsets_p1 (offset1, offset2);
2023 /* Returns true if FNA == FNB. */
2025 static bool
2026 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2028 unsigned i, n = fna.length ();
2030 if (n != fnb.length ())
2031 return false;
2033 for (i = 0; i < n; i++)
2034 if (!operand_equal_p (fna[i], fnb[i], 0))
2035 return false;
2037 return true;
2040 /* If all the functions in CF are the same, returns one of them,
2041 otherwise returns NULL. */
2043 static affine_fn
2044 common_affine_function (conflict_function *cf)
2046 unsigned i;
2047 affine_fn comm;
2049 if (!CF_NONTRIVIAL_P (cf))
2050 return affine_fn ();
2052 comm = cf->fns[0];
2054 for (i = 1; i < cf->n; i++)
2055 if (!affine_function_equal_p (comm, cf->fns[i]))
2056 return affine_fn ();
2058 return comm;
2061 /* Returns the base of the affine function FN. */
2063 static tree
2064 affine_function_base (affine_fn fn)
2066 return fn[0];
2069 /* Returns true if FN is a constant. */
2071 static bool
2072 affine_function_constant_p (affine_fn fn)
2074 unsigned i;
2075 tree coef;
2077 for (i = 1; fn.iterate (i, &coef); i++)
2078 if (!integer_zerop (coef))
2079 return false;
2081 return true;
2084 /* Returns true if FN is the zero constant function. */
2086 static bool
2087 affine_function_zero_p (affine_fn fn)
2089 return (integer_zerop (affine_function_base (fn))
2090 && affine_function_constant_p (fn));
2093 /* Returns a signed integer type with the largest precision from TA
2094 and TB. */
2096 static tree
2097 signed_type_for_types (tree ta, tree tb)
2099 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2100 return signed_type_for (ta);
2101 else
2102 return signed_type_for (tb);
2105 /* Applies operation OP on affine functions FNA and FNB, and returns the
2106 result. */
2108 static affine_fn
2109 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2111 unsigned i, n, m;
2112 affine_fn ret;
2113 tree coef;
2115 if (fnb.length () > fna.length ())
2117 n = fna.length ();
2118 m = fnb.length ();
2120 else
2122 n = fnb.length ();
2123 m = fna.length ();
2126 ret.create (m);
2127 for (i = 0; i < n; i++)
2129 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2130 TREE_TYPE (fnb[i]));
2131 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2134 for (; fna.iterate (i, &coef); i++)
2135 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2136 coef, integer_zero_node));
2137 for (; fnb.iterate (i, &coef); i++)
2138 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2139 integer_zero_node, coef));
2141 return ret;
2144 /* Returns the sum of affine functions FNA and FNB. */
2146 static affine_fn
2147 affine_fn_plus (affine_fn fna, affine_fn fnb)
2149 return affine_fn_op (PLUS_EXPR, fna, fnb);
2152 /* Returns the difference of affine functions FNA and FNB. */
2154 static affine_fn
2155 affine_fn_minus (affine_fn fna, affine_fn fnb)
2157 return affine_fn_op (MINUS_EXPR, fna, fnb);
2160 /* Frees affine function FN. */
2162 static void
2163 affine_fn_free (affine_fn fn)
2165 fn.release ();
2168 /* Determine for each subscript in the data dependence relation DDR
2169 the distance. */
2171 static void
2172 compute_subscript_distance (struct data_dependence_relation *ddr)
2174 conflict_function *cf_a, *cf_b;
2175 affine_fn fn_a, fn_b, diff;
2177 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2179 unsigned int i;
2181 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2183 struct subscript *subscript;
2185 subscript = DDR_SUBSCRIPT (ddr, i);
2186 cf_a = SUB_CONFLICTS_IN_A (subscript);
2187 cf_b = SUB_CONFLICTS_IN_B (subscript);
2189 fn_a = common_affine_function (cf_a);
2190 fn_b = common_affine_function (cf_b);
2191 if (!fn_a.exists () || !fn_b.exists ())
2193 SUB_DISTANCE (subscript) = chrec_dont_know;
2194 return;
2196 diff = affine_fn_minus (fn_a, fn_b);
2198 if (affine_function_constant_p (diff))
2199 SUB_DISTANCE (subscript) = affine_function_base (diff);
2200 else
2201 SUB_DISTANCE (subscript) = chrec_dont_know;
2203 affine_fn_free (diff);
2208 /* Returns the conflict function for "unknown". */
2210 static conflict_function *
2211 conflict_fn_not_known (void)
2213 conflict_function *fn = XCNEW (conflict_function);
2214 fn->n = NOT_KNOWN;
2216 return fn;
2219 /* Returns the conflict function for "independent". */
2221 static conflict_function *
2222 conflict_fn_no_dependence (void)
2224 conflict_function *fn = XCNEW (conflict_function);
2225 fn->n = NO_DEPENDENCE;
2227 return fn;
2230 /* Returns true if the address of OBJ is invariant in LOOP. */
2232 static bool
2233 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2235 while (handled_component_p (obj))
2237 if (TREE_CODE (obj) == ARRAY_REF)
2239 for (int i = 1; i < 4; ++i)
2240 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2241 loop->num))
2242 return false;
2244 else if (TREE_CODE (obj) == COMPONENT_REF)
2246 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2247 loop->num))
2248 return false;
2250 obj = TREE_OPERAND (obj, 0);
2253 if (!INDIRECT_REF_P (obj)
2254 && TREE_CODE (obj) != MEM_REF)
2255 return true;
2257 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2258 loop->num);
2261 /* Returns false if we can prove that data references A and B do not alias,
2262 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2263 considered. */
2265 bool
2266 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2267 class loop *loop_nest)
2269 tree addr_a = DR_BASE_OBJECT (a);
2270 tree addr_b = DR_BASE_OBJECT (b);
2272 /* If we are not processing a loop nest but scalar code we
2273 do not need to care about possible cross-iteration dependences
2274 and thus can process the full original reference. Do so,
2275 similar to how loop invariant motion applies extra offset-based
2276 disambiguation. */
2277 if (!loop_nest)
2279 aff_tree off1, off2;
2280 poly_widest_int size1, size2;
2281 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2282 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2283 aff_combination_scale (&off1, -1);
2284 aff_combination_add (&off2, &off1);
2285 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2286 return false;
2289 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2290 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2291 /* For cross-iteration dependences the cliques must be valid for the
2292 whole loop, not just individual iterations. */
2293 && (!loop_nest
2294 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2295 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2296 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2297 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2298 return false;
2300 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2301 do not know the size of the base-object. So we cannot do any
2302 offset/overlap based analysis but have to rely on points-to
2303 information only. */
2304 if (TREE_CODE (addr_a) == MEM_REF
2305 && (DR_UNCONSTRAINED_BASE (a)
2306 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2308 /* For true dependences we can apply TBAA. */
2309 if (flag_strict_aliasing
2310 && DR_IS_WRITE (a) && DR_IS_READ (b)
2311 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2312 get_alias_set (DR_REF (b))))
2313 return false;
2314 if (TREE_CODE (addr_b) == MEM_REF)
2315 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2316 TREE_OPERAND (addr_b, 0));
2317 else
2318 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2319 build_fold_addr_expr (addr_b));
2321 else if (TREE_CODE (addr_b) == MEM_REF
2322 && (DR_UNCONSTRAINED_BASE (b)
2323 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2325 /* For true dependences we can apply TBAA. */
2326 if (flag_strict_aliasing
2327 && DR_IS_WRITE (a) && DR_IS_READ (b)
2328 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2329 get_alias_set (DR_REF (b))))
2330 return false;
2331 if (TREE_CODE (addr_a) == MEM_REF)
2332 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2333 TREE_OPERAND (addr_b, 0));
2334 else
2335 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2336 TREE_OPERAND (addr_b, 0));
2339 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2340 that is being subsetted in the loop nest. */
2341 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2342 return refs_output_dependent_p (addr_a, addr_b);
2343 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2344 return refs_anti_dependent_p (addr_a, addr_b);
2345 return refs_may_alias_p (addr_a, addr_b);
2348 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2349 if it is meaningful to compare their associated access functions
2350 when checking for dependencies. */
2352 static bool
2353 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2355 /* Allow pairs of component refs from the following sets:
2357 { REALPART_EXPR, IMAGPART_EXPR }
2358 { COMPONENT_REF }
2359 { ARRAY_REF }. */
2360 tree_code code_a = TREE_CODE (ref_a);
2361 tree_code code_b = TREE_CODE (ref_b);
2362 if (code_a == IMAGPART_EXPR)
2363 code_a = REALPART_EXPR;
2364 if (code_b == IMAGPART_EXPR)
2365 code_b = REALPART_EXPR;
2366 if (code_a != code_b)
2367 return false;
2369 if (TREE_CODE (ref_a) == COMPONENT_REF)
2370 /* ??? We cannot simply use the type of operand #0 of the refs here as
2371 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2372 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2373 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2374 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2376 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2377 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2380 /* Initialize a data dependence relation between data accesses A and
2381 B. NB_LOOPS is the number of loops surrounding the references: the
2382 size of the classic distance/direction vectors. */
2384 struct data_dependence_relation *
2385 initialize_data_dependence_relation (struct data_reference *a,
2386 struct data_reference *b,
2387 vec<loop_p> loop_nest)
2389 struct data_dependence_relation *res;
2390 unsigned int i;
2392 res = XCNEW (struct data_dependence_relation);
2393 DDR_A (res) = a;
2394 DDR_B (res) = b;
2395 DDR_LOOP_NEST (res).create (0);
2396 DDR_SUBSCRIPTS (res).create (0);
2397 DDR_DIR_VECTS (res).create (0);
2398 DDR_DIST_VECTS (res).create (0);
2400 if (a == NULL || b == NULL)
2402 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2403 return res;
2406 /* If the data references do not alias, then they are independent. */
2407 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2409 DDR_ARE_DEPENDENT (res) = chrec_known;
2410 return res;
2413 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2414 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2415 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2417 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2418 return res;
2421 /* For unconstrained bases, the root (highest-indexed) subscript
2422 describes a variation in the base of the original DR_REF rather
2423 than a component access. We have no type that accurately describes
2424 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2425 applying this subscript) so limit the search to the last real
2426 component access.
2428 E.g. for:
2430 void
2431 f (int a[][8], int b[][8])
2433 for (int i = 0; i < 8; ++i)
2434 a[i * 2][0] = b[i][0];
2437 the a and b accesses have a single ARRAY_REF component reference [0]
2438 but have two subscripts. */
2439 if (DR_UNCONSTRAINED_BASE (a))
2440 num_dimensions_a -= 1;
2441 if (DR_UNCONSTRAINED_BASE (b))
2442 num_dimensions_b -= 1;
2444 /* These structures describe sequences of component references in
2445 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2446 specific access function. */
2447 struct {
2448 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2449 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2450 indices. In C notation, these are the indices of the rightmost
2451 component references; e.g. for a sequence .b.c.d, the start
2452 index is for .d. */
2453 unsigned int start_a;
2454 unsigned int start_b;
2456 /* The sequence contains LENGTH consecutive access functions from
2457 each DR. */
2458 unsigned int length;
2460 /* The enclosing objects for the A and B sequences respectively,
2461 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2462 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2463 tree object_a;
2464 tree object_b;
2465 } full_seq = {}, struct_seq = {};
2467 /* Before each iteration of the loop:
2469 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2470 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2471 unsigned int index_a = 0;
2472 unsigned int index_b = 0;
2473 tree ref_a = DR_REF (a);
2474 tree ref_b = DR_REF (b);
2476 /* Now walk the component references from the final DR_REFs back up to
2477 the enclosing base objects. Each component reference corresponds
2478 to one access function in the DR, with access function 0 being for
2479 the final DR_REF and the highest-indexed access function being the
2480 one that is applied to the base of the DR.
2482 Look for a sequence of component references whose access functions
2483 are comparable (see access_fn_components_comparable_p). If more
2484 than one such sequence exists, pick the one nearest the base
2485 (which is the leftmost sequence in C notation). Store this sequence
2486 in FULL_SEQ.
2488 For example, if we have:
2490 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2492 A: a[0][i].s.c.d
2493 B: __real b[0][i].s.e[i].f
2495 (where d is the same type as the real component of f) then the access
2496 functions would be:
2498 0 1 2 3
2499 A: .d .c .s [i]
2501 0 1 2 3 4 5
2502 B: __real .f [i] .e .s [i]
2504 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2505 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2506 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2507 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2508 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2509 index foo[10] arrays, so is again comparable. The sequence is
2510 therefore:
2512 A: [1, 3] (i.e. [i].s.c)
2513 B: [3, 5] (i.e. [i].s.e)
2515 Also look for sequences of component references whose access
2516 functions are comparable and whose enclosing objects have the same
2517 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2518 example, STRUCT_SEQ would be:
2520 A: [1, 2] (i.e. s.c)
2521 B: [3, 4] (i.e. s.e) */
2522 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2524 /* REF_A and REF_B must be one of the component access types
2525 allowed by dr_analyze_indices. */
2526 gcc_checking_assert (access_fn_component_p (ref_a));
2527 gcc_checking_assert (access_fn_component_p (ref_b));
2529 /* Get the immediately-enclosing objects for REF_A and REF_B,
2530 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2531 and DR_ACCESS_FN (B, INDEX_B). */
2532 tree object_a = TREE_OPERAND (ref_a, 0);
2533 tree object_b = TREE_OPERAND (ref_b, 0);
2535 tree type_a = TREE_TYPE (object_a);
2536 tree type_b = TREE_TYPE (object_b);
2537 if (access_fn_components_comparable_p (ref_a, ref_b))
2539 /* This pair of component accesses is comparable for dependence
2540 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2541 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2542 if (full_seq.start_a + full_seq.length != index_a
2543 || full_seq.start_b + full_seq.length != index_b)
2545 /* The accesses don't extend the current sequence,
2546 so start a new one here. */
2547 full_seq.start_a = index_a;
2548 full_seq.start_b = index_b;
2549 full_seq.length = 0;
2552 /* Add this pair of references to the sequence. */
2553 full_seq.length += 1;
2554 full_seq.object_a = object_a;
2555 full_seq.object_b = object_b;
2557 /* If the enclosing objects are structures (and thus have the
2558 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2559 if (TREE_CODE (type_a) == RECORD_TYPE)
2560 struct_seq = full_seq;
2562 /* Move to the next containing reference for both A and B. */
2563 ref_a = object_a;
2564 ref_b = object_b;
2565 index_a += 1;
2566 index_b += 1;
2567 continue;
2570 /* Try to approach equal type sizes. */
2571 if (!COMPLETE_TYPE_P (type_a)
2572 || !COMPLETE_TYPE_P (type_b)
2573 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2574 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2575 break;
2577 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2578 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2579 if (size_a <= size_b)
2581 index_a += 1;
2582 ref_a = object_a;
2584 if (size_b <= size_a)
2586 index_b += 1;
2587 ref_b = object_b;
2591 /* See whether FULL_SEQ ends at the base and whether the two bases
2592 are equal. We do not care about TBAA or alignment info so we can
2593 use OEP_ADDRESS_OF to avoid false negatives. */
2594 tree base_a = DR_BASE_OBJECT (a);
2595 tree base_b = DR_BASE_OBJECT (b);
2596 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2597 && full_seq.start_b + full_seq.length == num_dimensions_b
2598 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2599 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2600 && types_compatible_p (TREE_TYPE (base_a),
2601 TREE_TYPE (base_b))
2602 && (!loop_nest.exists ()
2603 || (object_address_invariant_in_loop_p
2604 (loop_nest[0], base_a))));
2606 /* If the bases are the same, we can include the base variation too.
2607 E.g. the b accesses in:
2609 for (int i = 0; i < n; ++i)
2610 b[i + 4][0] = b[i][0];
2612 have a definite dependence distance of 4, while for:
2614 for (int i = 0; i < n; ++i)
2615 a[i + 4][0] = b[i][0];
2617 the dependence distance depends on the gap between a and b.
2619 If the bases are different then we can only rely on the sequence
2620 rooted at a structure access, since arrays are allowed to overlap
2621 arbitrarily and change shape arbitrarily. E.g. we treat this as
2622 valid code:
2624 int a[256];
2626 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2628 where two lvalues with the same int[4][3] type overlap, and where
2629 both lvalues are distinct from the object's declared type. */
2630 if (same_base_p)
2632 if (DR_UNCONSTRAINED_BASE (a))
2633 full_seq.length += 1;
2635 else
2636 full_seq = struct_seq;
2638 /* Punt if we didn't find a suitable sequence. */
2639 if (full_seq.length == 0)
2641 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2642 return res;
2645 if (!same_base_p)
2647 /* Partial overlap is possible for different bases when strict aliasing
2648 is not in effect. It's also possible if either base involves a union
2649 access; e.g. for:
2651 struct s1 { int a[2]; };
2652 struct s2 { struct s1 b; int c; };
2653 struct s3 { int d; struct s1 e; };
2654 union u { struct s2 f; struct s3 g; } *p, *q;
2656 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2657 "p->g.e" (base "p->g") and might partially overlap the s1 at
2658 "q->g.e" (base "q->g"). */
2659 if (!flag_strict_aliasing
2660 || ref_contains_union_access_p (full_seq.object_a)
2661 || ref_contains_union_access_p (full_seq.object_b))
2663 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2664 return res;
2667 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2668 if (!loop_nest.exists ()
2669 || (object_address_invariant_in_loop_p (loop_nest[0],
2670 full_seq.object_a)
2671 && object_address_invariant_in_loop_p (loop_nest[0],
2672 full_seq.object_b)))
2674 DDR_OBJECT_A (res) = full_seq.object_a;
2675 DDR_OBJECT_B (res) = full_seq.object_b;
2679 DDR_AFFINE_P (res) = true;
2680 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2681 DDR_SUBSCRIPTS (res).create (full_seq.length);
2682 DDR_LOOP_NEST (res) = loop_nest;
2683 DDR_SELF_REFERENCE (res) = false;
2685 for (i = 0; i < full_seq.length; ++i)
2687 struct subscript *subscript;
2689 subscript = XNEW (struct subscript);
2690 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2691 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2692 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2693 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2694 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2695 SUB_DISTANCE (subscript) = chrec_dont_know;
2696 DDR_SUBSCRIPTS (res).safe_push (subscript);
2699 return res;
2702 /* Frees memory used by the conflict function F. */
2704 static void
2705 free_conflict_function (conflict_function *f)
2707 unsigned i;
2709 if (CF_NONTRIVIAL_P (f))
2711 for (i = 0; i < f->n; i++)
2712 affine_fn_free (f->fns[i]);
2714 free (f);
2717 /* Frees memory used by SUBSCRIPTS. */
2719 static void
2720 free_subscripts (vec<subscript_p> subscripts)
2722 unsigned i;
2723 subscript_p s;
2725 FOR_EACH_VEC_ELT (subscripts, i, s)
2727 free_conflict_function (s->conflicting_iterations_in_a);
2728 free_conflict_function (s->conflicting_iterations_in_b);
2729 free (s);
2731 subscripts.release ();
2734 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2735 description. */
2737 static inline void
2738 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2739 tree chrec)
2741 DDR_ARE_DEPENDENT (ddr) = chrec;
2742 free_subscripts (DDR_SUBSCRIPTS (ddr));
2743 DDR_SUBSCRIPTS (ddr).create (0);
2746 /* The dependence relation DDR cannot be represented by a distance
2747 vector. */
2749 static inline void
2750 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2752 if (dump_file && (dump_flags & TDF_DETAILS))
2753 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2755 DDR_AFFINE_P (ddr) = false;
2760 /* This section contains the classic Banerjee tests. */
2762 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2763 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2765 static inline bool
2766 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2768 return (evolution_function_is_constant_p (chrec_a)
2769 && evolution_function_is_constant_p (chrec_b));
2772 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2773 variable, i.e., if the SIV (Single Index Variable) test is true. */
2775 static bool
2776 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2778 if ((evolution_function_is_constant_p (chrec_a)
2779 && evolution_function_is_univariate_p (chrec_b))
2780 || (evolution_function_is_constant_p (chrec_b)
2781 && evolution_function_is_univariate_p (chrec_a)))
2782 return true;
2784 if (evolution_function_is_univariate_p (chrec_a)
2785 && evolution_function_is_univariate_p (chrec_b))
2787 switch (TREE_CODE (chrec_a))
2789 case POLYNOMIAL_CHREC:
2790 switch (TREE_CODE (chrec_b))
2792 case POLYNOMIAL_CHREC:
2793 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2794 return false;
2795 /* FALLTHRU */
2797 default:
2798 return true;
2801 default:
2802 return true;
2806 return false;
2809 /* Creates a conflict function with N dimensions. The affine functions
2810 in each dimension follow. */
2812 static conflict_function *
2813 conflict_fn (unsigned n, ...)
2815 unsigned i;
2816 conflict_function *ret = XCNEW (conflict_function);
2817 va_list ap;
2819 gcc_assert (n > 0 && n <= MAX_DIM);
2820 va_start (ap, n);
2822 ret->n = n;
2823 for (i = 0; i < n; i++)
2824 ret->fns[i] = va_arg (ap, affine_fn);
2825 va_end (ap);
2827 return ret;
2830 /* Returns constant affine function with value CST. */
2832 static affine_fn
2833 affine_fn_cst (tree cst)
2835 affine_fn fn;
2836 fn.create (1);
2837 fn.quick_push (cst);
2838 return fn;
2841 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2843 static affine_fn
2844 affine_fn_univar (tree cst, unsigned dim, tree coef)
2846 affine_fn fn;
2847 fn.create (dim + 1);
2848 unsigned i;
2850 gcc_assert (dim > 0);
2851 fn.quick_push (cst);
2852 for (i = 1; i < dim; i++)
2853 fn.quick_push (integer_zero_node);
2854 fn.quick_push (coef);
2855 return fn;
2858 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2859 *OVERLAPS_B are initialized to the functions that describe the
2860 relation between the elements accessed twice by CHREC_A and
2861 CHREC_B. For k >= 0, the following property is verified:
2863 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2865 static void
2866 analyze_ziv_subscript (tree chrec_a,
2867 tree chrec_b,
2868 conflict_function **overlaps_a,
2869 conflict_function **overlaps_b,
2870 tree *last_conflicts)
2872 tree type, difference;
2873 dependence_stats.num_ziv++;
2875 if (dump_file && (dump_flags & TDF_DETAILS))
2876 fprintf (dump_file, "(analyze_ziv_subscript \n");
2878 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2879 chrec_a = chrec_convert (type, chrec_a, NULL);
2880 chrec_b = chrec_convert (type, chrec_b, NULL);
2881 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2883 switch (TREE_CODE (difference))
2885 case INTEGER_CST:
2886 if (integer_zerop (difference))
2888 /* The difference is equal to zero: the accessed index
2889 overlaps for each iteration in the loop. */
2890 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2891 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2892 *last_conflicts = chrec_dont_know;
2893 dependence_stats.num_ziv_dependent++;
2895 else
2897 /* The accesses do not overlap. */
2898 *overlaps_a = conflict_fn_no_dependence ();
2899 *overlaps_b = conflict_fn_no_dependence ();
2900 *last_conflicts = integer_zero_node;
2901 dependence_stats.num_ziv_independent++;
2903 break;
2905 default:
2906 /* We're not sure whether the indexes overlap. For the moment,
2907 conservatively answer "don't know". */
2908 if (dump_file && (dump_flags & TDF_DETAILS))
2909 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2911 *overlaps_a = conflict_fn_not_known ();
2912 *overlaps_b = conflict_fn_not_known ();
2913 *last_conflicts = chrec_dont_know;
2914 dependence_stats.num_ziv_unimplemented++;
2915 break;
2918 if (dump_file && (dump_flags & TDF_DETAILS))
2919 fprintf (dump_file, ")\n");
2922 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2923 and only if it fits to the int type. If this is not the case, or the
2924 bound on the number of iterations of LOOP could not be derived, returns
2925 chrec_dont_know. */
2927 static tree
2928 max_stmt_executions_tree (class loop *loop)
2930 widest_int nit;
2932 if (!max_stmt_executions (loop, &nit))
2933 return chrec_dont_know;
2935 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2936 return chrec_dont_know;
2938 return wide_int_to_tree (unsigned_type_node, nit);
2941 /* Determine whether the CHREC is always positive/negative. If the expression
2942 cannot be statically analyzed, return false, otherwise set the answer into
2943 VALUE. */
2945 static bool
2946 chrec_is_positive (tree chrec, bool *value)
2948 bool value0, value1, value2;
2949 tree end_value, nb_iter;
2951 switch (TREE_CODE (chrec))
2953 case POLYNOMIAL_CHREC:
2954 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2955 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2956 return false;
2958 /* FIXME -- overflows. */
2959 if (value0 == value1)
2961 *value = value0;
2962 return true;
2965 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2966 and the proof consists in showing that the sign never
2967 changes during the execution of the loop, from 0 to
2968 loop->nb_iterations. */
2969 if (!evolution_function_is_affine_p (chrec))
2970 return false;
2972 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2973 if (chrec_contains_undetermined (nb_iter))
2974 return false;
2976 #if 0
2977 /* TODO -- If the test is after the exit, we may decrease the number of
2978 iterations by one. */
2979 if (after_exit)
2980 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2981 #endif
2983 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2985 if (!chrec_is_positive (end_value, &value2))
2986 return false;
2988 *value = value0;
2989 return value0 == value1;
2991 case INTEGER_CST:
2992 switch (tree_int_cst_sgn (chrec))
2994 case -1:
2995 *value = false;
2996 break;
2997 case 1:
2998 *value = true;
2999 break;
3000 default:
3001 return false;
3003 return true;
3005 default:
3006 return false;
3011 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3012 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3013 *OVERLAPS_B are initialized to the functions that describe the
3014 relation between the elements accessed twice by CHREC_A and
3015 CHREC_B. For k >= 0, the following property is verified:
3017 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3019 static void
3020 analyze_siv_subscript_cst_affine (tree chrec_a,
3021 tree chrec_b,
3022 conflict_function **overlaps_a,
3023 conflict_function **overlaps_b,
3024 tree *last_conflicts)
3026 bool value0, value1, value2;
3027 tree type, difference, tmp;
3029 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3030 chrec_a = chrec_convert (type, chrec_a, NULL);
3031 chrec_b = chrec_convert (type, chrec_b, NULL);
3032 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3034 /* Special case overlap in the first iteration. */
3035 if (integer_zerop (difference))
3037 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3038 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3039 *last_conflicts = integer_one_node;
3040 return;
3043 if (!chrec_is_positive (initial_condition (difference), &value0))
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3046 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3048 dependence_stats.num_siv_unimplemented++;
3049 *overlaps_a = conflict_fn_not_known ();
3050 *overlaps_b = conflict_fn_not_known ();
3051 *last_conflicts = chrec_dont_know;
3052 return;
3054 else
3056 if (value0 == false)
3058 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3059 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3061 if (dump_file && (dump_flags & TDF_DETAILS))
3062 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3064 *overlaps_a = conflict_fn_not_known ();
3065 *overlaps_b = conflict_fn_not_known ();
3066 *last_conflicts = chrec_dont_know;
3067 dependence_stats.num_siv_unimplemented++;
3068 return;
3070 else
3072 if (value1 == true)
3074 /* Example:
3075 chrec_a = 12
3076 chrec_b = {10, +, 1}
3079 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3081 HOST_WIDE_INT numiter;
3082 class loop *loop = get_chrec_loop (chrec_b);
3084 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3085 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3086 fold_build1 (ABS_EXPR, type, difference),
3087 CHREC_RIGHT (chrec_b));
3088 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3089 *last_conflicts = integer_one_node;
3092 /* Perform weak-zero siv test to see if overlap is
3093 outside the loop bounds. */
3094 numiter = max_stmt_executions_int (loop);
3096 if (numiter >= 0
3097 && compare_tree_int (tmp, numiter) > 0)
3099 free_conflict_function (*overlaps_a);
3100 free_conflict_function (*overlaps_b);
3101 *overlaps_a = conflict_fn_no_dependence ();
3102 *overlaps_b = conflict_fn_no_dependence ();
3103 *last_conflicts = integer_zero_node;
3104 dependence_stats.num_siv_independent++;
3105 return;
3107 dependence_stats.num_siv_dependent++;
3108 return;
3111 /* When the step does not divide the difference, there are
3112 no overlaps. */
3113 else
3115 *overlaps_a = conflict_fn_no_dependence ();
3116 *overlaps_b = conflict_fn_no_dependence ();
3117 *last_conflicts = integer_zero_node;
3118 dependence_stats.num_siv_independent++;
3119 return;
3123 else
3125 /* Example:
3126 chrec_a = 12
3127 chrec_b = {10, +, -1}
3129 In this case, chrec_a will not overlap with chrec_b. */
3130 *overlaps_a = conflict_fn_no_dependence ();
3131 *overlaps_b = conflict_fn_no_dependence ();
3132 *last_conflicts = integer_zero_node;
3133 dependence_stats.num_siv_independent++;
3134 return;
3138 else
3140 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3141 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3143 if (dump_file && (dump_flags & TDF_DETAILS))
3144 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3146 *overlaps_a = conflict_fn_not_known ();
3147 *overlaps_b = conflict_fn_not_known ();
3148 *last_conflicts = chrec_dont_know;
3149 dependence_stats.num_siv_unimplemented++;
3150 return;
3152 else
3154 if (value2 == false)
3156 /* Example:
3157 chrec_a = 3
3158 chrec_b = {10, +, -1}
3160 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3162 HOST_WIDE_INT numiter;
3163 class loop *loop = get_chrec_loop (chrec_b);
3165 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3166 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3167 CHREC_RIGHT (chrec_b));
3168 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3169 *last_conflicts = integer_one_node;
3171 /* Perform weak-zero siv test to see if overlap is
3172 outside the loop bounds. */
3173 numiter = max_stmt_executions_int (loop);
3175 if (numiter >= 0
3176 && compare_tree_int (tmp, numiter) > 0)
3178 free_conflict_function (*overlaps_a);
3179 free_conflict_function (*overlaps_b);
3180 *overlaps_a = conflict_fn_no_dependence ();
3181 *overlaps_b = conflict_fn_no_dependence ();
3182 *last_conflicts = integer_zero_node;
3183 dependence_stats.num_siv_independent++;
3184 return;
3186 dependence_stats.num_siv_dependent++;
3187 return;
3190 /* When the step does not divide the difference, there
3191 are no overlaps. */
3192 else
3194 *overlaps_a = conflict_fn_no_dependence ();
3195 *overlaps_b = conflict_fn_no_dependence ();
3196 *last_conflicts = integer_zero_node;
3197 dependence_stats.num_siv_independent++;
3198 return;
3201 else
3203 /* Example:
3204 chrec_a = 3
3205 chrec_b = {4, +, 1}
3207 In this case, chrec_a will not overlap with chrec_b. */
3208 *overlaps_a = conflict_fn_no_dependence ();
3209 *overlaps_b = conflict_fn_no_dependence ();
3210 *last_conflicts = integer_zero_node;
3211 dependence_stats.num_siv_independent++;
3212 return;
3219 /* Helper recursive function for initializing the matrix A. Returns
3220 the initial value of CHREC. */
3222 static tree
3223 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3225 gcc_assert (chrec);
3227 switch (TREE_CODE (chrec))
3229 case POLYNOMIAL_CHREC:
3230 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3231 return chrec_dont_know;
3232 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3233 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3235 case PLUS_EXPR:
3236 case MULT_EXPR:
3237 case MINUS_EXPR:
3239 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3240 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3242 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3245 CASE_CONVERT:
3247 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3248 return chrec_convert (chrec_type (chrec), op, NULL);
3251 case BIT_NOT_EXPR:
3253 /* Handle ~X as -1 - X. */
3254 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3255 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3256 build_int_cst (TREE_TYPE (chrec), -1), op);
3259 case INTEGER_CST:
3260 return chrec;
3262 default:
3263 gcc_unreachable ();
3264 return NULL_TREE;
3268 #define FLOOR_DIV(x,y) ((x) / (y))
3270 /* Solves the special case of the Diophantine equation:
3271 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3273 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3274 number of iterations that loops X and Y run. The overlaps will be
3275 constructed as evolutions in dimension DIM. */
3277 static void
3278 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3279 HOST_WIDE_INT step_a,
3280 HOST_WIDE_INT step_b,
3281 affine_fn *overlaps_a,
3282 affine_fn *overlaps_b,
3283 tree *last_conflicts, int dim)
3285 if (((step_a > 0 && step_b > 0)
3286 || (step_a < 0 && step_b < 0)))
3288 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3289 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3291 gcd_steps_a_b = gcd (step_a, step_b);
3292 step_overlaps_a = step_b / gcd_steps_a_b;
3293 step_overlaps_b = step_a / gcd_steps_a_b;
3295 if (niter > 0)
3297 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3298 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3299 last_conflict = tau2;
3300 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3302 else
3303 *last_conflicts = chrec_dont_know;
3305 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3306 build_int_cst (NULL_TREE,
3307 step_overlaps_a));
3308 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3309 build_int_cst (NULL_TREE,
3310 step_overlaps_b));
3313 else
3315 *overlaps_a = affine_fn_cst (integer_zero_node);
3316 *overlaps_b = affine_fn_cst (integer_zero_node);
3317 *last_conflicts = integer_zero_node;
3321 /* Solves the special case of a Diophantine equation where CHREC_A is
3322 an affine bivariate function, and CHREC_B is an affine univariate
3323 function. For example,
3325 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3327 has the following overlapping functions:
3329 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3330 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3331 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3333 FORNOW: This is a specialized implementation for a case occurring in
3334 a common benchmark. Implement the general algorithm. */
3336 static void
3337 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3338 conflict_function **overlaps_a,
3339 conflict_function **overlaps_b,
3340 tree *last_conflicts)
3342 bool xz_p, yz_p, xyz_p;
3343 HOST_WIDE_INT step_x, step_y, step_z;
3344 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3345 affine_fn overlaps_a_xz, overlaps_b_xz;
3346 affine_fn overlaps_a_yz, overlaps_b_yz;
3347 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3348 affine_fn ova1, ova2, ovb;
3349 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3351 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3352 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3353 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3355 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3356 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3357 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3359 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3361 if (dump_file && (dump_flags & TDF_DETAILS))
3362 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3364 *overlaps_a = conflict_fn_not_known ();
3365 *overlaps_b = conflict_fn_not_known ();
3366 *last_conflicts = chrec_dont_know;
3367 return;
3370 niter = MIN (niter_x, niter_z);
3371 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3372 &overlaps_a_xz,
3373 &overlaps_b_xz,
3374 &last_conflicts_xz, 1);
3375 niter = MIN (niter_y, niter_z);
3376 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3377 &overlaps_a_yz,
3378 &overlaps_b_yz,
3379 &last_conflicts_yz, 2);
3380 niter = MIN (niter_x, niter_z);
3381 niter = MIN (niter_y, niter);
3382 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3383 &overlaps_a_xyz,
3384 &overlaps_b_xyz,
3385 &last_conflicts_xyz, 3);
3387 xz_p = !integer_zerop (last_conflicts_xz);
3388 yz_p = !integer_zerop (last_conflicts_yz);
3389 xyz_p = !integer_zerop (last_conflicts_xyz);
3391 if (xz_p || yz_p || xyz_p)
3393 ova1 = affine_fn_cst (integer_zero_node);
3394 ova2 = affine_fn_cst (integer_zero_node);
3395 ovb = affine_fn_cst (integer_zero_node);
3396 if (xz_p)
3398 affine_fn t0 = ova1;
3399 affine_fn t2 = ovb;
3401 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3402 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3403 affine_fn_free (t0);
3404 affine_fn_free (t2);
3405 *last_conflicts = last_conflicts_xz;
3407 if (yz_p)
3409 affine_fn t0 = ova2;
3410 affine_fn t2 = ovb;
3412 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3413 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3414 affine_fn_free (t0);
3415 affine_fn_free (t2);
3416 *last_conflicts = last_conflicts_yz;
3418 if (xyz_p)
3420 affine_fn t0 = ova1;
3421 affine_fn t2 = ova2;
3422 affine_fn t4 = ovb;
3424 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3425 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3426 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3427 affine_fn_free (t0);
3428 affine_fn_free (t2);
3429 affine_fn_free (t4);
3430 *last_conflicts = last_conflicts_xyz;
3432 *overlaps_a = conflict_fn (2, ova1, ova2);
3433 *overlaps_b = conflict_fn (1, ovb);
3435 else
3437 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3438 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3439 *last_conflicts = integer_zero_node;
3442 affine_fn_free (overlaps_a_xz);
3443 affine_fn_free (overlaps_b_xz);
3444 affine_fn_free (overlaps_a_yz);
3445 affine_fn_free (overlaps_b_yz);
3446 affine_fn_free (overlaps_a_xyz);
3447 affine_fn_free (overlaps_b_xyz);
3450 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3452 static void
3453 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3454 int size)
3456 memcpy (vec2, vec1, size * sizeof (*vec1));
3459 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3461 static void
3462 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3463 int m, int n)
3465 int i;
3467 for (i = 0; i < m; i++)
3468 lambda_vector_copy (mat1[i], mat2[i], n);
3471 /* Store the N x N identity matrix in MAT. */
3473 static void
3474 lambda_matrix_id (lambda_matrix mat, int size)
3476 int i, j;
3478 for (i = 0; i < size; i++)
3479 for (j = 0; j < size; j++)
3480 mat[i][j] = (i == j) ? 1 : 0;
3483 /* Return the index of the first nonzero element of vector VEC1 between
3484 START and N. We must have START <= N.
3485 Returns N if VEC1 is the zero vector. */
3487 static int
3488 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3490 int j = start;
3491 while (j < n && vec1[j] == 0)
3492 j++;
3493 return j;
3496 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3497 R2 = R2 + CONST1 * R1. */
3499 static void
3500 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3501 lambda_int const1)
3503 int i;
3505 if (const1 == 0)
3506 return;
3508 for (i = 0; i < n; i++)
3509 mat[r2][i] += const1 * mat[r1][i];
3512 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3513 and store the result in VEC2. */
3515 static void
3516 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3517 int size, lambda_int const1)
3519 int i;
3521 if (const1 == 0)
3522 lambda_vector_clear (vec2, size);
3523 else
3524 for (i = 0; i < size; i++)
3525 vec2[i] = const1 * vec1[i];
3528 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3530 static void
3531 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3532 int size)
3534 lambda_vector_mult_const (vec1, vec2, size, -1);
3537 /* Negate row R1 of matrix MAT which has N columns. */
3539 static void
3540 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3542 lambda_vector_negate (mat[r1], mat[r1], n);
3545 /* Return true if two vectors are equal. */
3547 static bool
3548 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3550 int i;
3551 for (i = 0; i < size; i++)
3552 if (vec1[i] != vec2[i])
3553 return false;
3554 return true;
3557 /* Given an M x N integer matrix A, this function determines an M x
3558 M unimodular matrix U, and an M x N echelon matrix S such that
3559 "U.A = S". This decomposition is also known as "right Hermite".
3561 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3562 Restructuring Compilers" Utpal Banerjee. */
3564 static void
3565 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3566 lambda_matrix S, lambda_matrix U)
3568 int i, j, i0 = 0;
3570 lambda_matrix_copy (A, S, m, n);
3571 lambda_matrix_id (U, m);
3573 for (j = 0; j < n; j++)
3575 if (lambda_vector_first_nz (S[j], m, i0) < m)
3577 ++i0;
3578 for (i = m - 1; i >= i0; i--)
3580 while (S[i][j] != 0)
3582 lambda_int sigma, factor, a, b;
3584 a = S[i-1][j];
3585 b = S[i][j];
3586 sigma = (a * b < 0) ? -1: 1;
3587 a = abs_hwi (a);
3588 b = abs_hwi (b);
3589 factor = sigma * (a / b);
3591 lambda_matrix_row_add (S, n, i, i-1, -factor);
3592 std::swap (S[i], S[i-1]);
3594 lambda_matrix_row_add (U, m, i, i-1, -factor);
3595 std::swap (U[i], U[i-1]);
3602 /* Determines the overlapping elements due to accesses CHREC_A and
3603 CHREC_B, that are affine functions. This function cannot handle
3604 symbolic evolution functions, ie. when initial conditions are
3605 parameters, because it uses lambda matrices of integers. */
3607 static void
3608 analyze_subscript_affine_affine (tree chrec_a,
3609 tree chrec_b,
3610 conflict_function **overlaps_a,
3611 conflict_function **overlaps_b,
3612 tree *last_conflicts)
3614 unsigned nb_vars_a, nb_vars_b, dim;
3615 HOST_WIDE_INT gamma, gcd_alpha_beta;
3616 lambda_matrix A, U, S;
3617 struct obstack scratch_obstack;
3619 if (eq_evolutions_p (chrec_a, chrec_b))
3621 /* The accessed index overlaps for each iteration in the
3622 loop. */
3623 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3624 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3625 *last_conflicts = chrec_dont_know;
3626 return;
3628 if (dump_file && (dump_flags & TDF_DETAILS))
3629 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3631 /* For determining the initial intersection, we have to solve a
3632 Diophantine equation. This is the most time consuming part.
3634 For answering to the question: "Is there a dependence?" we have
3635 to prove that there exists a solution to the Diophantine
3636 equation, and that the solution is in the iteration domain,
3637 i.e. the solution is positive or zero, and that the solution
3638 happens before the upper bound loop.nb_iterations. Otherwise
3639 there is no dependence. This function outputs a description of
3640 the iterations that hold the intersections. */
3642 nb_vars_a = nb_vars_in_chrec (chrec_a);
3643 nb_vars_b = nb_vars_in_chrec (chrec_b);
3645 gcc_obstack_init (&scratch_obstack);
3647 dim = nb_vars_a + nb_vars_b;
3648 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3649 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3650 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3652 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3653 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3654 if (init_a == chrec_dont_know
3655 || init_b == chrec_dont_know)
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (dump_file, "affine-affine test failed: "
3659 "representation issue.\n");
3660 *overlaps_a = conflict_fn_not_known ();
3661 *overlaps_b = conflict_fn_not_known ();
3662 *last_conflicts = chrec_dont_know;
3663 goto end_analyze_subs_aa;
3665 gamma = int_cst_value (init_b) - int_cst_value (init_a);
3667 /* Don't do all the hard work of solving the Diophantine equation
3668 when we already know the solution: for example,
3669 | {3, +, 1}_1
3670 | {3, +, 4}_2
3671 | gamma = 3 - 3 = 0.
3672 Then the first overlap occurs during the first iterations:
3673 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3675 if (gamma == 0)
3677 if (nb_vars_a == 1 && nb_vars_b == 1)
3679 HOST_WIDE_INT step_a, step_b;
3680 HOST_WIDE_INT niter, niter_a, niter_b;
3681 affine_fn ova, ovb;
3683 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3684 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3685 niter = MIN (niter_a, niter_b);
3686 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3687 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3689 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3690 &ova, &ovb,
3691 last_conflicts, 1);
3692 *overlaps_a = conflict_fn (1, ova);
3693 *overlaps_b = conflict_fn (1, ovb);
3696 else if (nb_vars_a == 2 && nb_vars_b == 1)
3697 compute_overlap_steps_for_affine_1_2
3698 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3700 else if (nb_vars_a == 1 && nb_vars_b == 2)
3701 compute_overlap_steps_for_affine_1_2
3702 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3704 else
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3707 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3708 *overlaps_a = conflict_fn_not_known ();
3709 *overlaps_b = conflict_fn_not_known ();
3710 *last_conflicts = chrec_dont_know;
3712 goto end_analyze_subs_aa;
3715 /* U.A = S */
3716 lambda_matrix_right_hermite (A, dim, 1, S, U);
3718 if (S[0][0] < 0)
3720 S[0][0] *= -1;
3721 lambda_matrix_row_negate (U, dim, 0);
3723 gcd_alpha_beta = S[0][0];
3725 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3726 but that is a quite strange case. Instead of ICEing, answer
3727 don't know. */
3728 if (gcd_alpha_beta == 0)
3730 *overlaps_a = conflict_fn_not_known ();
3731 *overlaps_b = conflict_fn_not_known ();
3732 *last_conflicts = chrec_dont_know;
3733 goto end_analyze_subs_aa;
3736 /* The classic "gcd-test". */
3737 if (!int_divides_p (gcd_alpha_beta, gamma))
3739 /* The "gcd-test" has determined that there is no integer
3740 solution, i.e. there is no dependence. */
3741 *overlaps_a = conflict_fn_no_dependence ();
3742 *overlaps_b = conflict_fn_no_dependence ();
3743 *last_conflicts = integer_zero_node;
3746 /* Both access functions are univariate. This includes SIV and MIV cases. */
3747 else if (nb_vars_a == 1 && nb_vars_b == 1)
3749 /* Both functions should have the same evolution sign. */
3750 if (((A[0][0] > 0 && -A[1][0] > 0)
3751 || (A[0][0] < 0 && -A[1][0] < 0)))
3753 /* The solutions are given by:
3755 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3756 | [u21 u22] [y0]
3758 For a given integer t. Using the following variables,
3760 | i0 = u11 * gamma / gcd_alpha_beta
3761 | j0 = u12 * gamma / gcd_alpha_beta
3762 | i1 = u21
3763 | j1 = u22
3765 the solutions are:
3767 | x0 = i0 + i1 * t,
3768 | y0 = j0 + j1 * t. */
3769 HOST_WIDE_INT i0, j0, i1, j1;
3771 i0 = U[0][0] * gamma / gcd_alpha_beta;
3772 j0 = U[0][1] * gamma / gcd_alpha_beta;
3773 i1 = U[1][0];
3774 j1 = U[1][1];
3776 if ((i1 == 0 && i0 < 0)
3777 || (j1 == 0 && j0 < 0))
3779 /* There is no solution.
3780 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3781 falls in here, but for the moment we don't look at the
3782 upper bound of the iteration domain. */
3783 *overlaps_a = conflict_fn_no_dependence ();
3784 *overlaps_b = conflict_fn_no_dependence ();
3785 *last_conflicts = integer_zero_node;
3786 goto end_analyze_subs_aa;
3789 if (i1 > 0 && j1 > 0)
3791 HOST_WIDE_INT niter_a
3792 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3793 HOST_WIDE_INT niter_b
3794 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3795 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3797 /* (X0, Y0) is a solution of the Diophantine equation:
3798 "chrec_a (X0) = chrec_b (Y0)". */
3799 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3800 CEIL (-j0, j1));
3801 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3802 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3804 /* (X1, Y1) is the smallest positive solution of the eq
3805 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3806 first conflict occurs. */
3807 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3808 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3809 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3811 if (niter > 0)
3813 /* If the overlap occurs outside of the bounds of the
3814 loop, there is no dependence. */
3815 if (x1 >= niter_a || y1 >= niter_b)
3817 *overlaps_a = conflict_fn_no_dependence ();
3818 *overlaps_b = conflict_fn_no_dependence ();
3819 *last_conflicts = integer_zero_node;
3820 goto end_analyze_subs_aa;
3823 /* max stmt executions can get quite large, avoid
3824 overflows by using wide ints here. */
3825 widest_int tau2
3826 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3827 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3828 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3829 if (wi::min_precision (last_conflict, SIGNED)
3830 <= TYPE_PRECISION (integer_type_node))
3831 *last_conflicts
3832 = build_int_cst (integer_type_node,
3833 last_conflict.to_shwi ());
3834 else
3835 *last_conflicts = chrec_dont_know;
3837 else
3838 *last_conflicts = chrec_dont_know;
3840 *overlaps_a
3841 = conflict_fn (1,
3842 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3844 build_int_cst (NULL_TREE, i1)));
3845 *overlaps_b
3846 = conflict_fn (1,
3847 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3849 build_int_cst (NULL_TREE, j1)));
3851 else
3853 /* FIXME: For the moment, the upper bound of the
3854 iteration domain for i and j is not checked. */
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3857 *overlaps_a = conflict_fn_not_known ();
3858 *overlaps_b = conflict_fn_not_known ();
3859 *last_conflicts = chrec_dont_know;
3862 else
3864 if (dump_file && (dump_flags & TDF_DETAILS))
3865 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3866 *overlaps_a = conflict_fn_not_known ();
3867 *overlaps_b = conflict_fn_not_known ();
3868 *last_conflicts = chrec_dont_know;
3871 else
3873 if (dump_file && (dump_flags & TDF_DETAILS))
3874 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3875 *overlaps_a = conflict_fn_not_known ();
3876 *overlaps_b = conflict_fn_not_known ();
3877 *last_conflicts = chrec_dont_know;
3880 end_analyze_subs_aa:
3881 obstack_free (&scratch_obstack, NULL);
3882 if (dump_file && (dump_flags & TDF_DETAILS))
3884 fprintf (dump_file, " (overlaps_a = ");
3885 dump_conflict_function (dump_file, *overlaps_a);
3886 fprintf (dump_file, ")\n (overlaps_b = ");
3887 dump_conflict_function (dump_file, *overlaps_b);
3888 fprintf (dump_file, "))\n");
3892 /* Returns true when analyze_subscript_affine_affine can be used for
3893 determining the dependence relation between chrec_a and chrec_b,
3894 that contain symbols. This function modifies chrec_a and chrec_b
3895 such that the analysis result is the same, and such that they don't
3896 contain symbols, and then can safely be passed to the analyzer.
3898 Example: The analysis of the following tuples of evolutions produce
3899 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3900 vs. {0, +, 1}_1
3902 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3903 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3906 static bool
3907 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3909 tree diff, type, left_a, left_b, right_b;
3911 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3912 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3913 /* FIXME: For the moment not handled. Might be refined later. */
3914 return false;
3916 type = chrec_type (*chrec_a);
3917 left_a = CHREC_LEFT (*chrec_a);
3918 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3919 diff = chrec_fold_minus (type, left_a, left_b);
3921 if (!evolution_function_is_constant_p (diff))
3922 return false;
3924 if (dump_file && (dump_flags & TDF_DETAILS))
3925 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3927 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3928 diff, CHREC_RIGHT (*chrec_a));
3929 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3930 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3931 build_int_cst (type, 0),
3932 right_b);
3933 return true;
3936 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3937 *OVERLAPS_B are initialized to the functions that describe the
3938 relation between the elements accessed twice by CHREC_A and
3939 CHREC_B. For k >= 0, the following property is verified:
3941 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3943 static void
3944 analyze_siv_subscript (tree chrec_a,
3945 tree chrec_b,
3946 conflict_function **overlaps_a,
3947 conflict_function **overlaps_b,
3948 tree *last_conflicts,
3949 int loop_nest_num)
3951 dependence_stats.num_siv++;
3953 if (dump_file && (dump_flags & TDF_DETAILS))
3954 fprintf (dump_file, "(analyze_siv_subscript \n");
3956 if (evolution_function_is_constant_p (chrec_a)
3957 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3958 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3959 overlaps_a, overlaps_b, last_conflicts);
3961 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3962 && evolution_function_is_constant_p (chrec_b))
3963 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3964 overlaps_b, overlaps_a, last_conflicts);
3966 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3967 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3969 if (!chrec_contains_symbols (chrec_a)
3970 && !chrec_contains_symbols (chrec_b))
3972 analyze_subscript_affine_affine (chrec_a, chrec_b,
3973 overlaps_a, overlaps_b,
3974 last_conflicts);
3976 if (CF_NOT_KNOWN_P (*overlaps_a)
3977 || CF_NOT_KNOWN_P (*overlaps_b))
3978 dependence_stats.num_siv_unimplemented++;
3979 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3980 || CF_NO_DEPENDENCE_P (*overlaps_b))
3981 dependence_stats.num_siv_independent++;
3982 else
3983 dependence_stats.num_siv_dependent++;
3985 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3986 &chrec_b))
3988 analyze_subscript_affine_affine (chrec_a, chrec_b,
3989 overlaps_a, overlaps_b,
3990 last_conflicts);
3992 if (CF_NOT_KNOWN_P (*overlaps_a)
3993 || CF_NOT_KNOWN_P (*overlaps_b))
3994 dependence_stats.num_siv_unimplemented++;
3995 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3996 || CF_NO_DEPENDENCE_P (*overlaps_b))
3997 dependence_stats.num_siv_independent++;
3998 else
3999 dependence_stats.num_siv_dependent++;
4001 else
4002 goto siv_subscript_dontknow;
4005 else
4007 siv_subscript_dontknow:;
4008 if (dump_file && (dump_flags & TDF_DETAILS))
4009 fprintf (dump_file, " siv test failed: unimplemented");
4010 *overlaps_a = conflict_fn_not_known ();
4011 *overlaps_b = conflict_fn_not_known ();
4012 *last_conflicts = chrec_dont_know;
4013 dependence_stats.num_siv_unimplemented++;
4016 if (dump_file && (dump_flags & TDF_DETAILS))
4017 fprintf (dump_file, ")\n");
4020 /* Returns false if we can prove that the greatest common divisor of the steps
4021 of CHREC does not divide CST, false otherwise. */
4023 static bool
4024 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4026 HOST_WIDE_INT cd = 0, val;
4027 tree step;
4029 if (!tree_fits_shwi_p (cst))
4030 return true;
4031 val = tree_to_shwi (cst);
4033 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4035 step = CHREC_RIGHT (chrec);
4036 if (!tree_fits_shwi_p (step))
4037 return true;
4038 cd = gcd (cd, tree_to_shwi (step));
4039 chrec = CHREC_LEFT (chrec);
4042 return val % cd == 0;
4045 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4046 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4047 functions that describe the relation between the elements accessed
4048 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4049 is verified:
4051 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4053 static void
4054 analyze_miv_subscript (tree chrec_a,
4055 tree chrec_b,
4056 conflict_function **overlaps_a,
4057 conflict_function **overlaps_b,
4058 tree *last_conflicts,
4059 class loop *loop_nest)
4061 tree type, difference;
4063 dependence_stats.num_miv++;
4064 if (dump_file && (dump_flags & TDF_DETAILS))
4065 fprintf (dump_file, "(analyze_miv_subscript \n");
4067 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4068 chrec_a = chrec_convert (type, chrec_a, NULL);
4069 chrec_b = chrec_convert (type, chrec_b, NULL);
4070 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4072 if (eq_evolutions_p (chrec_a, chrec_b))
4074 /* Access functions are the same: all the elements are accessed
4075 in the same order. */
4076 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4077 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4078 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4079 dependence_stats.num_miv_dependent++;
4082 else if (evolution_function_is_constant_p (difference)
4083 && evolution_function_is_affine_multivariate_p (chrec_a,
4084 loop_nest->num)
4085 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4087 /* testsuite/.../ssa-chrec-33.c
4088 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4090 The difference is 1, and all the evolution steps are multiples
4091 of 2, consequently there are no overlapping elements. */
4092 *overlaps_a = conflict_fn_no_dependence ();
4093 *overlaps_b = conflict_fn_no_dependence ();
4094 *last_conflicts = integer_zero_node;
4095 dependence_stats.num_miv_independent++;
4098 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4099 && !chrec_contains_symbols (chrec_a, loop_nest)
4100 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4101 && !chrec_contains_symbols (chrec_b, loop_nest))
4103 /* testsuite/.../ssa-chrec-35.c
4104 {0, +, 1}_2 vs. {0, +, 1}_3
4105 the overlapping elements are respectively located at iterations:
4106 {0, +, 1}_x and {0, +, 1}_x,
4107 in other words, we have the equality:
4108 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4110 Other examples:
4111 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4112 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4114 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4115 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4117 analyze_subscript_affine_affine (chrec_a, chrec_b,
4118 overlaps_a, overlaps_b, last_conflicts);
4120 if (CF_NOT_KNOWN_P (*overlaps_a)
4121 || CF_NOT_KNOWN_P (*overlaps_b))
4122 dependence_stats.num_miv_unimplemented++;
4123 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4124 || CF_NO_DEPENDENCE_P (*overlaps_b))
4125 dependence_stats.num_miv_independent++;
4126 else
4127 dependence_stats.num_miv_dependent++;
4130 else
4132 /* When the analysis is too difficult, answer "don't know". */
4133 if (dump_file && (dump_flags & TDF_DETAILS))
4134 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4136 *overlaps_a = conflict_fn_not_known ();
4137 *overlaps_b = conflict_fn_not_known ();
4138 *last_conflicts = chrec_dont_know;
4139 dependence_stats.num_miv_unimplemented++;
4142 if (dump_file && (dump_flags & TDF_DETAILS))
4143 fprintf (dump_file, ")\n");
4146 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4147 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4148 OVERLAP_ITERATIONS_B are initialized with two functions that
4149 describe the iterations that contain conflicting elements.
4151 Remark: For an integer k >= 0, the following equality is true:
4153 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4156 static void
4157 analyze_overlapping_iterations (tree chrec_a,
4158 tree chrec_b,
4159 conflict_function **overlap_iterations_a,
4160 conflict_function **overlap_iterations_b,
4161 tree *last_conflicts, class loop *loop_nest)
4163 unsigned int lnn = loop_nest->num;
4165 dependence_stats.num_subscript_tests++;
4167 if (dump_file && (dump_flags & TDF_DETAILS))
4169 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4170 fprintf (dump_file, " (chrec_a = ");
4171 print_generic_expr (dump_file, chrec_a);
4172 fprintf (dump_file, ")\n (chrec_b = ");
4173 print_generic_expr (dump_file, chrec_b);
4174 fprintf (dump_file, ")\n");
4177 if (chrec_a == NULL_TREE
4178 || chrec_b == NULL_TREE
4179 || chrec_contains_undetermined (chrec_a)
4180 || chrec_contains_undetermined (chrec_b))
4182 dependence_stats.num_subscript_undetermined++;
4184 *overlap_iterations_a = conflict_fn_not_known ();
4185 *overlap_iterations_b = conflict_fn_not_known ();
4188 /* If they are the same chrec, and are affine, they overlap
4189 on every iteration. */
4190 else if (eq_evolutions_p (chrec_a, chrec_b)
4191 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4192 || operand_equal_p (chrec_a, chrec_b, 0)))
4194 dependence_stats.num_same_subscript_function++;
4195 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4196 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4197 *last_conflicts = chrec_dont_know;
4200 /* If they aren't the same, and aren't affine, we can't do anything
4201 yet. */
4202 else if ((chrec_contains_symbols (chrec_a)
4203 || chrec_contains_symbols (chrec_b))
4204 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4205 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4207 dependence_stats.num_subscript_undetermined++;
4208 *overlap_iterations_a = conflict_fn_not_known ();
4209 *overlap_iterations_b = conflict_fn_not_known ();
4212 else if (ziv_subscript_p (chrec_a, chrec_b))
4213 analyze_ziv_subscript (chrec_a, chrec_b,
4214 overlap_iterations_a, overlap_iterations_b,
4215 last_conflicts);
4217 else if (siv_subscript_p (chrec_a, chrec_b))
4218 analyze_siv_subscript (chrec_a, chrec_b,
4219 overlap_iterations_a, overlap_iterations_b,
4220 last_conflicts, lnn);
4222 else
4223 analyze_miv_subscript (chrec_a, chrec_b,
4224 overlap_iterations_a, overlap_iterations_b,
4225 last_conflicts, loop_nest);
4227 if (dump_file && (dump_flags & TDF_DETAILS))
4229 fprintf (dump_file, " (overlap_iterations_a = ");
4230 dump_conflict_function (dump_file, *overlap_iterations_a);
4231 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4232 dump_conflict_function (dump_file, *overlap_iterations_b);
4233 fprintf (dump_file, "))\n");
4237 /* Helper function for uniquely inserting distance vectors. */
4239 static void
4240 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4242 unsigned i;
4243 lambda_vector v;
4245 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4246 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4247 return;
4249 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4252 /* Helper function for uniquely inserting direction vectors. */
4254 static void
4255 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4257 unsigned i;
4258 lambda_vector v;
4260 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4261 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4262 return;
4264 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4267 /* Add a distance of 1 on all the loops outer than INDEX. If we
4268 haven't yet determined a distance for this outer loop, push a new
4269 distance vector composed of the previous distance, and a distance
4270 of 1 for this outer loop. Example:
4272 | loop_1
4273 | loop_2
4274 | A[10]
4275 | endloop_2
4276 | endloop_1
4278 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4279 save (0, 1), then we have to save (1, 0). */
4281 static void
4282 add_outer_distances (struct data_dependence_relation *ddr,
4283 lambda_vector dist_v, int index)
4285 /* For each outer loop where init_v is not set, the accesses are
4286 in dependence of distance 1 in the loop. */
4287 while (--index >= 0)
4289 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4290 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4291 save_v[index] = 1;
4292 save_dist_v (ddr, save_v);
4296 /* Return false when fail to represent the data dependence as a
4297 distance vector. A_INDEX is the index of the first reference
4298 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4299 second reference. INIT_B is set to true when a component has been
4300 added to the distance vector DIST_V. INDEX_CARRY is then set to
4301 the index in DIST_V that carries the dependence. */
4303 static bool
4304 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4305 unsigned int a_index, unsigned int b_index,
4306 lambda_vector dist_v, bool *init_b,
4307 int *index_carry)
4309 unsigned i;
4310 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4311 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4313 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4315 tree access_fn_a, access_fn_b;
4316 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4318 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4320 non_affine_dependence_relation (ddr);
4321 return false;
4324 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4325 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4327 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4328 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4330 HOST_WIDE_INT dist;
4331 int index;
4332 int var_a = CHREC_VARIABLE (access_fn_a);
4333 int var_b = CHREC_VARIABLE (access_fn_b);
4335 if (var_a != var_b
4336 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4338 non_affine_dependence_relation (ddr);
4339 return false;
4342 /* When data references are collected in a loop while data
4343 dependences are analyzed in loop nest nested in the loop, we
4344 would have more number of access functions than number of
4345 loops. Skip access functions of loops not in the loop nest.
4347 See PR89725 for more information. */
4348 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4349 continue;
4351 dist = int_cst_value (SUB_DISTANCE (subscript));
4352 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4353 *index_carry = MIN (index, *index_carry);
4355 /* This is the subscript coupling test. If we have already
4356 recorded a distance for this loop (a distance coming from
4357 another subscript), it should be the same. For example,
4358 in the following code, there is no dependence:
4360 | loop i = 0, N, 1
4361 | T[i+1][i] = ...
4362 | ... = T[i][i]
4363 | endloop
4365 if (init_v[index] != 0 && dist_v[index] != dist)
4367 finalize_ddr_dependent (ddr, chrec_known);
4368 return false;
4371 dist_v[index] = dist;
4372 init_v[index] = 1;
4373 *init_b = true;
4375 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4377 /* This can be for example an affine vs. constant dependence
4378 (T[i] vs. T[3]) that is not an affine dependence and is
4379 not representable as a distance vector. */
4380 non_affine_dependence_relation (ddr);
4381 return false;
4385 return true;
4388 /* Return true when the DDR contains only constant access functions. */
4390 static bool
4391 constant_access_functions (const struct data_dependence_relation *ddr)
4393 unsigned i;
4394 subscript *sub;
4396 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4397 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4398 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4399 return false;
4401 return true;
4404 /* Helper function for the case where DDR_A and DDR_B are the same
4405 multivariate access function with a constant step. For an example
4406 see pr34635-1.c. */
4408 static void
4409 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4411 int x_1, x_2;
4412 tree c_1 = CHREC_LEFT (c_2);
4413 tree c_0 = CHREC_LEFT (c_1);
4414 lambda_vector dist_v;
4415 HOST_WIDE_INT v1, v2, cd;
4417 /* Polynomials with more than 2 variables are not handled yet. When
4418 the evolution steps are parameters, it is not possible to
4419 represent the dependence using classical distance vectors. */
4420 if (TREE_CODE (c_0) != INTEGER_CST
4421 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4422 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4424 DDR_AFFINE_P (ddr) = false;
4425 return;
4428 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4429 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4431 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4432 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4433 v1 = int_cst_value (CHREC_RIGHT (c_1));
4434 v2 = int_cst_value (CHREC_RIGHT (c_2));
4435 cd = gcd (v1, v2);
4436 v1 /= cd;
4437 v2 /= cd;
4439 if (v2 < 0)
4441 v2 = -v2;
4442 v1 = -v1;
4445 dist_v[x_1] = v2;
4446 dist_v[x_2] = -v1;
4447 save_dist_v (ddr, dist_v);
4449 add_outer_distances (ddr, dist_v, x_1);
4452 /* Helper function for the case where DDR_A and DDR_B are the same
4453 access functions. */
4455 static void
4456 add_other_self_distances (struct data_dependence_relation *ddr)
4458 lambda_vector dist_v;
4459 unsigned i;
4460 int index_carry = DDR_NB_LOOPS (ddr);
4461 subscript *sub;
4462 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4464 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4466 tree access_fun = SUB_ACCESS_FN (sub, 0);
4468 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4470 if (!evolution_function_is_univariate_p (access_fun, loop->num))
4472 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4474 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4475 return;
4478 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4480 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4481 add_multivariate_self_dist (ddr, access_fun);
4482 else
4483 /* The evolution step is not constant: it varies in
4484 the outer loop, so this cannot be represented by a
4485 distance vector. For example in pr34635.c the
4486 evolution is {0, +, {0, +, 4}_1}_2. */
4487 DDR_AFFINE_P (ddr) = false;
4489 return;
4492 /* When data references are collected in a loop while data
4493 dependences are analyzed in loop nest nested in the loop, we
4494 would have more number of access functions than number of
4495 loops. Skip access functions of loops not in the loop nest.
4497 See PR89725 for more information. */
4498 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4499 loop))
4500 continue;
4502 index_carry = MIN (index_carry,
4503 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4504 DDR_LOOP_NEST (ddr)));
4508 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4509 add_outer_distances (ddr, dist_v, index_carry);
4512 static void
4513 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4515 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4517 dist_v[0] = 1;
4518 save_dist_v (ddr, dist_v);
4521 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4522 is the case for example when access functions are the same and
4523 equal to a constant, as in:
4525 | loop_1
4526 | A[3] = ...
4527 | ... = A[3]
4528 | endloop_1
4530 in which case the distance vectors are (0) and (1). */
4532 static void
4533 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4535 unsigned i, j;
4537 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4539 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4540 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4541 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4543 for (j = 0; j < ca->n; j++)
4544 if (affine_function_zero_p (ca->fns[j]))
4546 insert_innermost_unit_dist_vector (ddr);
4547 return;
4550 for (j = 0; j < cb->n; j++)
4551 if (affine_function_zero_p (cb->fns[j]))
4553 insert_innermost_unit_dist_vector (ddr);
4554 return;
4559 /* Return true when the DDR contains two data references that have the
4560 same access functions. */
4562 static inline bool
4563 same_access_functions (const struct data_dependence_relation *ddr)
4565 unsigned i;
4566 subscript *sub;
4568 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4569 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4570 SUB_ACCESS_FN (sub, 1)))
4571 return false;
4573 return true;
4576 /* Compute the classic per loop distance vector. DDR is the data
4577 dependence relation to build a vector from. Return false when fail
4578 to represent the data dependence as a distance vector. */
4580 static bool
4581 build_classic_dist_vector (struct data_dependence_relation *ddr,
4582 class loop *loop_nest)
4584 bool init_b = false;
4585 int index_carry = DDR_NB_LOOPS (ddr);
4586 lambda_vector dist_v;
4588 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4589 return false;
4591 if (same_access_functions (ddr))
4593 /* Save the 0 vector. */
4594 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4595 save_dist_v (ddr, dist_v);
4597 if (constant_access_functions (ddr))
4598 add_distance_for_zero_overlaps (ddr);
4600 if (DDR_NB_LOOPS (ddr) > 1)
4601 add_other_self_distances (ddr);
4603 return true;
4606 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4607 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4608 return false;
4610 /* Save the distance vector if we initialized one. */
4611 if (init_b)
4613 /* Verify a basic constraint: classic distance vectors should
4614 always be lexicographically positive.
4616 Data references are collected in the order of execution of
4617 the program, thus for the following loop
4619 | for (i = 1; i < 100; i++)
4620 | for (j = 1; j < 100; j++)
4622 | t = T[j+1][i-1]; // A
4623 | T[j][i] = t + 2; // B
4626 references are collected following the direction of the wind:
4627 A then B. The data dependence tests are performed also
4628 following this order, such that we're looking at the distance
4629 separating the elements accessed by A from the elements later
4630 accessed by B. But in this example, the distance returned by
4631 test_dep (A, B) is lexicographically negative (-1, 1), that
4632 means that the access A occurs later than B with respect to
4633 the outer loop, ie. we're actually looking upwind. In this
4634 case we solve test_dep (B, A) looking downwind to the
4635 lexicographically positive solution, that returns the
4636 distance vector (1, -1). */
4637 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4639 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4640 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4641 return false;
4642 compute_subscript_distance (ddr);
4643 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4644 &index_carry))
4645 return false;
4646 save_dist_v (ddr, save_v);
4647 DDR_REVERSED_P (ddr) = true;
4649 /* In this case there is a dependence forward for all the
4650 outer loops:
4652 | for (k = 1; k < 100; k++)
4653 | for (i = 1; i < 100; i++)
4654 | for (j = 1; j < 100; j++)
4656 | t = T[j+1][i-1]; // A
4657 | T[j][i] = t + 2; // B
4660 the vectors are:
4661 (0, 1, -1)
4662 (1, 1, -1)
4663 (1, -1, 1)
4665 if (DDR_NB_LOOPS (ddr) > 1)
4667 add_outer_distances (ddr, save_v, index_carry);
4668 add_outer_distances (ddr, dist_v, index_carry);
4671 else
4673 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4674 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4676 if (DDR_NB_LOOPS (ddr) > 1)
4678 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4680 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4681 return false;
4682 compute_subscript_distance (ddr);
4683 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4684 &index_carry))
4685 return false;
4687 save_dist_v (ddr, save_v);
4688 add_outer_distances (ddr, dist_v, index_carry);
4689 add_outer_distances (ddr, opposite_v, index_carry);
4691 else
4692 save_dist_v (ddr, save_v);
4695 else
4697 /* There is a distance of 1 on all the outer loops: Example:
4698 there is a dependence of distance 1 on loop_1 for the array A.
4700 | loop_1
4701 | A[5] = ...
4702 | endloop
4704 add_outer_distances (ddr, dist_v,
4705 lambda_vector_first_nz (dist_v,
4706 DDR_NB_LOOPS (ddr), 0));
4709 if (dump_file && (dump_flags & TDF_DETAILS))
4711 unsigned i;
4713 fprintf (dump_file, "(build_classic_dist_vector\n");
4714 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4716 fprintf (dump_file, " dist_vector = (");
4717 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4718 DDR_NB_LOOPS (ddr));
4719 fprintf (dump_file, " )\n");
4721 fprintf (dump_file, ")\n");
4724 return true;
4727 /* Return the direction for a given distance.
4728 FIXME: Computing dir this way is suboptimal, since dir can catch
4729 cases that dist is unable to represent. */
4731 static inline enum data_dependence_direction
4732 dir_from_dist (int dist)
4734 if (dist > 0)
4735 return dir_positive;
4736 else if (dist < 0)
4737 return dir_negative;
4738 else
4739 return dir_equal;
4742 /* Compute the classic per loop direction vector. DDR is the data
4743 dependence relation to build a vector from. */
4745 static void
4746 build_classic_dir_vector (struct data_dependence_relation *ddr)
4748 unsigned i, j;
4749 lambda_vector dist_v;
4751 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4753 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4755 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4756 dir_v[j] = dir_from_dist (dist_v[j]);
4758 save_dir_v (ddr, dir_v);
4762 /* Helper function. Returns true when there is a dependence between the
4763 data references. A_INDEX is the index of the first reference (0 for
4764 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4766 static bool
4767 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4768 unsigned int a_index, unsigned int b_index,
4769 class loop *loop_nest)
4771 unsigned int i;
4772 tree last_conflicts;
4773 struct subscript *subscript;
4774 tree res = NULL_TREE;
4776 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4778 conflict_function *overlaps_a, *overlaps_b;
4780 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4781 SUB_ACCESS_FN (subscript, b_index),
4782 &overlaps_a, &overlaps_b,
4783 &last_conflicts, loop_nest);
4785 if (SUB_CONFLICTS_IN_A (subscript))
4786 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4787 if (SUB_CONFLICTS_IN_B (subscript))
4788 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4790 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4791 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4792 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4794 /* If there is any undetermined conflict function we have to
4795 give a conservative answer in case we cannot prove that
4796 no dependence exists when analyzing another subscript. */
4797 if (CF_NOT_KNOWN_P (overlaps_a)
4798 || CF_NOT_KNOWN_P (overlaps_b))
4800 res = chrec_dont_know;
4801 continue;
4804 /* When there is a subscript with no dependence we can stop. */
4805 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4806 || CF_NO_DEPENDENCE_P (overlaps_b))
4808 res = chrec_known;
4809 break;
4813 if (res == NULL_TREE)
4814 return true;
4816 if (res == chrec_known)
4817 dependence_stats.num_dependence_independent++;
4818 else
4819 dependence_stats.num_dependence_undetermined++;
4820 finalize_ddr_dependent (ddr, res);
4821 return false;
4824 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4826 static void
4827 subscript_dependence_tester (struct data_dependence_relation *ddr,
4828 class loop *loop_nest)
4830 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4831 dependence_stats.num_dependence_dependent++;
4833 compute_subscript_distance (ddr);
4834 if (build_classic_dist_vector (ddr, loop_nest))
4835 build_classic_dir_vector (ddr);
4838 /* Returns true when all the access functions of A are affine or
4839 constant with respect to LOOP_NEST. */
4841 static bool
4842 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4843 const class loop *loop_nest)
4845 unsigned int i;
4846 vec<tree> fns = DR_ACCESS_FNS (a);
4847 tree t;
4849 FOR_EACH_VEC_ELT (fns, i, t)
4850 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4851 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4852 return false;
4854 return true;
4857 /* This computes the affine dependence relation between A and B with
4858 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4859 independence between two accesses, while CHREC_DONT_KNOW is used
4860 for representing the unknown relation.
4862 Note that it is possible to stop the computation of the dependence
4863 relation the first time we detect a CHREC_KNOWN element for a given
4864 subscript. */
4866 void
4867 compute_affine_dependence (struct data_dependence_relation *ddr,
4868 class loop *loop_nest)
4870 struct data_reference *dra = DDR_A (ddr);
4871 struct data_reference *drb = DDR_B (ddr);
4873 if (dump_file && (dump_flags & TDF_DETAILS))
4875 fprintf (dump_file, "(compute_affine_dependence\n");
4876 fprintf (dump_file, " stmt_a: ");
4877 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4878 fprintf (dump_file, " stmt_b: ");
4879 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4882 /* Analyze only when the dependence relation is not yet known. */
4883 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4885 dependence_stats.num_dependence_tests++;
4887 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4888 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4889 subscript_dependence_tester (ddr, loop_nest);
4891 /* As a last case, if the dependence cannot be determined, or if
4892 the dependence is considered too difficult to determine, answer
4893 "don't know". */
4894 else
4896 dependence_stats.num_dependence_undetermined++;
4898 if (dump_file && (dump_flags & TDF_DETAILS))
4900 fprintf (dump_file, "Data ref a:\n");
4901 dump_data_reference (dump_file, dra);
4902 fprintf (dump_file, "Data ref b:\n");
4903 dump_data_reference (dump_file, drb);
4904 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4906 finalize_ddr_dependent (ddr, chrec_dont_know);
4910 if (dump_file && (dump_flags & TDF_DETAILS))
4912 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4913 fprintf (dump_file, ") -> no dependence\n");
4914 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4915 fprintf (dump_file, ") -> dependence analysis failed\n");
4916 else
4917 fprintf (dump_file, ")\n");
4921 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4922 the data references in DATAREFS, in the LOOP_NEST. When
4923 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4924 relations. Return true when successful, i.e. data references number
4925 is small enough to be handled. */
4927 bool
4928 compute_all_dependences (vec<data_reference_p> datarefs,
4929 vec<ddr_p> *dependence_relations,
4930 vec<loop_p> loop_nest,
4931 bool compute_self_and_rr)
4933 struct data_dependence_relation *ddr;
4934 struct data_reference *a, *b;
4935 unsigned int i, j;
4937 if ((int) datarefs.length ()
4938 > param_loop_max_datarefs_for_datadeps)
4940 struct data_dependence_relation *ddr;
4942 /* Insert a single relation into dependence_relations:
4943 chrec_dont_know. */
4944 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4945 dependence_relations->safe_push (ddr);
4946 return false;
4949 FOR_EACH_VEC_ELT (datarefs, i, a)
4950 for (j = i + 1; datarefs.iterate (j, &b); j++)
4951 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4953 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4954 dependence_relations->safe_push (ddr);
4955 if (loop_nest.exists ())
4956 compute_affine_dependence (ddr, loop_nest[0]);
4959 if (compute_self_and_rr)
4960 FOR_EACH_VEC_ELT (datarefs, i, a)
4962 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4963 dependence_relations->safe_push (ddr);
4964 if (loop_nest.exists ())
4965 compute_affine_dependence (ddr, loop_nest[0]);
4968 return true;
4971 /* Describes a location of a memory reference. */
4973 struct data_ref_loc
4975 /* The memory reference. */
4976 tree ref;
4978 /* True if the memory reference is read. */
4979 bool is_read;
4981 /* True if the data reference is conditional within the containing
4982 statement, i.e. if it might not occur even when the statement
4983 is executed and runs to completion. */
4984 bool is_conditional_in_stmt;
4988 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4989 true if STMT clobbers memory, false otherwise. */
4991 static bool
4992 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4994 bool clobbers_memory = false;
4995 data_ref_loc ref;
4996 tree op0, op1;
4997 enum gimple_code stmt_code = gimple_code (stmt);
4999 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5000 As we cannot model data-references to not spelled out
5001 accesses give up if they may occur. */
5002 if (stmt_code == GIMPLE_CALL
5003 && !(gimple_call_flags (stmt) & ECF_CONST))
5005 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5006 if (gimple_call_internal_p (stmt))
5007 switch (gimple_call_internal_fn (stmt))
5009 case IFN_GOMP_SIMD_LANE:
5011 class loop *loop = gimple_bb (stmt)->loop_father;
5012 tree uid = gimple_call_arg (stmt, 0);
5013 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5014 if (loop == NULL
5015 || loop->simduid != SSA_NAME_VAR (uid))
5016 clobbers_memory = true;
5017 break;
5019 case IFN_MASK_LOAD:
5020 case IFN_MASK_STORE:
5021 break;
5022 default:
5023 clobbers_memory = true;
5024 break;
5026 else
5027 clobbers_memory = true;
5029 else if (stmt_code == GIMPLE_ASM
5030 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5031 || gimple_vuse (stmt)))
5032 clobbers_memory = true;
5034 if (!gimple_vuse (stmt))
5035 return clobbers_memory;
5037 if (stmt_code == GIMPLE_ASSIGN)
5039 tree base;
5040 op0 = gimple_assign_lhs (stmt);
5041 op1 = gimple_assign_rhs1 (stmt);
5043 if (DECL_P (op1)
5044 || (REFERENCE_CLASS_P (op1)
5045 && (base = get_base_address (op1))
5046 && TREE_CODE (base) != SSA_NAME
5047 && !is_gimple_min_invariant (base)))
5049 ref.ref = op1;
5050 ref.is_read = true;
5051 ref.is_conditional_in_stmt = false;
5052 references->safe_push (ref);
5055 else if (stmt_code == GIMPLE_CALL)
5057 unsigned i, n;
5058 tree ptr, type;
5059 unsigned int align;
5061 ref.is_read = false;
5062 if (gimple_call_internal_p (stmt))
5063 switch (gimple_call_internal_fn (stmt))
5065 case IFN_MASK_LOAD:
5066 if (gimple_call_lhs (stmt) == NULL_TREE)
5067 break;
5068 ref.is_read = true;
5069 /* FALLTHRU */
5070 case IFN_MASK_STORE:
5071 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5072 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5073 if (ref.is_read)
5074 type = TREE_TYPE (gimple_call_lhs (stmt));
5075 else
5076 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5077 if (TYPE_ALIGN (type) != align)
5078 type = build_aligned_type (type, align);
5079 ref.is_conditional_in_stmt = true;
5080 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5081 ptr);
5082 references->safe_push (ref);
5083 return false;
5084 default:
5085 break;
5088 op0 = gimple_call_lhs (stmt);
5089 n = gimple_call_num_args (stmt);
5090 for (i = 0; i < n; i++)
5092 op1 = gimple_call_arg (stmt, i);
5094 if (DECL_P (op1)
5095 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5097 ref.ref = op1;
5098 ref.is_read = true;
5099 ref.is_conditional_in_stmt = false;
5100 references->safe_push (ref);
5104 else
5105 return clobbers_memory;
5107 if (op0
5108 && (DECL_P (op0)
5109 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5111 ref.ref = op0;
5112 ref.is_read = false;
5113 ref.is_conditional_in_stmt = false;
5114 references->safe_push (ref);
5116 return clobbers_memory;
5120 /* Returns true if the loop-nest has any data reference. */
5122 bool
5123 loop_nest_has_data_refs (loop_p loop)
5125 basic_block *bbs = get_loop_body (loop);
5126 auto_vec<data_ref_loc, 3> references;
5128 for (unsigned i = 0; i < loop->num_nodes; i++)
5130 basic_block bb = bbs[i];
5131 gimple_stmt_iterator bsi;
5133 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5135 gimple *stmt = gsi_stmt (bsi);
5136 get_references_in_stmt (stmt, &references);
5137 if (references.length ())
5139 free (bbs);
5140 return true;
5144 free (bbs);
5145 return false;
5148 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5149 reference, returns false, otherwise returns true. NEST is the outermost
5150 loop of the loop nest in which the references should be analyzed. */
5152 opt_result
5153 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5154 vec<data_reference_p> *datarefs)
5156 unsigned i;
5157 auto_vec<data_ref_loc, 2> references;
5158 data_ref_loc *ref;
5159 data_reference_p dr;
5161 if (get_references_in_stmt (stmt, &references))
5162 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5163 stmt);
5165 FOR_EACH_VEC_ELT (references, i, ref)
5167 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5168 loop_containing_stmt (stmt), ref->ref,
5169 stmt, ref->is_read, ref->is_conditional_in_stmt);
5170 gcc_assert (dr != NULL);
5171 datarefs->safe_push (dr);
5174 return opt_result::success ();
5177 /* Stores the data references in STMT to DATAREFS. If there is an
5178 unanalyzable reference, returns false, otherwise returns true.
5179 NEST is the outermost loop of the loop nest in which the references
5180 should be instantiated, LOOP is the loop in which the references
5181 should be analyzed. */
5183 bool
5184 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5185 vec<data_reference_p> *datarefs)
5187 unsigned i;
5188 auto_vec<data_ref_loc, 2> references;
5189 data_ref_loc *ref;
5190 bool ret = true;
5191 data_reference_p dr;
5193 if (get_references_in_stmt (stmt, &references))
5194 return false;
5196 FOR_EACH_VEC_ELT (references, i, ref)
5198 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5199 ref->is_conditional_in_stmt);
5200 gcc_assert (dr != NULL);
5201 datarefs->safe_push (dr);
5204 return ret;
5207 /* Search the data references in LOOP, and record the information into
5208 DATAREFS. Returns chrec_dont_know when failing to analyze a
5209 difficult case, returns NULL_TREE otherwise. */
5211 tree
5212 find_data_references_in_bb (class loop *loop, basic_block bb,
5213 vec<data_reference_p> *datarefs)
5215 gimple_stmt_iterator bsi;
5217 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5219 gimple *stmt = gsi_stmt (bsi);
5221 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5223 struct data_reference *res;
5224 res = XCNEW (struct data_reference);
5225 datarefs->safe_push (res);
5227 return chrec_dont_know;
5231 return NULL_TREE;
5234 /* Search the data references in LOOP, and record the information into
5235 DATAREFS. Returns chrec_dont_know when failing to analyze a
5236 difficult case, returns NULL_TREE otherwise.
5238 TODO: This function should be made smarter so that it can handle address
5239 arithmetic as if they were array accesses, etc. */
5241 tree
5242 find_data_references_in_loop (class loop *loop,
5243 vec<data_reference_p> *datarefs)
5245 basic_block bb, *bbs;
5246 unsigned int i;
5248 bbs = get_loop_body_in_dom_order (loop);
5250 for (i = 0; i < loop->num_nodes; i++)
5252 bb = bbs[i];
5254 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5256 free (bbs);
5257 return chrec_dont_know;
5260 free (bbs);
5262 return NULL_TREE;
5265 /* Return the alignment in bytes that DRB is guaranteed to have at all
5266 times. */
5268 unsigned int
5269 dr_alignment (innermost_loop_behavior *drb)
5271 /* Get the alignment of BASE_ADDRESS + INIT. */
5272 unsigned int alignment = drb->base_alignment;
5273 unsigned int misalignment = (drb->base_misalignment
5274 + TREE_INT_CST_LOW (drb->init));
5275 if (misalignment != 0)
5276 alignment = MIN (alignment, misalignment & -misalignment);
5278 /* Cap it to the alignment of OFFSET. */
5279 if (!integer_zerop (drb->offset))
5280 alignment = MIN (alignment, drb->offset_alignment);
5282 /* Cap it to the alignment of STEP. */
5283 if (!integer_zerop (drb->step))
5284 alignment = MIN (alignment, drb->step_alignment);
5286 return alignment;
5289 /* If BASE is a pointer-typed SSA name, try to find the object that it
5290 is based on. Return this object X on success and store the alignment
5291 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5293 static tree
5294 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5296 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5297 return NULL_TREE;
5299 gimple *def = SSA_NAME_DEF_STMT (base);
5300 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5302 /* Peel chrecs and record the minimum alignment preserved by
5303 all steps. */
5304 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5305 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5307 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5308 alignment = MIN (alignment, step_alignment);
5309 base = CHREC_LEFT (base);
5312 /* Punt if the expression is too complicated to handle. */
5313 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5314 return NULL_TREE;
5316 /* The only useful cases are those for which a dereference folds to something
5317 other than an INDIRECT_REF. */
5318 tree ref_type = TREE_TYPE (TREE_TYPE (base));
5319 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5320 if (!ref)
5321 return NULL_TREE;
5323 /* Analyze the base to which the steps we peeled were applied. */
5324 poly_int64 bitsize, bitpos, bytepos;
5325 machine_mode mode;
5326 int unsignedp, reversep, volatilep;
5327 tree offset;
5328 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5329 &unsignedp, &reversep, &volatilep);
5330 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5331 return NULL_TREE;
5333 /* Restrict the alignment to that guaranteed by the offsets. */
5334 unsigned int bytepos_alignment = known_alignment (bytepos);
5335 if (bytepos_alignment != 0)
5336 alignment = MIN (alignment, bytepos_alignment);
5337 if (offset)
5339 unsigned int offset_alignment = highest_pow2_factor (offset);
5340 alignment = MIN (alignment, offset_alignment);
5343 *alignment_out = alignment;
5344 return base;
5347 /* Return the object whose alignment would need to be changed in order
5348 to increase the alignment of ADDR. Store the maximum achievable
5349 alignment in *MAX_ALIGNMENT. */
5351 tree
5352 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5354 tree base = get_base_for_alignment_1 (addr, max_alignment);
5355 if (base)
5356 return base;
5358 if (TREE_CODE (addr) == ADDR_EXPR)
5359 addr = TREE_OPERAND (addr, 0);
5360 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5361 return addr;
5364 /* Recursive helper function. */
5366 static bool
5367 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5369 /* Inner loops of the nest should not contain siblings. Example:
5370 when there are two consecutive loops,
5372 | loop_0
5373 | loop_1
5374 | A[{0, +, 1}_1]
5375 | endloop_1
5376 | loop_2
5377 | A[{0, +, 1}_2]
5378 | endloop_2
5379 | endloop_0
5381 the dependence relation cannot be captured by the distance
5382 abstraction. */
5383 if (loop->next)
5384 return false;
5386 loop_nest->safe_push (loop);
5387 if (loop->inner)
5388 return find_loop_nest_1 (loop->inner, loop_nest);
5389 return true;
5392 /* Return false when the LOOP is not well nested. Otherwise return
5393 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5394 contain the loops from the outermost to the innermost, as they will
5395 appear in the classic distance vector. */
5397 bool
5398 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5400 loop_nest->safe_push (loop);
5401 if (loop->inner)
5402 return find_loop_nest_1 (loop->inner, loop_nest);
5403 return true;
5406 /* Returns true when the data dependences have been computed, false otherwise.
5407 Given a loop nest LOOP, the following vectors are returned:
5408 DATAREFS is initialized to all the array elements contained in this loop,
5409 DEPENDENCE_RELATIONS contains the relations between the data references.
5410 Compute read-read and self relations if
5411 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5413 bool
5414 compute_data_dependences_for_loop (class loop *loop,
5415 bool compute_self_and_read_read_dependences,
5416 vec<loop_p> *loop_nest,
5417 vec<data_reference_p> *datarefs,
5418 vec<ddr_p> *dependence_relations)
5420 bool res = true;
5422 memset (&dependence_stats, 0, sizeof (dependence_stats));
5424 /* If the loop nest is not well formed, or one of the data references
5425 is not computable, give up without spending time to compute other
5426 dependences. */
5427 if (!loop
5428 || !find_loop_nest (loop, loop_nest)
5429 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5430 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5431 compute_self_and_read_read_dependences))
5432 res = false;
5434 if (dump_file && (dump_flags & TDF_STATS))
5436 fprintf (dump_file, "Dependence tester statistics:\n");
5438 fprintf (dump_file, "Number of dependence tests: %d\n",
5439 dependence_stats.num_dependence_tests);
5440 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5441 dependence_stats.num_dependence_dependent);
5442 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5443 dependence_stats.num_dependence_independent);
5444 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5445 dependence_stats.num_dependence_undetermined);
5447 fprintf (dump_file, "Number of subscript tests: %d\n",
5448 dependence_stats.num_subscript_tests);
5449 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5450 dependence_stats.num_subscript_undetermined);
5451 fprintf (dump_file, "Number of same subscript function: %d\n",
5452 dependence_stats.num_same_subscript_function);
5454 fprintf (dump_file, "Number of ziv tests: %d\n",
5455 dependence_stats.num_ziv);
5456 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5457 dependence_stats.num_ziv_dependent);
5458 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5459 dependence_stats.num_ziv_independent);
5460 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5461 dependence_stats.num_ziv_unimplemented);
5463 fprintf (dump_file, "Number of siv tests: %d\n",
5464 dependence_stats.num_siv);
5465 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5466 dependence_stats.num_siv_dependent);
5467 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5468 dependence_stats.num_siv_independent);
5469 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5470 dependence_stats.num_siv_unimplemented);
5472 fprintf (dump_file, "Number of miv tests: %d\n",
5473 dependence_stats.num_miv);
5474 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5475 dependence_stats.num_miv_dependent);
5476 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5477 dependence_stats.num_miv_independent);
5478 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5479 dependence_stats.num_miv_unimplemented);
5482 return res;
5485 /* Free the memory used by a data dependence relation DDR. */
5487 void
5488 free_dependence_relation (struct data_dependence_relation *ddr)
5490 if (ddr == NULL)
5491 return;
5493 if (DDR_SUBSCRIPTS (ddr).exists ())
5494 free_subscripts (DDR_SUBSCRIPTS (ddr));
5495 DDR_DIST_VECTS (ddr).release ();
5496 DDR_DIR_VECTS (ddr).release ();
5498 free (ddr);
5501 /* Free the memory used by the data dependence relations from
5502 DEPENDENCE_RELATIONS. */
5504 void
5505 free_dependence_relations (vec<ddr_p> dependence_relations)
5507 unsigned int i;
5508 struct data_dependence_relation *ddr;
5510 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5511 if (ddr)
5512 free_dependence_relation (ddr);
5514 dependence_relations.release ();
5517 /* Free the memory used by the data references from DATAREFS. */
5519 void
5520 free_data_refs (vec<data_reference_p> datarefs)
5522 unsigned int i;
5523 struct data_reference *dr;
5525 FOR_EACH_VEC_ELT (datarefs, i, dr)
5526 free_data_ref (dr);
5527 datarefs.release ();
5530 /* Common routine implementing both dr_direction_indicator and
5531 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5532 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5533 Return the step as the indicator otherwise. */
5535 static tree
5536 dr_step_indicator (struct data_reference *dr, int useful_min)
5538 tree step = DR_STEP (dr);
5539 if (!step)
5540 return NULL_TREE;
5541 STRIP_NOPS (step);
5542 /* Look for cases where the step is scaled by a positive constant
5543 integer, which will often be the access size. If the multiplication
5544 doesn't change the sign (due to overflow effects) then we can
5545 test the unscaled value instead. */
5546 if (TREE_CODE (step) == MULT_EXPR
5547 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5548 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5550 tree factor = TREE_OPERAND (step, 1);
5551 step = TREE_OPERAND (step, 0);
5553 /* Strip widening and truncating conversions as well as nops. */
5554 if (CONVERT_EXPR_P (step)
5555 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5556 step = TREE_OPERAND (step, 0);
5557 tree type = TREE_TYPE (step);
5559 /* Get the range of step values that would not cause overflow. */
5560 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5561 / wi::to_widest (factor));
5562 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5563 / wi::to_widest (factor));
5565 /* Get the range of values that the unconverted step actually has. */
5566 wide_int step_min, step_max;
5567 if (TREE_CODE (step) != SSA_NAME
5568 || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5570 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5571 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5574 /* Check whether the unconverted step has an acceptable range. */
5575 signop sgn = TYPE_SIGN (type);
5576 if (wi::les_p (minv, widest_int::from (step_min, sgn))
5577 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5579 if (wi::ge_p (step_min, useful_min, sgn))
5580 return ssize_int (useful_min);
5581 else if (wi::lt_p (step_max, 0, sgn))
5582 return ssize_int (-1);
5583 else
5584 return fold_convert (ssizetype, step);
5587 return DR_STEP (dr);
5590 /* Return a value that is negative iff DR has a negative step. */
5592 tree
5593 dr_direction_indicator (struct data_reference *dr)
5595 return dr_step_indicator (dr, 0);
5598 /* Return a value that is zero iff DR has a zero step. */
5600 tree
5601 dr_zero_step_indicator (struct data_reference *dr)
5603 return dr_step_indicator (dr, 1);
5606 /* Return true if DR is known to have a nonnegative (but possibly zero)
5607 step. */
5609 bool
5610 dr_known_forward_stride_p (struct data_reference *dr)
5612 tree indicator = dr_direction_indicator (dr);
5613 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5614 fold_convert (ssizetype, indicator),
5615 ssize_int (0));
5616 return neg_step_val && integer_zerop (neg_step_val);