Add Doxygen comments to <bit> header
[official-gcc.git] / gcc / tree-data-ref.c
blob7f75b7e3afeebd33fda493d942ff7f7160102d39
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
98 #include "tree-eh.h"
99 #include "ssa.h"
101 static struct datadep_stats
103 int num_dependence_tests;
104 int num_dependence_dependent;
105 int num_dependence_independent;
106 int num_dependence_undetermined;
108 int num_subscript_tests;
109 int num_subscript_undetermined;
110 int num_same_subscript_function;
112 int num_ziv;
113 int num_ziv_independent;
114 int num_ziv_dependent;
115 int num_ziv_unimplemented;
117 int num_siv;
118 int num_siv_independent;
119 int num_siv_dependent;
120 int num_siv_unimplemented;
122 int num_miv;
123 int num_miv_independent;
124 int num_miv_dependent;
125 int num_miv_unimplemented;
126 } dependence_stats;
128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
129 unsigned int, unsigned int,
130 class loop *);
131 /* Returns true iff A divides B. */
133 static inline bool
134 tree_fold_divides_p (const_tree a, const_tree b)
136 gcc_assert (TREE_CODE (a) == INTEGER_CST);
137 gcc_assert (TREE_CODE (b) == INTEGER_CST);
138 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
141 /* Returns true iff A divides B. */
143 static inline bool
144 int_divides_p (int a, int b)
146 return ((b % a) == 0);
149 /* Return true if reference REF contains a union access. */
151 static bool
152 ref_contains_union_access_p (tree ref)
154 while (handled_component_p (ref))
156 ref = TREE_OPERAND (ref, 0);
157 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
158 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
159 return true;
161 return false;
166 /* Dump into FILE all the data references from DATAREFS. */
168 static void
169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
171 unsigned int i;
172 struct data_reference *dr;
174 FOR_EACH_VEC_ELT (datarefs, i, dr)
175 dump_data_reference (file, dr);
178 /* Unified dump into FILE all the data references from DATAREFS. */
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> &ref)
183 dump_data_references (stderr, ref);
186 DEBUG_FUNCTION void
187 debug (vec<data_reference_p> *ptr)
189 if (ptr)
190 debug (*ptr);
191 else
192 fprintf (stderr, "<nil>\n");
196 /* Dump into STDERR all the data references from DATAREFS. */
198 DEBUG_FUNCTION void
199 debug_data_references (vec<data_reference_p> datarefs)
201 dump_data_references (stderr, datarefs);
204 /* Print to STDERR the data_reference DR. */
206 DEBUG_FUNCTION void
207 debug_data_reference (struct data_reference *dr)
209 dump_data_reference (stderr, dr);
212 /* Dump function for a DATA_REFERENCE structure. */
214 void
215 dump_data_reference (FILE *outf,
216 struct data_reference *dr)
218 unsigned int i;
220 fprintf (outf, "#(Data Ref: \n");
221 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222 fprintf (outf, "# stmt: ");
223 print_gimple_stmt (outf, DR_STMT (dr), 0);
224 fprintf (outf, "# ref: ");
225 print_generic_stmt (outf, DR_REF (dr));
226 fprintf (outf, "# base_object: ");
227 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 fprintf (outf, "# Access function %d: ", i);
232 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 fprintf (outf, "#)\n");
237 /* Unified dump function for a DATA_REFERENCE structure. */
239 DEBUG_FUNCTION void
240 debug (data_reference &ref)
242 dump_data_reference (stderr, &ref);
245 DEBUG_FUNCTION void
246 debug (data_reference *ptr)
248 if (ptr)
249 debug (*ptr);
250 else
251 fprintf (stderr, "<nil>\n");
255 /* Dumps the affine function described by FN to the file OUTF. */
257 DEBUG_FUNCTION void
258 dump_affine_function (FILE *outf, affine_fn fn)
260 unsigned i;
261 tree coef;
263 print_generic_expr (outf, fn[0], TDF_SLIM);
264 for (i = 1; fn.iterate (i, &coef); i++)
266 fprintf (outf, " + ");
267 print_generic_expr (outf, coef, TDF_SLIM);
268 fprintf (outf, " * x_%u", i);
272 /* Dumps the conflict function CF to the file OUTF. */
274 DEBUG_FUNCTION void
275 dump_conflict_function (FILE *outf, conflict_function *cf)
277 unsigned i;
279 if (cf->n == NO_DEPENDENCE)
280 fprintf (outf, "no dependence");
281 else if (cf->n == NOT_KNOWN)
282 fprintf (outf, "not known");
283 else
285 for (i = 0; i < cf->n; i++)
287 if (i != 0)
288 fprintf (outf, " ");
289 fprintf (outf, "[");
290 dump_affine_function (outf, cf->fns[i]);
291 fprintf (outf, "]");
296 /* Dump function for a SUBSCRIPT structure. */
298 DEBUG_FUNCTION void
299 dump_subscript (FILE *outf, struct subscript *subscript)
301 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 fprintf (outf, "\n (subscript \n");
304 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
305 dump_conflict_function (outf, cf);
306 if (CF_NONTRIVIAL_P (cf))
308 tree last_iteration = SUB_LAST_CONFLICT (subscript);
309 fprintf (outf, "\n last_conflict: ");
310 print_generic_expr (outf, last_iteration);
313 cf = SUB_CONFLICTS_IN_B (subscript);
314 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
315 dump_conflict_function (outf, cf);
316 if (CF_NONTRIVIAL_P (cf))
318 tree last_iteration = SUB_LAST_CONFLICT (subscript);
319 fprintf (outf, "\n last_conflict: ");
320 print_generic_expr (outf, last_iteration);
323 fprintf (outf, "\n (Subscript distance: ");
324 print_generic_expr (outf, SUB_DISTANCE (subscript));
325 fprintf (outf, " ))\n");
328 /* Print the classic direction vector DIRV to OUTF. */
330 DEBUG_FUNCTION void
331 print_direction_vector (FILE *outf,
332 lambda_vector dirv,
333 int length)
335 int eq;
337 for (eq = 0; eq < length; eq++)
339 enum data_dependence_direction dir = ((enum data_dependence_direction)
340 dirv[eq]);
342 switch (dir)
344 case dir_positive:
345 fprintf (outf, " +");
346 break;
347 case dir_negative:
348 fprintf (outf, " -");
349 break;
350 case dir_equal:
351 fprintf (outf, " =");
352 break;
353 case dir_positive_or_equal:
354 fprintf (outf, " +=");
355 break;
356 case dir_positive_or_negative:
357 fprintf (outf, " +-");
358 break;
359 case dir_negative_or_equal:
360 fprintf (outf, " -=");
361 break;
362 case dir_star:
363 fprintf (outf, " *");
364 break;
365 default:
366 fprintf (outf, "indep");
367 break;
370 fprintf (outf, "\n");
373 /* Print a vector of direction vectors. */
375 DEBUG_FUNCTION void
376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 int length)
379 unsigned j;
380 lambda_vector v;
382 FOR_EACH_VEC_ELT (dir_vects, j, v)
383 print_direction_vector (outf, v, length);
386 /* Print out a vector VEC of length N to OUTFILE. */
388 DEBUG_FUNCTION void
389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
391 int i;
393 for (i = 0; i < n; i++)
394 fprintf (outfile, "%3d ", (int)vector[i]);
395 fprintf (outfile, "\n");
398 /* Print a vector of distance vectors. */
400 DEBUG_FUNCTION void
401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
402 int length)
404 unsigned j;
405 lambda_vector v;
407 FOR_EACH_VEC_ELT (dist_vects, j, v)
408 print_lambda_vector (outf, v, length);
411 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
413 DEBUG_FUNCTION void
414 dump_data_dependence_relation (FILE *outf,
415 struct data_dependence_relation *ddr)
417 struct data_reference *dra, *drb;
419 fprintf (outf, "(Data Dep: \n");
421 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
423 if (ddr)
425 dra = DDR_A (ddr);
426 drb = DDR_B (ddr);
427 if (dra)
428 dump_data_reference (outf, dra);
429 else
430 fprintf (outf, " (nil)\n");
431 if (drb)
432 dump_data_reference (outf, drb);
433 else
434 fprintf (outf, " (nil)\n");
436 fprintf (outf, " (don't know)\n)\n");
437 return;
440 dra = DDR_A (ddr);
441 drb = DDR_B (ddr);
442 dump_data_reference (outf, dra);
443 dump_data_reference (outf, drb);
445 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
446 fprintf (outf, " (no dependence)\n");
448 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
450 unsigned int i;
451 class loop *loopi;
453 subscript *sub;
454 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
456 fprintf (outf, " access_fn_A: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
458 fprintf (outf, " access_fn_B: ");
459 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
460 dump_subscript (outf, sub);
463 fprintf (outf, " loop nest: (");
464 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
465 fprintf (outf, "%d ", loopi->num);
466 fprintf (outf, ")\n");
468 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
470 fprintf (outf, " distance_vector: ");
471 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
472 DDR_NB_LOOPS (ddr));
475 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
477 fprintf (outf, " direction_vector: ");
478 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
479 DDR_NB_LOOPS (ddr));
483 fprintf (outf, ")\n");
486 /* Debug version. */
488 DEBUG_FUNCTION void
489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
491 dump_data_dependence_relation (stderr, ddr);
494 /* Dump into FILE all the dependence relations from DDRS. */
496 DEBUG_FUNCTION void
497 dump_data_dependence_relations (FILE *file,
498 vec<ddr_p> ddrs)
500 unsigned int i;
501 struct data_dependence_relation *ddr;
503 FOR_EACH_VEC_ELT (ddrs, i, ddr)
504 dump_data_dependence_relation (file, ddr);
507 DEBUG_FUNCTION void
508 debug (vec<ddr_p> &ref)
510 dump_data_dependence_relations (stderr, ref);
513 DEBUG_FUNCTION void
514 debug (vec<ddr_p> *ptr)
516 if (ptr)
517 debug (*ptr);
518 else
519 fprintf (stderr, "<nil>\n");
523 /* Dump to STDERR all the dependence relations from DDRS. */
525 DEBUG_FUNCTION void
526 debug_data_dependence_relations (vec<ddr_p> ddrs)
528 dump_data_dependence_relations (stderr, ddrs);
531 /* Dumps the distance and direction vectors in FILE. DDRS contains
532 the dependence relations, and VECT_SIZE is the size of the
533 dependence vectors, or in other words the number of loops in the
534 considered nest. */
536 DEBUG_FUNCTION void
537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
539 unsigned int i, j;
540 struct data_dependence_relation *ddr;
541 lambda_vector v;
543 FOR_EACH_VEC_ELT (ddrs, i, ddr)
544 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
546 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
548 fprintf (file, "DISTANCE_V (");
549 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
550 fprintf (file, ")\n");
553 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
555 fprintf (file, "DIRECTION_V (");
556 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
557 fprintf (file, ")\n");
561 fprintf (file, "\n\n");
564 /* Dumps the data dependence relations DDRS in FILE. */
566 DEBUG_FUNCTION void
567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
569 unsigned int i;
570 struct data_dependence_relation *ddr;
572 FOR_EACH_VEC_ELT (ddrs, i, ddr)
573 dump_data_dependence_relation (file, ddr);
575 fprintf (file, "\n\n");
578 DEBUG_FUNCTION void
579 debug_ddrs (vec<ddr_p> ddrs)
581 dump_ddrs (stderr, ddrs);
584 static void
585 split_constant_offset (tree exp, tree *var, tree *off,
586 hash_map<tree, std::pair<tree, tree> > &cache,
587 unsigned *limit);
589 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
590 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
591 constant of type ssizetype, and returns true. If we cannot do this
592 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
593 is returned. */
595 static bool
596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
597 tree *var, tree *off,
598 hash_map<tree, std::pair<tree, tree> > &cache,
599 unsigned *limit)
601 tree var0, var1;
602 tree off0, off1;
603 enum tree_code ocode = code;
605 *var = NULL_TREE;
606 *off = NULL_TREE;
608 switch (code)
610 case INTEGER_CST:
611 *var = build_int_cst (type, 0);
612 *off = fold_convert (ssizetype, op0);
613 return true;
615 case POINTER_PLUS_EXPR:
616 ocode = PLUS_EXPR;
617 /* FALLTHROUGH */
618 case PLUS_EXPR:
619 case MINUS_EXPR:
620 if (TREE_CODE (op1) == INTEGER_CST)
622 split_constant_offset (op0, &var0, &off0, cache, limit);
623 *var = var0;
624 *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
625 return true;
627 split_constant_offset (op0, &var0, &off0, cache, limit);
628 split_constant_offset (op1, &var1, &off1, cache, limit);
629 *var = fold_build2 (code, type, var0, var1);
630 *off = size_binop (ocode, off0, off1);
631 return true;
633 case MULT_EXPR:
634 if (TREE_CODE (op1) != INTEGER_CST)
635 return false;
637 split_constant_offset (op0, &var0, &off0, cache, limit);
638 *var = fold_build2 (MULT_EXPR, type, var0, op1);
639 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
640 return true;
642 case ADDR_EXPR:
644 tree base, poffset;
645 poly_int64 pbitsize, pbitpos, pbytepos;
646 machine_mode pmode;
647 int punsignedp, preversep, pvolatilep;
649 op0 = TREE_OPERAND (op0, 0);
650 base
651 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
652 &punsignedp, &preversep, &pvolatilep);
654 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
655 return false;
656 base = build_fold_addr_expr (base);
657 off0 = ssize_int (pbytepos);
659 if (poffset)
661 split_constant_offset (poffset, &poffset, &off1, cache, limit);
662 off0 = size_binop (PLUS_EXPR, off0, off1);
663 if (POINTER_TYPE_P (TREE_TYPE (base)))
664 base = fold_build_pointer_plus (base, poffset);
665 else
666 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
667 fold_convert (TREE_TYPE (base), poffset));
670 var0 = fold_convert (type, base);
672 /* If variable length types are involved, punt, otherwise casts
673 might be converted into ARRAY_REFs in gimplify_conversion.
674 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
675 possibly no longer appears in current GIMPLE, might resurface.
676 This perhaps could run
677 if (CONVERT_EXPR_P (var0))
679 gimplify_conversion (&var0);
680 // Attempt to fill in any within var0 found ARRAY_REF's
681 // element size from corresponding op embedded ARRAY_REF,
682 // if unsuccessful, just punt.
683 } */
684 while (POINTER_TYPE_P (type))
685 type = TREE_TYPE (type);
686 if (int_size_in_bytes (type) < 0)
687 return false;
689 *var = var0;
690 *off = off0;
691 return true;
694 case SSA_NAME:
696 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
697 return false;
699 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
700 enum tree_code subcode;
702 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
703 return false;
705 subcode = gimple_assign_rhs_code (def_stmt);
707 /* We are using a cache to avoid un-CSEing large amounts of code. */
708 bool use_cache = false;
709 if (!has_single_use (op0)
710 && (subcode == POINTER_PLUS_EXPR
711 || subcode == PLUS_EXPR
712 || subcode == MINUS_EXPR
713 || subcode == MULT_EXPR
714 || subcode == ADDR_EXPR
715 || CONVERT_EXPR_CODE_P (subcode)))
717 use_cache = true;
718 bool existed;
719 std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
720 if (existed)
722 if (integer_zerop (e.second))
723 return false;
724 *var = e.first;
725 *off = e.second;
726 return true;
728 e = std::make_pair (op0, ssize_int (0));
731 if (*limit == 0)
732 return false;
733 --*limit;
735 var0 = gimple_assign_rhs1 (def_stmt);
736 var1 = gimple_assign_rhs2 (def_stmt);
738 bool res = split_constant_offset_1 (type, var0, subcode, var1,
739 var, off, cache, limit);
740 if (res && use_cache)
741 *cache.get (op0) = std::make_pair (*var, *off);
742 return res;
744 CASE_CONVERT:
746 /* We must not introduce undefined overflow, and we must not change
747 the value. Hence we're okay if the inner type doesn't overflow
748 to start with (pointer or signed), the outer type also is an
749 integer or pointer and the outer precision is at least as large
750 as the inner. */
751 tree itype = TREE_TYPE (op0);
752 if ((POINTER_TYPE_P (itype)
753 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
754 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
755 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
757 if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
759 /* Split the unconverted operand and try to prove that
760 wrapping isn't a problem. */
761 tree tmp_var, tmp_off;
762 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
764 /* See whether we have an SSA_NAME whose range is known
765 to be [A, B]. */
766 if (TREE_CODE (tmp_var) != SSA_NAME)
767 return false;
768 wide_int var_min, var_max;
769 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
770 &var_max);
771 wide_int var_nonzero = get_nonzero_bits (tmp_var);
772 signop sgn = TYPE_SIGN (itype);
773 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
774 &var_max, var_nonzero,
775 sgn) != VR_RANGE)
776 return false;
778 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
779 is known to be [A + TMP_OFF, B + TMP_OFF], with all
780 operations done in ITYPE. The addition must overflow
781 at both ends of the range or at neither. */
782 wi::overflow_type overflow[2];
783 unsigned int prec = TYPE_PRECISION (itype);
784 wide_int woff = wi::to_wide (tmp_off, prec);
785 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
786 wi::add (var_max, woff, sgn, &overflow[1]);
787 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
788 return false;
790 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
791 widest_int diff = (widest_int::from (op0_min, sgn)
792 - widest_int::from (var_min, sgn));
793 var0 = tmp_var;
794 *off = wide_int_to_tree (ssizetype, diff);
796 else
797 split_constant_offset (op0, &var0, off, cache, limit);
798 *var = fold_convert (type, var0);
799 return true;
801 return false;
804 default:
805 return false;
809 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
810 will be ssizetype. */
812 static void
813 split_constant_offset (tree exp, tree *var, tree *off,
814 hash_map<tree, std::pair<tree, tree> > &cache,
815 unsigned *limit)
817 tree type = TREE_TYPE (exp), op0, op1, e, o;
818 enum tree_code code;
820 *var = exp;
821 *off = ssize_int (0);
823 if (tree_is_chrec (exp)
824 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
825 return;
827 code = TREE_CODE (exp);
828 extract_ops_from_tree (exp, &code, &op0, &op1);
829 if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
831 *var = e;
832 *off = o;
836 void
837 split_constant_offset (tree exp, tree *var, tree *off)
839 unsigned limit = PARAM_VALUE (PARAM_SSA_NAME_DEF_CHAIN_LIMIT);
840 static hash_map<tree, std::pair<tree, tree> > *cache;
841 if (!cache)
842 cache = new hash_map<tree, std::pair<tree, tree> > (37);
843 split_constant_offset (exp, var, off, *cache, &limit);
844 cache->empty ();
847 /* Returns the address ADDR of an object in a canonical shape (without nop
848 casts, and with type of pointer to the object). */
850 static tree
851 canonicalize_base_object_address (tree addr)
853 tree orig = addr;
855 STRIP_NOPS (addr);
857 /* The base address may be obtained by casting from integer, in that case
858 keep the cast. */
859 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
860 return orig;
862 if (TREE_CODE (addr) != ADDR_EXPR)
863 return addr;
865 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
868 /* Analyze the behavior of memory reference REF within STMT.
869 There are two modes:
871 - BB analysis. In this case we simply split the address into base,
872 init and offset components, without reference to any containing loop.
873 The resulting base and offset are general expressions and they can
874 vary arbitrarily from one iteration of the containing loop to the next.
875 The step is always zero.
877 - loop analysis. In this case we analyze the reference both wrt LOOP
878 and on the basis that the reference occurs (is "used") in LOOP;
879 see the comment above analyze_scalar_evolution_in_loop for more
880 information about this distinction. The base, init, offset and
881 step fields are all invariant in LOOP.
883 Perform BB analysis if LOOP is null, or if LOOP is the function's
884 dummy outermost loop. In other cases perform loop analysis.
886 Return true if the analysis succeeded and store the results in DRB if so.
887 BB analysis can only fail for bitfield or reversed-storage accesses. */
889 opt_result
890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
891 class loop *loop, const gimple *stmt)
893 poly_int64 pbitsize, pbitpos;
894 tree base, poffset;
895 machine_mode pmode;
896 int punsignedp, preversep, pvolatilep;
897 affine_iv base_iv, offset_iv;
898 tree init, dinit, step;
899 bool in_loop = (loop && loop->num);
901 if (dump_file && (dump_flags & TDF_DETAILS))
902 fprintf (dump_file, "analyze_innermost: ");
904 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
905 &punsignedp, &preversep, &pvolatilep);
906 gcc_assert (base != NULL_TREE);
908 poly_int64 pbytepos;
909 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
910 return opt_result::failure_at (stmt,
911 "failed: bit offset alignment.\n");
913 if (preversep)
914 return opt_result::failure_at (stmt,
915 "failed: reverse storage order.\n");
917 /* Calculate the alignment and misalignment for the inner reference. */
918 unsigned int HOST_WIDE_INT bit_base_misalignment;
919 unsigned int bit_base_alignment;
920 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
922 /* There are no bitfield references remaining in BASE, so the values
923 we got back must be whole bytes. */
924 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
925 && bit_base_misalignment % BITS_PER_UNIT == 0);
926 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
927 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
929 if (TREE_CODE (base) == MEM_REF)
931 if (!integer_zerop (TREE_OPERAND (base, 1)))
933 /* Subtract MOFF from the base and add it to POFFSET instead.
934 Adjust the misalignment to reflect the amount we subtracted. */
935 poly_offset_int moff = mem_ref_offset (base);
936 base_misalignment -= moff.force_shwi ();
937 tree mofft = wide_int_to_tree (sizetype, moff);
938 if (!poffset)
939 poffset = mofft;
940 else
941 poffset = size_binop (PLUS_EXPR, poffset, mofft);
943 base = TREE_OPERAND (base, 0);
945 else
946 base = build_fold_addr_expr (base);
948 if (in_loop)
950 if (!simple_iv (loop, loop, base, &base_iv, true))
951 return opt_result::failure_at
952 (stmt, "failed: evolution of base is not affine.\n");
954 else
956 base_iv.base = base;
957 base_iv.step = ssize_int (0);
958 base_iv.no_overflow = true;
961 if (!poffset)
963 offset_iv.base = ssize_int (0);
964 offset_iv.step = ssize_int (0);
966 else
968 if (!in_loop)
970 offset_iv.base = poffset;
971 offset_iv.step = ssize_int (0);
973 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
974 return opt_result::failure_at
975 (stmt, "failed: evolution of offset is not affine.\n");
978 init = ssize_int (pbytepos);
980 /* Subtract any constant component from the base and add it to INIT instead.
981 Adjust the misalignment to reflect the amount we subtracted. */
982 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
983 init = size_binop (PLUS_EXPR, init, dinit);
984 base_misalignment -= TREE_INT_CST_LOW (dinit);
986 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
987 init = size_binop (PLUS_EXPR, init, dinit);
989 step = size_binop (PLUS_EXPR,
990 fold_convert (ssizetype, base_iv.step),
991 fold_convert (ssizetype, offset_iv.step));
993 base = canonicalize_base_object_address (base_iv.base);
995 /* See if get_pointer_alignment can guarantee a higher alignment than
996 the one we calculated above. */
997 unsigned int HOST_WIDE_INT alt_misalignment;
998 unsigned int alt_alignment;
999 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1001 /* As above, these values must be whole bytes. */
1002 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003 && alt_misalignment % BITS_PER_UNIT == 0);
1004 alt_alignment /= BITS_PER_UNIT;
1005 alt_misalignment /= BITS_PER_UNIT;
1007 if (base_alignment < alt_alignment)
1009 base_alignment = alt_alignment;
1010 base_misalignment = alt_misalignment;
1013 drb->base_address = base;
1014 drb->offset = fold_convert (ssizetype, offset_iv.base);
1015 drb->init = init;
1016 drb->step = step;
1017 if (known_misalignment (base_misalignment, base_alignment,
1018 &drb->base_misalignment))
1019 drb->base_alignment = base_alignment;
1020 else
1022 drb->base_alignment = known_alignment (base_misalignment);
1023 drb->base_misalignment = 0;
1025 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026 drb->step_alignment = highest_pow2_factor (step);
1028 if (dump_file && (dump_flags & TDF_DETAILS))
1029 fprintf (dump_file, "success.\n");
1031 return opt_result::success ();
1034 /* Return true if OP is a valid component reference for a DR access
1035 function. This accepts a subset of what handled_component_p accepts. */
1037 static bool
1038 access_fn_component_p (tree op)
1040 switch (TREE_CODE (op))
1042 case REALPART_EXPR:
1043 case IMAGPART_EXPR:
1044 case ARRAY_REF:
1045 return true;
1047 case COMPONENT_REF:
1048 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1050 default:
1051 return false;
1055 /* Determines the base object and the list of indices of memory reference
1056 DR, analyzed in LOOP and instantiated before NEST. */
1058 static void
1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1061 vec<tree> access_fns = vNULL;
1062 tree ref, op;
1063 tree base, off, access_fn;
1065 /* If analyzing a basic-block there are no indices to analyze
1066 and thus no access functions. */
1067 if (!nest)
1069 DR_BASE_OBJECT (dr) = DR_REF (dr);
1070 DR_ACCESS_FNS (dr).create (0);
1071 return;
1074 ref = DR_REF (dr);
1076 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077 into a two element array with a constant index. The base is
1078 then just the immediate underlying object. */
1079 if (TREE_CODE (ref) == REALPART_EXPR)
1081 ref = TREE_OPERAND (ref, 0);
1082 access_fns.safe_push (integer_zero_node);
1084 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1086 ref = TREE_OPERAND (ref, 0);
1087 access_fns.safe_push (integer_one_node);
1090 /* Analyze access functions of dimensions we know to be independent.
1091 The list of component references handled here should be kept in
1092 sync with access_fn_component_p. */
1093 while (handled_component_p (ref))
1095 if (TREE_CODE (ref) == ARRAY_REF)
1097 op = TREE_OPERAND (ref, 1);
1098 access_fn = analyze_scalar_evolution (loop, op);
1099 access_fn = instantiate_scev (nest, loop, access_fn);
1100 access_fns.safe_push (access_fn);
1102 else if (TREE_CODE (ref) == COMPONENT_REF
1103 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1105 /* For COMPONENT_REFs of records (but not unions!) use the
1106 FIELD_DECL offset as constant access function so we can
1107 disambiguate a[i].f1 and a[i].f2. */
1108 tree off = component_ref_field_offset (ref);
1109 off = size_binop (PLUS_EXPR,
1110 size_binop (MULT_EXPR,
1111 fold_convert (bitsizetype, off),
1112 bitsize_int (BITS_PER_UNIT)),
1113 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114 access_fns.safe_push (off);
1116 else
1117 /* If we have an unhandled component we could not translate
1118 to an access function stop analyzing. We have determined
1119 our base object in this case. */
1120 break;
1122 ref = TREE_OPERAND (ref, 0);
1125 /* If the address operand of a MEM_REF base has an evolution in the
1126 analyzed nest, add it as an additional independent access-function. */
1127 if (TREE_CODE (ref) == MEM_REF)
1129 op = TREE_OPERAND (ref, 0);
1130 access_fn = analyze_scalar_evolution (loop, op);
1131 access_fn = instantiate_scev (nest, loop, access_fn);
1132 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1134 tree orig_type;
1135 tree memoff = TREE_OPERAND (ref, 1);
1136 base = initial_condition (access_fn);
1137 orig_type = TREE_TYPE (base);
1138 STRIP_USELESS_TYPE_CONVERSION (base);
1139 split_constant_offset (base, &base, &off);
1140 STRIP_USELESS_TYPE_CONVERSION (base);
1141 /* Fold the MEM_REF offset into the evolutions initial
1142 value to make more bases comparable. */
1143 if (!integer_zerop (memoff))
1145 off = size_binop (PLUS_EXPR, off,
1146 fold_convert (ssizetype, memoff));
1147 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1149 /* Adjust the offset so it is a multiple of the access type
1150 size and thus we separate bases that can possibly be used
1151 to produce partial overlaps (which the access_fn machinery
1152 cannot handle). */
1153 wide_int rem;
1154 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157 rem = wi::mod_trunc
1158 (wi::to_wide (off),
1159 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160 SIGNED);
1161 else
1162 /* If we can't compute the remainder simply force the initial
1163 condition to zero. */
1164 rem = wi::to_wide (off);
1165 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167 /* And finally replace the initial condition. */
1168 access_fn = chrec_replace_initial_condition
1169 (access_fn, fold_convert (orig_type, off));
1170 /* ??? This is still not a suitable base object for
1171 dr_may_alias_p - the base object needs to be an
1172 access that covers the object as whole. With
1173 an evolution in the pointer this cannot be
1174 guaranteed.
1175 As a band-aid, mark the access so we can special-case
1176 it in dr_may_alias_p. */
1177 tree old = ref;
1178 ref = fold_build2_loc (EXPR_LOCATION (ref),
1179 MEM_REF, TREE_TYPE (ref),
1180 base, memoff);
1181 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183 DR_UNCONSTRAINED_BASE (dr) = true;
1184 access_fns.safe_push (access_fn);
1187 else if (DECL_P (ref))
1189 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1190 ref = build2 (MEM_REF, TREE_TYPE (ref),
1191 build_fold_addr_expr (ref),
1192 build_int_cst (reference_alias_ptr_type (ref), 0));
1195 DR_BASE_OBJECT (dr) = ref;
1196 DR_ACCESS_FNS (dr) = access_fns;
1199 /* Extracts the alias analysis information from the memory reference DR. */
1201 static void
1202 dr_analyze_alias (struct data_reference *dr)
1204 tree ref = DR_REF (dr);
1205 tree base = get_base_address (ref), addr;
1207 if (INDIRECT_REF_P (base)
1208 || TREE_CODE (base) == MEM_REF)
1210 addr = TREE_OPERAND (base, 0);
1211 if (TREE_CODE (addr) == SSA_NAME)
1212 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1216 /* Frees data reference DR. */
1218 void
1219 free_data_ref (data_reference_p dr)
1221 DR_ACCESS_FNS (dr).release ();
1222 free (dr);
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226 The reference is a read if IS_READ is true, otherwise it is a write.
1227 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228 within STMT, i.e. that it might not occur even if STMT is executed
1229 and runs to completion.
1231 Return the data_reference description of MEMREF. NEST is the outermost
1232 loop in which the reference should be instantiated, LOOP is the loop
1233 in which the data reference should be analyzed. */
1235 struct data_reference *
1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237 bool is_read, bool is_conditional_in_stmt)
1239 struct data_reference *dr;
1241 if (dump_file && (dump_flags & TDF_DETAILS))
1243 fprintf (dump_file, "Creating dr for ");
1244 print_generic_expr (dump_file, memref, TDF_SLIM);
1245 fprintf (dump_file, "\n");
1248 dr = XCNEW (struct data_reference);
1249 DR_STMT (dr) = stmt;
1250 DR_REF (dr) = memref;
1251 DR_IS_READ (dr) = is_read;
1252 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1254 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255 nest != NULL ? loop : NULL, stmt);
1256 dr_analyze_indices (dr, nest, loop);
1257 dr_analyze_alias (dr);
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1261 unsigned i;
1262 fprintf (dump_file, "\tbase_address: ");
1263 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264 fprintf (dump_file, "\n\toffset from base address: ");
1265 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266 fprintf (dump_file, "\n\tconstant offset from base address: ");
1267 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268 fprintf (dump_file, "\n\tstep: ");
1269 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271 fprintf (dump_file, "\n\tbase misalignment: %d",
1272 DR_BASE_MISALIGNMENT (dr));
1273 fprintf (dump_file, "\n\toffset alignment: %d",
1274 DR_OFFSET_ALIGNMENT (dr));
1275 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276 fprintf (dump_file, "\n\tbase_object: ");
1277 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278 fprintf (dump_file, "\n");
1279 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1281 fprintf (dump_file, "\tAccess function %d: ", i);
1282 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1286 return dr;
1289 /* A helper function computes order between two tree expressions T1 and T2.
1290 This is used in comparator functions sorting objects based on the order
1291 of tree expressions. The function returns -1, 0, or 1. */
1294 data_ref_compare_tree (tree t1, tree t2)
1296 int i, cmp;
1297 enum tree_code code;
1298 char tclass;
1300 if (t1 == t2)
1301 return 0;
1302 if (t1 == NULL)
1303 return -1;
1304 if (t2 == NULL)
1305 return 1;
1307 STRIP_USELESS_TYPE_CONVERSION (t1);
1308 STRIP_USELESS_TYPE_CONVERSION (t2);
1309 if (t1 == t2)
1310 return 0;
1312 if (TREE_CODE (t1) != TREE_CODE (t2)
1313 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1316 code = TREE_CODE (t1);
1317 switch (code)
1319 case INTEGER_CST:
1320 return tree_int_cst_compare (t1, t2);
1322 case STRING_CST:
1323 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326 TREE_STRING_LENGTH (t1));
1328 case SSA_NAME:
1329 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331 break;
1333 default:
1334 if (POLY_INT_CST_P (t1))
1335 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336 wi::to_poly_widest (t2));
1338 tclass = TREE_CODE_CLASS (code);
1340 /* For decls, compare their UIDs. */
1341 if (tclass == tcc_declaration)
1343 if (DECL_UID (t1) != DECL_UID (t2))
1344 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345 break;
1347 /* For expressions, compare their operands recursively. */
1348 else if (IS_EXPR_CODE_CLASS (tclass))
1350 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1352 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353 TREE_OPERAND (t2, i));
1354 if (cmp != 0)
1355 return cmp;
1358 else
1359 gcc_unreachable ();
1362 return 0;
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366 check. */
1368 opt_result
1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1371 if (dump_enabled_p ())
1372 dump_printf (MSG_NOTE,
1373 "consider run-time aliasing test between %T and %T\n",
1374 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1376 if (!speed_p)
1377 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378 "runtime alias check not supported when"
1379 " optimizing for size.\n");
1381 /* FORNOW: We don't support versioning with outer-loop in either
1382 vectorization or loop distribution. */
1383 if (loop != NULL && loop->inner != NULL)
1384 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385 "runtime alias check not supported for"
1386 " outer loop.\n");
1388 return opt_result::success ();
1391 /* Operator == between two dr_with_seg_len objects.
1393 This equality operator is used to make sure two data refs
1394 are the same one so that we will consider to combine the
1395 aliasing checks of those two pairs of data dependent data
1396 refs. */
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400 const dr_with_seg_len& d2)
1402 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403 DR_BASE_ADDRESS (d2.dr), 0)
1404 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407 && known_eq (d1.access_size, d2.access_size)
1408 && d1.align == d2.align);
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412 so that we can combine aliasing checks in one scan. */
1414 static int
1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1417 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1422 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423 if a and c have the same basic address snd step, and b and d have the same
1424 address and step. Therefore, if any a&c or b&d don't have the same address
1425 and step, we don't care the order of those two pairs after sorting. */
1426 int comp_res;
1428 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429 DR_BASE_ADDRESS (b1.dr))) != 0)
1430 return comp_res;
1431 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432 DR_BASE_ADDRESS (b2.dr))) != 0)
1433 return comp_res;
1434 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435 DR_STEP (b1.dr))) != 0)
1436 return comp_res;
1437 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438 DR_STEP (b2.dr))) != 0)
1439 return comp_res;
1440 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441 DR_OFFSET (b1.dr))) != 0)
1442 return comp_res;
1443 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444 DR_INIT (b1.dr))) != 0)
1445 return comp_res;
1446 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447 DR_OFFSET (b2.dr))) != 0)
1448 return comp_res;
1449 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450 DR_INIT (b2.dr))) != 0)
1451 return comp_res;
1453 return 0;
1456 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1457 FACTOR is number of iterations that each data reference is accessed.
1459 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1460 we create an expression:
1462 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1463 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1465 for aliasing checks. However, in some cases we can decrease the number
1466 of checks by combining two checks into one. For example, suppose we have
1467 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1468 condition is satisfied:
1470 load_ptr_0 < load_ptr_1 &&
1471 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1473 (this condition means, in each iteration of vectorized loop, the accessed
1474 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1475 load_ptr_1.)
1477 we then can use only the following expression to finish the alising checks
1478 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1480 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1481 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1483 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1484 basic address. */
1486 void
1487 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1488 poly_uint64)
1490 /* Sort the collected data ref pairs so that we can scan them once to
1491 combine all possible aliasing checks. */
1492 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1494 /* Scan the sorted dr pairs and check if we can combine alias checks
1495 of two neighboring dr pairs. */
1496 for (size_t i = 1; i < alias_pairs->length (); ++i)
1498 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1499 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1500 *dr_b1 = &(*alias_pairs)[i-1].second,
1501 *dr_a2 = &(*alias_pairs)[i].first,
1502 *dr_b2 = &(*alias_pairs)[i].second;
1504 /* Remove duplicate data ref pairs. */
1505 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1507 if (dump_enabled_p ())
1508 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1509 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1510 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1511 alias_pairs->ordered_remove (i--);
1512 continue;
1515 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1517 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1518 and DR_A1 and DR_A2 are two consecutive memrefs. */
1519 if (*dr_a1 == *dr_a2)
1521 std::swap (dr_a1, dr_b1);
1522 std::swap (dr_a2, dr_b2);
1525 poly_int64 init_a1, init_a2;
1526 /* Only consider cases in which the distance between the initial
1527 DR_A1 and the initial DR_A2 is known at compile time. */
1528 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1529 DR_BASE_ADDRESS (dr_a2->dr), 0)
1530 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1531 DR_OFFSET (dr_a2->dr), 0)
1532 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1533 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1534 continue;
1536 /* Don't combine if we can't tell which one comes first. */
1537 if (!ordered_p (init_a1, init_a2))
1538 continue;
1540 /* Make sure dr_a1 starts left of dr_a2. */
1541 if (maybe_gt (init_a1, init_a2))
1543 std::swap (*dr_a1, *dr_a2);
1544 std::swap (init_a1, init_a2);
1547 /* Work out what the segment length would be if we did combine
1548 DR_A1 and DR_A2:
1550 - If DR_A1 and DR_A2 have equal lengths, that length is
1551 also the combined length.
1553 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1554 length is the lower bound on those lengths.
1556 - If DR_A1 and DR_A2 both have positive lengths, the combined
1557 length is the upper bound on those lengths.
1559 Other cases are unlikely to give a useful combination.
1561 The lengths both have sizetype, so the sign is taken from
1562 the step instead. */
1563 if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1565 poly_uint64 seg_len_a1, seg_len_a2;
1566 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1567 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1568 continue;
1570 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1571 if (TREE_CODE (indicator_a) != INTEGER_CST)
1572 continue;
1574 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1575 if (TREE_CODE (indicator_b) != INTEGER_CST)
1576 continue;
1578 int sign_a = tree_int_cst_sgn (indicator_a);
1579 int sign_b = tree_int_cst_sgn (indicator_b);
1581 poly_uint64 new_seg_len;
1582 if (sign_a <= 0 && sign_b <= 0)
1583 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1584 else if (sign_a >= 0 && sign_b >= 0)
1585 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1586 else
1587 continue;
1589 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1590 new_seg_len);
1591 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1594 /* This is always positive due to the swap above. */
1595 poly_uint64 diff = init_a2 - init_a1;
1597 /* The new check will start at DR_A1. Make sure that its access
1598 size encompasses the initial DR_A2. */
1599 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1601 dr_a1->access_size = upper_bound (dr_a1->access_size,
1602 diff + dr_a2->access_size);
1603 unsigned int new_align = known_alignment (dr_a1->access_size);
1604 dr_a1->align = MIN (dr_a1->align, new_align);
1606 if (dump_enabled_p ())
1607 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1608 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1609 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1610 alias_pairs->ordered_remove (i);
1611 i--;
1616 /* Given LOOP's two data references and segment lengths described by DR_A
1617 and DR_B, create expression checking if the two addresses ranges intersect
1618 with each other based on index of the two addresses. This can only be
1619 done if DR_A and DR_B referring to the same (array) object and the index
1620 is the only difference. For example:
1622 DR_A DR_B
1623 data-ref arr[i] arr[j]
1624 base_object arr arr
1625 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1627 The addresses and their index are like:
1629 |<- ADDR_A ->| |<- ADDR_B ->|
1630 ------------------------------------------------------->
1631 | | | | | | | | | |
1632 ------------------------------------------------------->
1633 i_0 ... i_0+4 j_0 ... j_0+4
1635 We can create expression based on index rather than address:
1637 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1639 Note evolution step of index needs to be considered in comparison. */
1641 static bool
1642 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1643 const dr_with_seg_len& dr_a,
1644 const dr_with_seg_len& dr_b)
1646 if (integer_zerop (DR_STEP (dr_a.dr))
1647 || integer_zerop (DR_STEP (dr_b.dr))
1648 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1649 return false;
1651 poly_uint64 seg_len1, seg_len2;
1652 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1653 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1654 return false;
1656 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1657 return false;
1659 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1660 return false;
1662 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1663 return false;
1665 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1667 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1668 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1669 if (neg_step)
1671 abs_step = -abs_step;
1672 seg_len1 = -seg_len1;
1673 seg_len2 = -seg_len2;
1675 else
1677 /* Include the access size in the length, so that we only have one
1678 tree addition below. */
1679 seg_len1 += dr_a.access_size;
1680 seg_len2 += dr_b.access_size;
1683 /* Infer the number of iterations with which the memory segment is accessed
1684 by DR. In other words, alias is checked if memory segment accessed by
1685 DR_A in some iterations intersect with memory segment accessed by DR_B
1686 in the same amount iterations.
1687 Note segnment length is a linear function of number of iterations with
1688 DR_STEP as the coefficient. */
1689 poly_uint64 niter_len1, niter_len2;
1690 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1691 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1692 return false;
1694 poly_uint64 niter_access1 = 0, niter_access2 = 0;
1695 if (neg_step)
1697 /* Divide each access size by the byte step, rounding up. */
1698 if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1699 abs_step, &niter_access1)
1700 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1701 abs_step, &niter_access2))
1702 return false;
1705 unsigned int i;
1706 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1708 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1709 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1710 /* Two indices must be the same if they are not scev, or not scev wrto
1711 current loop being vecorized. */
1712 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1713 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1714 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1715 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1717 if (operand_equal_p (access1, access2, 0))
1718 continue;
1720 return false;
1722 /* The two indices must have the same step. */
1723 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1724 return false;
1726 tree idx_step = CHREC_RIGHT (access1);
1727 /* Index must have const step, otherwise DR_STEP won't be constant. */
1728 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1729 /* Index must evaluate in the same direction as DR. */
1730 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1732 tree min1 = CHREC_LEFT (access1);
1733 tree min2 = CHREC_LEFT (access2);
1734 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1735 return false;
1737 /* Ideally, alias can be checked against loop's control IV, but we
1738 need to prove linear mapping between control IV and reference
1739 index. Although that should be true, we check against (array)
1740 index of data reference. Like segment length, index length is
1741 linear function of the number of iterations with index_step as
1742 the coefficient, i.e, niter_len * idx_step. */
1743 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1744 build_int_cst (TREE_TYPE (min1),
1745 niter_len1));
1746 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1747 build_int_cst (TREE_TYPE (min2),
1748 niter_len2));
1749 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1750 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1751 /* Adjust ranges for negative step. */
1752 if (neg_step)
1754 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1755 std::swap (min1, max1);
1756 std::swap (min2, max2);
1758 /* As with the lengths just calculated, we've measured the access
1759 sizes in iterations, so multiply them by the index step. */
1760 tree idx_access1
1761 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1762 build_int_cst (TREE_TYPE (min1), niter_access1));
1763 tree idx_access2
1764 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1765 build_int_cst (TREE_TYPE (min2), niter_access2));
1767 /* MINUS_EXPR because the above values are negative. */
1768 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1769 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1771 tree part_cond_expr
1772 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1773 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1774 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1775 if (*cond_expr)
1776 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1777 *cond_expr, part_cond_expr);
1778 else
1779 *cond_expr = part_cond_expr;
1781 return true;
1784 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1785 every address ADDR accessed by D:
1787 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1789 In this case, every element accessed by D is aligned to at least
1790 ALIGN bytes.
1792 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1794 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1796 static void
1797 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1798 tree *seg_max_out, HOST_WIDE_INT align)
1800 /* Each access has the following pattern:
1802 <- |seg_len| ->
1803 <--- A: -ve step --->
1804 +-----+-------+-----+-------+-----+
1805 | n-1 | ,.... | 0 | ..... | n-1 |
1806 +-----+-------+-----+-------+-----+
1807 <--- B: +ve step --->
1808 <- |seg_len| ->
1810 base address
1812 where "n" is the number of scalar iterations covered by the segment.
1813 (This should be VF for a particular pair if we know that both steps
1814 are the same, otherwise it will be the full number of scalar loop
1815 iterations.)
1817 A is the range of bytes accessed when the step is negative,
1818 B is the range when the step is positive.
1820 If the access size is "access_size" bytes, the lowest addressed byte is:
1822 base + (step < 0 ? seg_len : 0) [LB]
1824 and the highest addressed byte is always below:
1826 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1828 Thus:
1830 LB <= ADDR < UB
1832 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1833 bytes, so:
1835 LB <= ADDR <= UB - ALIGN
1837 where "- ALIGN" folds naturally with the "+ access_size" and often
1838 cancels it out.
1840 We don't try to simplify LB and UB beyond this (e.g. by using
1841 MIN and MAX based on whether seg_len rather than the stride is
1842 negative) because it is possible for the absolute size of the
1843 segment to overflow the range of a ssize_t.
1845 Keeping the pointer_plus outside of the cond_expr should allow
1846 the cond_exprs to be shared with other alias checks. */
1847 tree indicator = dr_direction_indicator (d.dr);
1848 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1849 fold_convert (ssizetype, indicator),
1850 ssize_int (0));
1851 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1852 DR_OFFSET (d.dr));
1853 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1854 tree seg_len
1855 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1857 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1858 seg_len, size_zero_node);
1859 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1860 size_zero_node, seg_len);
1861 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1862 size_int (d.access_size - align));
1864 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1865 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1868 /* Given two data references and segment lengths described by DR_A and DR_B,
1869 create expression checking if the two addresses ranges intersect with
1870 each other:
1872 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1873 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1875 static void
1876 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1877 const dr_with_seg_len& dr_a,
1878 const dr_with_seg_len& dr_b)
1880 *cond_expr = NULL_TREE;
1881 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1882 return;
1884 unsigned HOST_WIDE_INT min_align;
1885 tree_code cmp_code;
1886 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1887 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1889 /* In this case adding access_size to seg_len is likely to give
1890 a simple X * step, where X is either the number of scalar
1891 iterations or the vectorization factor. We're better off
1892 keeping that, rather than subtracting an alignment from it.
1894 In this case the maximum values are exclusive and so there is
1895 no alias if the maximum of one segment equals the minimum
1896 of another. */
1897 min_align = 0;
1898 cmp_code = LE_EXPR;
1900 else
1902 /* Calculate the minimum alignment shared by all four pointers,
1903 then arrange for this alignment to be subtracted from the
1904 exclusive maximum values to get inclusive maximum values.
1905 This "- min_align" is cumulative with a "+ access_size"
1906 in the calculation of the maximum values. In the best
1907 (and common) case, the two cancel each other out, leaving
1908 us with an inclusive bound based only on seg_len. In the
1909 worst case we're simply adding a smaller number than before.
1911 Because the maximum values are inclusive, there is an alias
1912 if the maximum value of one segment is equal to the minimum
1913 value of the other. */
1914 min_align = MIN (dr_a.align, dr_b.align);
1915 cmp_code = LT_EXPR;
1918 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1919 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1920 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1922 *cond_expr
1923 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1924 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1925 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1928 /* Create a conditional expression that represents the run-time checks for
1929 overlapping of address ranges represented by a list of data references
1930 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1931 COND_EXPR is the conditional expression to be used in the if statement
1932 that controls which version of the loop gets executed at runtime. */
1934 void
1935 create_runtime_alias_checks (class loop *loop,
1936 vec<dr_with_seg_len_pair_t> *alias_pairs,
1937 tree * cond_expr)
1939 tree part_cond_expr;
1941 fold_defer_overflow_warnings ();
1942 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1944 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1945 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1947 if (dump_enabled_p ())
1948 dump_printf (MSG_NOTE,
1949 "create runtime check for data references %T and %T\n",
1950 DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1952 /* Create condition expression for each pair data references. */
1953 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1954 if (*cond_expr)
1955 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1956 *cond_expr, part_cond_expr);
1957 else
1958 *cond_expr = part_cond_expr;
1960 fold_undefer_and_ignore_overflow_warnings ();
1963 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1964 expressions. */
1965 static bool
1966 dr_equal_offsets_p1 (tree offset1, tree offset2)
1968 bool res;
1970 STRIP_NOPS (offset1);
1971 STRIP_NOPS (offset2);
1973 if (offset1 == offset2)
1974 return true;
1976 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1977 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1978 return false;
1980 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1981 TREE_OPERAND (offset2, 0));
1983 if (!res || !BINARY_CLASS_P (offset1))
1984 return res;
1986 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1987 TREE_OPERAND (offset2, 1));
1989 return res;
1992 /* Check if DRA and DRB have equal offsets. */
1993 bool
1994 dr_equal_offsets_p (struct data_reference *dra,
1995 struct data_reference *drb)
1997 tree offset1, offset2;
1999 offset1 = DR_OFFSET (dra);
2000 offset2 = DR_OFFSET (drb);
2002 return dr_equal_offsets_p1 (offset1, offset2);
2005 /* Returns true if FNA == FNB. */
2007 static bool
2008 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2010 unsigned i, n = fna.length ();
2012 if (n != fnb.length ())
2013 return false;
2015 for (i = 0; i < n; i++)
2016 if (!operand_equal_p (fna[i], fnb[i], 0))
2017 return false;
2019 return true;
2022 /* If all the functions in CF are the same, returns one of them,
2023 otherwise returns NULL. */
2025 static affine_fn
2026 common_affine_function (conflict_function *cf)
2028 unsigned i;
2029 affine_fn comm;
2031 if (!CF_NONTRIVIAL_P (cf))
2032 return affine_fn ();
2034 comm = cf->fns[0];
2036 for (i = 1; i < cf->n; i++)
2037 if (!affine_function_equal_p (comm, cf->fns[i]))
2038 return affine_fn ();
2040 return comm;
2043 /* Returns the base of the affine function FN. */
2045 static tree
2046 affine_function_base (affine_fn fn)
2048 return fn[0];
2051 /* Returns true if FN is a constant. */
2053 static bool
2054 affine_function_constant_p (affine_fn fn)
2056 unsigned i;
2057 tree coef;
2059 for (i = 1; fn.iterate (i, &coef); i++)
2060 if (!integer_zerop (coef))
2061 return false;
2063 return true;
2066 /* Returns true if FN is the zero constant function. */
2068 static bool
2069 affine_function_zero_p (affine_fn fn)
2071 return (integer_zerop (affine_function_base (fn))
2072 && affine_function_constant_p (fn));
2075 /* Returns a signed integer type with the largest precision from TA
2076 and TB. */
2078 static tree
2079 signed_type_for_types (tree ta, tree tb)
2081 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2082 return signed_type_for (ta);
2083 else
2084 return signed_type_for (tb);
2087 /* Applies operation OP on affine functions FNA and FNB, and returns the
2088 result. */
2090 static affine_fn
2091 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2093 unsigned i, n, m;
2094 affine_fn ret;
2095 tree coef;
2097 if (fnb.length () > fna.length ())
2099 n = fna.length ();
2100 m = fnb.length ();
2102 else
2104 n = fnb.length ();
2105 m = fna.length ();
2108 ret.create (m);
2109 for (i = 0; i < n; i++)
2111 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2112 TREE_TYPE (fnb[i]));
2113 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2116 for (; fna.iterate (i, &coef); i++)
2117 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2118 coef, integer_zero_node));
2119 for (; fnb.iterate (i, &coef); i++)
2120 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2121 integer_zero_node, coef));
2123 return ret;
2126 /* Returns the sum of affine functions FNA and FNB. */
2128 static affine_fn
2129 affine_fn_plus (affine_fn fna, affine_fn fnb)
2131 return affine_fn_op (PLUS_EXPR, fna, fnb);
2134 /* Returns the difference of affine functions FNA and FNB. */
2136 static affine_fn
2137 affine_fn_minus (affine_fn fna, affine_fn fnb)
2139 return affine_fn_op (MINUS_EXPR, fna, fnb);
2142 /* Frees affine function FN. */
2144 static void
2145 affine_fn_free (affine_fn fn)
2147 fn.release ();
2150 /* Determine for each subscript in the data dependence relation DDR
2151 the distance. */
2153 static void
2154 compute_subscript_distance (struct data_dependence_relation *ddr)
2156 conflict_function *cf_a, *cf_b;
2157 affine_fn fn_a, fn_b, diff;
2159 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2161 unsigned int i;
2163 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2165 struct subscript *subscript;
2167 subscript = DDR_SUBSCRIPT (ddr, i);
2168 cf_a = SUB_CONFLICTS_IN_A (subscript);
2169 cf_b = SUB_CONFLICTS_IN_B (subscript);
2171 fn_a = common_affine_function (cf_a);
2172 fn_b = common_affine_function (cf_b);
2173 if (!fn_a.exists () || !fn_b.exists ())
2175 SUB_DISTANCE (subscript) = chrec_dont_know;
2176 return;
2178 diff = affine_fn_minus (fn_a, fn_b);
2180 if (affine_function_constant_p (diff))
2181 SUB_DISTANCE (subscript) = affine_function_base (diff);
2182 else
2183 SUB_DISTANCE (subscript) = chrec_dont_know;
2185 affine_fn_free (diff);
2190 /* Returns the conflict function for "unknown". */
2192 static conflict_function *
2193 conflict_fn_not_known (void)
2195 conflict_function *fn = XCNEW (conflict_function);
2196 fn->n = NOT_KNOWN;
2198 return fn;
2201 /* Returns the conflict function for "independent". */
2203 static conflict_function *
2204 conflict_fn_no_dependence (void)
2206 conflict_function *fn = XCNEW (conflict_function);
2207 fn->n = NO_DEPENDENCE;
2209 return fn;
2212 /* Returns true if the address of OBJ is invariant in LOOP. */
2214 static bool
2215 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2217 while (handled_component_p (obj))
2219 if (TREE_CODE (obj) == ARRAY_REF)
2221 for (int i = 1; i < 4; ++i)
2222 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2223 loop->num))
2224 return false;
2226 else if (TREE_CODE (obj) == COMPONENT_REF)
2228 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2229 loop->num))
2230 return false;
2232 obj = TREE_OPERAND (obj, 0);
2235 if (!INDIRECT_REF_P (obj)
2236 && TREE_CODE (obj) != MEM_REF)
2237 return true;
2239 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2240 loop->num);
2243 /* Returns false if we can prove that data references A and B do not alias,
2244 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2245 considered. */
2247 bool
2248 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2249 class loop *loop_nest)
2251 tree addr_a = DR_BASE_OBJECT (a);
2252 tree addr_b = DR_BASE_OBJECT (b);
2254 /* If we are not processing a loop nest but scalar code we
2255 do not need to care about possible cross-iteration dependences
2256 and thus can process the full original reference. Do so,
2257 similar to how loop invariant motion applies extra offset-based
2258 disambiguation. */
2259 if (!loop_nest)
2261 aff_tree off1, off2;
2262 poly_widest_int size1, size2;
2263 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2264 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2265 aff_combination_scale (&off1, -1);
2266 aff_combination_add (&off2, &off1);
2267 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2268 return false;
2271 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2272 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2273 /* For cross-iteration dependences the cliques must be valid for the
2274 whole loop, not just individual iterations. */
2275 && (!loop_nest
2276 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2277 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2278 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2279 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2280 return false;
2282 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2283 do not know the size of the base-object. So we cannot do any
2284 offset/overlap based analysis but have to rely on points-to
2285 information only. */
2286 if (TREE_CODE (addr_a) == MEM_REF
2287 && (DR_UNCONSTRAINED_BASE (a)
2288 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2290 /* For true dependences we can apply TBAA. */
2291 if (flag_strict_aliasing
2292 && DR_IS_WRITE (a) && DR_IS_READ (b)
2293 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2294 get_alias_set (DR_REF (b))))
2295 return false;
2296 if (TREE_CODE (addr_b) == MEM_REF)
2297 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2298 TREE_OPERAND (addr_b, 0));
2299 else
2300 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2301 build_fold_addr_expr (addr_b));
2303 else if (TREE_CODE (addr_b) == MEM_REF
2304 && (DR_UNCONSTRAINED_BASE (b)
2305 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2307 /* For true dependences we can apply TBAA. */
2308 if (flag_strict_aliasing
2309 && DR_IS_WRITE (a) && DR_IS_READ (b)
2310 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2311 get_alias_set (DR_REF (b))))
2312 return false;
2313 if (TREE_CODE (addr_a) == MEM_REF)
2314 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2315 TREE_OPERAND (addr_b, 0));
2316 else
2317 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2318 TREE_OPERAND (addr_b, 0));
2321 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2322 that is being subsetted in the loop nest. */
2323 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2324 return refs_output_dependent_p (addr_a, addr_b);
2325 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2326 return refs_anti_dependent_p (addr_a, addr_b);
2327 return refs_may_alias_p (addr_a, addr_b);
2330 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2331 if it is meaningful to compare their associated access functions
2332 when checking for dependencies. */
2334 static bool
2335 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2337 /* Allow pairs of component refs from the following sets:
2339 { REALPART_EXPR, IMAGPART_EXPR }
2340 { COMPONENT_REF }
2341 { ARRAY_REF }. */
2342 tree_code code_a = TREE_CODE (ref_a);
2343 tree_code code_b = TREE_CODE (ref_b);
2344 if (code_a == IMAGPART_EXPR)
2345 code_a = REALPART_EXPR;
2346 if (code_b == IMAGPART_EXPR)
2347 code_b = REALPART_EXPR;
2348 if (code_a != code_b)
2349 return false;
2351 if (TREE_CODE (ref_a) == COMPONENT_REF)
2352 /* ??? We cannot simply use the type of operand #0 of the refs here as
2353 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2354 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2355 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2356 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2358 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2359 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2362 /* Initialize a data dependence relation between data accesses A and
2363 B. NB_LOOPS is the number of loops surrounding the references: the
2364 size of the classic distance/direction vectors. */
2366 struct data_dependence_relation *
2367 initialize_data_dependence_relation (struct data_reference *a,
2368 struct data_reference *b,
2369 vec<loop_p> loop_nest)
2371 struct data_dependence_relation *res;
2372 unsigned int i;
2374 res = XCNEW (struct data_dependence_relation);
2375 DDR_A (res) = a;
2376 DDR_B (res) = b;
2377 DDR_LOOP_NEST (res).create (0);
2378 DDR_SUBSCRIPTS (res).create (0);
2379 DDR_DIR_VECTS (res).create (0);
2380 DDR_DIST_VECTS (res).create (0);
2382 if (a == NULL || b == NULL)
2384 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2385 return res;
2388 /* If the data references do not alias, then they are independent. */
2389 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2391 DDR_ARE_DEPENDENT (res) = chrec_known;
2392 return res;
2395 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2396 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2397 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2399 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2400 return res;
2403 /* For unconstrained bases, the root (highest-indexed) subscript
2404 describes a variation in the base of the original DR_REF rather
2405 than a component access. We have no type that accurately describes
2406 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2407 applying this subscript) so limit the search to the last real
2408 component access.
2410 E.g. for:
2412 void
2413 f (int a[][8], int b[][8])
2415 for (int i = 0; i < 8; ++i)
2416 a[i * 2][0] = b[i][0];
2419 the a and b accesses have a single ARRAY_REF component reference [0]
2420 but have two subscripts. */
2421 if (DR_UNCONSTRAINED_BASE (a))
2422 num_dimensions_a -= 1;
2423 if (DR_UNCONSTRAINED_BASE (b))
2424 num_dimensions_b -= 1;
2426 /* These structures describe sequences of component references in
2427 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2428 specific access function. */
2429 struct {
2430 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2431 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2432 indices. In C notation, these are the indices of the rightmost
2433 component references; e.g. for a sequence .b.c.d, the start
2434 index is for .d. */
2435 unsigned int start_a;
2436 unsigned int start_b;
2438 /* The sequence contains LENGTH consecutive access functions from
2439 each DR. */
2440 unsigned int length;
2442 /* The enclosing objects for the A and B sequences respectively,
2443 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2444 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2445 tree object_a;
2446 tree object_b;
2447 } full_seq = {}, struct_seq = {};
2449 /* Before each iteration of the loop:
2451 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2452 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2453 unsigned int index_a = 0;
2454 unsigned int index_b = 0;
2455 tree ref_a = DR_REF (a);
2456 tree ref_b = DR_REF (b);
2458 /* Now walk the component references from the final DR_REFs back up to
2459 the enclosing base objects. Each component reference corresponds
2460 to one access function in the DR, with access function 0 being for
2461 the final DR_REF and the highest-indexed access function being the
2462 one that is applied to the base of the DR.
2464 Look for a sequence of component references whose access functions
2465 are comparable (see access_fn_components_comparable_p). If more
2466 than one such sequence exists, pick the one nearest the base
2467 (which is the leftmost sequence in C notation). Store this sequence
2468 in FULL_SEQ.
2470 For example, if we have:
2472 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2474 A: a[0][i].s.c.d
2475 B: __real b[0][i].s.e[i].f
2477 (where d is the same type as the real component of f) then the access
2478 functions would be:
2480 0 1 2 3
2481 A: .d .c .s [i]
2483 0 1 2 3 4 5
2484 B: __real .f [i] .e .s [i]
2486 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2487 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2488 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2489 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2490 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2491 index foo[10] arrays, so is again comparable. The sequence is
2492 therefore:
2494 A: [1, 3] (i.e. [i].s.c)
2495 B: [3, 5] (i.e. [i].s.e)
2497 Also look for sequences of component references whose access
2498 functions are comparable and whose enclosing objects have the same
2499 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2500 example, STRUCT_SEQ would be:
2502 A: [1, 2] (i.e. s.c)
2503 B: [3, 4] (i.e. s.e) */
2504 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2506 /* REF_A and REF_B must be one of the component access types
2507 allowed by dr_analyze_indices. */
2508 gcc_checking_assert (access_fn_component_p (ref_a));
2509 gcc_checking_assert (access_fn_component_p (ref_b));
2511 /* Get the immediately-enclosing objects for REF_A and REF_B,
2512 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2513 and DR_ACCESS_FN (B, INDEX_B). */
2514 tree object_a = TREE_OPERAND (ref_a, 0);
2515 tree object_b = TREE_OPERAND (ref_b, 0);
2517 tree type_a = TREE_TYPE (object_a);
2518 tree type_b = TREE_TYPE (object_b);
2519 if (access_fn_components_comparable_p (ref_a, ref_b))
2521 /* This pair of component accesses is comparable for dependence
2522 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2523 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2524 if (full_seq.start_a + full_seq.length != index_a
2525 || full_seq.start_b + full_seq.length != index_b)
2527 /* The accesses don't extend the current sequence,
2528 so start a new one here. */
2529 full_seq.start_a = index_a;
2530 full_seq.start_b = index_b;
2531 full_seq.length = 0;
2534 /* Add this pair of references to the sequence. */
2535 full_seq.length += 1;
2536 full_seq.object_a = object_a;
2537 full_seq.object_b = object_b;
2539 /* If the enclosing objects are structures (and thus have the
2540 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2541 if (TREE_CODE (type_a) == RECORD_TYPE)
2542 struct_seq = full_seq;
2544 /* Move to the next containing reference for both A and B. */
2545 ref_a = object_a;
2546 ref_b = object_b;
2547 index_a += 1;
2548 index_b += 1;
2549 continue;
2552 /* Try to approach equal type sizes. */
2553 if (!COMPLETE_TYPE_P (type_a)
2554 || !COMPLETE_TYPE_P (type_b)
2555 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2556 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2557 break;
2559 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2560 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2561 if (size_a <= size_b)
2563 index_a += 1;
2564 ref_a = object_a;
2566 if (size_b <= size_a)
2568 index_b += 1;
2569 ref_b = object_b;
2573 /* See whether FULL_SEQ ends at the base and whether the two bases
2574 are equal. We do not care about TBAA or alignment info so we can
2575 use OEP_ADDRESS_OF to avoid false negatives. */
2576 tree base_a = DR_BASE_OBJECT (a);
2577 tree base_b = DR_BASE_OBJECT (b);
2578 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2579 && full_seq.start_b + full_seq.length == num_dimensions_b
2580 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2581 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2582 && types_compatible_p (TREE_TYPE (base_a),
2583 TREE_TYPE (base_b))
2584 && (!loop_nest.exists ()
2585 || (object_address_invariant_in_loop_p
2586 (loop_nest[0], base_a))));
2588 /* If the bases are the same, we can include the base variation too.
2589 E.g. the b accesses in:
2591 for (int i = 0; i < n; ++i)
2592 b[i + 4][0] = b[i][0];
2594 have a definite dependence distance of 4, while for:
2596 for (int i = 0; i < n; ++i)
2597 a[i + 4][0] = b[i][0];
2599 the dependence distance depends on the gap between a and b.
2601 If the bases are different then we can only rely on the sequence
2602 rooted at a structure access, since arrays are allowed to overlap
2603 arbitrarily and change shape arbitrarily. E.g. we treat this as
2604 valid code:
2606 int a[256];
2608 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2610 where two lvalues with the same int[4][3] type overlap, and where
2611 both lvalues are distinct from the object's declared type. */
2612 if (same_base_p)
2614 if (DR_UNCONSTRAINED_BASE (a))
2615 full_seq.length += 1;
2617 else
2618 full_seq = struct_seq;
2620 /* Punt if we didn't find a suitable sequence. */
2621 if (full_seq.length == 0)
2623 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2624 return res;
2627 if (!same_base_p)
2629 /* Partial overlap is possible for different bases when strict aliasing
2630 is not in effect. It's also possible if either base involves a union
2631 access; e.g. for:
2633 struct s1 { int a[2]; };
2634 struct s2 { struct s1 b; int c; };
2635 struct s3 { int d; struct s1 e; };
2636 union u { struct s2 f; struct s3 g; } *p, *q;
2638 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2639 "p->g.e" (base "p->g") and might partially overlap the s1 at
2640 "q->g.e" (base "q->g"). */
2641 if (!flag_strict_aliasing
2642 || ref_contains_union_access_p (full_seq.object_a)
2643 || ref_contains_union_access_p (full_seq.object_b))
2645 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2646 return res;
2649 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2650 if (!loop_nest.exists ()
2651 || (object_address_invariant_in_loop_p (loop_nest[0],
2652 full_seq.object_a)
2653 && object_address_invariant_in_loop_p (loop_nest[0],
2654 full_seq.object_b)))
2656 DDR_OBJECT_A (res) = full_seq.object_a;
2657 DDR_OBJECT_B (res) = full_seq.object_b;
2661 DDR_AFFINE_P (res) = true;
2662 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2663 DDR_SUBSCRIPTS (res).create (full_seq.length);
2664 DDR_LOOP_NEST (res) = loop_nest;
2665 DDR_SELF_REFERENCE (res) = false;
2667 for (i = 0; i < full_seq.length; ++i)
2669 struct subscript *subscript;
2671 subscript = XNEW (struct subscript);
2672 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2673 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2674 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2675 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2676 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2677 SUB_DISTANCE (subscript) = chrec_dont_know;
2678 DDR_SUBSCRIPTS (res).safe_push (subscript);
2681 return res;
2684 /* Frees memory used by the conflict function F. */
2686 static void
2687 free_conflict_function (conflict_function *f)
2689 unsigned i;
2691 if (CF_NONTRIVIAL_P (f))
2693 for (i = 0; i < f->n; i++)
2694 affine_fn_free (f->fns[i]);
2696 free (f);
2699 /* Frees memory used by SUBSCRIPTS. */
2701 static void
2702 free_subscripts (vec<subscript_p> subscripts)
2704 unsigned i;
2705 subscript_p s;
2707 FOR_EACH_VEC_ELT (subscripts, i, s)
2709 free_conflict_function (s->conflicting_iterations_in_a);
2710 free_conflict_function (s->conflicting_iterations_in_b);
2711 free (s);
2713 subscripts.release ();
2716 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2717 description. */
2719 static inline void
2720 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2721 tree chrec)
2723 DDR_ARE_DEPENDENT (ddr) = chrec;
2724 free_subscripts (DDR_SUBSCRIPTS (ddr));
2725 DDR_SUBSCRIPTS (ddr).create (0);
2728 /* The dependence relation DDR cannot be represented by a distance
2729 vector. */
2731 static inline void
2732 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2737 DDR_AFFINE_P (ddr) = false;
2742 /* This section contains the classic Banerjee tests. */
2744 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2745 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2747 static inline bool
2748 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2750 return (evolution_function_is_constant_p (chrec_a)
2751 && evolution_function_is_constant_p (chrec_b));
2754 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2755 variable, i.e., if the SIV (Single Index Variable) test is true. */
2757 static bool
2758 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2760 if ((evolution_function_is_constant_p (chrec_a)
2761 && evolution_function_is_univariate_p (chrec_b))
2762 || (evolution_function_is_constant_p (chrec_b)
2763 && evolution_function_is_univariate_p (chrec_a)))
2764 return true;
2766 if (evolution_function_is_univariate_p (chrec_a)
2767 && evolution_function_is_univariate_p (chrec_b))
2769 switch (TREE_CODE (chrec_a))
2771 case POLYNOMIAL_CHREC:
2772 switch (TREE_CODE (chrec_b))
2774 case POLYNOMIAL_CHREC:
2775 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2776 return false;
2777 /* FALLTHRU */
2779 default:
2780 return true;
2783 default:
2784 return true;
2788 return false;
2791 /* Creates a conflict function with N dimensions. The affine functions
2792 in each dimension follow. */
2794 static conflict_function *
2795 conflict_fn (unsigned n, ...)
2797 unsigned i;
2798 conflict_function *ret = XCNEW (conflict_function);
2799 va_list ap;
2801 gcc_assert (n > 0 && n <= MAX_DIM);
2802 va_start (ap, n);
2804 ret->n = n;
2805 for (i = 0; i < n; i++)
2806 ret->fns[i] = va_arg (ap, affine_fn);
2807 va_end (ap);
2809 return ret;
2812 /* Returns constant affine function with value CST. */
2814 static affine_fn
2815 affine_fn_cst (tree cst)
2817 affine_fn fn;
2818 fn.create (1);
2819 fn.quick_push (cst);
2820 return fn;
2823 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2825 static affine_fn
2826 affine_fn_univar (tree cst, unsigned dim, tree coef)
2828 affine_fn fn;
2829 fn.create (dim + 1);
2830 unsigned i;
2832 gcc_assert (dim > 0);
2833 fn.quick_push (cst);
2834 for (i = 1; i < dim; i++)
2835 fn.quick_push (integer_zero_node);
2836 fn.quick_push (coef);
2837 return fn;
2840 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2841 *OVERLAPS_B are initialized to the functions that describe the
2842 relation between the elements accessed twice by CHREC_A and
2843 CHREC_B. For k >= 0, the following property is verified:
2845 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2847 static void
2848 analyze_ziv_subscript (tree chrec_a,
2849 tree chrec_b,
2850 conflict_function **overlaps_a,
2851 conflict_function **overlaps_b,
2852 tree *last_conflicts)
2854 tree type, difference;
2855 dependence_stats.num_ziv++;
2857 if (dump_file && (dump_flags & TDF_DETAILS))
2858 fprintf (dump_file, "(analyze_ziv_subscript \n");
2860 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2861 chrec_a = chrec_convert (type, chrec_a, NULL);
2862 chrec_b = chrec_convert (type, chrec_b, NULL);
2863 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2865 switch (TREE_CODE (difference))
2867 case INTEGER_CST:
2868 if (integer_zerop (difference))
2870 /* The difference is equal to zero: the accessed index
2871 overlaps for each iteration in the loop. */
2872 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2873 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2874 *last_conflicts = chrec_dont_know;
2875 dependence_stats.num_ziv_dependent++;
2877 else
2879 /* The accesses do not overlap. */
2880 *overlaps_a = conflict_fn_no_dependence ();
2881 *overlaps_b = conflict_fn_no_dependence ();
2882 *last_conflicts = integer_zero_node;
2883 dependence_stats.num_ziv_independent++;
2885 break;
2887 default:
2888 /* We're not sure whether the indexes overlap. For the moment,
2889 conservatively answer "don't know". */
2890 if (dump_file && (dump_flags & TDF_DETAILS))
2891 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2893 *overlaps_a = conflict_fn_not_known ();
2894 *overlaps_b = conflict_fn_not_known ();
2895 *last_conflicts = chrec_dont_know;
2896 dependence_stats.num_ziv_unimplemented++;
2897 break;
2900 if (dump_file && (dump_flags & TDF_DETAILS))
2901 fprintf (dump_file, ")\n");
2904 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2905 and only if it fits to the int type. If this is not the case, or the
2906 bound on the number of iterations of LOOP could not be derived, returns
2907 chrec_dont_know. */
2909 static tree
2910 max_stmt_executions_tree (class loop *loop)
2912 widest_int nit;
2914 if (!max_stmt_executions (loop, &nit))
2915 return chrec_dont_know;
2917 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2918 return chrec_dont_know;
2920 return wide_int_to_tree (unsigned_type_node, nit);
2923 /* Determine whether the CHREC is always positive/negative. If the expression
2924 cannot be statically analyzed, return false, otherwise set the answer into
2925 VALUE. */
2927 static bool
2928 chrec_is_positive (tree chrec, bool *value)
2930 bool value0, value1, value2;
2931 tree end_value, nb_iter;
2933 switch (TREE_CODE (chrec))
2935 case POLYNOMIAL_CHREC:
2936 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2937 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2938 return false;
2940 /* FIXME -- overflows. */
2941 if (value0 == value1)
2943 *value = value0;
2944 return true;
2947 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2948 and the proof consists in showing that the sign never
2949 changes during the execution of the loop, from 0 to
2950 loop->nb_iterations. */
2951 if (!evolution_function_is_affine_p (chrec))
2952 return false;
2954 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2955 if (chrec_contains_undetermined (nb_iter))
2956 return false;
2958 #if 0
2959 /* TODO -- If the test is after the exit, we may decrease the number of
2960 iterations by one. */
2961 if (after_exit)
2962 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2963 #endif
2965 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2967 if (!chrec_is_positive (end_value, &value2))
2968 return false;
2970 *value = value0;
2971 return value0 == value1;
2973 case INTEGER_CST:
2974 switch (tree_int_cst_sgn (chrec))
2976 case -1:
2977 *value = false;
2978 break;
2979 case 1:
2980 *value = true;
2981 break;
2982 default:
2983 return false;
2985 return true;
2987 default:
2988 return false;
2993 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2994 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2995 *OVERLAPS_B are initialized to the functions that describe the
2996 relation between the elements accessed twice by CHREC_A and
2997 CHREC_B. For k >= 0, the following property is verified:
2999 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3001 static void
3002 analyze_siv_subscript_cst_affine (tree chrec_a,
3003 tree chrec_b,
3004 conflict_function **overlaps_a,
3005 conflict_function **overlaps_b,
3006 tree *last_conflicts)
3008 bool value0, value1, value2;
3009 tree type, difference, tmp;
3011 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3012 chrec_a = chrec_convert (type, chrec_a, NULL);
3013 chrec_b = chrec_convert (type, chrec_b, NULL);
3014 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3016 /* Special case overlap in the first iteration. */
3017 if (integer_zerop (difference))
3019 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3020 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3021 *last_conflicts = integer_one_node;
3022 return;
3025 if (!chrec_is_positive (initial_condition (difference), &value0))
3027 if (dump_file && (dump_flags & TDF_DETAILS))
3028 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3030 dependence_stats.num_siv_unimplemented++;
3031 *overlaps_a = conflict_fn_not_known ();
3032 *overlaps_b = conflict_fn_not_known ();
3033 *last_conflicts = chrec_dont_know;
3034 return;
3036 else
3038 if (value0 == false)
3040 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3041 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3043 if (dump_file && (dump_flags & TDF_DETAILS))
3044 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3046 *overlaps_a = conflict_fn_not_known ();
3047 *overlaps_b = conflict_fn_not_known ();
3048 *last_conflicts = chrec_dont_know;
3049 dependence_stats.num_siv_unimplemented++;
3050 return;
3052 else
3054 if (value1 == true)
3056 /* Example:
3057 chrec_a = 12
3058 chrec_b = {10, +, 1}
3061 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3063 HOST_WIDE_INT numiter;
3064 class loop *loop = get_chrec_loop (chrec_b);
3066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3067 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3068 fold_build1 (ABS_EXPR, type, difference),
3069 CHREC_RIGHT (chrec_b));
3070 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3071 *last_conflicts = integer_one_node;
3074 /* Perform weak-zero siv test to see if overlap is
3075 outside the loop bounds. */
3076 numiter = max_stmt_executions_int (loop);
3078 if (numiter >= 0
3079 && compare_tree_int (tmp, numiter) > 0)
3081 free_conflict_function (*overlaps_a);
3082 free_conflict_function (*overlaps_b);
3083 *overlaps_a = conflict_fn_no_dependence ();
3084 *overlaps_b = conflict_fn_no_dependence ();
3085 *last_conflicts = integer_zero_node;
3086 dependence_stats.num_siv_independent++;
3087 return;
3089 dependence_stats.num_siv_dependent++;
3090 return;
3093 /* When the step does not divide the difference, there are
3094 no overlaps. */
3095 else
3097 *overlaps_a = conflict_fn_no_dependence ();
3098 *overlaps_b = conflict_fn_no_dependence ();
3099 *last_conflicts = integer_zero_node;
3100 dependence_stats.num_siv_independent++;
3101 return;
3105 else
3107 /* Example:
3108 chrec_a = 12
3109 chrec_b = {10, +, -1}
3111 In this case, chrec_a will not overlap with chrec_b. */
3112 *overlaps_a = conflict_fn_no_dependence ();
3113 *overlaps_b = conflict_fn_no_dependence ();
3114 *last_conflicts = integer_zero_node;
3115 dependence_stats.num_siv_independent++;
3116 return;
3120 else
3122 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3123 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3125 if (dump_file && (dump_flags & TDF_DETAILS))
3126 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3128 *overlaps_a = conflict_fn_not_known ();
3129 *overlaps_b = conflict_fn_not_known ();
3130 *last_conflicts = chrec_dont_know;
3131 dependence_stats.num_siv_unimplemented++;
3132 return;
3134 else
3136 if (value2 == false)
3138 /* Example:
3139 chrec_a = 3
3140 chrec_b = {10, +, -1}
3142 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3144 HOST_WIDE_INT numiter;
3145 class loop *loop = get_chrec_loop (chrec_b);
3147 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3148 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3149 CHREC_RIGHT (chrec_b));
3150 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3151 *last_conflicts = integer_one_node;
3153 /* Perform weak-zero siv test to see if overlap is
3154 outside the loop bounds. */
3155 numiter = max_stmt_executions_int (loop);
3157 if (numiter >= 0
3158 && compare_tree_int (tmp, numiter) > 0)
3160 free_conflict_function (*overlaps_a);
3161 free_conflict_function (*overlaps_b);
3162 *overlaps_a = conflict_fn_no_dependence ();
3163 *overlaps_b = conflict_fn_no_dependence ();
3164 *last_conflicts = integer_zero_node;
3165 dependence_stats.num_siv_independent++;
3166 return;
3168 dependence_stats.num_siv_dependent++;
3169 return;
3172 /* When the step does not divide the difference, there
3173 are no overlaps. */
3174 else
3176 *overlaps_a = conflict_fn_no_dependence ();
3177 *overlaps_b = conflict_fn_no_dependence ();
3178 *last_conflicts = integer_zero_node;
3179 dependence_stats.num_siv_independent++;
3180 return;
3183 else
3185 /* Example:
3186 chrec_a = 3
3187 chrec_b = {4, +, 1}
3189 In this case, chrec_a will not overlap with chrec_b. */
3190 *overlaps_a = conflict_fn_no_dependence ();
3191 *overlaps_b = conflict_fn_no_dependence ();
3192 *last_conflicts = integer_zero_node;
3193 dependence_stats.num_siv_independent++;
3194 return;
3201 /* Helper recursive function for initializing the matrix A. Returns
3202 the initial value of CHREC. */
3204 static tree
3205 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3207 gcc_assert (chrec);
3209 switch (TREE_CODE (chrec))
3211 case POLYNOMIAL_CHREC:
3212 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3213 return chrec_dont_know;
3214 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3215 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3217 case PLUS_EXPR:
3218 case MULT_EXPR:
3219 case MINUS_EXPR:
3221 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3222 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3224 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3227 CASE_CONVERT:
3229 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3230 return chrec_convert (chrec_type (chrec), op, NULL);
3233 case BIT_NOT_EXPR:
3235 /* Handle ~X as -1 - X. */
3236 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3237 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3238 build_int_cst (TREE_TYPE (chrec), -1), op);
3241 case INTEGER_CST:
3242 return chrec;
3244 default:
3245 gcc_unreachable ();
3246 return NULL_TREE;
3250 #define FLOOR_DIV(x,y) ((x) / (y))
3252 /* Solves the special case of the Diophantine equation:
3253 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3255 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3256 number of iterations that loops X and Y run. The overlaps will be
3257 constructed as evolutions in dimension DIM. */
3259 static void
3260 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3261 HOST_WIDE_INT step_a,
3262 HOST_WIDE_INT step_b,
3263 affine_fn *overlaps_a,
3264 affine_fn *overlaps_b,
3265 tree *last_conflicts, int dim)
3267 if (((step_a > 0 && step_b > 0)
3268 || (step_a < 0 && step_b < 0)))
3270 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3271 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3273 gcd_steps_a_b = gcd (step_a, step_b);
3274 step_overlaps_a = step_b / gcd_steps_a_b;
3275 step_overlaps_b = step_a / gcd_steps_a_b;
3277 if (niter > 0)
3279 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3280 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3281 last_conflict = tau2;
3282 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3284 else
3285 *last_conflicts = chrec_dont_know;
3287 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3288 build_int_cst (NULL_TREE,
3289 step_overlaps_a));
3290 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3291 build_int_cst (NULL_TREE,
3292 step_overlaps_b));
3295 else
3297 *overlaps_a = affine_fn_cst (integer_zero_node);
3298 *overlaps_b = affine_fn_cst (integer_zero_node);
3299 *last_conflicts = integer_zero_node;
3303 /* Solves the special case of a Diophantine equation where CHREC_A is
3304 an affine bivariate function, and CHREC_B is an affine univariate
3305 function. For example,
3307 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3309 has the following overlapping functions:
3311 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3312 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3313 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3315 FORNOW: This is a specialized implementation for a case occurring in
3316 a common benchmark. Implement the general algorithm. */
3318 static void
3319 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3320 conflict_function **overlaps_a,
3321 conflict_function **overlaps_b,
3322 tree *last_conflicts)
3324 bool xz_p, yz_p, xyz_p;
3325 HOST_WIDE_INT step_x, step_y, step_z;
3326 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3327 affine_fn overlaps_a_xz, overlaps_b_xz;
3328 affine_fn overlaps_a_yz, overlaps_b_yz;
3329 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3330 affine_fn ova1, ova2, ovb;
3331 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3333 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3334 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3335 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3337 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3338 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3339 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3341 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3343 if (dump_file && (dump_flags & TDF_DETAILS))
3344 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3346 *overlaps_a = conflict_fn_not_known ();
3347 *overlaps_b = conflict_fn_not_known ();
3348 *last_conflicts = chrec_dont_know;
3349 return;
3352 niter = MIN (niter_x, niter_z);
3353 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3354 &overlaps_a_xz,
3355 &overlaps_b_xz,
3356 &last_conflicts_xz, 1);
3357 niter = MIN (niter_y, niter_z);
3358 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3359 &overlaps_a_yz,
3360 &overlaps_b_yz,
3361 &last_conflicts_yz, 2);
3362 niter = MIN (niter_x, niter_z);
3363 niter = MIN (niter_y, niter);
3364 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3365 &overlaps_a_xyz,
3366 &overlaps_b_xyz,
3367 &last_conflicts_xyz, 3);
3369 xz_p = !integer_zerop (last_conflicts_xz);
3370 yz_p = !integer_zerop (last_conflicts_yz);
3371 xyz_p = !integer_zerop (last_conflicts_xyz);
3373 if (xz_p || yz_p || xyz_p)
3375 ova1 = affine_fn_cst (integer_zero_node);
3376 ova2 = affine_fn_cst (integer_zero_node);
3377 ovb = affine_fn_cst (integer_zero_node);
3378 if (xz_p)
3380 affine_fn t0 = ova1;
3381 affine_fn t2 = ovb;
3383 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3384 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3385 affine_fn_free (t0);
3386 affine_fn_free (t2);
3387 *last_conflicts = last_conflicts_xz;
3389 if (yz_p)
3391 affine_fn t0 = ova2;
3392 affine_fn t2 = ovb;
3394 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3395 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3396 affine_fn_free (t0);
3397 affine_fn_free (t2);
3398 *last_conflicts = last_conflicts_yz;
3400 if (xyz_p)
3402 affine_fn t0 = ova1;
3403 affine_fn t2 = ova2;
3404 affine_fn t4 = ovb;
3406 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3407 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3408 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3409 affine_fn_free (t0);
3410 affine_fn_free (t2);
3411 affine_fn_free (t4);
3412 *last_conflicts = last_conflicts_xyz;
3414 *overlaps_a = conflict_fn (2, ova1, ova2);
3415 *overlaps_b = conflict_fn (1, ovb);
3417 else
3419 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3420 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3421 *last_conflicts = integer_zero_node;
3424 affine_fn_free (overlaps_a_xz);
3425 affine_fn_free (overlaps_b_xz);
3426 affine_fn_free (overlaps_a_yz);
3427 affine_fn_free (overlaps_b_yz);
3428 affine_fn_free (overlaps_a_xyz);
3429 affine_fn_free (overlaps_b_xyz);
3432 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3434 static void
3435 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3436 int size)
3438 memcpy (vec2, vec1, size * sizeof (*vec1));
3441 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3443 static void
3444 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3445 int m, int n)
3447 int i;
3449 for (i = 0; i < m; i++)
3450 lambda_vector_copy (mat1[i], mat2[i], n);
3453 /* Store the N x N identity matrix in MAT. */
3455 static void
3456 lambda_matrix_id (lambda_matrix mat, int size)
3458 int i, j;
3460 for (i = 0; i < size; i++)
3461 for (j = 0; j < size; j++)
3462 mat[i][j] = (i == j) ? 1 : 0;
3465 /* Return the index of the first nonzero element of vector VEC1 between
3466 START and N. We must have START <= N.
3467 Returns N if VEC1 is the zero vector. */
3469 static int
3470 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3472 int j = start;
3473 while (j < n && vec1[j] == 0)
3474 j++;
3475 return j;
3478 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3479 R2 = R2 + CONST1 * R1. */
3481 static void
3482 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3483 lambda_int const1)
3485 int i;
3487 if (const1 == 0)
3488 return;
3490 for (i = 0; i < n; i++)
3491 mat[r2][i] += const1 * mat[r1][i];
3494 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3495 and store the result in VEC2. */
3497 static void
3498 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3499 int size, lambda_int const1)
3501 int i;
3503 if (const1 == 0)
3504 lambda_vector_clear (vec2, size);
3505 else
3506 for (i = 0; i < size; i++)
3507 vec2[i] = const1 * vec1[i];
3510 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3512 static void
3513 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3514 int size)
3516 lambda_vector_mult_const (vec1, vec2, size, -1);
3519 /* Negate row R1 of matrix MAT which has N columns. */
3521 static void
3522 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3524 lambda_vector_negate (mat[r1], mat[r1], n);
3527 /* Return true if two vectors are equal. */
3529 static bool
3530 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3532 int i;
3533 for (i = 0; i < size; i++)
3534 if (vec1[i] != vec2[i])
3535 return false;
3536 return true;
3539 /* Given an M x N integer matrix A, this function determines an M x
3540 M unimodular matrix U, and an M x N echelon matrix S such that
3541 "U.A = S". This decomposition is also known as "right Hermite".
3543 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3544 Restructuring Compilers" Utpal Banerjee. */
3546 static void
3547 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3548 lambda_matrix S, lambda_matrix U)
3550 int i, j, i0 = 0;
3552 lambda_matrix_copy (A, S, m, n);
3553 lambda_matrix_id (U, m);
3555 for (j = 0; j < n; j++)
3557 if (lambda_vector_first_nz (S[j], m, i0) < m)
3559 ++i0;
3560 for (i = m - 1; i >= i0; i--)
3562 while (S[i][j] != 0)
3564 lambda_int sigma, factor, a, b;
3566 a = S[i-1][j];
3567 b = S[i][j];
3568 sigma = (a * b < 0) ? -1: 1;
3569 a = abs_hwi (a);
3570 b = abs_hwi (b);
3571 factor = sigma * (a / b);
3573 lambda_matrix_row_add (S, n, i, i-1, -factor);
3574 std::swap (S[i], S[i-1]);
3576 lambda_matrix_row_add (U, m, i, i-1, -factor);
3577 std::swap (U[i], U[i-1]);
3584 /* Determines the overlapping elements due to accesses CHREC_A and
3585 CHREC_B, that are affine functions. This function cannot handle
3586 symbolic evolution functions, ie. when initial conditions are
3587 parameters, because it uses lambda matrices of integers. */
3589 static void
3590 analyze_subscript_affine_affine (tree chrec_a,
3591 tree chrec_b,
3592 conflict_function **overlaps_a,
3593 conflict_function **overlaps_b,
3594 tree *last_conflicts)
3596 unsigned nb_vars_a, nb_vars_b, dim;
3597 HOST_WIDE_INT gamma, gcd_alpha_beta;
3598 lambda_matrix A, U, S;
3599 struct obstack scratch_obstack;
3601 if (eq_evolutions_p (chrec_a, chrec_b))
3603 /* The accessed index overlaps for each iteration in the
3604 loop. */
3605 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3606 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3607 *last_conflicts = chrec_dont_know;
3608 return;
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3611 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3613 /* For determining the initial intersection, we have to solve a
3614 Diophantine equation. This is the most time consuming part.
3616 For answering to the question: "Is there a dependence?" we have
3617 to prove that there exists a solution to the Diophantine
3618 equation, and that the solution is in the iteration domain,
3619 i.e. the solution is positive or zero, and that the solution
3620 happens before the upper bound loop.nb_iterations. Otherwise
3621 there is no dependence. This function outputs a description of
3622 the iterations that hold the intersections. */
3624 nb_vars_a = nb_vars_in_chrec (chrec_a);
3625 nb_vars_b = nb_vars_in_chrec (chrec_b);
3627 gcc_obstack_init (&scratch_obstack);
3629 dim = nb_vars_a + nb_vars_b;
3630 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3631 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3632 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3634 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3635 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3636 if (init_a == chrec_dont_know
3637 || init_b == chrec_dont_know)
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3640 fprintf (dump_file, "affine-affine test failed: "
3641 "representation issue.\n");
3642 *overlaps_a = conflict_fn_not_known ();
3643 *overlaps_b = conflict_fn_not_known ();
3644 *last_conflicts = chrec_dont_know;
3645 goto end_analyze_subs_aa;
3647 gamma = int_cst_value (init_b) - int_cst_value (init_a);
3649 /* Don't do all the hard work of solving the Diophantine equation
3650 when we already know the solution: for example,
3651 | {3, +, 1}_1
3652 | {3, +, 4}_2
3653 | gamma = 3 - 3 = 0.
3654 Then the first overlap occurs during the first iterations:
3655 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3657 if (gamma == 0)
3659 if (nb_vars_a == 1 && nb_vars_b == 1)
3661 HOST_WIDE_INT step_a, step_b;
3662 HOST_WIDE_INT niter, niter_a, niter_b;
3663 affine_fn ova, ovb;
3665 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3666 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3667 niter = MIN (niter_a, niter_b);
3668 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3669 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3671 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3672 &ova, &ovb,
3673 last_conflicts, 1);
3674 *overlaps_a = conflict_fn (1, ova);
3675 *overlaps_b = conflict_fn (1, ovb);
3678 else if (nb_vars_a == 2 && nb_vars_b == 1)
3679 compute_overlap_steps_for_affine_1_2
3680 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3682 else if (nb_vars_a == 1 && nb_vars_b == 2)
3683 compute_overlap_steps_for_affine_1_2
3684 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3686 else
3688 if (dump_file && (dump_flags & TDF_DETAILS))
3689 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3690 *overlaps_a = conflict_fn_not_known ();
3691 *overlaps_b = conflict_fn_not_known ();
3692 *last_conflicts = chrec_dont_know;
3694 goto end_analyze_subs_aa;
3697 /* U.A = S */
3698 lambda_matrix_right_hermite (A, dim, 1, S, U);
3700 if (S[0][0] < 0)
3702 S[0][0] *= -1;
3703 lambda_matrix_row_negate (U, dim, 0);
3705 gcd_alpha_beta = S[0][0];
3707 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3708 but that is a quite strange case. Instead of ICEing, answer
3709 don't know. */
3710 if (gcd_alpha_beta == 0)
3712 *overlaps_a = conflict_fn_not_known ();
3713 *overlaps_b = conflict_fn_not_known ();
3714 *last_conflicts = chrec_dont_know;
3715 goto end_analyze_subs_aa;
3718 /* The classic "gcd-test". */
3719 if (!int_divides_p (gcd_alpha_beta, gamma))
3721 /* The "gcd-test" has determined that there is no integer
3722 solution, i.e. there is no dependence. */
3723 *overlaps_a = conflict_fn_no_dependence ();
3724 *overlaps_b = conflict_fn_no_dependence ();
3725 *last_conflicts = integer_zero_node;
3728 /* Both access functions are univariate. This includes SIV and MIV cases. */
3729 else if (nb_vars_a == 1 && nb_vars_b == 1)
3731 /* Both functions should have the same evolution sign. */
3732 if (((A[0][0] > 0 && -A[1][0] > 0)
3733 || (A[0][0] < 0 && -A[1][0] < 0)))
3735 /* The solutions are given by:
3737 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3738 | [u21 u22] [y0]
3740 For a given integer t. Using the following variables,
3742 | i0 = u11 * gamma / gcd_alpha_beta
3743 | j0 = u12 * gamma / gcd_alpha_beta
3744 | i1 = u21
3745 | j1 = u22
3747 the solutions are:
3749 | x0 = i0 + i1 * t,
3750 | y0 = j0 + j1 * t. */
3751 HOST_WIDE_INT i0, j0, i1, j1;
3753 i0 = U[0][0] * gamma / gcd_alpha_beta;
3754 j0 = U[0][1] * gamma / gcd_alpha_beta;
3755 i1 = U[1][0];
3756 j1 = U[1][1];
3758 if ((i1 == 0 && i0 < 0)
3759 || (j1 == 0 && j0 < 0))
3761 /* There is no solution.
3762 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3763 falls in here, but for the moment we don't look at the
3764 upper bound of the iteration domain. */
3765 *overlaps_a = conflict_fn_no_dependence ();
3766 *overlaps_b = conflict_fn_no_dependence ();
3767 *last_conflicts = integer_zero_node;
3768 goto end_analyze_subs_aa;
3771 if (i1 > 0 && j1 > 0)
3773 HOST_WIDE_INT niter_a
3774 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3775 HOST_WIDE_INT niter_b
3776 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3777 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3779 /* (X0, Y0) is a solution of the Diophantine equation:
3780 "chrec_a (X0) = chrec_b (Y0)". */
3781 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3782 CEIL (-j0, j1));
3783 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3784 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3786 /* (X1, Y1) is the smallest positive solution of the eq
3787 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3788 first conflict occurs. */
3789 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3790 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3791 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3793 if (niter > 0)
3795 /* If the overlap occurs outside of the bounds of the
3796 loop, there is no dependence. */
3797 if (x1 >= niter_a || y1 >= niter_b)
3799 *overlaps_a = conflict_fn_no_dependence ();
3800 *overlaps_b = conflict_fn_no_dependence ();
3801 *last_conflicts = integer_zero_node;
3802 goto end_analyze_subs_aa;
3805 /* max stmt executions can get quite large, avoid
3806 overflows by using wide ints here. */
3807 widest_int tau2
3808 = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3809 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3810 widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3811 if (wi::min_precision (last_conflict, SIGNED)
3812 <= TYPE_PRECISION (integer_type_node))
3813 *last_conflicts
3814 = build_int_cst (integer_type_node,
3815 last_conflict.to_shwi ());
3816 else
3817 *last_conflicts = chrec_dont_know;
3819 else
3820 *last_conflicts = chrec_dont_know;
3822 *overlaps_a
3823 = conflict_fn (1,
3824 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3826 build_int_cst (NULL_TREE, i1)));
3827 *overlaps_b
3828 = conflict_fn (1,
3829 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3831 build_int_cst (NULL_TREE, j1)));
3833 else
3835 /* FIXME: For the moment, the upper bound of the
3836 iteration domain for i and j is not checked. */
3837 if (dump_file && (dump_flags & TDF_DETAILS))
3838 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3839 *overlaps_a = conflict_fn_not_known ();
3840 *overlaps_b = conflict_fn_not_known ();
3841 *last_conflicts = chrec_dont_know;
3844 else
3846 if (dump_file && (dump_flags & TDF_DETAILS))
3847 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3848 *overlaps_a = conflict_fn_not_known ();
3849 *overlaps_b = conflict_fn_not_known ();
3850 *last_conflicts = chrec_dont_know;
3853 else
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3857 *overlaps_a = conflict_fn_not_known ();
3858 *overlaps_b = conflict_fn_not_known ();
3859 *last_conflicts = chrec_dont_know;
3862 end_analyze_subs_aa:
3863 obstack_free (&scratch_obstack, NULL);
3864 if (dump_file && (dump_flags & TDF_DETAILS))
3866 fprintf (dump_file, " (overlaps_a = ");
3867 dump_conflict_function (dump_file, *overlaps_a);
3868 fprintf (dump_file, ")\n (overlaps_b = ");
3869 dump_conflict_function (dump_file, *overlaps_b);
3870 fprintf (dump_file, "))\n");
3874 /* Returns true when analyze_subscript_affine_affine can be used for
3875 determining the dependence relation between chrec_a and chrec_b,
3876 that contain symbols. This function modifies chrec_a and chrec_b
3877 such that the analysis result is the same, and such that they don't
3878 contain symbols, and then can safely be passed to the analyzer.
3880 Example: The analysis of the following tuples of evolutions produce
3881 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3882 vs. {0, +, 1}_1
3884 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3885 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3888 static bool
3889 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3891 tree diff, type, left_a, left_b, right_b;
3893 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3894 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3895 /* FIXME: For the moment not handled. Might be refined later. */
3896 return false;
3898 type = chrec_type (*chrec_a);
3899 left_a = CHREC_LEFT (*chrec_a);
3900 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3901 diff = chrec_fold_minus (type, left_a, left_b);
3903 if (!evolution_function_is_constant_p (diff))
3904 return false;
3906 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3909 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3910 diff, CHREC_RIGHT (*chrec_a));
3911 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3912 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3913 build_int_cst (type, 0),
3914 right_b);
3915 return true;
3918 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3919 *OVERLAPS_B are initialized to the functions that describe the
3920 relation between the elements accessed twice by CHREC_A and
3921 CHREC_B. For k >= 0, the following property is verified:
3923 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3925 static void
3926 analyze_siv_subscript (tree chrec_a,
3927 tree chrec_b,
3928 conflict_function **overlaps_a,
3929 conflict_function **overlaps_b,
3930 tree *last_conflicts,
3931 int loop_nest_num)
3933 dependence_stats.num_siv++;
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3936 fprintf (dump_file, "(analyze_siv_subscript \n");
3938 if (evolution_function_is_constant_p (chrec_a)
3939 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3940 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3941 overlaps_a, overlaps_b, last_conflicts);
3943 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3944 && evolution_function_is_constant_p (chrec_b))
3945 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3946 overlaps_b, overlaps_a, last_conflicts);
3948 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3949 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3951 if (!chrec_contains_symbols (chrec_a)
3952 && !chrec_contains_symbols (chrec_b))
3954 analyze_subscript_affine_affine (chrec_a, chrec_b,
3955 overlaps_a, overlaps_b,
3956 last_conflicts);
3958 if (CF_NOT_KNOWN_P (*overlaps_a)
3959 || CF_NOT_KNOWN_P (*overlaps_b))
3960 dependence_stats.num_siv_unimplemented++;
3961 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3962 || CF_NO_DEPENDENCE_P (*overlaps_b))
3963 dependence_stats.num_siv_independent++;
3964 else
3965 dependence_stats.num_siv_dependent++;
3967 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3968 &chrec_b))
3970 analyze_subscript_affine_affine (chrec_a, chrec_b,
3971 overlaps_a, overlaps_b,
3972 last_conflicts);
3974 if (CF_NOT_KNOWN_P (*overlaps_a)
3975 || CF_NOT_KNOWN_P (*overlaps_b))
3976 dependence_stats.num_siv_unimplemented++;
3977 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3978 || CF_NO_DEPENDENCE_P (*overlaps_b))
3979 dependence_stats.num_siv_independent++;
3980 else
3981 dependence_stats.num_siv_dependent++;
3983 else
3984 goto siv_subscript_dontknow;
3987 else
3989 siv_subscript_dontknow:;
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3991 fprintf (dump_file, " siv test failed: unimplemented");
3992 *overlaps_a = conflict_fn_not_known ();
3993 *overlaps_b = conflict_fn_not_known ();
3994 *last_conflicts = chrec_dont_know;
3995 dependence_stats.num_siv_unimplemented++;
3998 if (dump_file && (dump_flags & TDF_DETAILS))
3999 fprintf (dump_file, ")\n");
4002 /* Returns false if we can prove that the greatest common divisor of the steps
4003 of CHREC does not divide CST, false otherwise. */
4005 static bool
4006 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4008 HOST_WIDE_INT cd = 0, val;
4009 tree step;
4011 if (!tree_fits_shwi_p (cst))
4012 return true;
4013 val = tree_to_shwi (cst);
4015 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4017 step = CHREC_RIGHT (chrec);
4018 if (!tree_fits_shwi_p (step))
4019 return true;
4020 cd = gcd (cd, tree_to_shwi (step));
4021 chrec = CHREC_LEFT (chrec);
4024 return val % cd == 0;
4027 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4028 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4029 functions that describe the relation between the elements accessed
4030 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4031 is verified:
4033 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4035 static void
4036 analyze_miv_subscript (tree chrec_a,
4037 tree chrec_b,
4038 conflict_function **overlaps_a,
4039 conflict_function **overlaps_b,
4040 tree *last_conflicts,
4041 class loop *loop_nest)
4043 tree type, difference;
4045 dependence_stats.num_miv++;
4046 if (dump_file && (dump_flags & TDF_DETAILS))
4047 fprintf (dump_file, "(analyze_miv_subscript \n");
4049 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4050 chrec_a = chrec_convert (type, chrec_a, NULL);
4051 chrec_b = chrec_convert (type, chrec_b, NULL);
4052 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4054 if (eq_evolutions_p (chrec_a, chrec_b))
4056 /* Access functions are the same: all the elements are accessed
4057 in the same order. */
4058 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4059 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4060 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4061 dependence_stats.num_miv_dependent++;
4064 else if (evolution_function_is_constant_p (difference)
4065 && evolution_function_is_affine_multivariate_p (chrec_a,
4066 loop_nest->num)
4067 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4069 /* testsuite/.../ssa-chrec-33.c
4070 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4072 The difference is 1, and all the evolution steps are multiples
4073 of 2, consequently there are no overlapping elements. */
4074 *overlaps_a = conflict_fn_no_dependence ();
4075 *overlaps_b = conflict_fn_no_dependence ();
4076 *last_conflicts = integer_zero_node;
4077 dependence_stats.num_miv_independent++;
4080 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4081 && !chrec_contains_symbols (chrec_a, loop_nest)
4082 && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4083 && !chrec_contains_symbols (chrec_b, loop_nest))
4085 /* testsuite/.../ssa-chrec-35.c
4086 {0, +, 1}_2 vs. {0, +, 1}_3
4087 the overlapping elements are respectively located at iterations:
4088 {0, +, 1}_x and {0, +, 1}_x,
4089 in other words, we have the equality:
4090 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4092 Other examples:
4093 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4094 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4096 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4097 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4099 analyze_subscript_affine_affine (chrec_a, chrec_b,
4100 overlaps_a, overlaps_b, last_conflicts);
4102 if (CF_NOT_KNOWN_P (*overlaps_a)
4103 || CF_NOT_KNOWN_P (*overlaps_b))
4104 dependence_stats.num_miv_unimplemented++;
4105 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4106 || CF_NO_DEPENDENCE_P (*overlaps_b))
4107 dependence_stats.num_miv_independent++;
4108 else
4109 dependence_stats.num_miv_dependent++;
4112 else
4114 /* When the analysis is too difficult, answer "don't know". */
4115 if (dump_file && (dump_flags & TDF_DETAILS))
4116 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4118 *overlaps_a = conflict_fn_not_known ();
4119 *overlaps_b = conflict_fn_not_known ();
4120 *last_conflicts = chrec_dont_know;
4121 dependence_stats.num_miv_unimplemented++;
4124 if (dump_file && (dump_flags & TDF_DETAILS))
4125 fprintf (dump_file, ")\n");
4128 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4129 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4130 OVERLAP_ITERATIONS_B are initialized with two functions that
4131 describe the iterations that contain conflicting elements.
4133 Remark: For an integer k >= 0, the following equality is true:
4135 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4138 static void
4139 analyze_overlapping_iterations (tree chrec_a,
4140 tree chrec_b,
4141 conflict_function **overlap_iterations_a,
4142 conflict_function **overlap_iterations_b,
4143 tree *last_conflicts, class loop *loop_nest)
4145 unsigned int lnn = loop_nest->num;
4147 dependence_stats.num_subscript_tests++;
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4151 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4152 fprintf (dump_file, " (chrec_a = ");
4153 print_generic_expr (dump_file, chrec_a);
4154 fprintf (dump_file, ")\n (chrec_b = ");
4155 print_generic_expr (dump_file, chrec_b);
4156 fprintf (dump_file, ")\n");
4159 if (chrec_a == NULL_TREE
4160 || chrec_b == NULL_TREE
4161 || chrec_contains_undetermined (chrec_a)
4162 || chrec_contains_undetermined (chrec_b))
4164 dependence_stats.num_subscript_undetermined++;
4166 *overlap_iterations_a = conflict_fn_not_known ();
4167 *overlap_iterations_b = conflict_fn_not_known ();
4170 /* If they are the same chrec, and are affine, they overlap
4171 on every iteration. */
4172 else if (eq_evolutions_p (chrec_a, chrec_b)
4173 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4174 || operand_equal_p (chrec_a, chrec_b, 0)))
4176 dependence_stats.num_same_subscript_function++;
4177 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4178 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4179 *last_conflicts = chrec_dont_know;
4182 /* If they aren't the same, and aren't affine, we can't do anything
4183 yet. */
4184 else if ((chrec_contains_symbols (chrec_a)
4185 || chrec_contains_symbols (chrec_b))
4186 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4187 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4189 dependence_stats.num_subscript_undetermined++;
4190 *overlap_iterations_a = conflict_fn_not_known ();
4191 *overlap_iterations_b = conflict_fn_not_known ();
4194 else if (ziv_subscript_p (chrec_a, chrec_b))
4195 analyze_ziv_subscript (chrec_a, chrec_b,
4196 overlap_iterations_a, overlap_iterations_b,
4197 last_conflicts);
4199 else if (siv_subscript_p (chrec_a, chrec_b))
4200 analyze_siv_subscript (chrec_a, chrec_b,
4201 overlap_iterations_a, overlap_iterations_b,
4202 last_conflicts, lnn);
4204 else
4205 analyze_miv_subscript (chrec_a, chrec_b,
4206 overlap_iterations_a, overlap_iterations_b,
4207 last_conflicts, loop_nest);
4209 if (dump_file && (dump_flags & TDF_DETAILS))
4211 fprintf (dump_file, " (overlap_iterations_a = ");
4212 dump_conflict_function (dump_file, *overlap_iterations_a);
4213 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4214 dump_conflict_function (dump_file, *overlap_iterations_b);
4215 fprintf (dump_file, "))\n");
4219 /* Helper function for uniquely inserting distance vectors. */
4221 static void
4222 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4224 unsigned i;
4225 lambda_vector v;
4227 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4228 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4229 return;
4231 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4234 /* Helper function for uniquely inserting direction vectors. */
4236 static void
4237 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4239 unsigned i;
4240 lambda_vector v;
4242 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4243 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4244 return;
4246 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4249 /* Add a distance of 1 on all the loops outer than INDEX. If we
4250 haven't yet determined a distance for this outer loop, push a new
4251 distance vector composed of the previous distance, and a distance
4252 of 1 for this outer loop. Example:
4254 | loop_1
4255 | loop_2
4256 | A[10]
4257 | endloop_2
4258 | endloop_1
4260 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4261 save (0, 1), then we have to save (1, 0). */
4263 static void
4264 add_outer_distances (struct data_dependence_relation *ddr,
4265 lambda_vector dist_v, int index)
4267 /* For each outer loop where init_v is not set, the accesses are
4268 in dependence of distance 1 in the loop. */
4269 while (--index >= 0)
4271 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4272 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4273 save_v[index] = 1;
4274 save_dist_v (ddr, save_v);
4278 /* Return false when fail to represent the data dependence as a
4279 distance vector. A_INDEX is the index of the first reference
4280 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4281 second reference. INIT_B is set to true when a component has been
4282 added to the distance vector DIST_V. INDEX_CARRY is then set to
4283 the index in DIST_V that carries the dependence. */
4285 static bool
4286 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4287 unsigned int a_index, unsigned int b_index,
4288 lambda_vector dist_v, bool *init_b,
4289 int *index_carry)
4291 unsigned i;
4292 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4293 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4295 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4297 tree access_fn_a, access_fn_b;
4298 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4300 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4302 non_affine_dependence_relation (ddr);
4303 return false;
4306 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4307 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4309 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4310 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4312 HOST_WIDE_INT dist;
4313 int index;
4314 int var_a = CHREC_VARIABLE (access_fn_a);
4315 int var_b = CHREC_VARIABLE (access_fn_b);
4317 if (var_a != var_b
4318 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4320 non_affine_dependence_relation (ddr);
4321 return false;
4324 /* When data references are collected in a loop while data
4325 dependences are analyzed in loop nest nested in the loop, we
4326 would have more number of access functions than number of
4327 loops. Skip access functions of loops not in the loop nest.
4329 See PR89725 for more information. */
4330 if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4331 continue;
4333 dist = int_cst_value (SUB_DISTANCE (subscript));
4334 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4335 *index_carry = MIN (index, *index_carry);
4337 /* This is the subscript coupling test. If we have already
4338 recorded a distance for this loop (a distance coming from
4339 another subscript), it should be the same. For example,
4340 in the following code, there is no dependence:
4342 | loop i = 0, N, 1
4343 | T[i+1][i] = ...
4344 | ... = T[i][i]
4345 | endloop
4347 if (init_v[index] != 0 && dist_v[index] != dist)
4349 finalize_ddr_dependent (ddr, chrec_known);
4350 return false;
4353 dist_v[index] = dist;
4354 init_v[index] = 1;
4355 *init_b = true;
4357 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4359 /* This can be for example an affine vs. constant dependence
4360 (T[i] vs. T[3]) that is not an affine dependence and is
4361 not representable as a distance vector. */
4362 non_affine_dependence_relation (ddr);
4363 return false;
4367 return true;
4370 /* Return true when the DDR contains only constant access functions. */
4372 static bool
4373 constant_access_functions (const struct data_dependence_relation *ddr)
4375 unsigned i;
4376 subscript *sub;
4378 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4379 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4380 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4381 return false;
4383 return true;
4386 /* Helper function for the case where DDR_A and DDR_B are the same
4387 multivariate access function with a constant step. For an example
4388 see pr34635-1.c. */
4390 static void
4391 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4393 int x_1, x_2;
4394 tree c_1 = CHREC_LEFT (c_2);
4395 tree c_0 = CHREC_LEFT (c_1);
4396 lambda_vector dist_v;
4397 HOST_WIDE_INT v1, v2, cd;
4399 /* Polynomials with more than 2 variables are not handled yet. When
4400 the evolution steps are parameters, it is not possible to
4401 represent the dependence using classical distance vectors. */
4402 if (TREE_CODE (c_0) != INTEGER_CST
4403 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4404 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4406 DDR_AFFINE_P (ddr) = false;
4407 return;
4410 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4411 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4413 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4414 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4415 v1 = int_cst_value (CHREC_RIGHT (c_1));
4416 v2 = int_cst_value (CHREC_RIGHT (c_2));
4417 cd = gcd (v1, v2);
4418 v1 /= cd;
4419 v2 /= cd;
4421 if (v2 < 0)
4423 v2 = -v2;
4424 v1 = -v1;
4427 dist_v[x_1] = v2;
4428 dist_v[x_2] = -v1;
4429 save_dist_v (ddr, dist_v);
4431 add_outer_distances (ddr, dist_v, x_1);
4434 /* Helper function for the case where DDR_A and DDR_B are the same
4435 access functions. */
4437 static void
4438 add_other_self_distances (struct data_dependence_relation *ddr)
4440 lambda_vector dist_v;
4441 unsigned i;
4442 int index_carry = DDR_NB_LOOPS (ddr);
4443 subscript *sub;
4444 class loop *loop = DDR_LOOP_NEST (ddr)[0];
4446 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4448 tree access_fun = SUB_ACCESS_FN (sub, 0);
4450 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4452 if (!evolution_function_is_univariate_p (access_fun, loop->num))
4454 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4456 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4457 return;
4460 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4462 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4463 add_multivariate_self_dist (ddr, access_fun);
4464 else
4465 /* The evolution step is not constant: it varies in
4466 the outer loop, so this cannot be represented by a
4467 distance vector. For example in pr34635.c the
4468 evolution is {0, +, {0, +, 4}_1}_2. */
4469 DDR_AFFINE_P (ddr) = false;
4471 return;
4474 /* When data references are collected in a loop while data
4475 dependences are analyzed in loop nest nested in the loop, we
4476 would have more number of access functions than number of
4477 loops. Skip access functions of loops not in the loop nest.
4479 See PR89725 for more information. */
4480 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4481 loop))
4482 continue;
4484 index_carry = MIN (index_carry,
4485 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4486 DDR_LOOP_NEST (ddr)));
4490 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4491 add_outer_distances (ddr, dist_v, index_carry);
4494 static void
4495 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4497 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4499 dist_v[0] = 1;
4500 save_dist_v (ddr, dist_v);
4503 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4504 is the case for example when access functions are the same and
4505 equal to a constant, as in:
4507 | loop_1
4508 | A[3] = ...
4509 | ... = A[3]
4510 | endloop_1
4512 in which case the distance vectors are (0) and (1). */
4514 static void
4515 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4517 unsigned i, j;
4519 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4521 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4522 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4523 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4525 for (j = 0; j < ca->n; j++)
4526 if (affine_function_zero_p (ca->fns[j]))
4528 insert_innermost_unit_dist_vector (ddr);
4529 return;
4532 for (j = 0; j < cb->n; j++)
4533 if (affine_function_zero_p (cb->fns[j]))
4535 insert_innermost_unit_dist_vector (ddr);
4536 return;
4541 /* Return true when the DDR contains two data references that have the
4542 same access functions. */
4544 static inline bool
4545 same_access_functions (const struct data_dependence_relation *ddr)
4547 unsigned i;
4548 subscript *sub;
4550 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4551 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4552 SUB_ACCESS_FN (sub, 1)))
4553 return false;
4555 return true;
4558 /* Compute the classic per loop distance vector. DDR is the data
4559 dependence relation to build a vector from. Return false when fail
4560 to represent the data dependence as a distance vector. */
4562 static bool
4563 build_classic_dist_vector (struct data_dependence_relation *ddr,
4564 class loop *loop_nest)
4566 bool init_b = false;
4567 int index_carry = DDR_NB_LOOPS (ddr);
4568 lambda_vector dist_v;
4570 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4571 return false;
4573 if (same_access_functions (ddr))
4575 /* Save the 0 vector. */
4576 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4577 save_dist_v (ddr, dist_v);
4579 if (constant_access_functions (ddr))
4580 add_distance_for_zero_overlaps (ddr);
4582 if (DDR_NB_LOOPS (ddr) > 1)
4583 add_other_self_distances (ddr);
4585 return true;
4588 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4589 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4590 return false;
4592 /* Save the distance vector if we initialized one. */
4593 if (init_b)
4595 /* Verify a basic constraint: classic distance vectors should
4596 always be lexicographically positive.
4598 Data references are collected in the order of execution of
4599 the program, thus for the following loop
4601 | for (i = 1; i < 100; i++)
4602 | for (j = 1; j < 100; j++)
4604 | t = T[j+1][i-1]; // A
4605 | T[j][i] = t + 2; // B
4608 references are collected following the direction of the wind:
4609 A then B. The data dependence tests are performed also
4610 following this order, such that we're looking at the distance
4611 separating the elements accessed by A from the elements later
4612 accessed by B. But in this example, the distance returned by
4613 test_dep (A, B) is lexicographically negative (-1, 1), that
4614 means that the access A occurs later than B with respect to
4615 the outer loop, ie. we're actually looking upwind. In this
4616 case we solve test_dep (B, A) looking downwind to the
4617 lexicographically positive solution, that returns the
4618 distance vector (1, -1). */
4619 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4621 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4622 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4623 return false;
4624 compute_subscript_distance (ddr);
4625 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4626 &index_carry))
4627 return false;
4628 save_dist_v (ddr, save_v);
4629 DDR_REVERSED_P (ddr) = true;
4631 /* In this case there is a dependence forward for all the
4632 outer loops:
4634 | for (k = 1; k < 100; k++)
4635 | for (i = 1; i < 100; i++)
4636 | for (j = 1; j < 100; j++)
4638 | t = T[j+1][i-1]; // A
4639 | T[j][i] = t + 2; // B
4642 the vectors are:
4643 (0, 1, -1)
4644 (1, 1, -1)
4645 (1, -1, 1)
4647 if (DDR_NB_LOOPS (ddr) > 1)
4649 add_outer_distances (ddr, save_v, index_carry);
4650 add_outer_distances (ddr, dist_v, index_carry);
4653 else
4655 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4656 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4658 if (DDR_NB_LOOPS (ddr) > 1)
4660 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4662 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4663 return false;
4664 compute_subscript_distance (ddr);
4665 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4666 &index_carry))
4667 return false;
4669 save_dist_v (ddr, save_v);
4670 add_outer_distances (ddr, dist_v, index_carry);
4671 add_outer_distances (ddr, opposite_v, index_carry);
4673 else
4674 save_dist_v (ddr, save_v);
4677 else
4679 /* There is a distance of 1 on all the outer loops: Example:
4680 there is a dependence of distance 1 on loop_1 for the array A.
4682 | loop_1
4683 | A[5] = ...
4684 | endloop
4686 add_outer_distances (ddr, dist_v,
4687 lambda_vector_first_nz (dist_v,
4688 DDR_NB_LOOPS (ddr), 0));
4691 if (dump_file && (dump_flags & TDF_DETAILS))
4693 unsigned i;
4695 fprintf (dump_file, "(build_classic_dist_vector\n");
4696 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4698 fprintf (dump_file, " dist_vector = (");
4699 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4700 DDR_NB_LOOPS (ddr));
4701 fprintf (dump_file, " )\n");
4703 fprintf (dump_file, ")\n");
4706 return true;
4709 /* Return the direction for a given distance.
4710 FIXME: Computing dir this way is suboptimal, since dir can catch
4711 cases that dist is unable to represent. */
4713 static inline enum data_dependence_direction
4714 dir_from_dist (int dist)
4716 if (dist > 0)
4717 return dir_positive;
4718 else if (dist < 0)
4719 return dir_negative;
4720 else
4721 return dir_equal;
4724 /* Compute the classic per loop direction vector. DDR is the data
4725 dependence relation to build a vector from. */
4727 static void
4728 build_classic_dir_vector (struct data_dependence_relation *ddr)
4730 unsigned i, j;
4731 lambda_vector dist_v;
4733 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4735 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4737 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4738 dir_v[j] = dir_from_dist (dist_v[j]);
4740 save_dir_v (ddr, dir_v);
4744 /* Helper function. Returns true when there is a dependence between the
4745 data references. A_INDEX is the index of the first reference (0 for
4746 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4748 static bool
4749 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4750 unsigned int a_index, unsigned int b_index,
4751 class loop *loop_nest)
4753 unsigned int i;
4754 tree last_conflicts;
4755 struct subscript *subscript;
4756 tree res = NULL_TREE;
4758 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4760 conflict_function *overlaps_a, *overlaps_b;
4762 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4763 SUB_ACCESS_FN (subscript, b_index),
4764 &overlaps_a, &overlaps_b,
4765 &last_conflicts, loop_nest);
4767 if (SUB_CONFLICTS_IN_A (subscript))
4768 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4769 if (SUB_CONFLICTS_IN_B (subscript))
4770 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4772 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4773 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4774 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4776 /* If there is any undetermined conflict function we have to
4777 give a conservative answer in case we cannot prove that
4778 no dependence exists when analyzing another subscript. */
4779 if (CF_NOT_KNOWN_P (overlaps_a)
4780 || CF_NOT_KNOWN_P (overlaps_b))
4782 res = chrec_dont_know;
4783 continue;
4786 /* When there is a subscript with no dependence we can stop. */
4787 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4788 || CF_NO_DEPENDENCE_P (overlaps_b))
4790 res = chrec_known;
4791 break;
4795 if (res == NULL_TREE)
4796 return true;
4798 if (res == chrec_known)
4799 dependence_stats.num_dependence_independent++;
4800 else
4801 dependence_stats.num_dependence_undetermined++;
4802 finalize_ddr_dependent (ddr, res);
4803 return false;
4806 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4808 static void
4809 subscript_dependence_tester (struct data_dependence_relation *ddr,
4810 class loop *loop_nest)
4812 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4813 dependence_stats.num_dependence_dependent++;
4815 compute_subscript_distance (ddr);
4816 if (build_classic_dist_vector (ddr, loop_nest))
4817 build_classic_dir_vector (ddr);
4820 /* Returns true when all the access functions of A are affine or
4821 constant with respect to LOOP_NEST. */
4823 static bool
4824 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4825 const class loop *loop_nest)
4827 unsigned int i;
4828 vec<tree> fns = DR_ACCESS_FNS (a);
4829 tree t;
4831 FOR_EACH_VEC_ELT (fns, i, t)
4832 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4833 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4834 return false;
4836 return true;
4839 /* This computes the affine dependence relation between A and B with
4840 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4841 independence between two accesses, while CHREC_DONT_KNOW is used
4842 for representing the unknown relation.
4844 Note that it is possible to stop the computation of the dependence
4845 relation the first time we detect a CHREC_KNOWN element for a given
4846 subscript. */
4848 void
4849 compute_affine_dependence (struct data_dependence_relation *ddr,
4850 class loop *loop_nest)
4852 struct data_reference *dra = DDR_A (ddr);
4853 struct data_reference *drb = DDR_B (ddr);
4855 if (dump_file && (dump_flags & TDF_DETAILS))
4857 fprintf (dump_file, "(compute_affine_dependence\n");
4858 fprintf (dump_file, " stmt_a: ");
4859 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4860 fprintf (dump_file, " stmt_b: ");
4861 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4864 /* Analyze only when the dependence relation is not yet known. */
4865 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4867 dependence_stats.num_dependence_tests++;
4869 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4870 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4871 subscript_dependence_tester (ddr, loop_nest);
4873 /* As a last case, if the dependence cannot be determined, or if
4874 the dependence is considered too difficult to determine, answer
4875 "don't know". */
4876 else
4878 dependence_stats.num_dependence_undetermined++;
4880 if (dump_file && (dump_flags & TDF_DETAILS))
4882 fprintf (dump_file, "Data ref a:\n");
4883 dump_data_reference (dump_file, dra);
4884 fprintf (dump_file, "Data ref b:\n");
4885 dump_data_reference (dump_file, drb);
4886 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4888 finalize_ddr_dependent (ddr, chrec_dont_know);
4892 if (dump_file && (dump_flags & TDF_DETAILS))
4894 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4895 fprintf (dump_file, ") -> no dependence\n");
4896 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4897 fprintf (dump_file, ") -> dependence analysis failed\n");
4898 else
4899 fprintf (dump_file, ")\n");
4903 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4904 the data references in DATAREFS, in the LOOP_NEST. When
4905 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4906 relations. Return true when successful, i.e. data references number
4907 is small enough to be handled. */
4909 bool
4910 compute_all_dependences (vec<data_reference_p> datarefs,
4911 vec<ddr_p> *dependence_relations,
4912 vec<loop_p> loop_nest,
4913 bool compute_self_and_rr)
4915 struct data_dependence_relation *ddr;
4916 struct data_reference *a, *b;
4917 unsigned int i, j;
4919 if ((int) datarefs.length ()
4920 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4922 struct data_dependence_relation *ddr;
4924 /* Insert a single relation into dependence_relations:
4925 chrec_dont_know. */
4926 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4927 dependence_relations->safe_push (ddr);
4928 return false;
4931 FOR_EACH_VEC_ELT (datarefs, i, a)
4932 for (j = i + 1; datarefs.iterate (j, &b); j++)
4933 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4935 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4936 dependence_relations->safe_push (ddr);
4937 if (loop_nest.exists ())
4938 compute_affine_dependence (ddr, loop_nest[0]);
4941 if (compute_self_and_rr)
4942 FOR_EACH_VEC_ELT (datarefs, i, a)
4944 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4945 dependence_relations->safe_push (ddr);
4946 if (loop_nest.exists ())
4947 compute_affine_dependence (ddr, loop_nest[0]);
4950 return true;
4953 /* Describes a location of a memory reference. */
4955 struct data_ref_loc
4957 /* The memory reference. */
4958 tree ref;
4960 /* True if the memory reference is read. */
4961 bool is_read;
4963 /* True if the data reference is conditional within the containing
4964 statement, i.e. if it might not occur even when the statement
4965 is executed and runs to completion. */
4966 bool is_conditional_in_stmt;
4970 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4971 true if STMT clobbers memory, false otherwise. */
4973 static bool
4974 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4976 bool clobbers_memory = false;
4977 data_ref_loc ref;
4978 tree op0, op1;
4979 enum gimple_code stmt_code = gimple_code (stmt);
4981 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4982 As we cannot model data-references to not spelled out
4983 accesses give up if they may occur. */
4984 if (stmt_code == GIMPLE_CALL
4985 && !(gimple_call_flags (stmt) & ECF_CONST))
4987 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4988 if (gimple_call_internal_p (stmt))
4989 switch (gimple_call_internal_fn (stmt))
4991 case IFN_GOMP_SIMD_LANE:
4993 class loop *loop = gimple_bb (stmt)->loop_father;
4994 tree uid = gimple_call_arg (stmt, 0);
4995 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4996 if (loop == NULL
4997 || loop->simduid != SSA_NAME_VAR (uid))
4998 clobbers_memory = true;
4999 break;
5001 case IFN_MASK_LOAD:
5002 case IFN_MASK_STORE:
5003 break;
5004 default:
5005 clobbers_memory = true;
5006 break;
5008 else
5009 clobbers_memory = true;
5011 else if (stmt_code == GIMPLE_ASM
5012 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5013 || gimple_vuse (stmt)))
5014 clobbers_memory = true;
5016 if (!gimple_vuse (stmt))
5017 return clobbers_memory;
5019 if (stmt_code == GIMPLE_ASSIGN)
5021 tree base;
5022 op0 = gimple_assign_lhs (stmt);
5023 op1 = gimple_assign_rhs1 (stmt);
5025 if (DECL_P (op1)
5026 || (REFERENCE_CLASS_P (op1)
5027 && (base = get_base_address (op1))
5028 && TREE_CODE (base) != SSA_NAME
5029 && !is_gimple_min_invariant (base)))
5031 ref.ref = op1;
5032 ref.is_read = true;
5033 ref.is_conditional_in_stmt = false;
5034 references->safe_push (ref);
5037 else if (stmt_code == GIMPLE_CALL)
5039 unsigned i, n;
5040 tree ptr, type;
5041 unsigned int align;
5043 ref.is_read = false;
5044 if (gimple_call_internal_p (stmt))
5045 switch (gimple_call_internal_fn (stmt))
5047 case IFN_MASK_LOAD:
5048 if (gimple_call_lhs (stmt) == NULL_TREE)
5049 break;
5050 ref.is_read = true;
5051 /* FALLTHRU */
5052 case IFN_MASK_STORE:
5053 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5054 align = tree_to_shwi (gimple_call_arg (stmt, 1));
5055 if (ref.is_read)
5056 type = TREE_TYPE (gimple_call_lhs (stmt));
5057 else
5058 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5059 if (TYPE_ALIGN (type) != align)
5060 type = build_aligned_type (type, align);
5061 ref.is_conditional_in_stmt = true;
5062 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5063 ptr);
5064 references->safe_push (ref);
5065 return false;
5066 default:
5067 break;
5070 op0 = gimple_call_lhs (stmt);
5071 n = gimple_call_num_args (stmt);
5072 for (i = 0; i < n; i++)
5074 op1 = gimple_call_arg (stmt, i);
5076 if (DECL_P (op1)
5077 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5079 ref.ref = op1;
5080 ref.is_read = true;
5081 ref.is_conditional_in_stmt = false;
5082 references->safe_push (ref);
5086 else
5087 return clobbers_memory;
5089 if (op0
5090 && (DECL_P (op0)
5091 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5093 ref.ref = op0;
5094 ref.is_read = false;
5095 ref.is_conditional_in_stmt = false;
5096 references->safe_push (ref);
5098 return clobbers_memory;
5102 /* Returns true if the loop-nest has any data reference. */
5104 bool
5105 loop_nest_has_data_refs (loop_p loop)
5107 basic_block *bbs = get_loop_body (loop);
5108 auto_vec<data_ref_loc, 3> references;
5110 for (unsigned i = 0; i < loop->num_nodes; i++)
5112 basic_block bb = bbs[i];
5113 gimple_stmt_iterator bsi;
5115 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5117 gimple *stmt = gsi_stmt (bsi);
5118 get_references_in_stmt (stmt, &references);
5119 if (references.length ())
5121 free (bbs);
5122 return true;
5126 free (bbs);
5127 return false;
5130 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5131 reference, returns false, otherwise returns true. NEST is the outermost
5132 loop of the loop nest in which the references should be analyzed. */
5134 opt_result
5135 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5136 vec<data_reference_p> *datarefs)
5138 unsigned i;
5139 auto_vec<data_ref_loc, 2> references;
5140 data_ref_loc *ref;
5141 data_reference_p dr;
5143 if (get_references_in_stmt (stmt, &references))
5144 return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5145 stmt);
5147 FOR_EACH_VEC_ELT (references, i, ref)
5149 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5150 loop_containing_stmt (stmt), ref->ref,
5151 stmt, ref->is_read, ref->is_conditional_in_stmt);
5152 gcc_assert (dr != NULL);
5153 datarefs->safe_push (dr);
5156 return opt_result::success ();
5159 /* Stores the data references in STMT to DATAREFS. If there is an
5160 unanalyzable reference, returns false, otherwise returns true.
5161 NEST is the outermost loop of the loop nest in which the references
5162 should be instantiated, LOOP is the loop in which the references
5163 should be analyzed. */
5165 bool
5166 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5167 vec<data_reference_p> *datarefs)
5169 unsigned i;
5170 auto_vec<data_ref_loc, 2> references;
5171 data_ref_loc *ref;
5172 bool ret = true;
5173 data_reference_p dr;
5175 if (get_references_in_stmt (stmt, &references))
5176 return false;
5178 FOR_EACH_VEC_ELT (references, i, ref)
5180 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5181 ref->is_conditional_in_stmt);
5182 gcc_assert (dr != NULL);
5183 datarefs->safe_push (dr);
5186 return ret;
5189 /* Search the data references in LOOP, and record the information into
5190 DATAREFS. Returns chrec_dont_know when failing to analyze a
5191 difficult case, returns NULL_TREE otherwise. */
5193 tree
5194 find_data_references_in_bb (class loop *loop, basic_block bb,
5195 vec<data_reference_p> *datarefs)
5197 gimple_stmt_iterator bsi;
5199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5201 gimple *stmt = gsi_stmt (bsi);
5203 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5205 struct data_reference *res;
5206 res = XCNEW (struct data_reference);
5207 datarefs->safe_push (res);
5209 return chrec_dont_know;
5213 return NULL_TREE;
5216 /* Search the data references in LOOP, and record the information into
5217 DATAREFS. Returns chrec_dont_know when failing to analyze a
5218 difficult case, returns NULL_TREE otherwise.
5220 TODO: This function should be made smarter so that it can handle address
5221 arithmetic as if they were array accesses, etc. */
5223 tree
5224 find_data_references_in_loop (class loop *loop,
5225 vec<data_reference_p> *datarefs)
5227 basic_block bb, *bbs;
5228 unsigned int i;
5230 bbs = get_loop_body_in_dom_order (loop);
5232 for (i = 0; i < loop->num_nodes; i++)
5234 bb = bbs[i];
5236 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5238 free (bbs);
5239 return chrec_dont_know;
5242 free (bbs);
5244 return NULL_TREE;
5247 /* Return the alignment in bytes that DRB is guaranteed to have at all
5248 times. */
5250 unsigned int
5251 dr_alignment (innermost_loop_behavior *drb)
5253 /* Get the alignment of BASE_ADDRESS + INIT. */
5254 unsigned int alignment = drb->base_alignment;
5255 unsigned int misalignment = (drb->base_misalignment
5256 + TREE_INT_CST_LOW (drb->init));
5257 if (misalignment != 0)
5258 alignment = MIN (alignment, misalignment & -misalignment);
5260 /* Cap it to the alignment of OFFSET. */
5261 if (!integer_zerop (drb->offset))
5262 alignment = MIN (alignment, drb->offset_alignment);
5264 /* Cap it to the alignment of STEP. */
5265 if (!integer_zerop (drb->step))
5266 alignment = MIN (alignment, drb->step_alignment);
5268 return alignment;
5271 /* If BASE is a pointer-typed SSA name, try to find the object that it
5272 is based on. Return this object X on success and store the alignment
5273 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5275 static tree
5276 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5278 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5279 return NULL_TREE;
5281 gimple *def = SSA_NAME_DEF_STMT (base);
5282 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5284 /* Peel chrecs and record the minimum alignment preserved by
5285 all steps. */
5286 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5287 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5289 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5290 alignment = MIN (alignment, step_alignment);
5291 base = CHREC_LEFT (base);
5294 /* Punt if the expression is too complicated to handle. */
5295 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5296 return NULL_TREE;
5298 /* The only useful cases are those for which a dereference folds to something
5299 other than an INDIRECT_REF. */
5300 tree ref_type = TREE_TYPE (TREE_TYPE (base));
5301 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5302 if (!ref)
5303 return NULL_TREE;
5305 /* Analyze the base to which the steps we peeled were applied. */
5306 poly_int64 bitsize, bitpos, bytepos;
5307 machine_mode mode;
5308 int unsignedp, reversep, volatilep;
5309 tree offset;
5310 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5311 &unsignedp, &reversep, &volatilep);
5312 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5313 return NULL_TREE;
5315 /* Restrict the alignment to that guaranteed by the offsets. */
5316 unsigned int bytepos_alignment = known_alignment (bytepos);
5317 if (bytepos_alignment != 0)
5318 alignment = MIN (alignment, bytepos_alignment);
5319 if (offset)
5321 unsigned int offset_alignment = highest_pow2_factor (offset);
5322 alignment = MIN (alignment, offset_alignment);
5325 *alignment_out = alignment;
5326 return base;
5329 /* Return the object whose alignment would need to be changed in order
5330 to increase the alignment of ADDR. Store the maximum achievable
5331 alignment in *MAX_ALIGNMENT. */
5333 tree
5334 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5336 tree base = get_base_for_alignment_1 (addr, max_alignment);
5337 if (base)
5338 return base;
5340 if (TREE_CODE (addr) == ADDR_EXPR)
5341 addr = TREE_OPERAND (addr, 0);
5342 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5343 return addr;
5346 /* Recursive helper function. */
5348 static bool
5349 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5351 /* Inner loops of the nest should not contain siblings. Example:
5352 when there are two consecutive loops,
5354 | loop_0
5355 | loop_1
5356 | A[{0, +, 1}_1]
5357 | endloop_1
5358 | loop_2
5359 | A[{0, +, 1}_2]
5360 | endloop_2
5361 | endloop_0
5363 the dependence relation cannot be captured by the distance
5364 abstraction. */
5365 if (loop->next)
5366 return false;
5368 loop_nest->safe_push (loop);
5369 if (loop->inner)
5370 return find_loop_nest_1 (loop->inner, loop_nest);
5371 return true;
5374 /* Return false when the LOOP is not well nested. Otherwise return
5375 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5376 contain the loops from the outermost to the innermost, as they will
5377 appear in the classic distance vector. */
5379 bool
5380 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5382 loop_nest->safe_push (loop);
5383 if (loop->inner)
5384 return find_loop_nest_1 (loop->inner, loop_nest);
5385 return true;
5388 /* Returns true when the data dependences have been computed, false otherwise.
5389 Given a loop nest LOOP, the following vectors are returned:
5390 DATAREFS is initialized to all the array elements contained in this loop,
5391 DEPENDENCE_RELATIONS contains the relations between the data references.
5392 Compute read-read and self relations if
5393 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5395 bool
5396 compute_data_dependences_for_loop (class loop *loop,
5397 bool compute_self_and_read_read_dependences,
5398 vec<loop_p> *loop_nest,
5399 vec<data_reference_p> *datarefs,
5400 vec<ddr_p> *dependence_relations)
5402 bool res = true;
5404 memset (&dependence_stats, 0, sizeof (dependence_stats));
5406 /* If the loop nest is not well formed, or one of the data references
5407 is not computable, give up without spending time to compute other
5408 dependences. */
5409 if (!loop
5410 || !find_loop_nest (loop, loop_nest)
5411 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5412 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5413 compute_self_and_read_read_dependences))
5414 res = false;
5416 if (dump_file && (dump_flags & TDF_STATS))
5418 fprintf (dump_file, "Dependence tester statistics:\n");
5420 fprintf (dump_file, "Number of dependence tests: %d\n",
5421 dependence_stats.num_dependence_tests);
5422 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5423 dependence_stats.num_dependence_dependent);
5424 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5425 dependence_stats.num_dependence_independent);
5426 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5427 dependence_stats.num_dependence_undetermined);
5429 fprintf (dump_file, "Number of subscript tests: %d\n",
5430 dependence_stats.num_subscript_tests);
5431 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5432 dependence_stats.num_subscript_undetermined);
5433 fprintf (dump_file, "Number of same subscript function: %d\n",
5434 dependence_stats.num_same_subscript_function);
5436 fprintf (dump_file, "Number of ziv tests: %d\n",
5437 dependence_stats.num_ziv);
5438 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5439 dependence_stats.num_ziv_dependent);
5440 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5441 dependence_stats.num_ziv_independent);
5442 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5443 dependence_stats.num_ziv_unimplemented);
5445 fprintf (dump_file, "Number of siv tests: %d\n",
5446 dependence_stats.num_siv);
5447 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5448 dependence_stats.num_siv_dependent);
5449 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5450 dependence_stats.num_siv_independent);
5451 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5452 dependence_stats.num_siv_unimplemented);
5454 fprintf (dump_file, "Number of miv tests: %d\n",
5455 dependence_stats.num_miv);
5456 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5457 dependence_stats.num_miv_dependent);
5458 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5459 dependence_stats.num_miv_independent);
5460 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5461 dependence_stats.num_miv_unimplemented);
5464 return res;
5467 /* Free the memory used by a data dependence relation DDR. */
5469 void
5470 free_dependence_relation (struct data_dependence_relation *ddr)
5472 if (ddr == NULL)
5473 return;
5475 if (DDR_SUBSCRIPTS (ddr).exists ())
5476 free_subscripts (DDR_SUBSCRIPTS (ddr));
5477 DDR_DIST_VECTS (ddr).release ();
5478 DDR_DIR_VECTS (ddr).release ();
5480 free (ddr);
5483 /* Free the memory used by the data dependence relations from
5484 DEPENDENCE_RELATIONS. */
5486 void
5487 free_dependence_relations (vec<ddr_p> dependence_relations)
5489 unsigned int i;
5490 struct data_dependence_relation *ddr;
5492 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5493 if (ddr)
5494 free_dependence_relation (ddr);
5496 dependence_relations.release ();
5499 /* Free the memory used by the data references from DATAREFS. */
5501 void
5502 free_data_refs (vec<data_reference_p> datarefs)
5504 unsigned int i;
5505 struct data_reference *dr;
5507 FOR_EACH_VEC_ELT (datarefs, i, dr)
5508 free_data_ref (dr);
5509 datarefs.release ();
5512 /* Common routine implementing both dr_direction_indicator and
5513 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5514 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5515 Return the step as the indicator otherwise. */
5517 static tree
5518 dr_step_indicator (struct data_reference *dr, int useful_min)
5520 tree step = DR_STEP (dr);
5521 if (!step)
5522 return NULL_TREE;
5523 STRIP_NOPS (step);
5524 /* Look for cases where the step is scaled by a positive constant
5525 integer, which will often be the access size. If the multiplication
5526 doesn't change the sign (due to overflow effects) then we can
5527 test the unscaled value instead. */
5528 if (TREE_CODE (step) == MULT_EXPR
5529 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5530 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5532 tree factor = TREE_OPERAND (step, 1);
5533 step = TREE_OPERAND (step, 0);
5535 /* Strip widening and truncating conversions as well as nops. */
5536 if (CONVERT_EXPR_P (step)
5537 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5538 step = TREE_OPERAND (step, 0);
5539 tree type = TREE_TYPE (step);
5541 /* Get the range of step values that would not cause overflow. */
5542 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5543 / wi::to_widest (factor));
5544 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5545 / wi::to_widest (factor));
5547 /* Get the range of values that the unconverted step actually has. */
5548 wide_int step_min, step_max;
5549 if (TREE_CODE (step) != SSA_NAME
5550 || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5552 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5553 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5556 /* Check whether the unconverted step has an acceptable range. */
5557 signop sgn = TYPE_SIGN (type);
5558 if (wi::les_p (minv, widest_int::from (step_min, sgn))
5559 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5561 if (wi::ge_p (step_min, useful_min, sgn))
5562 return ssize_int (useful_min);
5563 else if (wi::lt_p (step_max, 0, sgn))
5564 return ssize_int (-1);
5565 else
5566 return fold_convert (ssizetype, step);
5569 return DR_STEP (dr);
5572 /* Return a value that is negative iff DR has a negative step. */
5574 tree
5575 dr_direction_indicator (struct data_reference *dr)
5577 return dr_step_indicator (dr, 0);
5580 /* Return a value that is zero iff DR has a zero step. */
5582 tree
5583 dr_zero_step_indicator (struct data_reference *dr)
5585 return dr_step_indicator (dr, 1);
5588 /* Return true if DR is known to have a nonnegative (but possibly zero)
5589 step. */
5591 bool
5592 dr_known_forward_stride_p (struct data_reference *dr)
5594 tree indicator = dr_direction_indicator (dr);
5595 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5596 fold_convert (ssizetype, indicator),
5597 ssize_int (0));
5598 return neg_step_val && integer_zerop (neg_step_val);