C++: -Wwrite-strings: use location of string constant
[official-gcc.git] / gcc / tree-data-ref.c
bloba8c6872a2350f86f57079cba2e2e58b92b386ac8
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
98 #include "stringpool.h"
99 #include "tree-vrp.h"
100 #include "tree-ssanames.h"
101 #include "tree-eh.h"
103 static struct datadep_stats
105 int num_dependence_tests;
106 int num_dependence_dependent;
107 int num_dependence_independent;
108 int num_dependence_undetermined;
110 int num_subscript_tests;
111 int num_subscript_undetermined;
112 int num_same_subscript_function;
114 int num_ziv;
115 int num_ziv_independent;
116 int num_ziv_dependent;
117 int num_ziv_unimplemented;
119 int num_siv;
120 int num_siv_independent;
121 int num_siv_dependent;
122 int num_siv_unimplemented;
124 int num_miv;
125 int num_miv_independent;
126 int num_miv_dependent;
127 int num_miv_unimplemented;
128 } dependence_stats;
130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
131 unsigned int, unsigned int,
132 struct loop *);
133 /* Returns true iff A divides B. */
135 static inline bool
136 tree_fold_divides_p (const_tree a, const_tree b)
138 gcc_assert (TREE_CODE (a) == INTEGER_CST);
139 gcc_assert (TREE_CODE (b) == INTEGER_CST);
140 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143 /* Returns true iff A divides B. */
145 static inline bool
146 int_divides_p (int a, int b)
148 return ((b % a) == 0);
151 /* Return true if reference REF contains a union access. */
153 static bool
154 ref_contains_union_access_p (tree ref)
156 while (handled_component_p (ref))
158 ref = TREE_OPERAND (ref, 0);
159 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
160 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
161 return true;
163 return false;
168 /* Dump into FILE all the data references from DATAREFS. */
170 static void
171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
173 unsigned int i;
174 struct data_reference *dr;
176 FOR_EACH_VEC_ELT (datarefs, i, dr)
177 dump_data_reference (file, dr);
180 /* Unified dump into FILE all the data references from DATAREFS. */
182 DEBUG_FUNCTION void
183 debug (vec<data_reference_p> &ref)
185 dump_data_references (stderr, ref);
188 DEBUG_FUNCTION void
189 debug (vec<data_reference_p> *ptr)
191 if (ptr)
192 debug (*ptr);
193 else
194 fprintf (stderr, "<nil>\n");
198 /* Dump into STDERR all the data references from DATAREFS. */
200 DEBUG_FUNCTION void
201 debug_data_references (vec<data_reference_p> datarefs)
203 dump_data_references (stderr, datarefs);
206 /* Print to STDERR the data_reference DR. */
208 DEBUG_FUNCTION void
209 debug_data_reference (struct data_reference *dr)
211 dump_data_reference (stderr, dr);
214 /* Dump function for a DATA_REFERENCE structure. */
216 void
217 dump_data_reference (FILE *outf,
218 struct data_reference *dr)
220 unsigned int i;
222 fprintf (outf, "#(Data Ref: \n");
223 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 fprintf (outf, "# stmt: ");
225 print_gimple_stmt (outf, DR_STMT (dr), 0);
226 fprintf (outf, "# ref: ");
227 print_generic_stmt (outf, DR_REF (dr));
228 fprintf (outf, "# base_object: ");
229 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
231 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
233 fprintf (outf, "# Access function %d: ", i);
234 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
236 fprintf (outf, "#)\n");
239 /* Unified dump function for a DATA_REFERENCE structure. */
241 DEBUG_FUNCTION void
242 debug (data_reference &ref)
244 dump_data_reference (stderr, &ref);
247 DEBUG_FUNCTION void
248 debug (data_reference *ptr)
250 if (ptr)
251 debug (*ptr);
252 else
253 fprintf (stderr, "<nil>\n");
257 /* Dumps the affine function described by FN to the file OUTF. */
259 DEBUG_FUNCTION void
260 dump_affine_function (FILE *outf, affine_fn fn)
262 unsigned i;
263 tree coef;
265 print_generic_expr (outf, fn[0], TDF_SLIM);
266 for (i = 1; fn.iterate (i, &coef); i++)
268 fprintf (outf, " + ");
269 print_generic_expr (outf, coef, TDF_SLIM);
270 fprintf (outf, " * x_%u", i);
274 /* Dumps the conflict function CF to the file OUTF. */
276 DEBUG_FUNCTION void
277 dump_conflict_function (FILE *outf, conflict_function *cf)
279 unsigned i;
281 if (cf->n == NO_DEPENDENCE)
282 fprintf (outf, "no dependence");
283 else if (cf->n == NOT_KNOWN)
284 fprintf (outf, "not known");
285 else
287 for (i = 0; i < cf->n; i++)
289 if (i != 0)
290 fprintf (outf, " ");
291 fprintf (outf, "[");
292 dump_affine_function (outf, cf->fns[i]);
293 fprintf (outf, "]");
298 /* Dump function for a SUBSCRIPT structure. */
300 DEBUG_FUNCTION void
301 dump_subscript (FILE *outf, struct subscript *subscript)
303 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
305 fprintf (outf, "\n (subscript \n");
306 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
307 dump_conflict_function (outf, cf);
308 if (CF_NONTRIVIAL_P (cf))
310 tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 fprintf (outf, "\n last_conflict: ");
312 print_generic_expr (outf, last_iteration);
315 cf = SUB_CONFLICTS_IN_B (subscript);
316 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
317 dump_conflict_function (outf, cf);
318 if (CF_NONTRIVIAL_P (cf))
320 tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 fprintf (outf, "\n last_conflict: ");
322 print_generic_expr (outf, last_iteration);
325 fprintf (outf, "\n (Subscript distance: ");
326 print_generic_expr (outf, SUB_DISTANCE (subscript));
327 fprintf (outf, " ))\n");
330 /* Print the classic direction vector DIRV to OUTF. */
332 DEBUG_FUNCTION void
333 print_direction_vector (FILE *outf,
334 lambda_vector dirv,
335 int length)
337 int eq;
339 for (eq = 0; eq < length; eq++)
341 enum data_dependence_direction dir = ((enum data_dependence_direction)
342 dirv[eq]);
344 switch (dir)
346 case dir_positive:
347 fprintf (outf, " +");
348 break;
349 case dir_negative:
350 fprintf (outf, " -");
351 break;
352 case dir_equal:
353 fprintf (outf, " =");
354 break;
355 case dir_positive_or_equal:
356 fprintf (outf, " +=");
357 break;
358 case dir_positive_or_negative:
359 fprintf (outf, " +-");
360 break;
361 case dir_negative_or_equal:
362 fprintf (outf, " -=");
363 break;
364 case dir_star:
365 fprintf (outf, " *");
366 break;
367 default:
368 fprintf (outf, "indep");
369 break;
372 fprintf (outf, "\n");
375 /* Print a vector of direction vectors. */
377 DEBUG_FUNCTION void
378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 int length)
381 unsigned j;
382 lambda_vector v;
384 FOR_EACH_VEC_ELT (dir_vects, j, v)
385 print_direction_vector (outf, v, length);
388 /* Print out a vector VEC of length N to OUTFILE. */
390 DEBUG_FUNCTION void
391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
393 int i;
395 for (i = 0; i < n; i++)
396 fprintf (outfile, "%3d ", vector[i]);
397 fprintf (outfile, "\n");
400 /* Print a vector of distance vectors. */
402 DEBUG_FUNCTION void
403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
404 int length)
406 unsigned j;
407 lambda_vector v;
409 FOR_EACH_VEC_ELT (dist_vects, j, v)
410 print_lambda_vector (outf, v, length);
413 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
415 DEBUG_FUNCTION void
416 dump_data_dependence_relation (FILE *outf,
417 struct data_dependence_relation *ddr)
419 struct data_reference *dra, *drb;
421 fprintf (outf, "(Data Dep: \n");
423 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
425 if (ddr)
427 dra = DDR_A (ddr);
428 drb = DDR_B (ddr);
429 if (dra)
430 dump_data_reference (outf, dra);
431 else
432 fprintf (outf, " (nil)\n");
433 if (drb)
434 dump_data_reference (outf, drb);
435 else
436 fprintf (outf, " (nil)\n");
438 fprintf (outf, " (don't know)\n)\n");
439 return;
442 dra = DDR_A (ddr);
443 drb = DDR_B (ddr);
444 dump_data_reference (outf, dra);
445 dump_data_reference (outf, drb);
447 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
448 fprintf (outf, " (no dependence)\n");
450 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
452 unsigned int i;
453 struct loop *loopi;
455 subscript *sub;
456 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
458 fprintf (outf, " access_fn_A: ");
459 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
460 fprintf (outf, " access_fn_B: ");
461 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
462 dump_subscript (outf, sub);
465 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
466 fprintf (outf, " loop nest: (");
467 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
468 fprintf (outf, "%d ", loopi->num);
469 fprintf (outf, ")\n");
471 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
473 fprintf (outf, " distance_vector: ");
474 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
475 DDR_NB_LOOPS (ddr));
478 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
480 fprintf (outf, " direction_vector: ");
481 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
482 DDR_NB_LOOPS (ddr));
486 fprintf (outf, ")\n");
489 /* Debug version. */
491 DEBUG_FUNCTION void
492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
494 dump_data_dependence_relation (stderr, ddr);
497 /* Dump into FILE all the dependence relations from DDRS. */
499 DEBUG_FUNCTION void
500 dump_data_dependence_relations (FILE *file,
501 vec<ddr_p> ddrs)
503 unsigned int i;
504 struct data_dependence_relation *ddr;
506 FOR_EACH_VEC_ELT (ddrs, i, ddr)
507 dump_data_dependence_relation (file, ddr);
510 DEBUG_FUNCTION void
511 debug (vec<ddr_p> &ref)
513 dump_data_dependence_relations (stderr, ref);
516 DEBUG_FUNCTION void
517 debug (vec<ddr_p> *ptr)
519 if (ptr)
520 debug (*ptr);
521 else
522 fprintf (stderr, "<nil>\n");
526 /* Dump to STDERR all the dependence relations from DDRS. */
528 DEBUG_FUNCTION void
529 debug_data_dependence_relations (vec<ddr_p> ddrs)
531 dump_data_dependence_relations (stderr, ddrs);
534 /* Dumps the distance and direction vectors in FILE. DDRS contains
535 the dependence relations, and VECT_SIZE is the size of the
536 dependence vectors, or in other words the number of loops in the
537 considered nest. */
539 DEBUG_FUNCTION void
540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
542 unsigned int i, j;
543 struct data_dependence_relation *ddr;
544 lambda_vector v;
546 FOR_EACH_VEC_ELT (ddrs, i, ddr)
547 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
551 fprintf (file, "DISTANCE_V (");
552 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
553 fprintf (file, ")\n");
556 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
558 fprintf (file, "DIRECTION_V (");
559 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
560 fprintf (file, ")\n");
564 fprintf (file, "\n\n");
567 /* Dumps the data dependence relations DDRS in FILE. */
569 DEBUG_FUNCTION void
570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
572 unsigned int i;
573 struct data_dependence_relation *ddr;
575 FOR_EACH_VEC_ELT (ddrs, i, ddr)
576 dump_data_dependence_relation (file, ddr);
578 fprintf (file, "\n\n");
581 DEBUG_FUNCTION void
582 debug_ddrs (vec<ddr_p> ddrs)
584 dump_ddrs (stderr, ddrs);
587 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
588 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
589 constant of type ssizetype, and returns true. If we cannot do this
590 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
591 is returned. */
593 static bool
594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
595 tree *var, tree *off)
597 tree var0, var1;
598 tree off0, off1;
599 enum tree_code ocode = code;
601 *var = NULL_TREE;
602 *off = NULL_TREE;
604 switch (code)
606 case INTEGER_CST:
607 *var = build_int_cst (type, 0);
608 *off = fold_convert (ssizetype, op0);
609 return true;
611 case POINTER_PLUS_EXPR:
612 ocode = PLUS_EXPR;
613 /* FALLTHROUGH */
614 case PLUS_EXPR:
615 case MINUS_EXPR:
616 split_constant_offset (op0, &var0, &off0);
617 split_constant_offset (op1, &var1, &off1);
618 *var = fold_build2 (code, type, var0, var1);
619 *off = size_binop (ocode, off0, off1);
620 return true;
622 case MULT_EXPR:
623 if (TREE_CODE (op1) != INTEGER_CST)
624 return false;
626 split_constant_offset (op0, &var0, &off0);
627 *var = fold_build2 (MULT_EXPR, type, var0, op1);
628 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
629 return true;
631 case ADDR_EXPR:
633 tree base, poffset;
634 poly_int64 pbitsize, pbitpos, pbytepos;
635 machine_mode pmode;
636 int punsignedp, preversep, pvolatilep;
638 op0 = TREE_OPERAND (op0, 0);
639 base
640 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
641 &punsignedp, &preversep, &pvolatilep);
643 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
644 return false;
645 base = build_fold_addr_expr (base);
646 off0 = ssize_int (pbytepos);
648 if (poffset)
650 split_constant_offset (poffset, &poffset, &off1);
651 off0 = size_binop (PLUS_EXPR, off0, off1);
652 if (POINTER_TYPE_P (TREE_TYPE (base)))
653 base = fold_build_pointer_plus (base, poffset);
654 else
655 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
656 fold_convert (TREE_TYPE (base), poffset));
659 var0 = fold_convert (type, base);
661 /* If variable length types are involved, punt, otherwise casts
662 might be converted into ARRAY_REFs in gimplify_conversion.
663 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
664 possibly no longer appears in current GIMPLE, might resurface.
665 This perhaps could run
666 if (CONVERT_EXPR_P (var0))
668 gimplify_conversion (&var0);
669 // Attempt to fill in any within var0 found ARRAY_REF's
670 // element size from corresponding op embedded ARRAY_REF,
671 // if unsuccessful, just punt.
672 } */
673 while (POINTER_TYPE_P (type))
674 type = TREE_TYPE (type);
675 if (int_size_in_bytes (type) < 0)
676 return false;
678 *var = var0;
679 *off = off0;
680 return true;
683 case SSA_NAME:
685 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
686 return false;
688 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
689 enum tree_code subcode;
691 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
692 return false;
694 var0 = gimple_assign_rhs1 (def_stmt);
695 subcode = gimple_assign_rhs_code (def_stmt);
696 var1 = gimple_assign_rhs2 (def_stmt);
698 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
700 CASE_CONVERT:
702 /* We must not introduce undefined overflow, and we must not change the value.
703 Hence we're okay if the inner type doesn't overflow to start with
704 (pointer or signed), the outer type also is an integer or pointer
705 and the outer precision is at least as large as the inner. */
706 tree itype = TREE_TYPE (op0);
707 if ((POINTER_TYPE_P (itype)
708 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
709 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
710 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
712 if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
714 /* Split the unconverted operand and try to prove that
715 wrapping isn't a problem. */
716 tree tmp_var, tmp_off;
717 split_constant_offset (op0, &tmp_var, &tmp_off);
719 /* See whether we have an SSA_NAME whose range is known
720 to be [A, B]. */
721 if (TREE_CODE (tmp_var) != SSA_NAME)
722 return false;
723 wide_int var_min, var_max;
724 value_range_type vr_type = get_range_info (tmp_var, &var_min,
725 &var_max);
726 wide_int var_nonzero = get_nonzero_bits (tmp_var);
727 signop sgn = TYPE_SIGN (itype);
728 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
729 &var_max, var_nonzero,
730 sgn) != VR_RANGE)
731 return false;
733 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
734 is known to be [A + TMP_OFF, B + TMP_OFF], with all
735 operations done in ITYPE. The addition must overflow
736 at both ends of the range or at neither. */
737 wi::overflow_type overflow[2];
738 unsigned int prec = TYPE_PRECISION (itype);
739 wide_int woff = wi::to_wide (tmp_off, prec);
740 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
741 wi::add (var_max, woff, sgn, &overflow[1]);
742 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
743 return false;
745 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
746 widest_int diff = (widest_int::from (op0_min, sgn)
747 - widest_int::from (var_min, sgn));
748 var0 = tmp_var;
749 *off = wide_int_to_tree (ssizetype, diff);
751 else
752 split_constant_offset (op0, &var0, off);
753 *var = fold_convert (type, var0);
754 return true;
756 return false;
759 default:
760 return false;
764 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
765 will be ssizetype. */
767 void
768 split_constant_offset (tree exp, tree *var, tree *off)
770 tree type = TREE_TYPE (exp), op0, op1, e, o;
771 enum tree_code code;
773 *var = exp;
774 *off = ssize_int (0);
776 if (tree_is_chrec (exp)
777 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
778 return;
780 code = TREE_CODE (exp);
781 extract_ops_from_tree (exp, &code, &op0, &op1);
782 if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
784 *var = e;
785 *off = o;
789 /* Returns the address ADDR of an object in a canonical shape (without nop
790 casts, and with type of pointer to the object). */
792 static tree
793 canonicalize_base_object_address (tree addr)
795 tree orig = addr;
797 STRIP_NOPS (addr);
799 /* The base address may be obtained by casting from integer, in that case
800 keep the cast. */
801 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
802 return orig;
804 if (TREE_CODE (addr) != ADDR_EXPR)
805 return addr;
807 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
810 /* Analyze the behavior of memory reference REF. There are two modes:
812 - BB analysis. In this case we simply split the address into base,
813 init and offset components, without reference to any containing loop.
814 The resulting base and offset are general expressions and they can
815 vary arbitrarily from one iteration of the containing loop to the next.
816 The step is always zero.
818 - loop analysis. In this case we analyze the reference both wrt LOOP
819 and on the basis that the reference occurs (is "used") in LOOP;
820 see the comment above analyze_scalar_evolution_in_loop for more
821 information about this distinction. The base, init, offset and
822 step fields are all invariant in LOOP.
824 Perform BB analysis if LOOP is null, or if LOOP is the function's
825 dummy outermost loop. In other cases perform loop analysis.
827 Return true if the analysis succeeded and store the results in DRB if so.
828 BB analysis can only fail for bitfield or reversed-storage accesses. */
830 bool
831 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
832 struct loop *loop)
834 poly_int64 pbitsize, pbitpos;
835 tree base, poffset;
836 machine_mode pmode;
837 int punsignedp, preversep, pvolatilep;
838 affine_iv base_iv, offset_iv;
839 tree init, dinit, step;
840 bool in_loop = (loop && loop->num);
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 fprintf (dump_file, "analyze_innermost: ");
845 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
846 &punsignedp, &preversep, &pvolatilep);
847 gcc_assert (base != NULL_TREE);
849 poly_int64 pbytepos;
850 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "failed: bit offset alignment.\n");
854 return false;
857 if (preversep)
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "failed: reverse storage order.\n");
861 return false;
864 /* Calculate the alignment and misalignment for the inner reference. */
865 unsigned int HOST_WIDE_INT bit_base_misalignment;
866 unsigned int bit_base_alignment;
867 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
869 /* There are no bitfield references remaining in BASE, so the values
870 we got back must be whole bytes. */
871 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
872 && bit_base_misalignment % BITS_PER_UNIT == 0);
873 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
874 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
876 if (TREE_CODE (base) == MEM_REF)
878 if (!integer_zerop (TREE_OPERAND (base, 1)))
880 /* Subtract MOFF from the base and add it to POFFSET instead.
881 Adjust the misalignment to reflect the amount we subtracted. */
882 poly_offset_int moff = mem_ref_offset (base);
883 base_misalignment -= moff.force_shwi ();
884 tree mofft = wide_int_to_tree (sizetype, moff);
885 if (!poffset)
886 poffset = mofft;
887 else
888 poffset = size_binop (PLUS_EXPR, poffset, mofft);
890 base = TREE_OPERAND (base, 0);
892 else
893 base = build_fold_addr_expr (base);
895 if (in_loop)
897 if (!simple_iv (loop, loop, base, &base_iv, true))
899 if (dump_file && (dump_flags & TDF_DETAILS))
900 fprintf (dump_file, "failed: evolution of base is not affine.\n");
901 return false;
904 else
906 base_iv.base = base;
907 base_iv.step = ssize_int (0);
908 base_iv.no_overflow = true;
911 if (!poffset)
913 offset_iv.base = ssize_int (0);
914 offset_iv.step = ssize_int (0);
916 else
918 if (!in_loop)
920 offset_iv.base = poffset;
921 offset_iv.step = ssize_int (0);
923 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
925 if (dump_file && (dump_flags & TDF_DETAILS))
926 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
927 return false;
931 init = ssize_int (pbytepos);
933 /* Subtract any constant component from the base and add it to INIT instead.
934 Adjust the misalignment to reflect the amount we subtracted. */
935 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
936 init = size_binop (PLUS_EXPR, init, dinit);
937 base_misalignment -= TREE_INT_CST_LOW (dinit);
939 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
940 init = size_binop (PLUS_EXPR, init, dinit);
942 step = size_binop (PLUS_EXPR,
943 fold_convert (ssizetype, base_iv.step),
944 fold_convert (ssizetype, offset_iv.step));
946 base = canonicalize_base_object_address (base_iv.base);
948 /* See if get_pointer_alignment can guarantee a higher alignment than
949 the one we calculated above. */
950 unsigned int HOST_WIDE_INT alt_misalignment;
951 unsigned int alt_alignment;
952 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
954 /* As above, these values must be whole bytes. */
955 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
956 && alt_misalignment % BITS_PER_UNIT == 0);
957 alt_alignment /= BITS_PER_UNIT;
958 alt_misalignment /= BITS_PER_UNIT;
960 if (base_alignment < alt_alignment)
962 base_alignment = alt_alignment;
963 base_misalignment = alt_misalignment;
966 drb->base_address = base;
967 drb->offset = fold_convert (ssizetype, offset_iv.base);
968 drb->init = init;
969 drb->step = step;
970 if (known_misalignment (base_misalignment, base_alignment,
971 &drb->base_misalignment))
972 drb->base_alignment = base_alignment;
973 else
975 drb->base_alignment = known_alignment (base_misalignment);
976 drb->base_misalignment = 0;
978 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
979 drb->step_alignment = highest_pow2_factor (step);
981 if (dump_file && (dump_flags & TDF_DETAILS))
982 fprintf (dump_file, "success.\n");
984 return true;
987 /* Return true if OP is a valid component reference for a DR access
988 function. This accepts a subset of what handled_component_p accepts. */
990 static bool
991 access_fn_component_p (tree op)
993 switch (TREE_CODE (op))
995 case REALPART_EXPR:
996 case IMAGPART_EXPR:
997 case ARRAY_REF:
998 return true;
1000 case COMPONENT_REF:
1001 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1003 default:
1004 return false;
1008 /* Determines the base object and the list of indices of memory reference
1009 DR, analyzed in LOOP and instantiated before NEST. */
1011 static void
1012 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1014 vec<tree> access_fns = vNULL;
1015 tree ref, op;
1016 tree base, off, access_fn;
1018 /* If analyzing a basic-block there are no indices to analyze
1019 and thus no access functions. */
1020 if (!nest)
1022 DR_BASE_OBJECT (dr) = DR_REF (dr);
1023 DR_ACCESS_FNS (dr).create (0);
1024 return;
1027 ref = DR_REF (dr);
1029 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1030 into a two element array with a constant index. The base is
1031 then just the immediate underlying object. */
1032 if (TREE_CODE (ref) == REALPART_EXPR)
1034 ref = TREE_OPERAND (ref, 0);
1035 access_fns.safe_push (integer_zero_node);
1037 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1039 ref = TREE_OPERAND (ref, 0);
1040 access_fns.safe_push (integer_one_node);
1043 /* Analyze access functions of dimensions we know to be independent.
1044 The list of component references handled here should be kept in
1045 sync with access_fn_component_p. */
1046 while (handled_component_p (ref))
1048 if (TREE_CODE (ref) == ARRAY_REF)
1050 op = TREE_OPERAND (ref, 1);
1051 access_fn = analyze_scalar_evolution (loop, op);
1052 access_fn = instantiate_scev (nest, loop, access_fn);
1053 access_fns.safe_push (access_fn);
1055 else if (TREE_CODE (ref) == COMPONENT_REF
1056 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1058 /* For COMPONENT_REFs of records (but not unions!) use the
1059 FIELD_DECL offset as constant access function so we can
1060 disambiguate a[i].f1 and a[i].f2. */
1061 tree off = component_ref_field_offset (ref);
1062 off = size_binop (PLUS_EXPR,
1063 size_binop (MULT_EXPR,
1064 fold_convert (bitsizetype, off),
1065 bitsize_int (BITS_PER_UNIT)),
1066 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1067 access_fns.safe_push (off);
1069 else
1070 /* If we have an unhandled component we could not translate
1071 to an access function stop analyzing. We have determined
1072 our base object in this case. */
1073 break;
1075 ref = TREE_OPERAND (ref, 0);
1078 /* If the address operand of a MEM_REF base has an evolution in the
1079 analyzed nest, add it as an additional independent access-function. */
1080 if (TREE_CODE (ref) == MEM_REF)
1082 op = TREE_OPERAND (ref, 0);
1083 access_fn = analyze_scalar_evolution (loop, op);
1084 access_fn = instantiate_scev (nest, loop, access_fn);
1085 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1087 tree orig_type;
1088 tree memoff = TREE_OPERAND (ref, 1);
1089 base = initial_condition (access_fn);
1090 orig_type = TREE_TYPE (base);
1091 STRIP_USELESS_TYPE_CONVERSION (base);
1092 split_constant_offset (base, &base, &off);
1093 STRIP_USELESS_TYPE_CONVERSION (base);
1094 /* Fold the MEM_REF offset into the evolutions initial
1095 value to make more bases comparable. */
1096 if (!integer_zerop (memoff))
1098 off = size_binop (PLUS_EXPR, off,
1099 fold_convert (ssizetype, memoff));
1100 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1102 /* Adjust the offset so it is a multiple of the access type
1103 size and thus we separate bases that can possibly be used
1104 to produce partial overlaps (which the access_fn machinery
1105 cannot handle). */
1106 wide_int rem;
1107 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1108 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1109 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1110 rem = wi::mod_trunc
1111 (wi::to_wide (off),
1112 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1113 SIGNED);
1114 else
1115 /* If we can't compute the remainder simply force the initial
1116 condition to zero. */
1117 rem = wi::to_wide (off);
1118 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1119 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1120 /* And finally replace the initial condition. */
1121 access_fn = chrec_replace_initial_condition
1122 (access_fn, fold_convert (orig_type, off));
1123 /* ??? This is still not a suitable base object for
1124 dr_may_alias_p - the base object needs to be an
1125 access that covers the object as whole. With
1126 an evolution in the pointer this cannot be
1127 guaranteed.
1128 As a band-aid, mark the access so we can special-case
1129 it in dr_may_alias_p. */
1130 tree old = ref;
1131 ref = fold_build2_loc (EXPR_LOCATION (ref),
1132 MEM_REF, TREE_TYPE (ref),
1133 base, memoff);
1134 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1135 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1136 DR_UNCONSTRAINED_BASE (dr) = true;
1137 access_fns.safe_push (access_fn);
1140 else if (DECL_P (ref))
1142 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1143 ref = build2 (MEM_REF, TREE_TYPE (ref),
1144 build_fold_addr_expr (ref),
1145 build_int_cst (reference_alias_ptr_type (ref), 0));
1148 DR_BASE_OBJECT (dr) = ref;
1149 DR_ACCESS_FNS (dr) = access_fns;
1152 /* Extracts the alias analysis information from the memory reference DR. */
1154 static void
1155 dr_analyze_alias (struct data_reference *dr)
1157 tree ref = DR_REF (dr);
1158 tree base = get_base_address (ref), addr;
1160 if (INDIRECT_REF_P (base)
1161 || TREE_CODE (base) == MEM_REF)
1163 addr = TREE_OPERAND (base, 0);
1164 if (TREE_CODE (addr) == SSA_NAME)
1165 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1169 /* Frees data reference DR. */
1171 void
1172 free_data_ref (data_reference_p dr)
1174 DR_ACCESS_FNS (dr).release ();
1175 free (dr);
1178 /* Analyze memory reference MEMREF, which is accessed in STMT.
1179 The reference is a read if IS_READ is true, otherwise it is a write.
1180 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1181 within STMT, i.e. that it might not occur even if STMT is executed
1182 and runs to completion.
1184 Return the data_reference description of MEMREF. NEST is the outermost
1185 loop in which the reference should be instantiated, LOOP is the loop
1186 in which the data reference should be analyzed. */
1188 struct data_reference *
1189 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1190 bool is_read, bool is_conditional_in_stmt)
1192 struct data_reference *dr;
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1196 fprintf (dump_file, "Creating dr for ");
1197 print_generic_expr (dump_file, memref, TDF_SLIM);
1198 fprintf (dump_file, "\n");
1201 dr = XCNEW (struct data_reference);
1202 DR_STMT (dr) = stmt;
1203 DR_REF (dr) = memref;
1204 DR_IS_READ (dr) = is_read;
1205 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1207 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1208 nest != NULL ? loop : NULL);
1209 dr_analyze_indices (dr, nest, loop);
1210 dr_analyze_alias (dr);
1212 if (dump_file && (dump_flags & TDF_DETAILS))
1214 unsigned i;
1215 fprintf (dump_file, "\tbase_address: ");
1216 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1217 fprintf (dump_file, "\n\toffset from base address: ");
1218 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1219 fprintf (dump_file, "\n\tconstant offset from base address: ");
1220 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1221 fprintf (dump_file, "\n\tstep: ");
1222 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1223 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1224 fprintf (dump_file, "\n\tbase misalignment: %d",
1225 DR_BASE_MISALIGNMENT (dr));
1226 fprintf (dump_file, "\n\toffset alignment: %d",
1227 DR_OFFSET_ALIGNMENT (dr));
1228 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1229 fprintf (dump_file, "\n\tbase_object: ");
1230 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1231 fprintf (dump_file, "\n");
1232 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1234 fprintf (dump_file, "\tAccess function %d: ", i);
1235 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1239 return dr;
1242 /* A helper function computes order between two tree epxressions T1 and T2.
1243 This is used in comparator functions sorting objects based on the order
1244 of tree expressions. The function returns -1, 0, or 1. */
1247 data_ref_compare_tree (tree t1, tree t2)
1249 int i, cmp;
1250 enum tree_code code;
1251 char tclass;
1253 if (t1 == t2)
1254 return 0;
1255 if (t1 == NULL)
1256 return -1;
1257 if (t2 == NULL)
1258 return 1;
1260 STRIP_USELESS_TYPE_CONVERSION (t1);
1261 STRIP_USELESS_TYPE_CONVERSION (t2);
1262 if (t1 == t2)
1263 return 0;
1265 if (TREE_CODE (t1) != TREE_CODE (t2)
1266 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1267 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1269 code = TREE_CODE (t1);
1270 switch (code)
1272 case INTEGER_CST:
1273 return tree_int_cst_compare (t1, t2);
1275 case STRING_CST:
1276 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1277 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1278 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1279 TREE_STRING_LENGTH (t1));
1281 case SSA_NAME:
1282 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1283 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1284 break;
1286 default:
1287 if (POLY_INT_CST_P (t1))
1288 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1289 wi::to_poly_widest (t2));
1291 tclass = TREE_CODE_CLASS (code);
1293 /* For decls, compare their UIDs. */
1294 if (tclass == tcc_declaration)
1296 if (DECL_UID (t1) != DECL_UID (t2))
1297 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1298 break;
1300 /* For expressions, compare their operands recursively. */
1301 else if (IS_EXPR_CODE_CLASS (tclass))
1303 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1305 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1306 TREE_OPERAND (t2, i));
1307 if (cmp != 0)
1308 return cmp;
1311 else
1312 gcc_unreachable ();
1315 return 0;
1318 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1319 check. */
1321 bool
1322 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1324 if (dump_enabled_p ())
1326 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1327 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1328 dump_printf (MSG_NOTE, " and ");
1329 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1330 dump_printf (MSG_NOTE, "\n");
1333 if (!speed_p)
1335 if (dump_enabled_p ())
1336 dump_printf (MSG_MISSED_OPTIMIZATION,
1337 "runtime alias check not supported when optimizing "
1338 "for size.\n");
1339 return false;
1342 /* FORNOW: We don't support versioning with outer-loop in either
1343 vectorization or loop distribution. */
1344 if (loop != NULL && loop->inner != NULL)
1346 if (dump_enabled_p ())
1347 dump_printf (MSG_MISSED_OPTIMIZATION,
1348 "runtime alias check not supported for outer loop.\n");
1349 return false;
1352 return true;
1355 /* Operator == between two dr_with_seg_len objects.
1357 This equality operator is used to make sure two data refs
1358 are the same one so that we will consider to combine the
1359 aliasing checks of those two pairs of data dependent data
1360 refs. */
1362 static bool
1363 operator == (const dr_with_seg_len& d1,
1364 const dr_with_seg_len& d2)
1366 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1367 DR_BASE_ADDRESS (d2.dr), 0)
1368 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1369 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1370 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1371 && known_eq (d1.access_size, d2.access_size)
1372 && d1.align == d2.align);
1375 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1376 so that we can combine aliasing checks in one scan. */
1378 static int
1379 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1381 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1382 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1383 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1384 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1386 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1387 if a and c have the same basic address snd step, and b and d have the same
1388 address and step. Therefore, if any a&c or b&d don't have the same address
1389 and step, we don't care the order of those two pairs after sorting. */
1390 int comp_res;
1392 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1393 DR_BASE_ADDRESS (b1.dr))) != 0)
1394 return comp_res;
1395 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1396 DR_BASE_ADDRESS (b2.dr))) != 0)
1397 return comp_res;
1398 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1399 DR_STEP (b1.dr))) != 0)
1400 return comp_res;
1401 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1402 DR_STEP (b2.dr))) != 0)
1403 return comp_res;
1404 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1405 DR_OFFSET (b1.dr))) != 0)
1406 return comp_res;
1407 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1408 DR_INIT (b1.dr))) != 0)
1409 return comp_res;
1410 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1411 DR_OFFSET (b2.dr))) != 0)
1412 return comp_res;
1413 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1414 DR_INIT (b2.dr))) != 0)
1415 return comp_res;
1417 return 0;
1420 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1421 FACTOR is number of iterations that each data reference is accessed.
1423 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1424 we create an expression:
1426 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1427 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1429 for aliasing checks. However, in some cases we can decrease the number
1430 of checks by combining two checks into one. For example, suppose we have
1431 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1432 condition is satisfied:
1434 load_ptr_0 < load_ptr_1 &&
1435 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1437 (this condition means, in each iteration of vectorized loop, the accessed
1438 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1439 load_ptr_1.)
1441 we then can use only the following expression to finish the alising checks
1442 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1444 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1445 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1447 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1448 basic address. */
1450 void
1451 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1452 poly_uint64)
1454 /* Sort the collected data ref pairs so that we can scan them once to
1455 combine all possible aliasing checks. */
1456 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1458 /* Scan the sorted dr pairs and check if we can combine alias checks
1459 of two neighboring dr pairs. */
1460 for (size_t i = 1; i < alias_pairs->length (); ++i)
1462 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1463 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1464 *dr_b1 = &(*alias_pairs)[i-1].second,
1465 *dr_a2 = &(*alias_pairs)[i].first,
1466 *dr_b2 = &(*alias_pairs)[i].second;
1468 /* Remove duplicate data ref pairs. */
1469 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1471 if (dump_enabled_p ())
1473 dump_printf (MSG_NOTE, "found equal ranges ");
1474 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1475 dump_printf (MSG_NOTE, ", ");
1476 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1477 dump_printf (MSG_NOTE, " and ");
1478 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1479 dump_printf (MSG_NOTE, ", ");
1480 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1481 dump_printf (MSG_NOTE, "\n");
1483 alias_pairs->ordered_remove (i--);
1484 continue;
1487 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1489 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1490 and DR_A1 and DR_A2 are two consecutive memrefs. */
1491 if (*dr_a1 == *dr_a2)
1493 std::swap (dr_a1, dr_b1);
1494 std::swap (dr_a2, dr_b2);
1497 poly_int64 init_a1, init_a2;
1498 /* Only consider cases in which the distance between the initial
1499 DR_A1 and the initial DR_A2 is known at compile time. */
1500 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1501 DR_BASE_ADDRESS (dr_a2->dr), 0)
1502 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1503 DR_OFFSET (dr_a2->dr), 0)
1504 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1505 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1506 continue;
1508 /* Don't combine if we can't tell which one comes first. */
1509 if (!ordered_p (init_a1, init_a2))
1510 continue;
1512 /* Make sure dr_a1 starts left of dr_a2. */
1513 if (maybe_gt (init_a1, init_a2))
1515 std::swap (*dr_a1, *dr_a2);
1516 std::swap (init_a1, init_a2);
1519 /* Work out what the segment length would be if we did combine
1520 DR_A1 and DR_A2:
1522 - If DR_A1 and DR_A2 have equal lengths, that length is
1523 also the combined length.
1525 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1526 length is the lower bound on those lengths.
1528 - If DR_A1 and DR_A2 both have positive lengths, the combined
1529 length is the upper bound on those lengths.
1531 Other cases are unlikely to give a useful combination.
1533 The lengths both have sizetype, so the sign is taken from
1534 the step instead. */
1535 if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1537 poly_uint64 seg_len_a1, seg_len_a2;
1538 if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1539 || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1540 continue;
1542 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1543 if (TREE_CODE (indicator_a) != INTEGER_CST)
1544 continue;
1546 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1547 if (TREE_CODE (indicator_b) != INTEGER_CST)
1548 continue;
1550 int sign_a = tree_int_cst_sgn (indicator_a);
1551 int sign_b = tree_int_cst_sgn (indicator_b);
1553 poly_uint64 new_seg_len;
1554 if (sign_a <= 0 && sign_b <= 0)
1555 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1556 else if (sign_a >= 0 && sign_b >= 0)
1557 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1558 else
1559 continue;
1561 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1562 new_seg_len);
1563 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1566 /* This is always positive due to the swap above. */
1567 poly_uint64 diff = init_a2 - init_a1;
1569 /* The new check will start at DR_A1. Make sure that its access
1570 size encompasses the initial DR_A2. */
1571 if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1573 dr_a1->access_size = upper_bound (dr_a1->access_size,
1574 diff + dr_a2->access_size);
1575 unsigned int new_align = known_alignment (dr_a1->access_size);
1576 dr_a1->align = MIN (dr_a1->align, new_align);
1578 if (dump_enabled_p ())
1580 dump_printf (MSG_NOTE, "merging ranges for ");
1581 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1582 dump_printf (MSG_NOTE, ", ");
1583 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1584 dump_printf (MSG_NOTE, " and ");
1585 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1586 dump_printf (MSG_NOTE, ", ");
1587 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1588 dump_printf (MSG_NOTE, "\n");
1590 alias_pairs->ordered_remove (i);
1591 i--;
1596 /* Given LOOP's two data references and segment lengths described by DR_A
1597 and DR_B, create expression checking if the two addresses ranges intersect
1598 with each other based on index of the two addresses. This can only be
1599 done if DR_A and DR_B referring to the same (array) object and the index
1600 is the only difference. For example:
1602 DR_A DR_B
1603 data-ref arr[i] arr[j]
1604 base_object arr arr
1605 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1607 The addresses and their index are like:
1609 |<- ADDR_A ->| |<- ADDR_B ->|
1610 ------------------------------------------------------->
1611 | | | | | | | | | |
1612 ------------------------------------------------------->
1613 i_0 ... i_0+4 j_0 ... j_0+4
1615 We can create expression based on index rather than address:
1617 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1619 Note evolution step of index needs to be considered in comparison. */
1621 static bool
1622 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1623 const dr_with_seg_len& dr_a,
1624 const dr_with_seg_len& dr_b)
1626 if (integer_zerop (DR_STEP (dr_a.dr))
1627 || integer_zerop (DR_STEP (dr_b.dr))
1628 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1629 return false;
1631 poly_uint64 seg_len1, seg_len2;
1632 if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1633 || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1634 return false;
1636 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1637 return false;
1639 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1640 return false;
1642 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1643 return false;
1645 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1647 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1648 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1649 if (neg_step)
1651 abs_step = -abs_step;
1652 seg_len1 = -seg_len1;
1653 seg_len2 = -seg_len2;
1655 else
1657 /* Include the access size in the length, so that we only have one
1658 tree addition below. */
1659 seg_len1 += dr_a.access_size;
1660 seg_len2 += dr_b.access_size;
1663 /* Infer the number of iterations with which the memory segment is accessed
1664 by DR. In other words, alias is checked if memory segment accessed by
1665 DR_A in some iterations intersect with memory segment accessed by DR_B
1666 in the same amount iterations.
1667 Note segnment length is a linear function of number of iterations with
1668 DR_STEP as the coefficient. */
1669 poly_uint64 niter_len1, niter_len2;
1670 if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1671 || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1672 return false;
1674 poly_uint64 niter_access1 = 0, niter_access2 = 0;
1675 if (neg_step)
1677 /* Divide each access size by the byte step, rounding up. */
1678 if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1679 abs_step, &niter_access1)
1680 || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1681 abs_step, &niter_access2))
1682 return false;
1685 unsigned int i;
1686 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1688 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1689 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1690 /* Two indices must be the same if they are not scev, or not scev wrto
1691 current loop being vecorized. */
1692 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1693 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1694 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1695 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1697 if (operand_equal_p (access1, access2, 0))
1698 continue;
1700 return false;
1702 /* The two indices must have the same step. */
1703 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1704 return false;
1706 tree idx_step = CHREC_RIGHT (access1);
1707 /* Index must have const step, otherwise DR_STEP won't be constant. */
1708 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1709 /* Index must evaluate in the same direction as DR. */
1710 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1712 tree min1 = CHREC_LEFT (access1);
1713 tree min2 = CHREC_LEFT (access2);
1714 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1715 return false;
1717 /* Ideally, alias can be checked against loop's control IV, but we
1718 need to prove linear mapping between control IV and reference
1719 index. Although that should be true, we check against (array)
1720 index of data reference. Like segment length, index length is
1721 linear function of the number of iterations with index_step as
1722 the coefficient, i.e, niter_len * idx_step. */
1723 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1724 build_int_cst (TREE_TYPE (min1),
1725 niter_len1));
1726 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1727 build_int_cst (TREE_TYPE (min2),
1728 niter_len2));
1729 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1730 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1731 /* Adjust ranges for negative step. */
1732 if (neg_step)
1734 /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
1735 std::swap (min1, max1);
1736 std::swap (min2, max2);
1738 /* As with the lengths just calculated, we've measured the access
1739 sizes in iterations, so multiply them by the index step. */
1740 tree idx_access1
1741 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1742 build_int_cst (TREE_TYPE (min1), niter_access1));
1743 tree idx_access2
1744 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1745 build_int_cst (TREE_TYPE (min2), niter_access2));
1747 /* MINUS_EXPR because the above values are negative. */
1748 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1749 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1751 tree part_cond_expr
1752 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1753 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1754 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1755 if (*cond_expr)
1756 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1757 *cond_expr, part_cond_expr);
1758 else
1759 *cond_expr = part_cond_expr;
1761 return true;
1764 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1765 every address ADDR accessed by D:
1767 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1769 In this case, every element accessed by D is aligned to at least
1770 ALIGN bytes.
1772 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1774 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
1776 static void
1777 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1778 tree *seg_max_out, HOST_WIDE_INT align)
1780 /* Each access has the following pattern:
1782 <- |seg_len| ->
1783 <--- A: -ve step --->
1784 +-----+-------+-----+-------+-----+
1785 | n-1 | ,.... | 0 | ..... | n-1 |
1786 +-----+-------+-----+-------+-----+
1787 <--- B: +ve step --->
1788 <- |seg_len| ->
1790 base address
1792 where "n" is the number of scalar iterations covered by the segment.
1793 (This should be VF for a particular pair if we know that both steps
1794 are the same, otherwise it will be the full number of scalar loop
1795 iterations.)
1797 A is the range of bytes accessed when the step is negative,
1798 B is the range when the step is positive.
1800 If the access size is "access_size" bytes, the lowest addressed byte is:
1802 base + (step < 0 ? seg_len : 0) [LB]
1804 and the highest addressed byte is always below:
1806 base + (step < 0 ? 0 : seg_len) + access_size [UB]
1808 Thus:
1810 LB <= ADDR < UB
1812 If ALIGN is nonzero, all three values are aligned to at least ALIGN
1813 bytes, so:
1815 LB <= ADDR <= UB - ALIGN
1817 where "- ALIGN" folds naturally with the "+ access_size" and often
1818 cancels it out.
1820 We don't try to simplify LB and UB beyond this (e.g. by using
1821 MIN and MAX based on whether seg_len rather than the stride is
1822 negative) because it is possible for the absolute size of the
1823 segment to overflow the range of a ssize_t.
1825 Keeping the pointer_plus outside of the cond_expr should allow
1826 the cond_exprs to be shared with other alias checks. */
1827 tree indicator = dr_direction_indicator (d.dr);
1828 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1829 fold_convert (ssizetype, indicator),
1830 ssize_int (0));
1831 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1832 DR_OFFSET (d.dr));
1833 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1834 tree seg_len
1835 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1837 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1838 seg_len, size_zero_node);
1839 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1840 size_zero_node, seg_len);
1841 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1842 size_int (d.access_size - align));
1844 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1845 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1848 /* Given two data references and segment lengths described by DR_A and DR_B,
1849 create expression checking if the two addresses ranges intersect with
1850 each other:
1852 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1853 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1855 static void
1856 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1857 const dr_with_seg_len& dr_a,
1858 const dr_with_seg_len& dr_b)
1860 *cond_expr = NULL_TREE;
1861 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1862 return;
1864 unsigned HOST_WIDE_INT min_align;
1865 tree_code cmp_code;
1866 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1867 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1869 /* In this case adding access_size to seg_len is likely to give
1870 a simple X * step, where X is either the number of scalar
1871 iterations or the vectorization factor. We're better off
1872 keeping that, rather than subtracting an alignment from it.
1874 In this case the maximum values are exclusive and so there is
1875 no alias if the maximum of one segment equals the minimum
1876 of another. */
1877 min_align = 0;
1878 cmp_code = LE_EXPR;
1880 else
1882 /* Calculate the minimum alignment shared by all four pointers,
1883 then arrange for this alignment to be subtracted from the
1884 exclusive maximum values to get inclusive maximum values.
1885 This "- min_align" is cumulative with a "+ access_size"
1886 in the calculation of the maximum values. In the best
1887 (and common) case, the two cancel each other out, leaving
1888 us with an inclusive bound based only on seg_len. In the
1889 worst case we're simply adding a smaller number than before.
1891 Because the maximum values are inclusive, there is an alias
1892 if the maximum value of one segment is equal to the minimum
1893 value of the other. */
1894 min_align = MIN (dr_a.align, dr_b.align);
1895 cmp_code = LT_EXPR;
1898 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1899 get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1900 get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1902 *cond_expr
1903 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1904 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1905 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1908 /* Create a conditional expression that represents the run-time checks for
1909 overlapping of address ranges represented by a list of data references
1910 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1911 COND_EXPR is the conditional expression to be used in the if statement
1912 that controls which version of the loop gets executed at runtime. */
1914 void
1915 create_runtime_alias_checks (struct loop *loop,
1916 vec<dr_with_seg_len_pair_t> *alias_pairs,
1917 tree * cond_expr)
1919 tree part_cond_expr;
1921 fold_defer_overflow_warnings ();
1922 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1924 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1925 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1927 if (dump_enabled_p ())
1929 dump_printf (MSG_NOTE, "create runtime check for data references ");
1930 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1931 dump_printf (MSG_NOTE, " and ");
1932 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1933 dump_printf (MSG_NOTE, "\n");
1936 /* Create condition expression for each pair data references. */
1937 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1938 if (*cond_expr)
1939 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1940 *cond_expr, part_cond_expr);
1941 else
1942 *cond_expr = part_cond_expr;
1944 fold_undefer_and_ignore_overflow_warnings ();
1947 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1948 expressions. */
1949 static bool
1950 dr_equal_offsets_p1 (tree offset1, tree offset2)
1952 bool res;
1954 STRIP_NOPS (offset1);
1955 STRIP_NOPS (offset2);
1957 if (offset1 == offset2)
1958 return true;
1960 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1961 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1962 return false;
1964 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1965 TREE_OPERAND (offset2, 0));
1967 if (!res || !BINARY_CLASS_P (offset1))
1968 return res;
1970 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1971 TREE_OPERAND (offset2, 1));
1973 return res;
1976 /* Check if DRA and DRB have equal offsets. */
1977 bool
1978 dr_equal_offsets_p (struct data_reference *dra,
1979 struct data_reference *drb)
1981 tree offset1, offset2;
1983 offset1 = DR_OFFSET (dra);
1984 offset2 = DR_OFFSET (drb);
1986 return dr_equal_offsets_p1 (offset1, offset2);
1989 /* Returns true if FNA == FNB. */
1991 static bool
1992 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1994 unsigned i, n = fna.length ();
1996 if (n != fnb.length ())
1997 return false;
1999 for (i = 0; i < n; i++)
2000 if (!operand_equal_p (fna[i], fnb[i], 0))
2001 return false;
2003 return true;
2006 /* If all the functions in CF are the same, returns one of them,
2007 otherwise returns NULL. */
2009 static affine_fn
2010 common_affine_function (conflict_function *cf)
2012 unsigned i;
2013 affine_fn comm;
2015 if (!CF_NONTRIVIAL_P (cf))
2016 return affine_fn ();
2018 comm = cf->fns[0];
2020 for (i = 1; i < cf->n; i++)
2021 if (!affine_function_equal_p (comm, cf->fns[i]))
2022 return affine_fn ();
2024 return comm;
2027 /* Returns the base of the affine function FN. */
2029 static tree
2030 affine_function_base (affine_fn fn)
2032 return fn[0];
2035 /* Returns true if FN is a constant. */
2037 static bool
2038 affine_function_constant_p (affine_fn fn)
2040 unsigned i;
2041 tree coef;
2043 for (i = 1; fn.iterate (i, &coef); i++)
2044 if (!integer_zerop (coef))
2045 return false;
2047 return true;
2050 /* Returns true if FN is the zero constant function. */
2052 static bool
2053 affine_function_zero_p (affine_fn fn)
2055 return (integer_zerop (affine_function_base (fn))
2056 && affine_function_constant_p (fn));
2059 /* Returns a signed integer type with the largest precision from TA
2060 and TB. */
2062 static tree
2063 signed_type_for_types (tree ta, tree tb)
2065 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2066 return signed_type_for (ta);
2067 else
2068 return signed_type_for (tb);
2071 /* Applies operation OP on affine functions FNA and FNB, and returns the
2072 result. */
2074 static affine_fn
2075 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2077 unsigned i, n, m;
2078 affine_fn ret;
2079 tree coef;
2081 if (fnb.length () > fna.length ())
2083 n = fna.length ();
2084 m = fnb.length ();
2086 else
2088 n = fnb.length ();
2089 m = fna.length ();
2092 ret.create (m);
2093 for (i = 0; i < n; i++)
2095 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2096 TREE_TYPE (fnb[i]));
2097 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2100 for (; fna.iterate (i, &coef); i++)
2101 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2102 coef, integer_zero_node));
2103 for (; fnb.iterate (i, &coef); i++)
2104 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2105 integer_zero_node, coef));
2107 return ret;
2110 /* Returns the sum of affine functions FNA and FNB. */
2112 static affine_fn
2113 affine_fn_plus (affine_fn fna, affine_fn fnb)
2115 return affine_fn_op (PLUS_EXPR, fna, fnb);
2118 /* Returns the difference of affine functions FNA and FNB. */
2120 static affine_fn
2121 affine_fn_minus (affine_fn fna, affine_fn fnb)
2123 return affine_fn_op (MINUS_EXPR, fna, fnb);
2126 /* Frees affine function FN. */
2128 static void
2129 affine_fn_free (affine_fn fn)
2131 fn.release ();
2134 /* Determine for each subscript in the data dependence relation DDR
2135 the distance. */
2137 static void
2138 compute_subscript_distance (struct data_dependence_relation *ddr)
2140 conflict_function *cf_a, *cf_b;
2141 affine_fn fn_a, fn_b, diff;
2143 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2145 unsigned int i;
2147 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2149 struct subscript *subscript;
2151 subscript = DDR_SUBSCRIPT (ddr, i);
2152 cf_a = SUB_CONFLICTS_IN_A (subscript);
2153 cf_b = SUB_CONFLICTS_IN_B (subscript);
2155 fn_a = common_affine_function (cf_a);
2156 fn_b = common_affine_function (cf_b);
2157 if (!fn_a.exists () || !fn_b.exists ())
2159 SUB_DISTANCE (subscript) = chrec_dont_know;
2160 return;
2162 diff = affine_fn_minus (fn_a, fn_b);
2164 if (affine_function_constant_p (diff))
2165 SUB_DISTANCE (subscript) = affine_function_base (diff);
2166 else
2167 SUB_DISTANCE (subscript) = chrec_dont_know;
2169 affine_fn_free (diff);
2174 /* Returns the conflict function for "unknown". */
2176 static conflict_function *
2177 conflict_fn_not_known (void)
2179 conflict_function *fn = XCNEW (conflict_function);
2180 fn->n = NOT_KNOWN;
2182 return fn;
2185 /* Returns the conflict function for "independent". */
2187 static conflict_function *
2188 conflict_fn_no_dependence (void)
2190 conflict_function *fn = XCNEW (conflict_function);
2191 fn->n = NO_DEPENDENCE;
2193 return fn;
2196 /* Returns true if the address of OBJ is invariant in LOOP. */
2198 static bool
2199 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2201 while (handled_component_p (obj))
2203 if (TREE_CODE (obj) == ARRAY_REF)
2205 for (int i = 1; i < 4; ++i)
2206 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2207 loop->num))
2208 return false;
2210 else if (TREE_CODE (obj) == COMPONENT_REF)
2212 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2213 loop->num))
2214 return false;
2216 obj = TREE_OPERAND (obj, 0);
2219 if (!INDIRECT_REF_P (obj)
2220 && TREE_CODE (obj) != MEM_REF)
2221 return true;
2223 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2224 loop->num);
2227 /* Returns false if we can prove that data references A and B do not alias,
2228 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2229 considered. */
2231 bool
2232 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2233 bool loop_nest)
2235 tree addr_a = DR_BASE_OBJECT (a);
2236 tree addr_b = DR_BASE_OBJECT (b);
2238 /* If we are not processing a loop nest but scalar code we
2239 do not need to care about possible cross-iteration dependences
2240 and thus can process the full original reference. Do so,
2241 similar to how loop invariant motion applies extra offset-based
2242 disambiguation. */
2243 if (!loop_nest)
2245 aff_tree off1, off2;
2246 poly_widest_int size1, size2;
2247 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2248 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2249 aff_combination_scale (&off1, -1);
2250 aff_combination_add (&off2, &off1);
2251 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2252 return false;
2255 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2256 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2257 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2258 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2259 return false;
2261 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2262 do not know the size of the base-object. So we cannot do any
2263 offset/overlap based analysis but have to rely on points-to
2264 information only. */
2265 if (TREE_CODE (addr_a) == MEM_REF
2266 && (DR_UNCONSTRAINED_BASE (a)
2267 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2269 /* For true dependences we can apply TBAA. */
2270 if (flag_strict_aliasing
2271 && DR_IS_WRITE (a) && DR_IS_READ (b)
2272 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2273 get_alias_set (DR_REF (b))))
2274 return false;
2275 if (TREE_CODE (addr_b) == MEM_REF)
2276 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2277 TREE_OPERAND (addr_b, 0));
2278 else
2279 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2280 build_fold_addr_expr (addr_b));
2282 else if (TREE_CODE (addr_b) == MEM_REF
2283 && (DR_UNCONSTRAINED_BASE (b)
2284 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2286 /* For true dependences we can apply TBAA. */
2287 if (flag_strict_aliasing
2288 && DR_IS_WRITE (a) && DR_IS_READ (b)
2289 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2290 get_alias_set (DR_REF (b))))
2291 return false;
2292 if (TREE_CODE (addr_a) == MEM_REF)
2293 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2294 TREE_OPERAND (addr_b, 0));
2295 else
2296 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2297 TREE_OPERAND (addr_b, 0));
2300 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2301 that is being subsetted in the loop nest. */
2302 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2303 return refs_output_dependent_p (addr_a, addr_b);
2304 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2305 return refs_anti_dependent_p (addr_a, addr_b);
2306 return refs_may_alias_p (addr_a, addr_b);
2309 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2310 if it is meaningful to compare their associated access functions
2311 when checking for dependencies. */
2313 static bool
2314 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2316 /* Allow pairs of component refs from the following sets:
2318 { REALPART_EXPR, IMAGPART_EXPR }
2319 { COMPONENT_REF }
2320 { ARRAY_REF }. */
2321 tree_code code_a = TREE_CODE (ref_a);
2322 tree_code code_b = TREE_CODE (ref_b);
2323 if (code_a == IMAGPART_EXPR)
2324 code_a = REALPART_EXPR;
2325 if (code_b == IMAGPART_EXPR)
2326 code_b = REALPART_EXPR;
2327 if (code_a != code_b)
2328 return false;
2330 if (TREE_CODE (ref_a) == COMPONENT_REF)
2331 /* ??? We cannot simply use the type of operand #0 of the refs here as
2332 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2333 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2334 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2335 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2337 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2338 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2341 /* Initialize a data dependence relation between data accesses A and
2342 B. NB_LOOPS is the number of loops surrounding the references: the
2343 size of the classic distance/direction vectors. */
2345 struct data_dependence_relation *
2346 initialize_data_dependence_relation (struct data_reference *a,
2347 struct data_reference *b,
2348 vec<loop_p> loop_nest)
2350 struct data_dependence_relation *res;
2351 unsigned int i;
2353 res = XCNEW (struct data_dependence_relation);
2354 DDR_A (res) = a;
2355 DDR_B (res) = b;
2356 DDR_LOOP_NEST (res).create (0);
2357 DDR_SUBSCRIPTS (res).create (0);
2358 DDR_DIR_VECTS (res).create (0);
2359 DDR_DIST_VECTS (res).create (0);
2361 if (a == NULL || b == NULL)
2363 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2364 return res;
2367 /* If the data references do not alias, then they are independent. */
2368 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2370 DDR_ARE_DEPENDENT (res) = chrec_known;
2371 return res;
2374 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2375 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2376 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2378 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2379 return res;
2382 /* For unconstrained bases, the root (highest-indexed) subscript
2383 describes a variation in the base of the original DR_REF rather
2384 than a component access. We have no type that accurately describes
2385 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2386 applying this subscript) so limit the search to the last real
2387 component access.
2389 E.g. for:
2391 void
2392 f (int a[][8], int b[][8])
2394 for (int i = 0; i < 8; ++i)
2395 a[i * 2][0] = b[i][0];
2398 the a and b accesses have a single ARRAY_REF component reference [0]
2399 but have two subscripts. */
2400 if (DR_UNCONSTRAINED_BASE (a))
2401 num_dimensions_a -= 1;
2402 if (DR_UNCONSTRAINED_BASE (b))
2403 num_dimensions_b -= 1;
2405 /* These structures describe sequences of component references in
2406 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2407 specific access function. */
2408 struct {
2409 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2410 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2411 indices. In C notation, these are the indices of the rightmost
2412 component references; e.g. for a sequence .b.c.d, the start
2413 index is for .d. */
2414 unsigned int start_a;
2415 unsigned int start_b;
2417 /* The sequence contains LENGTH consecutive access functions from
2418 each DR. */
2419 unsigned int length;
2421 /* The enclosing objects for the A and B sequences respectively,
2422 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2423 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2424 tree object_a;
2425 tree object_b;
2426 } full_seq = {}, struct_seq = {};
2428 /* Before each iteration of the loop:
2430 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2431 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2432 unsigned int index_a = 0;
2433 unsigned int index_b = 0;
2434 tree ref_a = DR_REF (a);
2435 tree ref_b = DR_REF (b);
2437 /* Now walk the component references from the final DR_REFs back up to
2438 the enclosing base objects. Each component reference corresponds
2439 to one access function in the DR, with access function 0 being for
2440 the final DR_REF and the highest-indexed access function being the
2441 one that is applied to the base of the DR.
2443 Look for a sequence of component references whose access functions
2444 are comparable (see access_fn_components_comparable_p). If more
2445 than one such sequence exists, pick the one nearest the base
2446 (which is the leftmost sequence in C notation). Store this sequence
2447 in FULL_SEQ.
2449 For example, if we have:
2451 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2453 A: a[0][i].s.c.d
2454 B: __real b[0][i].s.e[i].f
2456 (where d is the same type as the real component of f) then the access
2457 functions would be:
2459 0 1 2 3
2460 A: .d .c .s [i]
2462 0 1 2 3 4 5
2463 B: __real .f [i] .e .s [i]
2465 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2466 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2467 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2468 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2469 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2470 index foo[10] arrays, so is again comparable. The sequence is
2471 therefore:
2473 A: [1, 3] (i.e. [i].s.c)
2474 B: [3, 5] (i.e. [i].s.e)
2476 Also look for sequences of component references whose access
2477 functions are comparable and whose enclosing objects have the same
2478 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2479 example, STRUCT_SEQ would be:
2481 A: [1, 2] (i.e. s.c)
2482 B: [3, 4] (i.e. s.e) */
2483 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2485 /* REF_A and REF_B must be one of the component access types
2486 allowed by dr_analyze_indices. */
2487 gcc_checking_assert (access_fn_component_p (ref_a));
2488 gcc_checking_assert (access_fn_component_p (ref_b));
2490 /* Get the immediately-enclosing objects for REF_A and REF_B,
2491 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2492 and DR_ACCESS_FN (B, INDEX_B). */
2493 tree object_a = TREE_OPERAND (ref_a, 0);
2494 tree object_b = TREE_OPERAND (ref_b, 0);
2496 tree type_a = TREE_TYPE (object_a);
2497 tree type_b = TREE_TYPE (object_b);
2498 if (access_fn_components_comparable_p (ref_a, ref_b))
2500 /* This pair of component accesses is comparable for dependence
2501 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2502 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2503 if (full_seq.start_a + full_seq.length != index_a
2504 || full_seq.start_b + full_seq.length != index_b)
2506 /* The accesses don't extend the current sequence,
2507 so start a new one here. */
2508 full_seq.start_a = index_a;
2509 full_seq.start_b = index_b;
2510 full_seq.length = 0;
2513 /* Add this pair of references to the sequence. */
2514 full_seq.length += 1;
2515 full_seq.object_a = object_a;
2516 full_seq.object_b = object_b;
2518 /* If the enclosing objects are structures (and thus have the
2519 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2520 if (TREE_CODE (type_a) == RECORD_TYPE)
2521 struct_seq = full_seq;
2523 /* Move to the next containing reference for both A and B. */
2524 ref_a = object_a;
2525 ref_b = object_b;
2526 index_a += 1;
2527 index_b += 1;
2528 continue;
2531 /* Try to approach equal type sizes. */
2532 if (!COMPLETE_TYPE_P (type_a)
2533 || !COMPLETE_TYPE_P (type_b)
2534 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2535 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2536 break;
2538 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2539 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2540 if (size_a <= size_b)
2542 index_a += 1;
2543 ref_a = object_a;
2545 if (size_b <= size_a)
2547 index_b += 1;
2548 ref_b = object_b;
2552 /* See whether FULL_SEQ ends at the base and whether the two bases
2553 are equal. We do not care about TBAA or alignment info so we can
2554 use OEP_ADDRESS_OF to avoid false negatives. */
2555 tree base_a = DR_BASE_OBJECT (a);
2556 tree base_b = DR_BASE_OBJECT (b);
2557 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2558 && full_seq.start_b + full_seq.length == num_dimensions_b
2559 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2560 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2561 && types_compatible_p (TREE_TYPE (base_a),
2562 TREE_TYPE (base_b))
2563 && (!loop_nest.exists ()
2564 || (object_address_invariant_in_loop_p
2565 (loop_nest[0], base_a))));
2567 /* If the bases are the same, we can include the base variation too.
2568 E.g. the b accesses in:
2570 for (int i = 0; i < n; ++i)
2571 b[i + 4][0] = b[i][0];
2573 have a definite dependence distance of 4, while for:
2575 for (int i = 0; i < n; ++i)
2576 a[i + 4][0] = b[i][0];
2578 the dependence distance depends on the gap between a and b.
2580 If the bases are different then we can only rely on the sequence
2581 rooted at a structure access, since arrays are allowed to overlap
2582 arbitrarily and change shape arbitrarily. E.g. we treat this as
2583 valid code:
2585 int a[256];
2587 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2589 where two lvalues with the same int[4][3] type overlap, and where
2590 both lvalues are distinct from the object's declared type. */
2591 if (same_base_p)
2593 if (DR_UNCONSTRAINED_BASE (a))
2594 full_seq.length += 1;
2596 else
2597 full_seq = struct_seq;
2599 /* Punt if we didn't find a suitable sequence. */
2600 if (full_seq.length == 0)
2602 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2603 return res;
2606 if (!same_base_p)
2608 /* Partial overlap is possible for different bases when strict aliasing
2609 is not in effect. It's also possible if either base involves a union
2610 access; e.g. for:
2612 struct s1 { int a[2]; };
2613 struct s2 { struct s1 b; int c; };
2614 struct s3 { int d; struct s1 e; };
2615 union u { struct s2 f; struct s3 g; } *p, *q;
2617 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2618 "p->g.e" (base "p->g") and might partially overlap the s1 at
2619 "q->g.e" (base "q->g"). */
2620 if (!flag_strict_aliasing
2621 || ref_contains_union_access_p (full_seq.object_a)
2622 || ref_contains_union_access_p (full_seq.object_b))
2624 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2625 return res;
2628 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2629 if (!loop_nest.exists ()
2630 || (object_address_invariant_in_loop_p (loop_nest[0],
2631 full_seq.object_a)
2632 && object_address_invariant_in_loop_p (loop_nest[0],
2633 full_seq.object_b)))
2635 DDR_OBJECT_A (res) = full_seq.object_a;
2636 DDR_OBJECT_B (res) = full_seq.object_b;
2640 DDR_AFFINE_P (res) = true;
2641 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2642 DDR_SUBSCRIPTS (res).create (full_seq.length);
2643 DDR_LOOP_NEST (res) = loop_nest;
2644 DDR_INNER_LOOP (res) = 0;
2645 DDR_SELF_REFERENCE (res) = false;
2647 for (i = 0; i < full_seq.length; ++i)
2649 struct subscript *subscript;
2651 subscript = XNEW (struct subscript);
2652 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2653 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2654 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2655 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2656 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2657 SUB_DISTANCE (subscript) = chrec_dont_know;
2658 DDR_SUBSCRIPTS (res).safe_push (subscript);
2661 return res;
2664 /* Frees memory used by the conflict function F. */
2666 static void
2667 free_conflict_function (conflict_function *f)
2669 unsigned i;
2671 if (CF_NONTRIVIAL_P (f))
2673 for (i = 0; i < f->n; i++)
2674 affine_fn_free (f->fns[i]);
2676 free (f);
2679 /* Frees memory used by SUBSCRIPTS. */
2681 static void
2682 free_subscripts (vec<subscript_p> subscripts)
2684 unsigned i;
2685 subscript_p s;
2687 FOR_EACH_VEC_ELT (subscripts, i, s)
2689 free_conflict_function (s->conflicting_iterations_in_a);
2690 free_conflict_function (s->conflicting_iterations_in_b);
2691 free (s);
2693 subscripts.release ();
2696 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2697 description. */
2699 static inline void
2700 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2701 tree chrec)
2703 DDR_ARE_DEPENDENT (ddr) = chrec;
2704 free_subscripts (DDR_SUBSCRIPTS (ddr));
2705 DDR_SUBSCRIPTS (ddr).create (0);
2708 /* The dependence relation DDR cannot be represented by a distance
2709 vector. */
2711 static inline void
2712 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2714 if (dump_file && (dump_flags & TDF_DETAILS))
2715 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2717 DDR_AFFINE_P (ddr) = false;
2722 /* This section contains the classic Banerjee tests. */
2724 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2725 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2727 static inline bool
2728 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2730 return (evolution_function_is_constant_p (chrec_a)
2731 && evolution_function_is_constant_p (chrec_b));
2734 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2735 variable, i.e., if the SIV (Single Index Variable) test is true. */
2737 static bool
2738 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2740 if ((evolution_function_is_constant_p (chrec_a)
2741 && evolution_function_is_univariate_p (chrec_b))
2742 || (evolution_function_is_constant_p (chrec_b)
2743 && evolution_function_is_univariate_p (chrec_a)))
2744 return true;
2746 if (evolution_function_is_univariate_p (chrec_a)
2747 && evolution_function_is_univariate_p (chrec_b))
2749 switch (TREE_CODE (chrec_a))
2751 case POLYNOMIAL_CHREC:
2752 switch (TREE_CODE (chrec_b))
2754 case POLYNOMIAL_CHREC:
2755 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2756 return false;
2757 /* FALLTHRU */
2759 default:
2760 return true;
2763 default:
2764 return true;
2768 return false;
2771 /* Creates a conflict function with N dimensions. The affine functions
2772 in each dimension follow. */
2774 static conflict_function *
2775 conflict_fn (unsigned n, ...)
2777 unsigned i;
2778 conflict_function *ret = XCNEW (conflict_function);
2779 va_list ap;
2781 gcc_assert (n > 0 && n <= MAX_DIM);
2782 va_start (ap, n);
2784 ret->n = n;
2785 for (i = 0; i < n; i++)
2786 ret->fns[i] = va_arg (ap, affine_fn);
2787 va_end (ap);
2789 return ret;
2792 /* Returns constant affine function with value CST. */
2794 static affine_fn
2795 affine_fn_cst (tree cst)
2797 affine_fn fn;
2798 fn.create (1);
2799 fn.quick_push (cst);
2800 return fn;
2803 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2805 static affine_fn
2806 affine_fn_univar (tree cst, unsigned dim, tree coef)
2808 affine_fn fn;
2809 fn.create (dim + 1);
2810 unsigned i;
2812 gcc_assert (dim > 0);
2813 fn.quick_push (cst);
2814 for (i = 1; i < dim; i++)
2815 fn.quick_push (integer_zero_node);
2816 fn.quick_push (coef);
2817 return fn;
2820 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2821 *OVERLAPS_B are initialized to the functions that describe the
2822 relation between the elements accessed twice by CHREC_A and
2823 CHREC_B. For k >= 0, the following property is verified:
2825 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2827 static void
2828 analyze_ziv_subscript (tree chrec_a,
2829 tree chrec_b,
2830 conflict_function **overlaps_a,
2831 conflict_function **overlaps_b,
2832 tree *last_conflicts)
2834 tree type, difference;
2835 dependence_stats.num_ziv++;
2837 if (dump_file && (dump_flags & TDF_DETAILS))
2838 fprintf (dump_file, "(analyze_ziv_subscript \n");
2840 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2841 chrec_a = chrec_convert (type, chrec_a, NULL);
2842 chrec_b = chrec_convert (type, chrec_b, NULL);
2843 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2845 switch (TREE_CODE (difference))
2847 case INTEGER_CST:
2848 if (integer_zerop (difference))
2850 /* The difference is equal to zero: the accessed index
2851 overlaps for each iteration in the loop. */
2852 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2853 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2854 *last_conflicts = chrec_dont_know;
2855 dependence_stats.num_ziv_dependent++;
2857 else
2859 /* The accesses do not overlap. */
2860 *overlaps_a = conflict_fn_no_dependence ();
2861 *overlaps_b = conflict_fn_no_dependence ();
2862 *last_conflicts = integer_zero_node;
2863 dependence_stats.num_ziv_independent++;
2865 break;
2867 default:
2868 /* We're not sure whether the indexes overlap. For the moment,
2869 conservatively answer "don't know". */
2870 if (dump_file && (dump_flags & TDF_DETAILS))
2871 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2873 *overlaps_a = conflict_fn_not_known ();
2874 *overlaps_b = conflict_fn_not_known ();
2875 *last_conflicts = chrec_dont_know;
2876 dependence_stats.num_ziv_unimplemented++;
2877 break;
2880 if (dump_file && (dump_flags & TDF_DETAILS))
2881 fprintf (dump_file, ")\n");
2884 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2885 and only if it fits to the int type. If this is not the case, or the
2886 bound on the number of iterations of LOOP could not be derived, returns
2887 chrec_dont_know. */
2889 static tree
2890 max_stmt_executions_tree (struct loop *loop)
2892 widest_int nit;
2894 if (!max_stmt_executions (loop, &nit))
2895 return chrec_dont_know;
2897 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2898 return chrec_dont_know;
2900 return wide_int_to_tree (unsigned_type_node, nit);
2903 /* Determine whether the CHREC is always positive/negative. If the expression
2904 cannot be statically analyzed, return false, otherwise set the answer into
2905 VALUE. */
2907 static bool
2908 chrec_is_positive (tree chrec, bool *value)
2910 bool value0, value1, value2;
2911 tree end_value, nb_iter;
2913 switch (TREE_CODE (chrec))
2915 case POLYNOMIAL_CHREC:
2916 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2917 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2918 return false;
2920 /* FIXME -- overflows. */
2921 if (value0 == value1)
2923 *value = value0;
2924 return true;
2927 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2928 and the proof consists in showing that the sign never
2929 changes during the execution of the loop, from 0 to
2930 loop->nb_iterations. */
2931 if (!evolution_function_is_affine_p (chrec))
2932 return false;
2934 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2935 if (chrec_contains_undetermined (nb_iter))
2936 return false;
2938 #if 0
2939 /* TODO -- If the test is after the exit, we may decrease the number of
2940 iterations by one. */
2941 if (after_exit)
2942 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2943 #endif
2945 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2947 if (!chrec_is_positive (end_value, &value2))
2948 return false;
2950 *value = value0;
2951 return value0 == value1;
2953 case INTEGER_CST:
2954 switch (tree_int_cst_sgn (chrec))
2956 case -1:
2957 *value = false;
2958 break;
2959 case 1:
2960 *value = true;
2961 break;
2962 default:
2963 return false;
2965 return true;
2967 default:
2968 return false;
2973 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2974 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2975 *OVERLAPS_B are initialized to the functions that describe the
2976 relation between the elements accessed twice by CHREC_A and
2977 CHREC_B. For k >= 0, the following property is verified:
2979 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2981 static void
2982 analyze_siv_subscript_cst_affine (tree chrec_a,
2983 tree chrec_b,
2984 conflict_function **overlaps_a,
2985 conflict_function **overlaps_b,
2986 tree *last_conflicts)
2988 bool value0, value1, value2;
2989 tree type, difference, tmp;
2991 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2992 chrec_a = chrec_convert (type, chrec_a, NULL);
2993 chrec_b = chrec_convert (type, chrec_b, NULL);
2994 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2996 /* Special case overlap in the first iteration. */
2997 if (integer_zerop (difference))
2999 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3000 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3001 *last_conflicts = integer_one_node;
3002 return;
3005 if (!chrec_is_positive (initial_condition (difference), &value0))
3007 if (dump_file && (dump_flags & TDF_DETAILS))
3008 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3010 dependence_stats.num_siv_unimplemented++;
3011 *overlaps_a = conflict_fn_not_known ();
3012 *overlaps_b = conflict_fn_not_known ();
3013 *last_conflicts = chrec_dont_know;
3014 return;
3016 else
3018 if (value0 == false)
3020 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3021 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3023 if (dump_file && (dump_flags & TDF_DETAILS))
3024 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3026 *overlaps_a = conflict_fn_not_known ();
3027 *overlaps_b = conflict_fn_not_known ();
3028 *last_conflicts = chrec_dont_know;
3029 dependence_stats.num_siv_unimplemented++;
3030 return;
3032 else
3034 if (value1 == true)
3036 /* Example:
3037 chrec_a = 12
3038 chrec_b = {10, +, 1}
3041 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3043 HOST_WIDE_INT numiter;
3044 struct loop *loop = get_chrec_loop (chrec_b);
3046 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3047 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3048 fold_build1 (ABS_EXPR, type, difference),
3049 CHREC_RIGHT (chrec_b));
3050 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3051 *last_conflicts = integer_one_node;
3054 /* Perform weak-zero siv test to see if overlap is
3055 outside the loop bounds. */
3056 numiter = max_stmt_executions_int (loop);
3058 if (numiter >= 0
3059 && compare_tree_int (tmp, numiter) > 0)
3061 free_conflict_function (*overlaps_a);
3062 free_conflict_function (*overlaps_b);
3063 *overlaps_a = conflict_fn_no_dependence ();
3064 *overlaps_b = conflict_fn_no_dependence ();
3065 *last_conflicts = integer_zero_node;
3066 dependence_stats.num_siv_independent++;
3067 return;
3069 dependence_stats.num_siv_dependent++;
3070 return;
3073 /* When the step does not divide the difference, there are
3074 no overlaps. */
3075 else
3077 *overlaps_a = conflict_fn_no_dependence ();
3078 *overlaps_b = conflict_fn_no_dependence ();
3079 *last_conflicts = integer_zero_node;
3080 dependence_stats.num_siv_independent++;
3081 return;
3085 else
3087 /* Example:
3088 chrec_a = 12
3089 chrec_b = {10, +, -1}
3091 In this case, chrec_a will not overlap with chrec_b. */
3092 *overlaps_a = conflict_fn_no_dependence ();
3093 *overlaps_b = conflict_fn_no_dependence ();
3094 *last_conflicts = integer_zero_node;
3095 dependence_stats.num_siv_independent++;
3096 return;
3100 else
3102 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3103 || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3106 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3108 *overlaps_a = conflict_fn_not_known ();
3109 *overlaps_b = conflict_fn_not_known ();
3110 *last_conflicts = chrec_dont_know;
3111 dependence_stats.num_siv_unimplemented++;
3112 return;
3114 else
3116 if (value2 == false)
3118 /* Example:
3119 chrec_a = 3
3120 chrec_b = {10, +, -1}
3122 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3124 HOST_WIDE_INT numiter;
3125 struct loop *loop = get_chrec_loop (chrec_b);
3127 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3128 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3129 CHREC_RIGHT (chrec_b));
3130 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3131 *last_conflicts = integer_one_node;
3133 /* Perform weak-zero siv test to see if overlap is
3134 outside the loop bounds. */
3135 numiter = max_stmt_executions_int (loop);
3137 if (numiter >= 0
3138 && compare_tree_int (tmp, numiter) > 0)
3140 free_conflict_function (*overlaps_a);
3141 free_conflict_function (*overlaps_b);
3142 *overlaps_a = conflict_fn_no_dependence ();
3143 *overlaps_b = conflict_fn_no_dependence ();
3144 *last_conflicts = integer_zero_node;
3145 dependence_stats.num_siv_independent++;
3146 return;
3148 dependence_stats.num_siv_dependent++;
3149 return;
3152 /* When the step does not divide the difference, there
3153 are no overlaps. */
3154 else
3156 *overlaps_a = conflict_fn_no_dependence ();
3157 *overlaps_b = conflict_fn_no_dependence ();
3158 *last_conflicts = integer_zero_node;
3159 dependence_stats.num_siv_independent++;
3160 return;
3163 else
3165 /* Example:
3166 chrec_a = 3
3167 chrec_b = {4, +, 1}
3169 In this case, chrec_a will not overlap with chrec_b. */
3170 *overlaps_a = conflict_fn_no_dependence ();
3171 *overlaps_b = conflict_fn_no_dependence ();
3172 *last_conflicts = integer_zero_node;
3173 dependence_stats.num_siv_independent++;
3174 return;
3181 /* Helper recursive function for initializing the matrix A. Returns
3182 the initial value of CHREC. */
3184 static tree
3185 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3187 gcc_assert (chrec);
3189 switch (TREE_CODE (chrec))
3191 case POLYNOMIAL_CHREC:
3192 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3193 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3195 case PLUS_EXPR:
3196 case MULT_EXPR:
3197 case MINUS_EXPR:
3199 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3200 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3202 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3205 CASE_CONVERT:
3207 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3208 return chrec_convert (chrec_type (chrec), op, NULL);
3211 case BIT_NOT_EXPR:
3213 /* Handle ~X as -1 - X. */
3214 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3215 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3216 build_int_cst (TREE_TYPE (chrec), -1), op);
3219 case INTEGER_CST:
3220 return chrec;
3222 default:
3223 gcc_unreachable ();
3224 return NULL_TREE;
3228 #define FLOOR_DIV(x,y) ((x) / (y))
3230 /* Solves the special case of the Diophantine equation:
3231 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3233 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3234 number of iterations that loops X and Y run. The overlaps will be
3235 constructed as evolutions in dimension DIM. */
3237 static void
3238 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3239 HOST_WIDE_INT step_a,
3240 HOST_WIDE_INT step_b,
3241 affine_fn *overlaps_a,
3242 affine_fn *overlaps_b,
3243 tree *last_conflicts, int dim)
3245 if (((step_a > 0 && step_b > 0)
3246 || (step_a < 0 && step_b < 0)))
3248 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3249 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3251 gcd_steps_a_b = gcd (step_a, step_b);
3252 step_overlaps_a = step_b / gcd_steps_a_b;
3253 step_overlaps_b = step_a / gcd_steps_a_b;
3255 if (niter > 0)
3257 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3258 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3259 last_conflict = tau2;
3260 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3262 else
3263 *last_conflicts = chrec_dont_know;
3265 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3266 build_int_cst (NULL_TREE,
3267 step_overlaps_a));
3268 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3269 build_int_cst (NULL_TREE,
3270 step_overlaps_b));
3273 else
3275 *overlaps_a = affine_fn_cst (integer_zero_node);
3276 *overlaps_b = affine_fn_cst (integer_zero_node);
3277 *last_conflicts = integer_zero_node;
3281 /* Solves the special case of a Diophantine equation where CHREC_A is
3282 an affine bivariate function, and CHREC_B is an affine univariate
3283 function. For example,
3285 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3287 has the following overlapping functions:
3289 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3290 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3291 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3293 FORNOW: This is a specialized implementation for a case occurring in
3294 a common benchmark. Implement the general algorithm. */
3296 static void
3297 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3298 conflict_function **overlaps_a,
3299 conflict_function **overlaps_b,
3300 tree *last_conflicts)
3302 bool xz_p, yz_p, xyz_p;
3303 HOST_WIDE_INT step_x, step_y, step_z;
3304 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3305 affine_fn overlaps_a_xz, overlaps_b_xz;
3306 affine_fn overlaps_a_yz, overlaps_b_yz;
3307 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3308 affine_fn ova1, ova2, ovb;
3309 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3311 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3312 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3313 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3315 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3316 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3317 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3319 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3321 if (dump_file && (dump_flags & TDF_DETAILS))
3322 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3324 *overlaps_a = conflict_fn_not_known ();
3325 *overlaps_b = conflict_fn_not_known ();
3326 *last_conflicts = chrec_dont_know;
3327 return;
3330 niter = MIN (niter_x, niter_z);
3331 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3332 &overlaps_a_xz,
3333 &overlaps_b_xz,
3334 &last_conflicts_xz, 1);
3335 niter = MIN (niter_y, niter_z);
3336 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3337 &overlaps_a_yz,
3338 &overlaps_b_yz,
3339 &last_conflicts_yz, 2);
3340 niter = MIN (niter_x, niter_z);
3341 niter = MIN (niter_y, niter);
3342 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3343 &overlaps_a_xyz,
3344 &overlaps_b_xyz,
3345 &last_conflicts_xyz, 3);
3347 xz_p = !integer_zerop (last_conflicts_xz);
3348 yz_p = !integer_zerop (last_conflicts_yz);
3349 xyz_p = !integer_zerop (last_conflicts_xyz);
3351 if (xz_p || yz_p || xyz_p)
3353 ova1 = affine_fn_cst (integer_zero_node);
3354 ova2 = affine_fn_cst (integer_zero_node);
3355 ovb = affine_fn_cst (integer_zero_node);
3356 if (xz_p)
3358 affine_fn t0 = ova1;
3359 affine_fn t2 = ovb;
3361 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3362 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3363 affine_fn_free (t0);
3364 affine_fn_free (t2);
3365 *last_conflicts = last_conflicts_xz;
3367 if (yz_p)
3369 affine_fn t0 = ova2;
3370 affine_fn t2 = ovb;
3372 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3373 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3374 affine_fn_free (t0);
3375 affine_fn_free (t2);
3376 *last_conflicts = last_conflicts_yz;
3378 if (xyz_p)
3380 affine_fn t0 = ova1;
3381 affine_fn t2 = ova2;
3382 affine_fn t4 = ovb;
3384 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3385 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3386 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3387 affine_fn_free (t0);
3388 affine_fn_free (t2);
3389 affine_fn_free (t4);
3390 *last_conflicts = last_conflicts_xyz;
3392 *overlaps_a = conflict_fn (2, ova1, ova2);
3393 *overlaps_b = conflict_fn (1, ovb);
3395 else
3397 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3398 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3399 *last_conflicts = integer_zero_node;
3402 affine_fn_free (overlaps_a_xz);
3403 affine_fn_free (overlaps_b_xz);
3404 affine_fn_free (overlaps_a_yz);
3405 affine_fn_free (overlaps_b_yz);
3406 affine_fn_free (overlaps_a_xyz);
3407 affine_fn_free (overlaps_b_xyz);
3410 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3412 static void
3413 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3414 int size)
3416 memcpy (vec2, vec1, size * sizeof (*vec1));
3419 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3421 static void
3422 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3423 int m, int n)
3425 int i;
3427 for (i = 0; i < m; i++)
3428 lambda_vector_copy (mat1[i], mat2[i], n);
3431 /* Store the N x N identity matrix in MAT. */
3433 static void
3434 lambda_matrix_id (lambda_matrix mat, int size)
3436 int i, j;
3438 for (i = 0; i < size; i++)
3439 for (j = 0; j < size; j++)
3440 mat[i][j] = (i == j) ? 1 : 0;
3443 /* Return the first nonzero element of vector VEC1 between START and N.
3444 We must have START <= N. Returns N if VEC1 is the zero vector. */
3446 static int
3447 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3449 int j = start;
3450 while (j < n && vec1[j] == 0)
3451 j++;
3452 return j;
3455 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3456 R2 = R2 + CONST1 * R1. */
3458 static void
3459 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3461 int i;
3463 if (const1 == 0)
3464 return;
3466 for (i = 0; i < n; i++)
3467 mat[r2][i] += const1 * mat[r1][i];
3470 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3471 and store the result in VEC2. */
3473 static void
3474 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3475 int size, int const1)
3477 int i;
3479 if (const1 == 0)
3480 lambda_vector_clear (vec2, size);
3481 else
3482 for (i = 0; i < size; i++)
3483 vec2[i] = const1 * vec1[i];
3486 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3488 static void
3489 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3490 int size)
3492 lambda_vector_mult_const (vec1, vec2, size, -1);
3495 /* Negate row R1 of matrix MAT which has N columns. */
3497 static void
3498 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3500 lambda_vector_negate (mat[r1], mat[r1], n);
3503 /* Return true if two vectors are equal. */
3505 static bool
3506 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3508 int i;
3509 for (i = 0; i < size; i++)
3510 if (vec1[i] != vec2[i])
3511 return false;
3512 return true;
3515 /* Given an M x N integer matrix A, this function determines an M x
3516 M unimodular matrix U, and an M x N echelon matrix S such that
3517 "U.A = S". This decomposition is also known as "right Hermite".
3519 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3520 Restructuring Compilers" Utpal Banerjee. */
3522 static void
3523 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3524 lambda_matrix S, lambda_matrix U)
3526 int i, j, i0 = 0;
3528 lambda_matrix_copy (A, S, m, n);
3529 lambda_matrix_id (U, m);
3531 for (j = 0; j < n; j++)
3533 if (lambda_vector_first_nz (S[j], m, i0) < m)
3535 ++i0;
3536 for (i = m - 1; i >= i0; i--)
3538 while (S[i][j] != 0)
3540 int sigma, factor, a, b;
3542 a = S[i-1][j];
3543 b = S[i][j];
3544 sigma = (a * b < 0) ? -1: 1;
3545 a = abs (a);
3546 b = abs (b);
3547 factor = sigma * (a / b);
3549 lambda_matrix_row_add (S, n, i, i-1, -factor);
3550 std::swap (S[i], S[i-1]);
3552 lambda_matrix_row_add (U, m, i, i-1, -factor);
3553 std::swap (U[i], U[i-1]);
3560 /* Determines the overlapping elements due to accesses CHREC_A and
3561 CHREC_B, that are affine functions. This function cannot handle
3562 symbolic evolution functions, ie. when initial conditions are
3563 parameters, because it uses lambda matrices of integers. */
3565 static void
3566 analyze_subscript_affine_affine (tree chrec_a,
3567 tree chrec_b,
3568 conflict_function **overlaps_a,
3569 conflict_function **overlaps_b,
3570 tree *last_conflicts)
3572 unsigned nb_vars_a, nb_vars_b, dim;
3573 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3574 lambda_matrix A, U, S;
3575 struct obstack scratch_obstack;
3577 if (eq_evolutions_p (chrec_a, chrec_b))
3579 /* The accessed index overlaps for each iteration in the
3580 loop. */
3581 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3582 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3583 *last_conflicts = chrec_dont_know;
3584 return;
3586 if (dump_file && (dump_flags & TDF_DETAILS))
3587 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3589 /* For determining the initial intersection, we have to solve a
3590 Diophantine equation. This is the most time consuming part.
3592 For answering to the question: "Is there a dependence?" we have
3593 to prove that there exists a solution to the Diophantine
3594 equation, and that the solution is in the iteration domain,
3595 i.e. the solution is positive or zero, and that the solution
3596 happens before the upper bound loop.nb_iterations. Otherwise
3597 there is no dependence. This function outputs a description of
3598 the iterations that hold the intersections. */
3600 nb_vars_a = nb_vars_in_chrec (chrec_a);
3601 nb_vars_b = nb_vars_in_chrec (chrec_b);
3603 gcc_obstack_init (&scratch_obstack);
3605 dim = nb_vars_a + nb_vars_b;
3606 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3607 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3608 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3610 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3611 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3612 gamma = init_b - init_a;
3614 /* Don't do all the hard work of solving the Diophantine equation
3615 when we already know the solution: for example,
3616 | {3, +, 1}_1
3617 | {3, +, 4}_2
3618 | gamma = 3 - 3 = 0.
3619 Then the first overlap occurs during the first iterations:
3620 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3622 if (gamma == 0)
3624 if (nb_vars_a == 1 && nb_vars_b == 1)
3626 HOST_WIDE_INT step_a, step_b;
3627 HOST_WIDE_INT niter, niter_a, niter_b;
3628 affine_fn ova, ovb;
3630 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3631 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3632 niter = MIN (niter_a, niter_b);
3633 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3634 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3636 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3637 &ova, &ovb,
3638 last_conflicts, 1);
3639 *overlaps_a = conflict_fn (1, ova);
3640 *overlaps_b = conflict_fn (1, ovb);
3643 else if (nb_vars_a == 2 && nb_vars_b == 1)
3644 compute_overlap_steps_for_affine_1_2
3645 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3647 else if (nb_vars_a == 1 && nb_vars_b == 2)
3648 compute_overlap_steps_for_affine_1_2
3649 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3651 else
3653 if (dump_file && (dump_flags & TDF_DETAILS))
3654 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3655 *overlaps_a = conflict_fn_not_known ();
3656 *overlaps_b = conflict_fn_not_known ();
3657 *last_conflicts = chrec_dont_know;
3659 goto end_analyze_subs_aa;
3662 /* U.A = S */
3663 lambda_matrix_right_hermite (A, dim, 1, S, U);
3665 if (S[0][0] < 0)
3667 S[0][0] *= -1;
3668 lambda_matrix_row_negate (U, dim, 0);
3670 gcd_alpha_beta = S[0][0];
3672 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3673 but that is a quite strange case. Instead of ICEing, answer
3674 don't know. */
3675 if (gcd_alpha_beta == 0)
3677 *overlaps_a = conflict_fn_not_known ();
3678 *overlaps_b = conflict_fn_not_known ();
3679 *last_conflicts = chrec_dont_know;
3680 goto end_analyze_subs_aa;
3683 /* The classic "gcd-test". */
3684 if (!int_divides_p (gcd_alpha_beta, gamma))
3686 /* The "gcd-test" has determined that there is no integer
3687 solution, i.e. there is no dependence. */
3688 *overlaps_a = conflict_fn_no_dependence ();
3689 *overlaps_b = conflict_fn_no_dependence ();
3690 *last_conflicts = integer_zero_node;
3693 /* Both access functions are univariate. This includes SIV and MIV cases. */
3694 else if (nb_vars_a == 1 && nb_vars_b == 1)
3696 /* Both functions should have the same evolution sign. */
3697 if (((A[0][0] > 0 && -A[1][0] > 0)
3698 || (A[0][0] < 0 && -A[1][0] < 0)))
3700 /* The solutions are given by:
3702 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3703 | [u21 u22] [y0]
3705 For a given integer t. Using the following variables,
3707 | i0 = u11 * gamma / gcd_alpha_beta
3708 | j0 = u12 * gamma / gcd_alpha_beta
3709 | i1 = u21
3710 | j1 = u22
3712 the solutions are:
3714 | x0 = i0 + i1 * t,
3715 | y0 = j0 + j1 * t. */
3716 HOST_WIDE_INT i0, j0, i1, j1;
3718 i0 = U[0][0] * gamma / gcd_alpha_beta;
3719 j0 = U[0][1] * gamma / gcd_alpha_beta;
3720 i1 = U[1][0];
3721 j1 = U[1][1];
3723 if ((i1 == 0 && i0 < 0)
3724 || (j1 == 0 && j0 < 0))
3726 /* There is no solution.
3727 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3728 falls in here, but for the moment we don't look at the
3729 upper bound of the iteration domain. */
3730 *overlaps_a = conflict_fn_no_dependence ();
3731 *overlaps_b = conflict_fn_no_dependence ();
3732 *last_conflicts = integer_zero_node;
3733 goto end_analyze_subs_aa;
3736 if (i1 > 0 && j1 > 0)
3738 HOST_WIDE_INT niter_a
3739 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3740 HOST_WIDE_INT niter_b
3741 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3742 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3744 /* (X0, Y0) is a solution of the Diophantine equation:
3745 "chrec_a (X0) = chrec_b (Y0)". */
3746 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3747 CEIL (-j0, j1));
3748 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3749 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3751 /* (X1, Y1) is the smallest positive solution of the eq
3752 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3753 first conflict occurs. */
3754 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3755 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3756 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3758 if (niter > 0)
3760 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3761 FLOOR_DIV (niter_b - j0, j1));
3762 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3764 /* If the overlap occurs outside of the bounds of the
3765 loop, there is no dependence. */
3766 if (x1 >= niter_a || y1 >= niter_b)
3768 *overlaps_a = conflict_fn_no_dependence ();
3769 *overlaps_b = conflict_fn_no_dependence ();
3770 *last_conflicts = integer_zero_node;
3771 goto end_analyze_subs_aa;
3773 else
3774 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3776 else
3777 *last_conflicts = chrec_dont_know;
3779 *overlaps_a
3780 = conflict_fn (1,
3781 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3783 build_int_cst (NULL_TREE, i1)));
3784 *overlaps_b
3785 = conflict_fn (1,
3786 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3788 build_int_cst (NULL_TREE, j1)));
3790 else
3792 /* FIXME: For the moment, the upper bound of the
3793 iteration domain for i and j is not checked. */
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3795 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3796 *overlaps_a = conflict_fn_not_known ();
3797 *overlaps_b = conflict_fn_not_known ();
3798 *last_conflicts = chrec_dont_know;
3801 else
3803 if (dump_file && (dump_flags & TDF_DETAILS))
3804 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3805 *overlaps_a = conflict_fn_not_known ();
3806 *overlaps_b = conflict_fn_not_known ();
3807 *last_conflicts = chrec_dont_know;
3810 else
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3813 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3814 *overlaps_a = conflict_fn_not_known ();
3815 *overlaps_b = conflict_fn_not_known ();
3816 *last_conflicts = chrec_dont_know;
3819 end_analyze_subs_aa:
3820 obstack_free (&scratch_obstack, NULL);
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3823 fprintf (dump_file, " (overlaps_a = ");
3824 dump_conflict_function (dump_file, *overlaps_a);
3825 fprintf (dump_file, ")\n (overlaps_b = ");
3826 dump_conflict_function (dump_file, *overlaps_b);
3827 fprintf (dump_file, "))\n");
3831 /* Returns true when analyze_subscript_affine_affine can be used for
3832 determining the dependence relation between chrec_a and chrec_b,
3833 that contain symbols. This function modifies chrec_a and chrec_b
3834 such that the analysis result is the same, and such that they don't
3835 contain symbols, and then can safely be passed to the analyzer.
3837 Example: The analysis of the following tuples of evolutions produce
3838 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3839 vs. {0, +, 1}_1
3841 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3842 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3845 static bool
3846 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3848 tree diff, type, left_a, left_b, right_b;
3850 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3851 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3852 /* FIXME: For the moment not handled. Might be refined later. */
3853 return false;
3855 type = chrec_type (*chrec_a);
3856 left_a = CHREC_LEFT (*chrec_a);
3857 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3858 diff = chrec_fold_minus (type, left_a, left_b);
3860 if (!evolution_function_is_constant_p (diff))
3861 return false;
3863 if (dump_file && (dump_flags & TDF_DETAILS))
3864 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3866 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3867 diff, CHREC_RIGHT (*chrec_a));
3868 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3869 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3870 build_int_cst (type, 0),
3871 right_b);
3872 return true;
3875 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3876 *OVERLAPS_B are initialized to the functions that describe the
3877 relation between the elements accessed twice by CHREC_A and
3878 CHREC_B. For k >= 0, the following property is verified:
3880 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3882 static void
3883 analyze_siv_subscript (tree chrec_a,
3884 tree chrec_b,
3885 conflict_function **overlaps_a,
3886 conflict_function **overlaps_b,
3887 tree *last_conflicts,
3888 int loop_nest_num)
3890 dependence_stats.num_siv++;
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3893 fprintf (dump_file, "(analyze_siv_subscript \n");
3895 if (evolution_function_is_constant_p (chrec_a)
3896 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3897 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3898 overlaps_a, overlaps_b, last_conflicts);
3900 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3901 && evolution_function_is_constant_p (chrec_b))
3902 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3903 overlaps_b, overlaps_a, last_conflicts);
3905 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3906 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3908 if (!chrec_contains_symbols (chrec_a)
3909 && !chrec_contains_symbols (chrec_b))
3911 analyze_subscript_affine_affine (chrec_a, chrec_b,
3912 overlaps_a, overlaps_b,
3913 last_conflicts);
3915 if (CF_NOT_KNOWN_P (*overlaps_a)
3916 || CF_NOT_KNOWN_P (*overlaps_b))
3917 dependence_stats.num_siv_unimplemented++;
3918 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3919 || CF_NO_DEPENDENCE_P (*overlaps_b))
3920 dependence_stats.num_siv_independent++;
3921 else
3922 dependence_stats.num_siv_dependent++;
3924 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3925 &chrec_b))
3927 analyze_subscript_affine_affine (chrec_a, chrec_b,
3928 overlaps_a, overlaps_b,
3929 last_conflicts);
3931 if (CF_NOT_KNOWN_P (*overlaps_a)
3932 || CF_NOT_KNOWN_P (*overlaps_b))
3933 dependence_stats.num_siv_unimplemented++;
3934 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3935 || CF_NO_DEPENDENCE_P (*overlaps_b))
3936 dependence_stats.num_siv_independent++;
3937 else
3938 dependence_stats.num_siv_dependent++;
3940 else
3941 goto siv_subscript_dontknow;
3944 else
3946 siv_subscript_dontknow:;
3947 if (dump_file && (dump_flags & TDF_DETAILS))
3948 fprintf (dump_file, " siv test failed: unimplemented");
3949 *overlaps_a = conflict_fn_not_known ();
3950 *overlaps_b = conflict_fn_not_known ();
3951 *last_conflicts = chrec_dont_know;
3952 dependence_stats.num_siv_unimplemented++;
3955 if (dump_file && (dump_flags & TDF_DETAILS))
3956 fprintf (dump_file, ")\n");
3959 /* Returns false if we can prove that the greatest common divisor of the steps
3960 of CHREC does not divide CST, false otherwise. */
3962 static bool
3963 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3965 HOST_WIDE_INT cd = 0, val;
3966 tree step;
3968 if (!tree_fits_shwi_p (cst))
3969 return true;
3970 val = tree_to_shwi (cst);
3972 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3974 step = CHREC_RIGHT (chrec);
3975 if (!tree_fits_shwi_p (step))
3976 return true;
3977 cd = gcd (cd, tree_to_shwi (step));
3978 chrec = CHREC_LEFT (chrec);
3981 return val % cd == 0;
3984 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3985 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3986 functions that describe the relation between the elements accessed
3987 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3988 is verified:
3990 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3992 static void
3993 analyze_miv_subscript (tree chrec_a,
3994 tree chrec_b,
3995 conflict_function **overlaps_a,
3996 conflict_function **overlaps_b,
3997 tree *last_conflicts,
3998 struct loop *loop_nest)
4000 tree type, difference;
4002 dependence_stats.num_miv++;
4003 if (dump_file && (dump_flags & TDF_DETAILS))
4004 fprintf (dump_file, "(analyze_miv_subscript \n");
4006 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4007 chrec_a = chrec_convert (type, chrec_a, NULL);
4008 chrec_b = chrec_convert (type, chrec_b, NULL);
4009 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4011 if (eq_evolutions_p (chrec_a, chrec_b))
4013 /* Access functions are the same: all the elements are accessed
4014 in the same order. */
4015 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4016 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4017 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4018 dependence_stats.num_miv_dependent++;
4021 else if (evolution_function_is_constant_p (difference)
4022 && evolution_function_is_affine_multivariate_p (chrec_a,
4023 loop_nest->num)
4024 && !gcd_of_steps_may_divide_p (chrec_a, difference))
4026 /* testsuite/.../ssa-chrec-33.c
4027 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4029 The difference is 1, and all the evolution steps are multiples
4030 of 2, consequently there are no overlapping elements. */
4031 *overlaps_a = conflict_fn_no_dependence ();
4032 *overlaps_b = conflict_fn_no_dependence ();
4033 *last_conflicts = integer_zero_node;
4034 dependence_stats.num_miv_independent++;
4037 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
4038 && !chrec_contains_symbols (chrec_a)
4039 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4040 && !chrec_contains_symbols (chrec_b))
4042 /* testsuite/.../ssa-chrec-35.c
4043 {0, +, 1}_2 vs. {0, +, 1}_3
4044 the overlapping elements are respectively located at iterations:
4045 {0, +, 1}_x and {0, +, 1}_x,
4046 in other words, we have the equality:
4047 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4049 Other examples:
4050 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4051 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4053 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4054 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4056 analyze_subscript_affine_affine (chrec_a, chrec_b,
4057 overlaps_a, overlaps_b, last_conflicts);
4059 if (CF_NOT_KNOWN_P (*overlaps_a)
4060 || CF_NOT_KNOWN_P (*overlaps_b))
4061 dependence_stats.num_miv_unimplemented++;
4062 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4063 || CF_NO_DEPENDENCE_P (*overlaps_b))
4064 dependence_stats.num_miv_independent++;
4065 else
4066 dependence_stats.num_miv_dependent++;
4069 else
4071 /* When the analysis is too difficult, answer "don't know". */
4072 if (dump_file && (dump_flags & TDF_DETAILS))
4073 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4075 *overlaps_a = conflict_fn_not_known ();
4076 *overlaps_b = conflict_fn_not_known ();
4077 *last_conflicts = chrec_dont_know;
4078 dependence_stats.num_miv_unimplemented++;
4081 if (dump_file && (dump_flags & TDF_DETAILS))
4082 fprintf (dump_file, ")\n");
4085 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4086 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4087 OVERLAP_ITERATIONS_B are initialized with two functions that
4088 describe the iterations that contain conflicting elements.
4090 Remark: For an integer k >= 0, the following equality is true:
4092 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4095 static void
4096 analyze_overlapping_iterations (tree chrec_a,
4097 tree chrec_b,
4098 conflict_function **overlap_iterations_a,
4099 conflict_function **overlap_iterations_b,
4100 tree *last_conflicts, struct loop *loop_nest)
4102 unsigned int lnn = loop_nest->num;
4104 dependence_stats.num_subscript_tests++;
4106 if (dump_file && (dump_flags & TDF_DETAILS))
4108 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4109 fprintf (dump_file, " (chrec_a = ");
4110 print_generic_expr (dump_file, chrec_a);
4111 fprintf (dump_file, ")\n (chrec_b = ");
4112 print_generic_expr (dump_file, chrec_b);
4113 fprintf (dump_file, ")\n");
4116 if (chrec_a == NULL_TREE
4117 || chrec_b == NULL_TREE
4118 || chrec_contains_undetermined (chrec_a)
4119 || chrec_contains_undetermined (chrec_b))
4121 dependence_stats.num_subscript_undetermined++;
4123 *overlap_iterations_a = conflict_fn_not_known ();
4124 *overlap_iterations_b = conflict_fn_not_known ();
4127 /* If they are the same chrec, and are affine, they overlap
4128 on every iteration. */
4129 else if (eq_evolutions_p (chrec_a, chrec_b)
4130 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4131 || operand_equal_p (chrec_a, chrec_b, 0)))
4133 dependence_stats.num_same_subscript_function++;
4134 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4135 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4136 *last_conflicts = chrec_dont_know;
4139 /* If they aren't the same, and aren't affine, we can't do anything
4140 yet. */
4141 else if ((chrec_contains_symbols (chrec_a)
4142 || chrec_contains_symbols (chrec_b))
4143 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4144 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4146 dependence_stats.num_subscript_undetermined++;
4147 *overlap_iterations_a = conflict_fn_not_known ();
4148 *overlap_iterations_b = conflict_fn_not_known ();
4151 else if (ziv_subscript_p (chrec_a, chrec_b))
4152 analyze_ziv_subscript (chrec_a, chrec_b,
4153 overlap_iterations_a, overlap_iterations_b,
4154 last_conflicts);
4156 else if (siv_subscript_p (chrec_a, chrec_b))
4157 analyze_siv_subscript (chrec_a, chrec_b,
4158 overlap_iterations_a, overlap_iterations_b,
4159 last_conflicts, lnn);
4161 else
4162 analyze_miv_subscript (chrec_a, chrec_b,
4163 overlap_iterations_a, overlap_iterations_b,
4164 last_conflicts, loop_nest);
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4168 fprintf (dump_file, " (overlap_iterations_a = ");
4169 dump_conflict_function (dump_file, *overlap_iterations_a);
4170 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4171 dump_conflict_function (dump_file, *overlap_iterations_b);
4172 fprintf (dump_file, "))\n");
4176 /* Helper function for uniquely inserting distance vectors. */
4178 static void
4179 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4181 unsigned i;
4182 lambda_vector v;
4184 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4185 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4186 return;
4188 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4191 /* Helper function for uniquely inserting direction vectors. */
4193 static void
4194 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4196 unsigned i;
4197 lambda_vector v;
4199 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4200 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4201 return;
4203 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4206 /* Add a distance of 1 on all the loops outer than INDEX. If we
4207 haven't yet determined a distance for this outer loop, push a new
4208 distance vector composed of the previous distance, and a distance
4209 of 1 for this outer loop. Example:
4211 | loop_1
4212 | loop_2
4213 | A[10]
4214 | endloop_2
4215 | endloop_1
4217 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4218 save (0, 1), then we have to save (1, 0). */
4220 static void
4221 add_outer_distances (struct data_dependence_relation *ddr,
4222 lambda_vector dist_v, int index)
4224 /* For each outer loop where init_v is not set, the accesses are
4225 in dependence of distance 1 in the loop. */
4226 while (--index >= 0)
4228 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4229 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4230 save_v[index] = 1;
4231 save_dist_v (ddr, save_v);
4235 /* Return false when fail to represent the data dependence as a
4236 distance vector. A_INDEX is the index of the first reference
4237 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4238 second reference. INIT_B is set to true when a component has been
4239 added to the distance vector DIST_V. INDEX_CARRY is then set to
4240 the index in DIST_V that carries the dependence. */
4242 static bool
4243 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4244 unsigned int a_index, unsigned int b_index,
4245 lambda_vector dist_v, bool *init_b,
4246 int *index_carry)
4248 unsigned i;
4249 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4251 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4253 tree access_fn_a, access_fn_b;
4254 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4256 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4258 non_affine_dependence_relation (ddr);
4259 return false;
4262 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4263 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4265 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4266 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4268 HOST_WIDE_INT dist;
4269 int index;
4270 int var_a = CHREC_VARIABLE (access_fn_a);
4271 int var_b = CHREC_VARIABLE (access_fn_b);
4273 if (var_a != var_b
4274 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4276 non_affine_dependence_relation (ddr);
4277 return false;
4280 dist = int_cst_value (SUB_DISTANCE (subscript));
4281 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4282 *index_carry = MIN (index, *index_carry);
4284 /* This is the subscript coupling test. If we have already
4285 recorded a distance for this loop (a distance coming from
4286 another subscript), it should be the same. For example,
4287 in the following code, there is no dependence:
4289 | loop i = 0, N, 1
4290 | T[i+1][i] = ...
4291 | ... = T[i][i]
4292 | endloop
4294 if (init_v[index] != 0 && dist_v[index] != dist)
4296 finalize_ddr_dependent (ddr, chrec_known);
4297 return false;
4300 dist_v[index] = dist;
4301 init_v[index] = 1;
4302 *init_b = true;
4304 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4306 /* This can be for example an affine vs. constant dependence
4307 (T[i] vs. T[3]) that is not an affine dependence and is
4308 not representable as a distance vector. */
4309 non_affine_dependence_relation (ddr);
4310 return false;
4314 return true;
4317 /* Return true when the DDR contains only constant access functions. */
4319 static bool
4320 constant_access_functions (const struct data_dependence_relation *ddr)
4322 unsigned i;
4323 subscript *sub;
4325 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4326 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4327 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4328 return false;
4330 return true;
4333 /* Helper function for the case where DDR_A and DDR_B are the same
4334 multivariate access function with a constant step. For an example
4335 see pr34635-1.c. */
4337 static void
4338 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4340 int x_1, x_2;
4341 tree c_1 = CHREC_LEFT (c_2);
4342 tree c_0 = CHREC_LEFT (c_1);
4343 lambda_vector dist_v;
4344 HOST_WIDE_INT v1, v2, cd;
4346 /* Polynomials with more than 2 variables are not handled yet. When
4347 the evolution steps are parameters, it is not possible to
4348 represent the dependence using classical distance vectors. */
4349 if (TREE_CODE (c_0) != INTEGER_CST
4350 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4351 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4353 DDR_AFFINE_P (ddr) = false;
4354 return;
4357 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4358 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4360 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4361 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4362 v1 = int_cst_value (CHREC_RIGHT (c_1));
4363 v2 = int_cst_value (CHREC_RIGHT (c_2));
4364 cd = gcd (v1, v2);
4365 v1 /= cd;
4366 v2 /= cd;
4368 if (v2 < 0)
4370 v2 = -v2;
4371 v1 = -v1;
4374 dist_v[x_1] = v2;
4375 dist_v[x_2] = -v1;
4376 save_dist_v (ddr, dist_v);
4378 add_outer_distances (ddr, dist_v, x_1);
4381 /* Helper function for the case where DDR_A and DDR_B are the same
4382 access functions. */
4384 static void
4385 add_other_self_distances (struct data_dependence_relation *ddr)
4387 lambda_vector dist_v;
4388 unsigned i;
4389 int index_carry = DDR_NB_LOOPS (ddr);
4390 subscript *sub;
4392 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4394 tree access_fun = SUB_ACCESS_FN (sub, 0);
4396 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4398 if (!evolution_function_is_univariate_p (access_fun))
4400 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4402 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4403 return;
4406 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4408 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4409 add_multivariate_self_dist (ddr, access_fun);
4410 else
4411 /* The evolution step is not constant: it varies in
4412 the outer loop, so this cannot be represented by a
4413 distance vector. For example in pr34635.c the
4414 evolution is {0, +, {0, +, 4}_1}_2. */
4415 DDR_AFFINE_P (ddr) = false;
4417 return;
4420 index_carry = MIN (index_carry,
4421 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4422 DDR_LOOP_NEST (ddr)));
4426 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4427 add_outer_distances (ddr, dist_v, index_carry);
4430 static void
4431 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4433 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4435 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4436 save_dist_v (ddr, dist_v);
4439 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4440 is the case for example when access functions are the same and
4441 equal to a constant, as in:
4443 | loop_1
4444 | A[3] = ...
4445 | ... = A[3]
4446 | endloop_1
4448 in which case the distance vectors are (0) and (1). */
4450 static void
4451 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4453 unsigned i, j;
4455 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4457 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4458 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4459 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4461 for (j = 0; j < ca->n; j++)
4462 if (affine_function_zero_p (ca->fns[j]))
4464 insert_innermost_unit_dist_vector (ddr);
4465 return;
4468 for (j = 0; j < cb->n; j++)
4469 if (affine_function_zero_p (cb->fns[j]))
4471 insert_innermost_unit_dist_vector (ddr);
4472 return;
4477 /* Return true when the DDR contains two data references that have the
4478 same access functions. */
4480 static inline bool
4481 same_access_functions (const struct data_dependence_relation *ddr)
4483 unsigned i;
4484 subscript *sub;
4486 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4487 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4488 SUB_ACCESS_FN (sub, 1)))
4489 return false;
4491 return true;
4494 /* Compute the classic per loop distance vector. DDR is the data
4495 dependence relation to build a vector from. Return false when fail
4496 to represent the data dependence as a distance vector. */
4498 static bool
4499 build_classic_dist_vector (struct data_dependence_relation *ddr,
4500 struct loop *loop_nest)
4502 bool init_b = false;
4503 int index_carry = DDR_NB_LOOPS (ddr);
4504 lambda_vector dist_v;
4506 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4507 return false;
4509 if (same_access_functions (ddr))
4511 /* Save the 0 vector. */
4512 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4513 save_dist_v (ddr, dist_v);
4515 if (constant_access_functions (ddr))
4516 add_distance_for_zero_overlaps (ddr);
4518 if (DDR_NB_LOOPS (ddr) > 1)
4519 add_other_self_distances (ddr);
4521 return true;
4524 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4525 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4526 return false;
4528 /* Save the distance vector if we initialized one. */
4529 if (init_b)
4531 /* Verify a basic constraint: classic distance vectors should
4532 always be lexicographically positive.
4534 Data references are collected in the order of execution of
4535 the program, thus for the following loop
4537 | for (i = 1; i < 100; i++)
4538 | for (j = 1; j < 100; j++)
4540 | t = T[j+1][i-1]; // A
4541 | T[j][i] = t + 2; // B
4544 references are collected following the direction of the wind:
4545 A then B. The data dependence tests are performed also
4546 following this order, such that we're looking at the distance
4547 separating the elements accessed by A from the elements later
4548 accessed by B. But in this example, the distance returned by
4549 test_dep (A, B) is lexicographically negative (-1, 1), that
4550 means that the access A occurs later than B with respect to
4551 the outer loop, ie. we're actually looking upwind. In this
4552 case we solve test_dep (B, A) looking downwind to the
4553 lexicographically positive solution, that returns the
4554 distance vector (1, -1). */
4555 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4557 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4558 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4559 return false;
4560 compute_subscript_distance (ddr);
4561 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4562 &index_carry))
4563 return false;
4564 save_dist_v (ddr, save_v);
4565 DDR_REVERSED_P (ddr) = true;
4567 /* In this case there is a dependence forward for all the
4568 outer loops:
4570 | for (k = 1; k < 100; k++)
4571 | for (i = 1; i < 100; i++)
4572 | for (j = 1; j < 100; j++)
4574 | t = T[j+1][i-1]; // A
4575 | T[j][i] = t + 2; // B
4578 the vectors are:
4579 (0, 1, -1)
4580 (1, 1, -1)
4581 (1, -1, 1)
4583 if (DDR_NB_LOOPS (ddr) > 1)
4585 add_outer_distances (ddr, save_v, index_carry);
4586 add_outer_distances (ddr, dist_v, index_carry);
4589 else
4591 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4592 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4594 if (DDR_NB_LOOPS (ddr) > 1)
4596 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4598 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4599 return false;
4600 compute_subscript_distance (ddr);
4601 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4602 &index_carry))
4603 return false;
4605 save_dist_v (ddr, save_v);
4606 add_outer_distances (ddr, dist_v, index_carry);
4607 add_outer_distances (ddr, opposite_v, index_carry);
4609 else
4610 save_dist_v (ddr, save_v);
4613 else
4615 /* There is a distance of 1 on all the outer loops: Example:
4616 there is a dependence of distance 1 on loop_1 for the array A.
4618 | loop_1
4619 | A[5] = ...
4620 | endloop
4622 add_outer_distances (ddr, dist_v,
4623 lambda_vector_first_nz (dist_v,
4624 DDR_NB_LOOPS (ddr), 0));
4627 if (dump_file && (dump_flags & TDF_DETAILS))
4629 unsigned i;
4631 fprintf (dump_file, "(build_classic_dist_vector\n");
4632 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4634 fprintf (dump_file, " dist_vector = (");
4635 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4636 DDR_NB_LOOPS (ddr));
4637 fprintf (dump_file, " )\n");
4639 fprintf (dump_file, ")\n");
4642 return true;
4645 /* Return the direction for a given distance.
4646 FIXME: Computing dir this way is suboptimal, since dir can catch
4647 cases that dist is unable to represent. */
4649 static inline enum data_dependence_direction
4650 dir_from_dist (int dist)
4652 if (dist > 0)
4653 return dir_positive;
4654 else if (dist < 0)
4655 return dir_negative;
4656 else
4657 return dir_equal;
4660 /* Compute the classic per loop direction vector. DDR is the data
4661 dependence relation to build a vector from. */
4663 static void
4664 build_classic_dir_vector (struct data_dependence_relation *ddr)
4666 unsigned i, j;
4667 lambda_vector dist_v;
4669 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4671 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4673 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4674 dir_v[j] = dir_from_dist (dist_v[j]);
4676 save_dir_v (ddr, dir_v);
4680 /* Helper function. Returns true when there is a dependence between the
4681 data references. A_INDEX is the index of the first reference (0 for
4682 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4684 static bool
4685 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4686 unsigned int a_index, unsigned int b_index,
4687 struct loop *loop_nest)
4689 unsigned int i;
4690 tree last_conflicts;
4691 struct subscript *subscript;
4692 tree res = NULL_TREE;
4694 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4696 conflict_function *overlaps_a, *overlaps_b;
4698 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4699 SUB_ACCESS_FN (subscript, b_index),
4700 &overlaps_a, &overlaps_b,
4701 &last_conflicts, loop_nest);
4703 if (SUB_CONFLICTS_IN_A (subscript))
4704 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4705 if (SUB_CONFLICTS_IN_B (subscript))
4706 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4708 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4709 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4710 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4712 /* If there is any undetermined conflict function we have to
4713 give a conservative answer in case we cannot prove that
4714 no dependence exists when analyzing another subscript. */
4715 if (CF_NOT_KNOWN_P (overlaps_a)
4716 || CF_NOT_KNOWN_P (overlaps_b))
4718 res = chrec_dont_know;
4719 continue;
4722 /* When there is a subscript with no dependence we can stop. */
4723 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4724 || CF_NO_DEPENDENCE_P (overlaps_b))
4726 res = chrec_known;
4727 break;
4731 if (res == NULL_TREE)
4732 return true;
4734 if (res == chrec_known)
4735 dependence_stats.num_dependence_independent++;
4736 else
4737 dependence_stats.num_dependence_undetermined++;
4738 finalize_ddr_dependent (ddr, res);
4739 return false;
4742 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4744 static void
4745 subscript_dependence_tester (struct data_dependence_relation *ddr,
4746 struct loop *loop_nest)
4748 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4749 dependence_stats.num_dependence_dependent++;
4751 compute_subscript_distance (ddr);
4752 if (build_classic_dist_vector (ddr, loop_nest))
4753 build_classic_dir_vector (ddr);
4756 /* Returns true when all the access functions of A are affine or
4757 constant with respect to LOOP_NEST. */
4759 static bool
4760 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4761 const struct loop *loop_nest)
4763 unsigned int i;
4764 vec<tree> fns = DR_ACCESS_FNS (a);
4765 tree t;
4767 FOR_EACH_VEC_ELT (fns, i, t)
4768 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4769 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4770 return false;
4772 return true;
4775 /* This computes the affine dependence relation between A and B with
4776 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4777 independence between two accesses, while CHREC_DONT_KNOW is used
4778 for representing the unknown relation.
4780 Note that it is possible to stop the computation of the dependence
4781 relation the first time we detect a CHREC_KNOWN element for a given
4782 subscript. */
4784 void
4785 compute_affine_dependence (struct data_dependence_relation *ddr,
4786 struct loop *loop_nest)
4788 struct data_reference *dra = DDR_A (ddr);
4789 struct data_reference *drb = DDR_B (ddr);
4791 if (dump_file && (dump_flags & TDF_DETAILS))
4793 fprintf (dump_file, "(compute_affine_dependence\n");
4794 fprintf (dump_file, " stmt_a: ");
4795 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4796 fprintf (dump_file, " stmt_b: ");
4797 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4800 /* Analyze only when the dependence relation is not yet known. */
4801 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4803 dependence_stats.num_dependence_tests++;
4805 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4806 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4807 subscript_dependence_tester (ddr, loop_nest);
4809 /* As a last case, if the dependence cannot be determined, or if
4810 the dependence is considered too difficult to determine, answer
4811 "don't know". */
4812 else
4814 dependence_stats.num_dependence_undetermined++;
4816 if (dump_file && (dump_flags & TDF_DETAILS))
4818 fprintf (dump_file, "Data ref a:\n");
4819 dump_data_reference (dump_file, dra);
4820 fprintf (dump_file, "Data ref b:\n");
4821 dump_data_reference (dump_file, drb);
4822 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4824 finalize_ddr_dependent (ddr, chrec_dont_know);
4828 if (dump_file && (dump_flags & TDF_DETAILS))
4830 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4831 fprintf (dump_file, ") -> no dependence\n");
4832 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4833 fprintf (dump_file, ") -> dependence analysis failed\n");
4834 else
4835 fprintf (dump_file, ")\n");
4839 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4840 the data references in DATAREFS, in the LOOP_NEST. When
4841 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4842 relations. Return true when successful, i.e. data references number
4843 is small enough to be handled. */
4845 bool
4846 compute_all_dependences (vec<data_reference_p> datarefs,
4847 vec<ddr_p> *dependence_relations,
4848 vec<loop_p> loop_nest,
4849 bool compute_self_and_rr)
4851 struct data_dependence_relation *ddr;
4852 struct data_reference *a, *b;
4853 unsigned int i, j;
4855 if ((int) datarefs.length ()
4856 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4858 struct data_dependence_relation *ddr;
4860 /* Insert a single relation into dependence_relations:
4861 chrec_dont_know. */
4862 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4863 dependence_relations->safe_push (ddr);
4864 return false;
4867 FOR_EACH_VEC_ELT (datarefs, i, a)
4868 for (j = i + 1; datarefs.iterate (j, &b); j++)
4869 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4871 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4872 dependence_relations->safe_push (ddr);
4873 if (loop_nest.exists ())
4874 compute_affine_dependence (ddr, loop_nest[0]);
4877 if (compute_self_and_rr)
4878 FOR_EACH_VEC_ELT (datarefs, i, a)
4880 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4881 dependence_relations->safe_push (ddr);
4882 if (loop_nest.exists ())
4883 compute_affine_dependence (ddr, loop_nest[0]);
4886 return true;
4889 /* Describes a location of a memory reference. */
4891 struct data_ref_loc
4893 /* The memory reference. */
4894 tree ref;
4896 /* True if the memory reference is read. */
4897 bool is_read;
4899 /* True if the data reference is conditional within the containing
4900 statement, i.e. if it might not occur even when the statement
4901 is executed and runs to completion. */
4902 bool is_conditional_in_stmt;
4906 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4907 true if STMT clobbers memory, false otherwise. */
4909 static bool
4910 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4912 bool clobbers_memory = false;
4913 data_ref_loc ref;
4914 tree op0, op1;
4915 enum gimple_code stmt_code = gimple_code (stmt);
4917 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4918 As we cannot model data-references to not spelled out
4919 accesses give up if they may occur. */
4920 if (stmt_code == GIMPLE_CALL
4921 && !(gimple_call_flags (stmt) & ECF_CONST))
4923 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4924 if (gimple_call_internal_p (stmt))
4925 switch (gimple_call_internal_fn (stmt))
4927 case IFN_GOMP_SIMD_LANE:
4929 struct loop *loop = gimple_bb (stmt)->loop_father;
4930 tree uid = gimple_call_arg (stmt, 0);
4931 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4932 if (loop == NULL
4933 || loop->simduid != SSA_NAME_VAR (uid))
4934 clobbers_memory = true;
4935 break;
4937 case IFN_MASK_LOAD:
4938 case IFN_MASK_STORE:
4939 break;
4940 default:
4941 clobbers_memory = true;
4942 break;
4944 else
4945 clobbers_memory = true;
4947 else if (stmt_code == GIMPLE_ASM
4948 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4949 || gimple_vuse (stmt)))
4950 clobbers_memory = true;
4952 if (!gimple_vuse (stmt))
4953 return clobbers_memory;
4955 if (stmt_code == GIMPLE_ASSIGN)
4957 tree base;
4958 op0 = gimple_assign_lhs (stmt);
4959 op1 = gimple_assign_rhs1 (stmt);
4961 if (DECL_P (op1)
4962 || (REFERENCE_CLASS_P (op1)
4963 && (base = get_base_address (op1))
4964 && TREE_CODE (base) != SSA_NAME
4965 && !is_gimple_min_invariant (base)))
4967 ref.ref = op1;
4968 ref.is_read = true;
4969 ref.is_conditional_in_stmt = false;
4970 references->safe_push (ref);
4973 else if (stmt_code == GIMPLE_CALL)
4975 unsigned i, n;
4976 tree ptr, type;
4977 unsigned int align;
4979 ref.is_read = false;
4980 if (gimple_call_internal_p (stmt))
4981 switch (gimple_call_internal_fn (stmt))
4983 case IFN_MASK_LOAD:
4984 if (gimple_call_lhs (stmt) == NULL_TREE)
4985 break;
4986 ref.is_read = true;
4987 /* FALLTHRU */
4988 case IFN_MASK_STORE:
4989 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4990 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4991 if (ref.is_read)
4992 type = TREE_TYPE (gimple_call_lhs (stmt));
4993 else
4994 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4995 if (TYPE_ALIGN (type) != align)
4996 type = build_aligned_type (type, align);
4997 ref.is_conditional_in_stmt = true;
4998 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4999 ptr);
5000 references->safe_push (ref);
5001 return false;
5002 default:
5003 break;
5006 op0 = gimple_call_lhs (stmt);
5007 n = gimple_call_num_args (stmt);
5008 for (i = 0; i < n; i++)
5010 op1 = gimple_call_arg (stmt, i);
5012 if (DECL_P (op1)
5013 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5015 ref.ref = op1;
5016 ref.is_read = true;
5017 ref.is_conditional_in_stmt = false;
5018 references->safe_push (ref);
5022 else
5023 return clobbers_memory;
5025 if (op0
5026 && (DECL_P (op0)
5027 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5029 ref.ref = op0;
5030 ref.is_read = false;
5031 ref.is_conditional_in_stmt = false;
5032 references->safe_push (ref);
5034 return clobbers_memory;
5038 /* Returns true if the loop-nest has any data reference. */
5040 bool
5041 loop_nest_has_data_refs (loop_p loop)
5043 basic_block *bbs = get_loop_body (loop);
5044 auto_vec<data_ref_loc, 3> references;
5046 for (unsigned i = 0; i < loop->num_nodes; i++)
5048 basic_block bb = bbs[i];
5049 gimple_stmt_iterator bsi;
5051 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5053 gimple *stmt = gsi_stmt (bsi);
5054 get_references_in_stmt (stmt, &references);
5055 if (references.length ())
5057 free (bbs);
5058 return true;
5062 free (bbs);
5063 return false;
5066 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5067 reference, returns false, otherwise returns true. NEST is the outermost
5068 loop of the loop nest in which the references should be analyzed. */
5070 bool
5071 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5072 vec<data_reference_p> *datarefs)
5074 unsigned i;
5075 auto_vec<data_ref_loc, 2> references;
5076 data_ref_loc *ref;
5077 bool ret = true;
5078 data_reference_p dr;
5080 if (get_references_in_stmt (stmt, &references))
5081 return false;
5083 FOR_EACH_VEC_ELT (references, i, ref)
5085 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5086 loop_containing_stmt (stmt), ref->ref,
5087 stmt, ref->is_read, ref->is_conditional_in_stmt);
5088 gcc_assert (dr != NULL);
5089 datarefs->safe_push (dr);
5092 return ret;
5095 /* Stores the data references in STMT to DATAREFS. If there is an
5096 unanalyzable reference, returns false, otherwise returns true.
5097 NEST is the outermost loop of the loop nest in which the references
5098 should be instantiated, LOOP is the loop in which the references
5099 should be analyzed. */
5101 bool
5102 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5103 vec<data_reference_p> *datarefs)
5105 unsigned i;
5106 auto_vec<data_ref_loc, 2> references;
5107 data_ref_loc *ref;
5108 bool ret = true;
5109 data_reference_p dr;
5111 if (get_references_in_stmt (stmt, &references))
5112 return false;
5114 FOR_EACH_VEC_ELT (references, i, ref)
5116 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5117 ref->is_conditional_in_stmt);
5118 gcc_assert (dr != NULL);
5119 datarefs->safe_push (dr);
5122 return ret;
5125 /* Search the data references in LOOP, and record the information into
5126 DATAREFS. Returns chrec_dont_know when failing to analyze a
5127 difficult case, returns NULL_TREE otherwise. */
5129 tree
5130 find_data_references_in_bb (struct loop *loop, basic_block bb,
5131 vec<data_reference_p> *datarefs)
5133 gimple_stmt_iterator bsi;
5135 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5137 gimple *stmt = gsi_stmt (bsi);
5139 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5141 struct data_reference *res;
5142 res = XCNEW (struct data_reference);
5143 datarefs->safe_push (res);
5145 return chrec_dont_know;
5149 return NULL_TREE;
5152 /* Search the data references in LOOP, and record the information into
5153 DATAREFS. Returns chrec_dont_know when failing to analyze a
5154 difficult case, returns NULL_TREE otherwise.
5156 TODO: This function should be made smarter so that it can handle address
5157 arithmetic as if they were array accesses, etc. */
5159 tree
5160 find_data_references_in_loop (struct loop *loop,
5161 vec<data_reference_p> *datarefs)
5163 basic_block bb, *bbs;
5164 unsigned int i;
5166 bbs = get_loop_body_in_dom_order (loop);
5168 for (i = 0; i < loop->num_nodes; i++)
5170 bb = bbs[i];
5172 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5174 free (bbs);
5175 return chrec_dont_know;
5178 free (bbs);
5180 return NULL_TREE;
5183 /* Return the alignment in bytes that DRB is guaranteed to have at all
5184 times. */
5186 unsigned int
5187 dr_alignment (innermost_loop_behavior *drb)
5189 /* Get the alignment of BASE_ADDRESS + INIT. */
5190 unsigned int alignment = drb->base_alignment;
5191 unsigned int misalignment = (drb->base_misalignment
5192 + TREE_INT_CST_LOW (drb->init));
5193 if (misalignment != 0)
5194 alignment = MIN (alignment, misalignment & -misalignment);
5196 /* Cap it to the alignment of OFFSET. */
5197 if (!integer_zerop (drb->offset))
5198 alignment = MIN (alignment, drb->offset_alignment);
5200 /* Cap it to the alignment of STEP. */
5201 if (!integer_zerop (drb->step))
5202 alignment = MIN (alignment, drb->step_alignment);
5204 return alignment;
5207 /* If BASE is a pointer-typed SSA name, try to find the object that it
5208 is based on. Return this object X on success and store the alignment
5209 in bytes of BASE - &X in *ALIGNMENT_OUT. */
5211 static tree
5212 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5214 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5215 return NULL_TREE;
5217 gimple *def = SSA_NAME_DEF_STMT (base);
5218 base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5220 /* Peel chrecs and record the minimum alignment preserved by
5221 all steps. */
5222 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5223 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5225 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5226 alignment = MIN (alignment, step_alignment);
5227 base = CHREC_LEFT (base);
5230 /* Punt if the expression is too complicated to handle. */
5231 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5232 return NULL_TREE;
5234 /* The only useful cases are those for which a dereference folds to something
5235 other than an INDIRECT_REF. */
5236 tree ref_type = TREE_TYPE (TREE_TYPE (base));
5237 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5238 if (!ref)
5239 return NULL_TREE;
5241 /* Analyze the base to which the steps we peeled were applied. */
5242 poly_int64 bitsize, bitpos, bytepos;
5243 machine_mode mode;
5244 int unsignedp, reversep, volatilep;
5245 tree offset;
5246 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5247 &unsignedp, &reversep, &volatilep);
5248 if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5249 return NULL_TREE;
5251 /* Restrict the alignment to that guaranteed by the offsets. */
5252 unsigned int bytepos_alignment = known_alignment (bytepos);
5253 if (bytepos_alignment != 0)
5254 alignment = MIN (alignment, bytepos_alignment);
5255 if (offset)
5257 unsigned int offset_alignment = highest_pow2_factor (offset);
5258 alignment = MIN (alignment, offset_alignment);
5261 *alignment_out = alignment;
5262 return base;
5265 /* Return the object whose alignment would need to be changed in order
5266 to increase the alignment of ADDR. Store the maximum achievable
5267 alignment in *MAX_ALIGNMENT. */
5269 tree
5270 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5272 tree base = get_base_for_alignment_1 (addr, max_alignment);
5273 if (base)
5274 return base;
5276 if (TREE_CODE (addr) == ADDR_EXPR)
5277 addr = TREE_OPERAND (addr, 0);
5278 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5279 return addr;
5282 /* Recursive helper function. */
5284 static bool
5285 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5287 /* Inner loops of the nest should not contain siblings. Example:
5288 when there are two consecutive loops,
5290 | loop_0
5291 | loop_1
5292 | A[{0, +, 1}_1]
5293 | endloop_1
5294 | loop_2
5295 | A[{0, +, 1}_2]
5296 | endloop_2
5297 | endloop_0
5299 the dependence relation cannot be captured by the distance
5300 abstraction. */
5301 if (loop->next)
5302 return false;
5304 loop_nest->safe_push (loop);
5305 if (loop->inner)
5306 return find_loop_nest_1 (loop->inner, loop_nest);
5307 return true;
5310 /* Return false when the LOOP is not well nested. Otherwise return
5311 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5312 contain the loops from the outermost to the innermost, as they will
5313 appear in the classic distance vector. */
5315 bool
5316 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5318 loop_nest->safe_push (loop);
5319 if (loop->inner)
5320 return find_loop_nest_1 (loop->inner, loop_nest);
5321 return true;
5324 /* Returns true when the data dependences have been computed, false otherwise.
5325 Given a loop nest LOOP, the following vectors are returned:
5326 DATAREFS is initialized to all the array elements contained in this loop,
5327 DEPENDENCE_RELATIONS contains the relations between the data references.
5328 Compute read-read and self relations if
5329 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5331 bool
5332 compute_data_dependences_for_loop (struct loop *loop,
5333 bool compute_self_and_read_read_dependences,
5334 vec<loop_p> *loop_nest,
5335 vec<data_reference_p> *datarefs,
5336 vec<ddr_p> *dependence_relations)
5338 bool res = true;
5340 memset (&dependence_stats, 0, sizeof (dependence_stats));
5342 /* If the loop nest is not well formed, or one of the data references
5343 is not computable, give up without spending time to compute other
5344 dependences. */
5345 if (!loop
5346 || !find_loop_nest (loop, loop_nest)
5347 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5348 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5349 compute_self_and_read_read_dependences))
5350 res = false;
5352 if (dump_file && (dump_flags & TDF_STATS))
5354 fprintf (dump_file, "Dependence tester statistics:\n");
5356 fprintf (dump_file, "Number of dependence tests: %d\n",
5357 dependence_stats.num_dependence_tests);
5358 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5359 dependence_stats.num_dependence_dependent);
5360 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5361 dependence_stats.num_dependence_independent);
5362 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5363 dependence_stats.num_dependence_undetermined);
5365 fprintf (dump_file, "Number of subscript tests: %d\n",
5366 dependence_stats.num_subscript_tests);
5367 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5368 dependence_stats.num_subscript_undetermined);
5369 fprintf (dump_file, "Number of same subscript function: %d\n",
5370 dependence_stats.num_same_subscript_function);
5372 fprintf (dump_file, "Number of ziv tests: %d\n",
5373 dependence_stats.num_ziv);
5374 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5375 dependence_stats.num_ziv_dependent);
5376 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5377 dependence_stats.num_ziv_independent);
5378 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5379 dependence_stats.num_ziv_unimplemented);
5381 fprintf (dump_file, "Number of siv tests: %d\n",
5382 dependence_stats.num_siv);
5383 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5384 dependence_stats.num_siv_dependent);
5385 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5386 dependence_stats.num_siv_independent);
5387 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5388 dependence_stats.num_siv_unimplemented);
5390 fprintf (dump_file, "Number of miv tests: %d\n",
5391 dependence_stats.num_miv);
5392 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5393 dependence_stats.num_miv_dependent);
5394 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5395 dependence_stats.num_miv_independent);
5396 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5397 dependence_stats.num_miv_unimplemented);
5400 return res;
5403 /* Free the memory used by a data dependence relation DDR. */
5405 void
5406 free_dependence_relation (struct data_dependence_relation *ddr)
5408 if (ddr == NULL)
5409 return;
5411 if (DDR_SUBSCRIPTS (ddr).exists ())
5412 free_subscripts (DDR_SUBSCRIPTS (ddr));
5413 DDR_DIST_VECTS (ddr).release ();
5414 DDR_DIR_VECTS (ddr).release ();
5416 free (ddr);
5419 /* Free the memory used by the data dependence relations from
5420 DEPENDENCE_RELATIONS. */
5422 void
5423 free_dependence_relations (vec<ddr_p> dependence_relations)
5425 unsigned int i;
5426 struct data_dependence_relation *ddr;
5428 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5429 if (ddr)
5430 free_dependence_relation (ddr);
5432 dependence_relations.release ();
5435 /* Free the memory used by the data references from DATAREFS. */
5437 void
5438 free_data_refs (vec<data_reference_p> datarefs)
5440 unsigned int i;
5441 struct data_reference *dr;
5443 FOR_EACH_VEC_ELT (datarefs, i, dr)
5444 free_data_ref (dr);
5445 datarefs.release ();
5448 /* Common routine implementing both dr_direction_indicator and
5449 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
5450 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5451 Return the step as the indicator otherwise. */
5453 static tree
5454 dr_step_indicator (struct data_reference *dr, int useful_min)
5456 tree step = DR_STEP (dr);
5457 if (!step)
5458 return NULL_TREE;
5459 STRIP_NOPS (step);
5460 /* Look for cases where the step is scaled by a positive constant
5461 integer, which will often be the access size. If the multiplication
5462 doesn't change the sign (due to overflow effects) then we can
5463 test the unscaled value instead. */
5464 if (TREE_CODE (step) == MULT_EXPR
5465 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5466 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5468 tree factor = TREE_OPERAND (step, 1);
5469 step = TREE_OPERAND (step, 0);
5471 /* Strip widening and truncating conversions as well as nops. */
5472 if (CONVERT_EXPR_P (step)
5473 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5474 step = TREE_OPERAND (step, 0);
5475 tree type = TREE_TYPE (step);
5477 /* Get the range of step values that would not cause overflow. */
5478 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5479 / wi::to_widest (factor));
5480 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5481 / wi::to_widest (factor));
5483 /* Get the range of values that the unconverted step actually has. */
5484 wide_int step_min, step_max;
5485 if (TREE_CODE (step) != SSA_NAME
5486 || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5488 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5489 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5492 /* Check whether the unconverted step has an acceptable range. */
5493 signop sgn = TYPE_SIGN (type);
5494 if (wi::les_p (minv, widest_int::from (step_min, sgn))
5495 && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5497 if (wi::ge_p (step_min, useful_min, sgn))
5498 return ssize_int (useful_min);
5499 else if (wi::lt_p (step_max, 0, sgn))
5500 return ssize_int (-1);
5501 else
5502 return fold_convert (ssizetype, step);
5505 return DR_STEP (dr);
5508 /* Return a value that is negative iff DR has a negative step. */
5510 tree
5511 dr_direction_indicator (struct data_reference *dr)
5513 return dr_step_indicator (dr, 0);
5516 /* Return a value that is zero iff DR has a zero step. */
5518 tree
5519 dr_zero_step_indicator (struct data_reference *dr)
5521 return dr_step_indicator (dr, 1);
5524 /* Return true if DR is known to have a nonnegative (but possibly zero)
5525 step. */
5527 bool
5528 dr_known_forward_stride_p (struct data_reference *dr)
5530 tree indicator = dr_direction_indicator (dr);
5531 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5532 fold_convert (ssizetype, indicator),
5533 ssize_int (0));
5534 return neg_step_val && integer_zerop (neg_step_val);