* tree-loop-distribution.c (bb_top_order_index): New.
[official-gcc.git] / gcc / tree-data-ref.c
blobb7f9a570abb8f23c61b1047eb4a4dd77b992e690
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 struct data_reference *,
128 struct data_reference *,
129 struct loop *);
130 /* Returns true iff A divides B. */
132 static inline bool
133 tree_fold_divides_p (const_tree a, const_tree b)
135 gcc_assert (TREE_CODE (a) == INTEGER_CST);
136 gcc_assert (TREE_CODE (b) == INTEGER_CST);
137 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
140 /* Returns true iff A divides B. */
142 static inline bool
143 int_divides_p (int a, int b)
145 return ((b % a) == 0);
150 /* Dump into FILE all the data references from DATAREFS. */
152 static void
153 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
155 unsigned int i;
156 struct data_reference *dr;
158 FOR_EACH_VEC_ELT (datarefs, i, dr)
159 dump_data_reference (file, dr);
162 /* Unified dump into FILE all the data references from DATAREFS. */
164 DEBUG_FUNCTION void
165 debug (vec<data_reference_p> &ref)
167 dump_data_references (stderr, ref);
170 DEBUG_FUNCTION void
171 debug (vec<data_reference_p> *ptr)
173 if (ptr)
174 debug (*ptr);
175 else
176 fprintf (stderr, "<nil>\n");
180 /* Dump into STDERR all the data references from DATAREFS. */
182 DEBUG_FUNCTION void
183 debug_data_references (vec<data_reference_p> datarefs)
185 dump_data_references (stderr, datarefs);
188 /* Print to STDERR the data_reference DR. */
190 DEBUG_FUNCTION void
191 debug_data_reference (struct data_reference *dr)
193 dump_data_reference (stderr, dr);
196 /* Dump function for a DATA_REFERENCE structure. */
198 void
199 dump_data_reference (FILE *outf,
200 struct data_reference *dr)
202 unsigned int i;
204 fprintf (outf, "#(Data Ref: \n");
205 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
206 fprintf (outf, "# stmt: ");
207 print_gimple_stmt (outf, DR_STMT (dr), 0);
208 fprintf (outf, "# ref: ");
209 print_generic_stmt (outf, DR_REF (dr));
210 fprintf (outf, "# base_object: ");
211 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
213 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
215 fprintf (outf, "# Access function %d: ", i);
216 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
218 fprintf (outf, "#)\n");
221 /* Unified dump function for a DATA_REFERENCE structure. */
223 DEBUG_FUNCTION void
224 debug (data_reference &ref)
226 dump_data_reference (stderr, &ref);
229 DEBUG_FUNCTION void
230 debug (data_reference *ptr)
232 if (ptr)
233 debug (*ptr);
234 else
235 fprintf (stderr, "<nil>\n");
239 /* Dumps the affine function described by FN to the file OUTF. */
241 DEBUG_FUNCTION void
242 dump_affine_function (FILE *outf, affine_fn fn)
244 unsigned i;
245 tree coef;
247 print_generic_expr (outf, fn[0], TDF_SLIM);
248 for (i = 1; fn.iterate (i, &coef); i++)
250 fprintf (outf, " + ");
251 print_generic_expr (outf, coef, TDF_SLIM);
252 fprintf (outf, " * x_%u", i);
256 /* Dumps the conflict function CF to the file OUTF. */
258 DEBUG_FUNCTION void
259 dump_conflict_function (FILE *outf, conflict_function *cf)
261 unsigned i;
263 if (cf->n == NO_DEPENDENCE)
264 fprintf (outf, "no dependence");
265 else if (cf->n == NOT_KNOWN)
266 fprintf (outf, "not known");
267 else
269 for (i = 0; i < cf->n; i++)
271 if (i != 0)
272 fprintf (outf, " ");
273 fprintf (outf, "[");
274 dump_affine_function (outf, cf->fns[i]);
275 fprintf (outf, "]");
280 /* Dump function for a SUBSCRIPT structure. */
282 DEBUG_FUNCTION void
283 dump_subscript (FILE *outf, struct subscript *subscript)
285 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
287 fprintf (outf, "\n (subscript \n");
288 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
289 dump_conflict_function (outf, cf);
290 if (CF_NONTRIVIAL_P (cf))
292 tree last_iteration = SUB_LAST_CONFLICT (subscript);
293 fprintf (outf, "\n last_conflict: ");
294 print_generic_expr (outf, last_iteration);
297 cf = SUB_CONFLICTS_IN_B (subscript);
298 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
299 dump_conflict_function (outf, cf);
300 if (CF_NONTRIVIAL_P (cf))
302 tree last_iteration = SUB_LAST_CONFLICT (subscript);
303 fprintf (outf, "\n last_conflict: ");
304 print_generic_expr (outf, last_iteration);
307 fprintf (outf, "\n (Subscript distance: ");
308 print_generic_expr (outf, SUB_DISTANCE (subscript));
309 fprintf (outf, " ))\n");
312 /* Print the classic direction vector DIRV to OUTF. */
314 DEBUG_FUNCTION void
315 print_direction_vector (FILE *outf,
316 lambda_vector dirv,
317 int length)
319 int eq;
321 for (eq = 0; eq < length; eq++)
323 enum data_dependence_direction dir = ((enum data_dependence_direction)
324 dirv[eq]);
326 switch (dir)
328 case dir_positive:
329 fprintf (outf, " +");
330 break;
331 case dir_negative:
332 fprintf (outf, " -");
333 break;
334 case dir_equal:
335 fprintf (outf, " =");
336 break;
337 case dir_positive_or_equal:
338 fprintf (outf, " +=");
339 break;
340 case dir_positive_or_negative:
341 fprintf (outf, " +-");
342 break;
343 case dir_negative_or_equal:
344 fprintf (outf, " -=");
345 break;
346 case dir_star:
347 fprintf (outf, " *");
348 break;
349 default:
350 fprintf (outf, "indep");
351 break;
354 fprintf (outf, "\n");
357 /* Print a vector of direction vectors. */
359 DEBUG_FUNCTION void
360 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
361 int length)
363 unsigned j;
364 lambda_vector v;
366 FOR_EACH_VEC_ELT (dir_vects, j, v)
367 print_direction_vector (outf, v, length);
370 /* Print out a vector VEC of length N to OUTFILE. */
372 DEBUG_FUNCTION void
373 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
375 int i;
377 for (i = 0; i < n; i++)
378 fprintf (outfile, "%3d ", vector[i]);
379 fprintf (outfile, "\n");
382 /* Print a vector of distance vectors. */
384 DEBUG_FUNCTION void
385 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
386 int length)
388 unsigned j;
389 lambda_vector v;
391 FOR_EACH_VEC_ELT (dist_vects, j, v)
392 print_lambda_vector (outf, v, length);
395 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
397 DEBUG_FUNCTION void
398 dump_data_dependence_relation (FILE *outf,
399 struct data_dependence_relation *ddr)
401 struct data_reference *dra, *drb;
403 fprintf (outf, "(Data Dep: \n");
405 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
407 if (ddr)
409 dra = DDR_A (ddr);
410 drb = DDR_B (ddr);
411 if (dra)
412 dump_data_reference (outf, dra);
413 else
414 fprintf (outf, " (nil)\n");
415 if (drb)
416 dump_data_reference (outf, drb);
417 else
418 fprintf (outf, " (nil)\n");
420 fprintf (outf, " (don't know)\n)\n");
421 return;
424 dra = DDR_A (ddr);
425 drb = DDR_B (ddr);
426 dump_data_reference (outf, dra);
427 dump_data_reference (outf, drb);
429 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
430 fprintf (outf, " (no dependence)\n");
432 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
434 unsigned int i;
435 struct loop *loopi;
437 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
439 fprintf (outf, " access_fn_A: ");
440 print_generic_stmt (outf, DR_ACCESS_FN (dra, i));
441 fprintf (outf, " access_fn_B: ");
442 print_generic_stmt (outf, DR_ACCESS_FN (drb, i));
443 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
446 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
447 fprintf (outf, " loop nest: (");
448 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
449 fprintf (outf, "%d ", loopi->num);
450 fprintf (outf, ")\n");
452 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
454 fprintf (outf, " distance_vector: ");
455 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
456 DDR_NB_LOOPS (ddr));
459 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
461 fprintf (outf, " direction_vector: ");
462 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
463 DDR_NB_LOOPS (ddr));
467 fprintf (outf, ")\n");
470 /* Debug version. */
472 DEBUG_FUNCTION void
473 debug_data_dependence_relation (struct data_dependence_relation *ddr)
475 dump_data_dependence_relation (stderr, ddr);
478 /* Dump into FILE all the dependence relations from DDRS. */
480 DEBUG_FUNCTION void
481 dump_data_dependence_relations (FILE *file,
482 vec<ddr_p> ddrs)
484 unsigned int i;
485 struct data_dependence_relation *ddr;
487 FOR_EACH_VEC_ELT (ddrs, i, ddr)
488 dump_data_dependence_relation (file, ddr);
491 DEBUG_FUNCTION void
492 debug (vec<ddr_p> &ref)
494 dump_data_dependence_relations (stderr, ref);
497 DEBUG_FUNCTION void
498 debug (vec<ddr_p> *ptr)
500 if (ptr)
501 debug (*ptr);
502 else
503 fprintf (stderr, "<nil>\n");
507 /* Dump to STDERR all the dependence relations from DDRS. */
509 DEBUG_FUNCTION void
510 debug_data_dependence_relations (vec<ddr_p> ddrs)
512 dump_data_dependence_relations (stderr, ddrs);
515 /* Dumps the distance and direction vectors in FILE. DDRS contains
516 the dependence relations, and VECT_SIZE is the size of the
517 dependence vectors, or in other words the number of loops in the
518 considered nest. */
520 DEBUG_FUNCTION void
521 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
523 unsigned int i, j;
524 struct data_dependence_relation *ddr;
525 lambda_vector v;
527 FOR_EACH_VEC_ELT (ddrs, i, ddr)
528 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
530 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
532 fprintf (file, "DISTANCE_V (");
533 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
534 fprintf (file, ")\n");
537 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
539 fprintf (file, "DIRECTION_V (");
540 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
541 fprintf (file, ")\n");
545 fprintf (file, "\n\n");
548 /* Dumps the data dependence relations DDRS in FILE. */
550 DEBUG_FUNCTION void
551 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
553 unsigned int i;
554 struct data_dependence_relation *ddr;
556 FOR_EACH_VEC_ELT (ddrs, i, ddr)
557 dump_data_dependence_relation (file, ddr);
559 fprintf (file, "\n\n");
562 DEBUG_FUNCTION void
563 debug_ddrs (vec<ddr_p> ddrs)
565 dump_ddrs (stderr, ddrs);
568 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
569 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
570 constant of type ssizetype, and returns true. If we cannot do this
571 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
572 is returned. */
574 static bool
575 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
576 tree *var, tree *off)
578 tree var0, var1;
579 tree off0, off1;
580 enum tree_code ocode = code;
582 *var = NULL_TREE;
583 *off = NULL_TREE;
585 switch (code)
587 case INTEGER_CST:
588 *var = build_int_cst (type, 0);
589 *off = fold_convert (ssizetype, op0);
590 return true;
592 case POINTER_PLUS_EXPR:
593 ocode = PLUS_EXPR;
594 /* FALLTHROUGH */
595 case PLUS_EXPR:
596 case MINUS_EXPR:
597 split_constant_offset (op0, &var0, &off0);
598 split_constant_offset (op1, &var1, &off1);
599 *var = fold_build2 (code, type, var0, var1);
600 *off = size_binop (ocode, off0, off1);
601 return true;
603 case MULT_EXPR:
604 if (TREE_CODE (op1) != INTEGER_CST)
605 return false;
607 split_constant_offset (op0, &var0, &off0);
608 *var = fold_build2 (MULT_EXPR, type, var0, op1);
609 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
610 return true;
612 case ADDR_EXPR:
614 tree base, poffset;
615 HOST_WIDE_INT pbitsize, pbitpos;
616 machine_mode pmode;
617 int punsignedp, preversep, pvolatilep;
619 op0 = TREE_OPERAND (op0, 0);
620 base
621 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
622 &punsignedp, &preversep, &pvolatilep);
624 if (pbitpos % BITS_PER_UNIT != 0)
625 return false;
626 base = build_fold_addr_expr (base);
627 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
629 if (poffset)
631 split_constant_offset (poffset, &poffset, &off1);
632 off0 = size_binop (PLUS_EXPR, off0, off1);
633 if (POINTER_TYPE_P (TREE_TYPE (base)))
634 base = fold_build_pointer_plus (base, poffset);
635 else
636 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
637 fold_convert (TREE_TYPE (base), poffset));
640 var0 = fold_convert (type, base);
642 /* If variable length types are involved, punt, otherwise casts
643 might be converted into ARRAY_REFs in gimplify_conversion.
644 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
645 possibly no longer appears in current GIMPLE, might resurface.
646 This perhaps could run
647 if (CONVERT_EXPR_P (var0))
649 gimplify_conversion (&var0);
650 // Attempt to fill in any within var0 found ARRAY_REF's
651 // element size from corresponding op embedded ARRAY_REF,
652 // if unsuccessful, just punt.
653 } */
654 while (POINTER_TYPE_P (type))
655 type = TREE_TYPE (type);
656 if (int_size_in_bytes (type) < 0)
657 return false;
659 *var = var0;
660 *off = off0;
661 return true;
664 case SSA_NAME:
666 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
667 return false;
669 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
670 enum tree_code subcode;
672 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
673 return false;
675 var0 = gimple_assign_rhs1 (def_stmt);
676 subcode = gimple_assign_rhs_code (def_stmt);
677 var1 = gimple_assign_rhs2 (def_stmt);
679 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
681 CASE_CONVERT:
683 /* We must not introduce undefined overflow, and we must not change the value.
684 Hence we're okay if the inner type doesn't overflow to start with
685 (pointer or signed), the outer type also is an integer or pointer
686 and the outer precision is at least as large as the inner. */
687 tree itype = TREE_TYPE (op0);
688 if ((POINTER_TYPE_P (itype)
689 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
690 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
691 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
693 split_constant_offset (op0, &var0, off);
694 *var = fold_convert (type, var0);
695 return true;
697 return false;
700 default:
701 return false;
705 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
706 will be ssizetype. */
708 void
709 split_constant_offset (tree exp, tree *var, tree *off)
711 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
712 enum tree_code code;
714 *var = exp;
715 *off = ssize_int (0);
716 STRIP_NOPS (exp);
718 if (tree_is_chrec (exp)
719 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
720 return;
722 otype = TREE_TYPE (exp);
723 code = TREE_CODE (exp);
724 extract_ops_from_tree (exp, &code, &op0, &op1);
725 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
727 *var = fold_convert (type, e);
728 *off = o;
732 /* Returns the address ADDR of an object in a canonical shape (without nop
733 casts, and with type of pointer to the object). */
735 static tree
736 canonicalize_base_object_address (tree addr)
738 tree orig = addr;
740 STRIP_NOPS (addr);
742 /* The base address may be obtained by casting from integer, in that case
743 keep the cast. */
744 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
745 return orig;
747 if (TREE_CODE (addr) != ADDR_EXPR)
748 return addr;
750 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
753 /* Analyze the behavior of memory reference REF. There are two modes:
755 - BB analysis. In this case we simply split the address into base,
756 init and offset components, without reference to any containing loop.
757 The resulting base and offset are general expressions and they can
758 vary arbitrarily from one iteration of the containing loop to the next.
759 The step is always zero.
761 - loop analysis. In this case we analyze the reference both wrt LOOP
762 and on the basis that the reference occurs (is "used") in LOOP;
763 see the comment above analyze_scalar_evolution_in_loop for more
764 information about this distinction. The base, init, offset and
765 step fields are all invariant in LOOP.
767 Perform BB analysis if LOOP is null, or if LOOP is the function's
768 dummy outermost loop. In other cases perform loop analysis.
770 Return true if the analysis succeeded and store the results in DRB if so.
771 BB analysis can only fail for bitfield or reversed-storage accesses. */
773 bool
774 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
775 struct loop *loop)
777 HOST_WIDE_INT pbitsize, pbitpos;
778 tree base, poffset;
779 machine_mode pmode;
780 int punsignedp, preversep, pvolatilep;
781 affine_iv base_iv, offset_iv;
782 tree init, dinit, step;
783 bool in_loop = (loop && loop->num);
785 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, "analyze_innermost: ");
788 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
789 &punsignedp, &preversep, &pvolatilep);
790 gcc_assert (base != NULL_TREE);
792 if (pbitpos % BITS_PER_UNIT != 0)
794 if (dump_file && (dump_flags & TDF_DETAILS))
795 fprintf (dump_file, "failed: bit offset alignment.\n");
796 return false;
799 if (preversep)
801 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "failed: reverse storage order.\n");
803 return false;
806 /* Calculate the alignment and misalignment for the inner reference. */
807 unsigned int HOST_WIDE_INT base_misalignment;
808 unsigned int base_alignment;
809 get_object_alignment_1 (base, &base_alignment, &base_misalignment);
811 /* There are no bitfield references remaining in BASE, so the values
812 we got back must be whole bytes. */
813 gcc_assert (base_alignment % BITS_PER_UNIT == 0
814 && base_misalignment % BITS_PER_UNIT == 0);
815 base_alignment /= BITS_PER_UNIT;
816 base_misalignment /= BITS_PER_UNIT;
818 if (TREE_CODE (base) == MEM_REF)
820 if (!integer_zerop (TREE_OPERAND (base, 1)))
822 /* Subtract MOFF from the base and add it to POFFSET instead.
823 Adjust the misalignment to reflect the amount we subtracted. */
824 offset_int moff = mem_ref_offset (base);
825 base_misalignment -= moff.to_short_addr ();
826 tree mofft = wide_int_to_tree (sizetype, moff);
827 if (!poffset)
828 poffset = mofft;
829 else
830 poffset = size_binop (PLUS_EXPR, poffset, mofft);
832 base = TREE_OPERAND (base, 0);
834 else
835 base = build_fold_addr_expr (base);
837 if (in_loop)
839 if (!simple_iv (loop, loop, base, &base_iv, true))
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 fprintf (dump_file, "failed: evolution of base is not affine.\n");
843 return false;
846 else
848 base_iv.base = base;
849 base_iv.step = ssize_int (0);
850 base_iv.no_overflow = true;
853 if (!poffset)
855 offset_iv.base = ssize_int (0);
856 offset_iv.step = ssize_int (0);
858 else
860 if (!in_loop)
862 offset_iv.base = poffset;
863 offset_iv.step = ssize_int (0);
865 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
867 if (dump_file && (dump_flags & TDF_DETAILS))
868 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
869 return false;
873 init = ssize_int (pbitpos / BITS_PER_UNIT);
875 /* Subtract any constant component from the base and add it to INIT instead.
876 Adjust the misalignment to reflect the amount we subtracted. */
877 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
878 init = size_binop (PLUS_EXPR, init, dinit);
879 base_misalignment -= TREE_INT_CST_LOW (dinit);
881 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
882 init = size_binop (PLUS_EXPR, init, dinit);
884 step = size_binop (PLUS_EXPR,
885 fold_convert (ssizetype, base_iv.step),
886 fold_convert (ssizetype, offset_iv.step));
888 base = canonicalize_base_object_address (base_iv.base);
890 /* See if get_pointer_alignment can guarantee a higher alignment than
891 the one we calculated above. */
892 unsigned int HOST_WIDE_INT alt_misalignment;
893 unsigned int alt_alignment;
894 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
896 /* As above, these values must be whole bytes. */
897 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
898 && alt_misalignment % BITS_PER_UNIT == 0);
899 alt_alignment /= BITS_PER_UNIT;
900 alt_misalignment /= BITS_PER_UNIT;
902 if (base_alignment < alt_alignment)
904 base_alignment = alt_alignment;
905 base_misalignment = alt_misalignment;
908 drb->base_address = base;
909 drb->offset = fold_convert (ssizetype, offset_iv.base);
910 drb->init = init;
911 drb->step = step;
912 drb->base_alignment = base_alignment;
913 drb->base_misalignment = base_misalignment & (base_alignment - 1);
914 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
915 drb->step_alignment = highest_pow2_factor (step);
917 if (dump_file && (dump_flags & TDF_DETAILS))
918 fprintf (dump_file, "success.\n");
920 return true;
923 /* Determines the base object and the list of indices of memory reference
924 DR, analyzed in LOOP and instantiated in loop nest NEST. */
926 static void
927 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
929 vec<tree> access_fns = vNULL;
930 tree ref, op;
931 tree base, off, access_fn;
932 basic_block before_loop;
934 /* If analyzing a basic-block there are no indices to analyze
935 and thus no access functions. */
936 if (!nest)
938 DR_BASE_OBJECT (dr) = DR_REF (dr);
939 DR_ACCESS_FNS (dr).create (0);
940 return;
943 ref = DR_REF (dr);
944 before_loop = block_before_loop (nest);
946 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
947 into a two element array with a constant index. The base is
948 then just the immediate underlying object. */
949 if (TREE_CODE (ref) == REALPART_EXPR)
951 ref = TREE_OPERAND (ref, 0);
952 access_fns.safe_push (integer_zero_node);
954 else if (TREE_CODE (ref) == IMAGPART_EXPR)
956 ref = TREE_OPERAND (ref, 0);
957 access_fns.safe_push (integer_one_node);
960 /* Analyze access functions of dimensions we know to be independent. */
961 while (handled_component_p (ref))
963 if (TREE_CODE (ref) == ARRAY_REF)
965 op = TREE_OPERAND (ref, 1);
966 access_fn = analyze_scalar_evolution (loop, op);
967 access_fn = instantiate_scev (before_loop, loop, access_fn);
968 access_fns.safe_push (access_fn);
970 else if (TREE_CODE (ref) == COMPONENT_REF
971 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
973 /* For COMPONENT_REFs of records (but not unions!) use the
974 FIELD_DECL offset as constant access function so we can
975 disambiguate a[i].f1 and a[i].f2. */
976 tree off = component_ref_field_offset (ref);
977 off = size_binop (PLUS_EXPR,
978 size_binop (MULT_EXPR,
979 fold_convert (bitsizetype, off),
980 bitsize_int (BITS_PER_UNIT)),
981 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
982 access_fns.safe_push (off);
984 else
985 /* If we have an unhandled component we could not translate
986 to an access function stop analyzing. We have determined
987 our base object in this case. */
988 break;
990 ref = TREE_OPERAND (ref, 0);
993 /* If the address operand of a MEM_REF base has an evolution in the
994 analyzed nest, add it as an additional independent access-function. */
995 if (TREE_CODE (ref) == MEM_REF)
997 op = TREE_OPERAND (ref, 0);
998 access_fn = analyze_scalar_evolution (loop, op);
999 access_fn = instantiate_scev (before_loop, loop, access_fn);
1000 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1002 tree orig_type;
1003 tree memoff = TREE_OPERAND (ref, 1);
1004 base = initial_condition (access_fn);
1005 orig_type = TREE_TYPE (base);
1006 STRIP_USELESS_TYPE_CONVERSION (base);
1007 split_constant_offset (base, &base, &off);
1008 STRIP_USELESS_TYPE_CONVERSION (base);
1009 /* Fold the MEM_REF offset into the evolutions initial
1010 value to make more bases comparable. */
1011 if (!integer_zerop (memoff))
1013 off = size_binop (PLUS_EXPR, off,
1014 fold_convert (ssizetype, memoff));
1015 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1017 /* Adjust the offset so it is a multiple of the access type
1018 size and thus we separate bases that can possibly be used
1019 to produce partial overlaps (which the access_fn machinery
1020 cannot handle). */
1021 wide_int rem;
1022 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1023 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1024 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1025 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
1026 else
1027 /* If we can't compute the remainder simply force the initial
1028 condition to zero. */
1029 rem = off;
1030 off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
1031 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1032 /* And finally replace the initial condition. */
1033 access_fn = chrec_replace_initial_condition
1034 (access_fn, fold_convert (orig_type, off));
1035 /* ??? This is still not a suitable base object for
1036 dr_may_alias_p - the base object needs to be an
1037 access that covers the object as whole. With
1038 an evolution in the pointer this cannot be
1039 guaranteed.
1040 As a band-aid, mark the access so we can special-case
1041 it in dr_may_alias_p. */
1042 tree old = ref;
1043 ref = fold_build2_loc (EXPR_LOCATION (ref),
1044 MEM_REF, TREE_TYPE (ref),
1045 base, memoff);
1046 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1047 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1048 DR_UNCONSTRAINED_BASE (dr) = true;
1049 access_fns.safe_push (access_fn);
1052 else if (DECL_P (ref))
1054 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1055 ref = build2 (MEM_REF, TREE_TYPE (ref),
1056 build_fold_addr_expr (ref),
1057 build_int_cst (reference_alias_ptr_type (ref), 0));
1060 DR_BASE_OBJECT (dr) = ref;
1061 DR_ACCESS_FNS (dr) = access_fns;
1064 /* Extracts the alias analysis information from the memory reference DR. */
1066 static void
1067 dr_analyze_alias (struct data_reference *dr)
1069 tree ref = DR_REF (dr);
1070 tree base = get_base_address (ref), addr;
1072 if (INDIRECT_REF_P (base)
1073 || TREE_CODE (base) == MEM_REF)
1075 addr = TREE_OPERAND (base, 0);
1076 if (TREE_CODE (addr) == SSA_NAME)
1077 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1081 /* Frees data reference DR. */
1083 void
1084 free_data_ref (data_reference_p dr)
1086 DR_ACCESS_FNS (dr).release ();
1087 free (dr);
1090 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1091 is read if IS_READ is true, write otherwise. Returns the
1092 data_reference description of MEMREF. NEST is the outermost loop
1093 in which the reference should be instantiated, LOOP is the loop in
1094 which the data reference should be analyzed. */
1096 struct data_reference *
1097 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
1098 bool is_read)
1100 struct data_reference *dr;
1102 if (dump_file && (dump_flags & TDF_DETAILS))
1104 fprintf (dump_file, "Creating dr for ");
1105 print_generic_expr (dump_file, memref, TDF_SLIM);
1106 fprintf (dump_file, "\n");
1109 dr = XCNEW (struct data_reference);
1110 DR_STMT (dr) = stmt;
1111 DR_REF (dr) = memref;
1112 DR_IS_READ (dr) = is_read;
1114 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1115 nest != NULL ? loop : NULL);
1116 dr_analyze_indices (dr, nest, loop);
1117 dr_analyze_alias (dr);
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1121 unsigned i;
1122 fprintf (dump_file, "\tbase_address: ");
1123 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1124 fprintf (dump_file, "\n\toffset from base address: ");
1125 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1126 fprintf (dump_file, "\n\tconstant offset from base address: ");
1127 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1128 fprintf (dump_file, "\n\tstep: ");
1129 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1130 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1131 fprintf (dump_file, "\n\tbase misalignment: %d",
1132 DR_BASE_MISALIGNMENT (dr));
1133 fprintf (dump_file, "\n\toffset alignment: %d",
1134 DR_OFFSET_ALIGNMENT (dr));
1135 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1136 fprintf (dump_file, "\n\tbase_object: ");
1137 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1138 fprintf (dump_file, "\n");
1139 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1141 fprintf (dump_file, "\tAccess function %d: ", i);
1142 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1146 return dr;
1149 /* A helper function computes order between two tree epxressions T1 and T2.
1150 This is used in comparator functions sorting objects based on the order
1151 of tree expressions. The function returns -1, 0, or 1. */
1154 data_ref_compare_tree (tree t1, tree t2)
1156 int i, cmp;
1157 enum tree_code code;
1158 char tclass;
1160 if (t1 == t2)
1161 return 0;
1162 if (t1 == NULL)
1163 return -1;
1164 if (t2 == NULL)
1165 return 1;
1167 STRIP_NOPS (t1);
1168 STRIP_NOPS (t2);
1170 if (TREE_CODE (t1) != TREE_CODE (t2))
1171 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1173 code = TREE_CODE (t1);
1174 switch (code)
1176 /* For const values, we can just use hash values for comparisons. */
1177 case INTEGER_CST:
1178 case REAL_CST:
1179 case FIXED_CST:
1180 case STRING_CST:
1181 case COMPLEX_CST:
1182 case VECTOR_CST:
1184 hashval_t h1 = iterative_hash_expr (t1, 0);
1185 hashval_t h2 = iterative_hash_expr (t2, 0);
1186 if (h1 != h2)
1187 return h1 < h2 ? -1 : 1;
1188 break;
1191 case SSA_NAME:
1192 cmp = data_ref_compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
1193 if (cmp != 0)
1194 return cmp;
1196 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1197 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1198 break;
1200 default:
1201 tclass = TREE_CODE_CLASS (code);
1203 /* For var-decl, we could compare their UIDs. */
1204 if (tclass == tcc_declaration)
1206 if (DECL_UID (t1) != DECL_UID (t2))
1207 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1208 break;
1211 /* For expressions with operands, compare their operands recursively. */
1212 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1214 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1215 TREE_OPERAND (t2, i));
1216 if (cmp != 0)
1217 return cmp;
1221 return 0;
1224 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1225 check. */
1227 bool
1228 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1230 if (dump_enabled_p ())
1232 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1233 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1234 dump_printf (MSG_NOTE, " and ");
1235 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1236 dump_printf (MSG_NOTE, "\n");
1239 if (!speed_p)
1241 if (dump_enabled_p ())
1242 dump_printf (MSG_MISSED_OPTIMIZATION,
1243 "runtime alias check not supported when optimizing "
1244 "for size.\n");
1245 return false;
1248 /* FORNOW: We don't support versioning with outer-loop in either
1249 vectorization or loop distribution. */
1250 if (loop != NULL && loop->inner != NULL)
1252 if (dump_enabled_p ())
1253 dump_printf (MSG_MISSED_OPTIMIZATION,
1254 "runtime alias check not supported for outer loop.\n");
1255 return false;
1258 /* FORNOW: We don't support creating runtime alias tests for non-constant
1259 step. */
1260 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1261 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1263 if (dump_enabled_p ())
1264 dump_printf (MSG_MISSED_OPTIMIZATION,
1265 "runtime alias check not supported for non-constant "
1266 "step\n");
1267 return false;
1270 return true;
1273 /* Operator == between two dr_with_seg_len objects.
1275 This equality operator is used to make sure two data refs
1276 are the same one so that we will consider to combine the
1277 aliasing checks of those two pairs of data dependent data
1278 refs. */
1280 static bool
1281 operator == (const dr_with_seg_len& d1,
1282 const dr_with_seg_len& d2)
1284 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1285 DR_BASE_ADDRESS (d2.dr), 0)
1286 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1287 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1288 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1291 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1292 so that we can combine aliasing checks in one scan. */
1294 static int
1295 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1297 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1298 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1299 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1300 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1302 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1303 if a and c have the same basic address snd step, and b and d have the same
1304 address and step. Therefore, if any a&c or b&d don't have the same address
1305 and step, we don't care the order of those two pairs after sorting. */
1306 int comp_res;
1308 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1309 DR_BASE_ADDRESS (b1.dr))) != 0)
1310 return comp_res;
1311 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1312 DR_BASE_ADDRESS (b2.dr))) != 0)
1313 return comp_res;
1314 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1315 DR_STEP (b1.dr))) != 0)
1316 return comp_res;
1317 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1318 DR_STEP (b2.dr))) != 0)
1319 return comp_res;
1320 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1321 DR_OFFSET (b1.dr))) != 0)
1322 return comp_res;
1323 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1324 DR_INIT (b1.dr))) != 0)
1325 return comp_res;
1326 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1327 DR_OFFSET (b2.dr))) != 0)
1328 return comp_res;
1329 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1330 DR_INIT (b2.dr))) != 0)
1331 return comp_res;
1333 return 0;
1336 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1337 FACTOR is number of iterations that each data reference is accessed.
1339 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1340 we create an expression:
1342 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1343 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1345 for aliasing checks. However, in some cases we can decrease the number
1346 of checks by combining two checks into one. For example, suppose we have
1347 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1348 condition is satisfied:
1350 load_ptr_0 < load_ptr_1 &&
1351 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1353 (this condition means, in each iteration of vectorized loop, the accessed
1354 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1355 load_ptr_1.)
1357 we then can use only the following expression to finish the alising checks
1358 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1360 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1361 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1363 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1364 basic address. */
1366 void
1367 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1368 unsigned HOST_WIDE_INT factor)
1370 /* Sort the collected data ref pairs so that we can scan them once to
1371 combine all possible aliasing checks. */
1372 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1374 /* Scan the sorted dr pairs and check if we can combine alias checks
1375 of two neighboring dr pairs. */
1376 for (size_t i = 1; i < alias_pairs->length (); ++i)
1378 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1379 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1380 *dr_b1 = &(*alias_pairs)[i-1].second,
1381 *dr_a2 = &(*alias_pairs)[i].first,
1382 *dr_b2 = &(*alias_pairs)[i].second;
1384 /* Remove duplicate data ref pairs. */
1385 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1387 if (dump_enabled_p ())
1389 dump_printf (MSG_NOTE, "found equal ranges ");
1390 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1391 dump_printf (MSG_NOTE, ", ");
1392 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1393 dump_printf (MSG_NOTE, " and ");
1394 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1395 dump_printf (MSG_NOTE, ", ");
1396 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1397 dump_printf (MSG_NOTE, "\n");
1399 alias_pairs->ordered_remove (i--);
1400 continue;
1403 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1405 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1406 and DR_A1 and DR_A2 are two consecutive memrefs. */
1407 if (*dr_a1 == *dr_a2)
1409 std::swap (dr_a1, dr_b1);
1410 std::swap (dr_a2, dr_b2);
1413 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1414 DR_BASE_ADDRESS (dr_a2->dr), 0)
1415 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1416 DR_OFFSET (dr_a2->dr), 0)
1417 || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1418 || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1419 continue;
1421 /* Only merge const step data references. */
1422 if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1423 || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1424 continue;
1426 /* DR_A1 and DR_A2 must goes in the same direction. */
1427 if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1428 != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1429 continue;
1431 bool neg_step
1432 = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1434 /* We need to compute merged segment length at compilation time for
1435 dr_a1 and dr_a2, which is impossible if either one has non-const
1436 segment length. */
1437 if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1438 || !tree_fits_uhwi_p (dr_a2->seg_len))
1439 && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1440 DR_STEP (dr_a2->dr)) != 0)
1441 continue;
1443 /* Make sure dr_a1 starts left of dr_a2. */
1444 if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1445 std::swap (*dr_a1, *dr_a2);
1447 bool do_remove = false;
1448 wide_int diff = wi::sub (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr));
1449 wide_int min_seg_len_b;
1450 tree new_seg_len;
1452 if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1453 min_seg_len_b = wi::abs (dr_b1->seg_len);
1454 else
1455 min_seg_len_b = wi::mul (factor, wi::abs (DR_STEP (dr_b1->dr)));
1457 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1459 Case A:
1460 check if the following condition is satisfied:
1462 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1464 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1465 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1466 have to make a best estimation. We can get the minimum value
1467 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1468 then either of the following two conditions can guarantee the
1469 one above:
1471 1: DIFF <= MIN_SEG_LEN_B
1472 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1473 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1474 to take care of wrapping behavior in it.
1476 Case B:
1477 If the left segment does not extend beyond the start of the
1478 right segment the new segment length is that of the right
1479 plus the segment distance. The condition is like:
1481 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1483 Note 1: Case A.2 and B combined together effectively merges every
1484 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1486 Note 2: Above description is based on positive DR_STEP, we need to
1487 take care of negative DR_STEP for wrapping behavior. See PR80815
1488 for more information. */
1489 if (neg_step)
1491 /* Adjust diff according to access size of both references. */
1492 tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1493 tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1494 diff = wi::add (diff, wi::sub (size_a2, size_a1));
1495 /* Case A.1. */
1496 if (wi::leu_p (diff, min_seg_len_b)
1497 /* Case A.2 and B combined. */
1498 || (tree_fits_uhwi_p (dr_a2->seg_len)))
1500 if (tree_fits_uhwi_p (dr_a1->seg_len)
1501 && tree_fits_uhwi_p (dr_a2->seg_len))
1502 new_seg_len
1503 = wide_int_to_tree (sizetype,
1504 wi::umin (wi::sub (dr_a1->seg_len,
1505 diff),
1506 dr_a2->seg_len));
1507 else
1508 new_seg_len
1509 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1510 wide_int_to_tree (sizetype, diff));
1512 dr_a2->seg_len = new_seg_len;
1513 do_remove = true;
1516 else
1518 /* Case A.1. */
1519 if (wi::leu_p (diff, min_seg_len_b)
1520 /* Case A.2 and B combined. */
1521 || (tree_fits_uhwi_p (dr_a1->seg_len)))
1523 if (tree_fits_uhwi_p (dr_a1->seg_len)
1524 && tree_fits_uhwi_p (dr_a2->seg_len))
1525 new_seg_len
1526 = wide_int_to_tree (sizetype,
1527 wi::umax (wi::add (dr_a2->seg_len,
1528 diff),
1529 dr_a1->seg_len));
1530 else
1531 new_seg_len
1532 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1533 wide_int_to_tree (sizetype, diff));
1535 dr_a1->seg_len = new_seg_len;
1536 do_remove = true;
1540 if (do_remove)
1542 if (dump_enabled_p ())
1544 dump_printf (MSG_NOTE, "merging ranges for ");
1545 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1546 dump_printf (MSG_NOTE, ", ");
1547 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1548 dump_printf (MSG_NOTE, " and ");
1549 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1550 dump_printf (MSG_NOTE, ", ");
1551 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1552 dump_printf (MSG_NOTE, "\n");
1554 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1555 i--;
1561 /* Given LOOP's two data references and segment lengths described by DR_A
1562 and DR_B, create expression checking if the two addresses ranges intersect
1563 with each other based on index of the two addresses. This can only be
1564 done if DR_A and DR_B referring to the same (array) object and the index
1565 is the only difference. For example:
1567 DR_A DR_B
1568 data-ref arr[i] arr[j]
1569 base_object arr arr
1570 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1572 The addresses and their index are like:
1574 |<- ADDR_A ->| |<- ADDR_B ->|
1575 ------------------------------------------------------->
1576 | | | | | | | | | |
1577 ------------------------------------------------------->
1578 i_0 ... i_0+4 j_0 ... j_0+4
1580 We can create expression based on index rather than address:
1582 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1584 Note evolution step of index needs to be considered in comparison. */
1586 static bool
1587 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1588 const dr_with_seg_len& dr_a,
1589 const dr_with_seg_len& dr_b)
1591 if (integer_zerop (DR_STEP (dr_a.dr))
1592 || integer_zerop (DR_STEP (dr_b.dr))
1593 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1594 return false;
1596 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1597 return false;
1599 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1600 return false;
1602 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1603 return false;
1605 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1606 return false;
1608 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1610 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1611 unsigned HOST_WIDE_INT abs_step
1612 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1614 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1615 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1616 /* Infer the number of iterations with which the memory segment is accessed
1617 by DR. In other words, alias is checked if memory segment accessed by
1618 DR_A in some iterations intersect with memory segment accessed by DR_B
1619 in the same amount iterations.
1620 Note segnment length is a linear function of number of iterations with
1621 DR_STEP as the coefficient. */
1622 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1623 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1625 unsigned int i;
1626 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1628 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1629 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1630 /* Two indices must be the same if they are not scev, or not scev wrto
1631 current loop being vecorized. */
1632 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1633 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1634 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1635 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1637 if (operand_equal_p (access1, access2, 0))
1638 continue;
1640 return false;
1642 /* The two indices must have the same step. */
1643 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1644 return false;
1646 tree idx_step = CHREC_RIGHT (access1);
1647 /* Index must have const step, otherwise DR_STEP won't be constant. */
1648 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1649 /* Index must evaluate in the same direction as DR. */
1650 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1652 tree min1 = CHREC_LEFT (access1);
1653 tree min2 = CHREC_LEFT (access2);
1654 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1655 return false;
1657 /* Ideally, alias can be checked against loop's control IV, but we
1658 need to prove linear mapping between control IV and reference
1659 index. Although that should be true, we check against (array)
1660 index of data reference. Like segment length, index length is
1661 linear function of the number of iterations with index_step as
1662 the coefficient, i.e, niter_len * idx_step. */
1663 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1664 build_int_cst (TREE_TYPE (min1),
1665 niter_len1));
1666 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1667 build_int_cst (TREE_TYPE (min2),
1668 niter_len2));
1669 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1670 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1671 /* Adjust ranges for negative step. */
1672 if (neg_step)
1674 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1675 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1676 CHREC_LEFT (access1), idx_step);
1677 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1678 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1679 CHREC_LEFT (access2), idx_step);
1681 tree part_cond_expr
1682 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1683 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1684 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1685 if (*cond_expr)
1686 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1687 *cond_expr, part_cond_expr);
1688 else
1689 *cond_expr = part_cond_expr;
1691 return true;
1694 /* Given two data references and segment lengths described by DR_A and DR_B,
1695 create expression checking if the two addresses ranges intersect with
1696 each other:
1698 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1699 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1701 static void
1702 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1703 const dr_with_seg_len& dr_a,
1704 const dr_with_seg_len& dr_b)
1706 *cond_expr = NULL_TREE;
1707 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1708 return;
1710 tree segment_length_a = dr_a.seg_len;
1711 tree segment_length_b = dr_b.seg_len;
1712 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1713 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1714 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1716 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1717 offset_a, DR_INIT (dr_a.dr));
1718 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1719 offset_b, DR_INIT (dr_b.dr));
1720 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1721 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1723 tree seg_a_min = addr_base_a;
1724 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1725 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1726 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1727 [a, a+12) */
1728 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1730 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1731 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1732 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1735 tree seg_b_min = addr_base_b;
1736 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1737 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1739 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1740 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1741 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1743 *cond_expr
1744 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1745 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1746 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1749 /* Create a conditional expression that represents the run-time checks for
1750 overlapping of address ranges represented by a list of data references
1751 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1752 COND_EXPR is the conditional expression to be used in the if statement
1753 that controls which version of the loop gets executed at runtime. */
1755 void
1756 create_runtime_alias_checks (struct loop *loop,
1757 vec<dr_with_seg_len_pair_t> *alias_pairs,
1758 tree * cond_expr)
1760 tree part_cond_expr;
1762 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1764 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1765 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1767 if (dump_enabled_p ())
1769 dump_printf (MSG_NOTE, "create runtime check for data references ");
1770 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1771 dump_printf (MSG_NOTE, " and ");
1772 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1773 dump_printf (MSG_NOTE, "\n");
1776 /* Create condition expression for each pair data references. */
1777 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1778 if (*cond_expr)
1779 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1780 *cond_expr, part_cond_expr);
1781 else
1782 *cond_expr = part_cond_expr;
1786 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1787 expressions. */
1788 static bool
1789 dr_equal_offsets_p1 (tree offset1, tree offset2)
1791 bool res;
1793 STRIP_NOPS (offset1);
1794 STRIP_NOPS (offset2);
1796 if (offset1 == offset2)
1797 return true;
1799 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1800 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1801 return false;
1803 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1804 TREE_OPERAND (offset2, 0));
1806 if (!res || !BINARY_CLASS_P (offset1))
1807 return res;
1809 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1810 TREE_OPERAND (offset2, 1));
1812 return res;
1815 /* Check if DRA and DRB have equal offsets. */
1816 bool
1817 dr_equal_offsets_p (struct data_reference *dra,
1818 struct data_reference *drb)
1820 tree offset1, offset2;
1822 offset1 = DR_OFFSET (dra);
1823 offset2 = DR_OFFSET (drb);
1825 return dr_equal_offsets_p1 (offset1, offset2);
1828 /* Returns true if FNA == FNB. */
1830 static bool
1831 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1833 unsigned i, n = fna.length ();
1835 if (n != fnb.length ())
1836 return false;
1838 for (i = 0; i < n; i++)
1839 if (!operand_equal_p (fna[i], fnb[i], 0))
1840 return false;
1842 return true;
1845 /* If all the functions in CF are the same, returns one of them,
1846 otherwise returns NULL. */
1848 static affine_fn
1849 common_affine_function (conflict_function *cf)
1851 unsigned i;
1852 affine_fn comm;
1854 if (!CF_NONTRIVIAL_P (cf))
1855 return affine_fn ();
1857 comm = cf->fns[0];
1859 for (i = 1; i < cf->n; i++)
1860 if (!affine_function_equal_p (comm, cf->fns[i]))
1861 return affine_fn ();
1863 return comm;
1866 /* Returns the base of the affine function FN. */
1868 static tree
1869 affine_function_base (affine_fn fn)
1871 return fn[0];
1874 /* Returns true if FN is a constant. */
1876 static bool
1877 affine_function_constant_p (affine_fn fn)
1879 unsigned i;
1880 tree coef;
1882 for (i = 1; fn.iterate (i, &coef); i++)
1883 if (!integer_zerop (coef))
1884 return false;
1886 return true;
1889 /* Returns true if FN is the zero constant function. */
1891 static bool
1892 affine_function_zero_p (affine_fn fn)
1894 return (integer_zerop (affine_function_base (fn))
1895 && affine_function_constant_p (fn));
1898 /* Returns a signed integer type with the largest precision from TA
1899 and TB. */
1901 static tree
1902 signed_type_for_types (tree ta, tree tb)
1904 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1905 return signed_type_for (ta);
1906 else
1907 return signed_type_for (tb);
1910 /* Applies operation OP on affine functions FNA and FNB, and returns the
1911 result. */
1913 static affine_fn
1914 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1916 unsigned i, n, m;
1917 affine_fn ret;
1918 tree coef;
1920 if (fnb.length () > fna.length ())
1922 n = fna.length ();
1923 m = fnb.length ();
1925 else
1927 n = fnb.length ();
1928 m = fna.length ();
1931 ret.create (m);
1932 for (i = 0; i < n; i++)
1934 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1935 TREE_TYPE (fnb[i]));
1936 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1939 for (; fna.iterate (i, &coef); i++)
1940 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1941 coef, integer_zero_node));
1942 for (; fnb.iterate (i, &coef); i++)
1943 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1944 integer_zero_node, coef));
1946 return ret;
1949 /* Returns the sum of affine functions FNA and FNB. */
1951 static affine_fn
1952 affine_fn_plus (affine_fn fna, affine_fn fnb)
1954 return affine_fn_op (PLUS_EXPR, fna, fnb);
1957 /* Returns the difference of affine functions FNA and FNB. */
1959 static affine_fn
1960 affine_fn_minus (affine_fn fna, affine_fn fnb)
1962 return affine_fn_op (MINUS_EXPR, fna, fnb);
1965 /* Frees affine function FN. */
1967 static void
1968 affine_fn_free (affine_fn fn)
1970 fn.release ();
1973 /* Determine for each subscript in the data dependence relation DDR
1974 the distance. */
1976 static void
1977 compute_subscript_distance (struct data_dependence_relation *ddr)
1979 conflict_function *cf_a, *cf_b;
1980 affine_fn fn_a, fn_b, diff;
1982 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1984 unsigned int i;
1986 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1988 struct subscript *subscript;
1990 subscript = DDR_SUBSCRIPT (ddr, i);
1991 cf_a = SUB_CONFLICTS_IN_A (subscript);
1992 cf_b = SUB_CONFLICTS_IN_B (subscript);
1994 fn_a = common_affine_function (cf_a);
1995 fn_b = common_affine_function (cf_b);
1996 if (!fn_a.exists () || !fn_b.exists ())
1998 SUB_DISTANCE (subscript) = chrec_dont_know;
1999 return;
2001 diff = affine_fn_minus (fn_a, fn_b);
2003 if (affine_function_constant_p (diff))
2004 SUB_DISTANCE (subscript) = affine_function_base (diff);
2005 else
2006 SUB_DISTANCE (subscript) = chrec_dont_know;
2008 affine_fn_free (diff);
2013 /* Returns the conflict function for "unknown". */
2015 static conflict_function *
2016 conflict_fn_not_known (void)
2018 conflict_function *fn = XCNEW (conflict_function);
2019 fn->n = NOT_KNOWN;
2021 return fn;
2024 /* Returns the conflict function for "independent". */
2026 static conflict_function *
2027 conflict_fn_no_dependence (void)
2029 conflict_function *fn = XCNEW (conflict_function);
2030 fn->n = NO_DEPENDENCE;
2032 return fn;
2035 /* Returns true if the address of OBJ is invariant in LOOP. */
2037 static bool
2038 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2040 while (handled_component_p (obj))
2042 if (TREE_CODE (obj) == ARRAY_REF)
2044 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2045 need to check the stride and the lower bound of the reference. */
2046 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2047 loop->num)
2048 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2049 loop->num))
2050 return false;
2052 else if (TREE_CODE (obj) == COMPONENT_REF)
2054 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2055 loop->num))
2056 return false;
2058 obj = TREE_OPERAND (obj, 0);
2061 if (!INDIRECT_REF_P (obj)
2062 && TREE_CODE (obj) != MEM_REF)
2063 return true;
2065 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2066 loop->num);
2069 /* Returns false if we can prove that data references A and B do not alias,
2070 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2071 considered. */
2073 bool
2074 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2075 bool loop_nest)
2077 tree addr_a = DR_BASE_OBJECT (a);
2078 tree addr_b = DR_BASE_OBJECT (b);
2080 /* If we are not processing a loop nest but scalar code we
2081 do not need to care about possible cross-iteration dependences
2082 and thus can process the full original reference. Do so,
2083 similar to how loop invariant motion applies extra offset-based
2084 disambiguation. */
2085 if (!loop_nest)
2087 aff_tree off1, off2;
2088 widest_int size1, size2;
2089 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2090 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2091 aff_combination_scale (&off1, -1);
2092 aff_combination_add (&off2, &off1);
2093 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2094 return false;
2097 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2098 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2099 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2100 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2101 return false;
2103 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2104 do not know the size of the base-object. So we cannot do any
2105 offset/overlap based analysis but have to rely on points-to
2106 information only. */
2107 if (TREE_CODE (addr_a) == MEM_REF
2108 && (DR_UNCONSTRAINED_BASE (a)
2109 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2111 /* For true dependences we can apply TBAA. */
2112 if (flag_strict_aliasing
2113 && DR_IS_WRITE (a) && DR_IS_READ (b)
2114 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2115 get_alias_set (DR_REF (b))))
2116 return false;
2117 if (TREE_CODE (addr_b) == MEM_REF)
2118 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2119 TREE_OPERAND (addr_b, 0));
2120 else
2121 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2122 build_fold_addr_expr (addr_b));
2124 else if (TREE_CODE (addr_b) == MEM_REF
2125 && (DR_UNCONSTRAINED_BASE (b)
2126 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2128 /* For true dependences we can apply TBAA. */
2129 if (flag_strict_aliasing
2130 && DR_IS_WRITE (a) && DR_IS_READ (b)
2131 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2132 get_alias_set (DR_REF (b))))
2133 return false;
2134 if (TREE_CODE (addr_a) == MEM_REF)
2135 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2136 TREE_OPERAND (addr_b, 0));
2137 else
2138 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2139 TREE_OPERAND (addr_b, 0));
2142 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2143 that is being subsetted in the loop nest. */
2144 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2145 return refs_output_dependent_p (addr_a, addr_b);
2146 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2147 return refs_anti_dependent_p (addr_a, addr_b);
2148 return refs_may_alias_p (addr_a, addr_b);
2151 /* Initialize a data dependence relation between data accesses A and
2152 B. NB_LOOPS is the number of loops surrounding the references: the
2153 size of the classic distance/direction vectors. */
2155 struct data_dependence_relation *
2156 initialize_data_dependence_relation (struct data_reference *a,
2157 struct data_reference *b,
2158 vec<loop_p> loop_nest)
2160 struct data_dependence_relation *res;
2161 unsigned int i;
2163 res = XNEW (struct data_dependence_relation);
2164 DDR_A (res) = a;
2165 DDR_B (res) = b;
2166 DDR_LOOP_NEST (res).create (0);
2167 DDR_REVERSED_P (res) = false;
2168 DDR_SUBSCRIPTS (res).create (0);
2169 DDR_DIR_VECTS (res).create (0);
2170 DDR_DIST_VECTS (res).create (0);
2172 if (a == NULL || b == NULL)
2174 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2175 return res;
2178 /* If the data references do not alias, then they are independent. */
2179 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2181 DDR_ARE_DEPENDENT (res) = chrec_known;
2182 return res;
2185 /* The case where the references are exactly the same. */
2186 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
2188 if ((loop_nest.exists ()
2189 && !object_address_invariant_in_loop_p (loop_nest[0],
2190 DR_BASE_OBJECT (a)))
2191 || DR_NUM_DIMENSIONS (a) == 0)
2193 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2194 return res;
2196 DDR_AFFINE_P (res) = true;
2197 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2198 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
2199 DDR_LOOP_NEST (res) = loop_nest;
2200 DDR_INNER_LOOP (res) = 0;
2201 DDR_SELF_REFERENCE (res) = true;
2202 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
2204 struct subscript *subscript;
2206 subscript = XNEW (struct subscript);
2207 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2208 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2209 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2210 SUB_DISTANCE (subscript) = chrec_dont_know;
2211 DDR_SUBSCRIPTS (res).safe_push (subscript);
2213 return res;
2216 /* If the references do not access the same object, we do not know
2217 whether they alias or not. We do not care about TBAA or alignment
2218 info so we can use OEP_ADDRESS_OF to avoid false negatives.
2219 But the accesses have to use compatible types as otherwise the
2220 built indices would not match. */
2221 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF)
2222 || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
2223 TREE_TYPE (DR_BASE_OBJECT (b))))
2225 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2226 return res;
2229 /* If the base of the object is not invariant in the loop nest, we cannot
2230 analyze it. TODO -- in fact, it would suffice to record that there may
2231 be arbitrary dependences in the loops where the base object varies. */
2232 if ((loop_nest.exists ()
2233 && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT (a)))
2234 || DR_NUM_DIMENSIONS (a) == 0)
2236 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2237 return res;
2240 /* If the number of dimensions of the access to not agree we can have
2241 a pointer access to a component of the array element type and an
2242 array access while the base-objects are still the same. Punt. */
2243 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
2245 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2246 return res;
2249 DDR_AFFINE_P (res) = true;
2250 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2251 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
2252 DDR_LOOP_NEST (res) = loop_nest;
2253 DDR_INNER_LOOP (res) = 0;
2254 DDR_SELF_REFERENCE (res) = false;
2256 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
2258 struct subscript *subscript;
2260 subscript = XNEW (struct subscript);
2261 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2262 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2263 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2264 SUB_DISTANCE (subscript) = chrec_dont_know;
2265 DDR_SUBSCRIPTS (res).safe_push (subscript);
2268 return res;
2271 /* Frees memory used by the conflict function F. */
2273 static void
2274 free_conflict_function (conflict_function *f)
2276 unsigned i;
2278 if (CF_NONTRIVIAL_P (f))
2280 for (i = 0; i < f->n; i++)
2281 affine_fn_free (f->fns[i]);
2283 free (f);
2286 /* Frees memory used by SUBSCRIPTS. */
2288 static void
2289 free_subscripts (vec<subscript_p> subscripts)
2291 unsigned i;
2292 subscript_p s;
2294 FOR_EACH_VEC_ELT (subscripts, i, s)
2296 free_conflict_function (s->conflicting_iterations_in_a);
2297 free_conflict_function (s->conflicting_iterations_in_b);
2298 free (s);
2300 subscripts.release ();
2303 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2304 description. */
2306 static inline void
2307 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2308 tree chrec)
2310 DDR_ARE_DEPENDENT (ddr) = chrec;
2311 free_subscripts (DDR_SUBSCRIPTS (ddr));
2312 DDR_SUBSCRIPTS (ddr).create (0);
2315 /* The dependence relation DDR cannot be represented by a distance
2316 vector. */
2318 static inline void
2319 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2322 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2324 DDR_AFFINE_P (ddr) = false;
2329 /* This section contains the classic Banerjee tests. */
2331 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2332 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2334 static inline bool
2335 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2337 return (evolution_function_is_constant_p (chrec_a)
2338 && evolution_function_is_constant_p (chrec_b));
2341 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2342 variable, i.e., if the SIV (Single Index Variable) test is true. */
2344 static bool
2345 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2347 if ((evolution_function_is_constant_p (chrec_a)
2348 && evolution_function_is_univariate_p (chrec_b))
2349 || (evolution_function_is_constant_p (chrec_b)
2350 && evolution_function_is_univariate_p (chrec_a)))
2351 return true;
2353 if (evolution_function_is_univariate_p (chrec_a)
2354 && evolution_function_is_univariate_p (chrec_b))
2356 switch (TREE_CODE (chrec_a))
2358 case POLYNOMIAL_CHREC:
2359 switch (TREE_CODE (chrec_b))
2361 case POLYNOMIAL_CHREC:
2362 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2363 return false;
2364 /* FALLTHRU */
2366 default:
2367 return true;
2370 default:
2371 return true;
2375 return false;
2378 /* Creates a conflict function with N dimensions. The affine functions
2379 in each dimension follow. */
2381 static conflict_function *
2382 conflict_fn (unsigned n, ...)
2384 unsigned i;
2385 conflict_function *ret = XCNEW (conflict_function);
2386 va_list ap;
2388 gcc_assert (0 < n && n <= MAX_DIM);
2389 va_start (ap, n);
2391 ret->n = n;
2392 for (i = 0; i < n; i++)
2393 ret->fns[i] = va_arg (ap, affine_fn);
2394 va_end (ap);
2396 return ret;
2399 /* Returns constant affine function with value CST. */
2401 static affine_fn
2402 affine_fn_cst (tree cst)
2404 affine_fn fn;
2405 fn.create (1);
2406 fn.quick_push (cst);
2407 return fn;
2410 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2412 static affine_fn
2413 affine_fn_univar (tree cst, unsigned dim, tree coef)
2415 affine_fn fn;
2416 fn.create (dim + 1);
2417 unsigned i;
2419 gcc_assert (dim > 0);
2420 fn.quick_push (cst);
2421 for (i = 1; i < dim; i++)
2422 fn.quick_push (integer_zero_node);
2423 fn.quick_push (coef);
2424 return fn;
2427 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2428 *OVERLAPS_B are initialized to the functions that describe the
2429 relation between the elements accessed twice by CHREC_A and
2430 CHREC_B. For k >= 0, the following property is verified:
2432 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2434 static void
2435 analyze_ziv_subscript (tree chrec_a,
2436 tree chrec_b,
2437 conflict_function **overlaps_a,
2438 conflict_function **overlaps_b,
2439 tree *last_conflicts)
2441 tree type, difference;
2442 dependence_stats.num_ziv++;
2444 if (dump_file && (dump_flags & TDF_DETAILS))
2445 fprintf (dump_file, "(analyze_ziv_subscript \n");
2447 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2448 chrec_a = chrec_convert (type, chrec_a, NULL);
2449 chrec_b = chrec_convert (type, chrec_b, NULL);
2450 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2452 switch (TREE_CODE (difference))
2454 case INTEGER_CST:
2455 if (integer_zerop (difference))
2457 /* The difference is equal to zero: the accessed index
2458 overlaps for each iteration in the loop. */
2459 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2460 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2461 *last_conflicts = chrec_dont_know;
2462 dependence_stats.num_ziv_dependent++;
2464 else
2466 /* The accesses do not overlap. */
2467 *overlaps_a = conflict_fn_no_dependence ();
2468 *overlaps_b = conflict_fn_no_dependence ();
2469 *last_conflicts = integer_zero_node;
2470 dependence_stats.num_ziv_independent++;
2472 break;
2474 default:
2475 /* We're not sure whether the indexes overlap. For the moment,
2476 conservatively answer "don't know". */
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2480 *overlaps_a = conflict_fn_not_known ();
2481 *overlaps_b = conflict_fn_not_known ();
2482 *last_conflicts = chrec_dont_know;
2483 dependence_stats.num_ziv_unimplemented++;
2484 break;
2487 if (dump_file && (dump_flags & TDF_DETAILS))
2488 fprintf (dump_file, ")\n");
2491 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2492 and only if it fits to the int type. If this is not the case, or the
2493 bound on the number of iterations of LOOP could not be derived, returns
2494 chrec_dont_know. */
2496 static tree
2497 max_stmt_executions_tree (struct loop *loop)
2499 widest_int nit;
2501 if (!max_stmt_executions (loop, &nit))
2502 return chrec_dont_know;
2504 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2505 return chrec_dont_know;
2507 return wide_int_to_tree (unsigned_type_node, nit);
2510 /* Determine whether the CHREC is always positive/negative. If the expression
2511 cannot be statically analyzed, return false, otherwise set the answer into
2512 VALUE. */
2514 static bool
2515 chrec_is_positive (tree chrec, bool *value)
2517 bool value0, value1, value2;
2518 tree end_value, nb_iter;
2520 switch (TREE_CODE (chrec))
2522 case POLYNOMIAL_CHREC:
2523 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2524 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2525 return false;
2527 /* FIXME -- overflows. */
2528 if (value0 == value1)
2530 *value = value0;
2531 return true;
2534 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2535 and the proof consists in showing that the sign never
2536 changes during the execution of the loop, from 0 to
2537 loop->nb_iterations. */
2538 if (!evolution_function_is_affine_p (chrec))
2539 return false;
2541 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2542 if (chrec_contains_undetermined (nb_iter))
2543 return false;
2545 #if 0
2546 /* TODO -- If the test is after the exit, we may decrease the number of
2547 iterations by one. */
2548 if (after_exit)
2549 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2550 #endif
2552 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2554 if (!chrec_is_positive (end_value, &value2))
2555 return false;
2557 *value = value0;
2558 return value0 == value1;
2560 case INTEGER_CST:
2561 switch (tree_int_cst_sgn (chrec))
2563 case -1:
2564 *value = false;
2565 break;
2566 case 1:
2567 *value = true;
2568 break;
2569 default:
2570 return false;
2572 return true;
2574 default:
2575 return false;
2580 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2581 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2582 *OVERLAPS_B are initialized to the functions that describe the
2583 relation between the elements accessed twice by CHREC_A and
2584 CHREC_B. For k >= 0, the following property is verified:
2586 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2588 static void
2589 analyze_siv_subscript_cst_affine (tree chrec_a,
2590 tree chrec_b,
2591 conflict_function **overlaps_a,
2592 conflict_function **overlaps_b,
2593 tree *last_conflicts)
2595 bool value0, value1, value2;
2596 tree type, difference, tmp;
2598 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2599 chrec_a = chrec_convert (type, chrec_a, NULL);
2600 chrec_b = chrec_convert (type, chrec_b, NULL);
2601 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2603 /* Special case overlap in the first iteration. */
2604 if (integer_zerop (difference))
2606 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2607 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2608 *last_conflicts = integer_one_node;
2609 return;
2612 if (!chrec_is_positive (initial_condition (difference), &value0))
2614 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2617 dependence_stats.num_siv_unimplemented++;
2618 *overlaps_a = conflict_fn_not_known ();
2619 *overlaps_b = conflict_fn_not_known ();
2620 *last_conflicts = chrec_dont_know;
2621 return;
2623 else
2625 if (value0 == false)
2627 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2629 if (dump_file && (dump_flags & TDF_DETAILS))
2630 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2632 *overlaps_a = conflict_fn_not_known ();
2633 *overlaps_b = conflict_fn_not_known ();
2634 *last_conflicts = chrec_dont_know;
2635 dependence_stats.num_siv_unimplemented++;
2636 return;
2638 else
2640 if (value1 == true)
2642 /* Example:
2643 chrec_a = 12
2644 chrec_b = {10, +, 1}
2647 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2649 HOST_WIDE_INT numiter;
2650 struct loop *loop = get_chrec_loop (chrec_b);
2652 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2653 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2654 fold_build1 (ABS_EXPR, type, difference),
2655 CHREC_RIGHT (chrec_b));
2656 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2657 *last_conflicts = integer_one_node;
2660 /* Perform weak-zero siv test to see if overlap is
2661 outside the loop bounds. */
2662 numiter = max_stmt_executions_int (loop);
2664 if (numiter >= 0
2665 && compare_tree_int (tmp, numiter) > 0)
2667 free_conflict_function (*overlaps_a);
2668 free_conflict_function (*overlaps_b);
2669 *overlaps_a = conflict_fn_no_dependence ();
2670 *overlaps_b = conflict_fn_no_dependence ();
2671 *last_conflicts = integer_zero_node;
2672 dependence_stats.num_siv_independent++;
2673 return;
2675 dependence_stats.num_siv_dependent++;
2676 return;
2679 /* When the step does not divide the difference, there are
2680 no overlaps. */
2681 else
2683 *overlaps_a = conflict_fn_no_dependence ();
2684 *overlaps_b = conflict_fn_no_dependence ();
2685 *last_conflicts = integer_zero_node;
2686 dependence_stats.num_siv_independent++;
2687 return;
2691 else
2693 /* Example:
2694 chrec_a = 12
2695 chrec_b = {10, +, -1}
2697 In this case, chrec_a will not overlap with chrec_b. */
2698 *overlaps_a = conflict_fn_no_dependence ();
2699 *overlaps_b = conflict_fn_no_dependence ();
2700 *last_conflicts = integer_zero_node;
2701 dependence_stats.num_siv_independent++;
2702 return;
2706 else
2708 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2710 if (dump_file && (dump_flags & TDF_DETAILS))
2711 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2713 *overlaps_a = conflict_fn_not_known ();
2714 *overlaps_b = conflict_fn_not_known ();
2715 *last_conflicts = chrec_dont_know;
2716 dependence_stats.num_siv_unimplemented++;
2717 return;
2719 else
2721 if (value2 == false)
2723 /* Example:
2724 chrec_a = 3
2725 chrec_b = {10, +, -1}
2727 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2729 HOST_WIDE_INT numiter;
2730 struct loop *loop = get_chrec_loop (chrec_b);
2732 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2733 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2734 CHREC_RIGHT (chrec_b));
2735 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2736 *last_conflicts = integer_one_node;
2738 /* Perform weak-zero siv test to see if overlap is
2739 outside the loop bounds. */
2740 numiter = max_stmt_executions_int (loop);
2742 if (numiter >= 0
2743 && compare_tree_int (tmp, numiter) > 0)
2745 free_conflict_function (*overlaps_a);
2746 free_conflict_function (*overlaps_b);
2747 *overlaps_a = conflict_fn_no_dependence ();
2748 *overlaps_b = conflict_fn_no_dependence ();
2749 *last_conflicts = integer_zero_node;
2750 dependence_stats.num_siv_independent++;
2751 return;
2753 dependence_stats.num_siv_dependent++;
2754 return;
2757 /* When the step does not divide the difference, there
2758 are no overlaps. */
2759 else
2761 *overlaps_a = conflict_fn_no_dependence ();
2762 *overlaps_b = conflict_fn_no_dependence ();
2763 *last_conflicts = integer_zero_node;
2764 dependence_stats.num_siv_independent++;
2765 return;
2768 else
2770 /* Example:
2771 chrec_a = 3
2772 chrec_b = {4, +, 1}
2774 In this case, chrec_a will not overlap with chrec_b. */
2775 *overlaps_a = conflict_fn_no_dependence ();
2776 *overlaps_b = conflict_fn_no_dependence ();
2777 *last_conflicts = integer_zero_node;
2778 dependence_stats.num_siv_independent++;
2779 return;
2786 /* Helper recursive function for initializing the matrix A. Returns
2787 the initial value of CHREC. */
2789 static tree
2790 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2792 gcc_assert (chrec);
2794 switch (TREE_CODE (chrec))
2796 case POLYNOMIAL_CHREC:
2797 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2798 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2800 case PLUS_EXPR:
2801 case MULT_EXPR:
2802 case MINUS_EXPR:
2804 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2805 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2807 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2810 CASE_CONVERT:
2812 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2813 return chrec_convert (chrec_type (chrec), op, NULL);
2816 case BIT_NOT_EXPR:
2818 /* Handle ~X as -1 - X. */
2819 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2820 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2821 build_int_cst (TREE_TYPE (chrec), -1), op);
2824 case INTEGER_CST:
2825 return chrec;
2827 default:
2828 gcc_unreachable ();
2829 return NULL_TREE;
2833 #define FLOOR_DIV(x,y) ((x) / (y))
2835 /* Solves the special case of the Diophantine equation:
2836 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2838 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2839 number of iterations that loops X and Y run. The overlaps will be
2840 constructed as evolutions in dimension DIM. */
2842 static void
2843 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
2844 HOST_WIDE_INT step_a,
2845 HOST_WIDE_INT step_b,
2846 affine_fn *overlaps_a,
2847 affine_fn *overlaps_b,
2848 tree *last_conflicts, int dim)
2850 if (((step_a > 0 && step_b > 0)
2851 || (step_a < 0 && step_b < 0)))
2853 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
2854 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
2856 gcd_steps_a_b = gcd (step_a, step_b);
2857 step_overlaps_a = step_b / gcd_steps_a_b;
2858 step_overlaps_b = step_a / gcd_steps_a_b;
2860 if (niter > 0)
2862 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2863 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2864 last_conflict = tau2;
2865 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2867 else
2868 *last_conflicts = chrec_dont_know;
2870 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2871 build_int_cst (NULL_TREE,
2872 step_overlaps_a));
2873 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2874 build_int_cst (NULL_TREE,
2875 step_overlaps_b));
2878 else
2880 *overlaps_a = affine_fn_cst (integer_zero_node);
2881 *overlaps_b = affine_fn_cst (integer_zero_node);
2882 *last_conflicts = integer_zero_node;
2886 /* Solves the special case of a Diophantine equation where CHREC_A is
2887 an affine bivariate function, and CHREC_B is an affine univariate
2888 function. For example,
2890 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2892 has the following overlapping functions:
2894 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2895 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2896 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2898 FORNOW: This is a specialized implementation for a case occurring in
2899 a common benchmark. Implement the general algorithm. */
2901 static void
2902 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2903 conflict_function **overlaps_a,
2904 conflict_function **overlaps_b,
2905 tree *last_conflicts)
2907 bool xz_p, yz_p, xyz_p;
2908 HOST_WIDE_INT step_x, step_y, step_z;
2909 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2910 affine_fn overlaps_a_xz, overlaps_b_xz;
2911 affine_fn overlaps_a_yz, overlaps_b_yz;
2912 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2913 affine_fn ova1, ova2, ovb;
2914 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2916 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2917 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2918 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2920 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2921 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2922 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2924 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2926 if (dump_file && (dump_flags & TDF_DETAILS))
2927 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2929 *overlaps_a = conflict_fn_not_known ();
2930 *overlaps_b = conflict_fn_not_known ();
2931 *last_conflicts = chrec_dont_know;
2932 return;
2935 niter = MIN (niter_x, niter_z);
2936 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2937 &overlaps_a_xz,
2938 &overlaps_b_xz,
2939 &last_conflicts_xz, 1);
2940 niter = MIN (niter_y, niter_z);
2941 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2942 &overlaps_a_yz,
2943 &overlaps_b_yz,
2944 &last_conflicts_yz, 2);
2945 niter = MIN (niter_x, niter_z);
2946 niter = MIN (niter_y, niter);
2947 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2948 &overlaps_a_xyz,
2949 &overlaps_b_xyz,
2950 &last_conflicts_xyz, 3);
2952 xz_p = !integer_zerop (last_conflicts_xz);
2953 yz_p = !integer_zerop (last_conflicts_yz);
2954 xyz_p = !integer_zerop (last_conflicts_xyz);
2956 if (xz_p || yz_p || xyz_p)
2958 ova1 = affine_fn_cst (integer_zero_node);
2959 ova2 = affine_fn_cst (integer_zero_node);
2960 ovb = affine_fn_cst (integer_zero_node);
2961 if (xz_p)
2963 affine_fn t0 = ova1;
2964 affine_fn t2 = ovb;
2966 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2967 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2968 affine_fn_free (t0);
2969 affine_fn_free (t2);
2970 *last_conflicts = last_conflicts_xz;
2972 if (yz_p)
2974 affine_fn t0 = ova2;
2975 affine_fn t2 = ovb;
2977 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2978 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2979 affine_fn_free (t0);
2980 affine_fn_free (t2);
2981 *last_conflicts = last_conflicts_yz;
2983 if (xyz_p)
2985 affine_fn t0 = ova1;
2986 affine_fn t2 = ova2;
2987 affine_fn t4 = ovb;
2989 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2990 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2991 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2992 affine_fn_free (t0);
2993 affine_fn_free (t2);
2994 affine_fn_free (t4);
2995 *last_conflicts = last_conflicts_xyz;
2997 *overlaps_a = conflict_fn (2, ova1, ova2);
2998 *overlaps_b = conflict_fn (1, ovb);
3000 else
3002 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3003 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3004 *last_conflicts = integer_zero_node;
3007 affine_fn_free (overlaps_a_xz);
3008 affine_fn_free (overlaps_b_xz);
3009 affine_fn_free (overlaps_a_yz);
3010 affine_fn_free (overlaps_b_yz);
3011 affine_fn_free (overlaps_a_xyz);
3012 affine_fn_free (overlaps_b_xyz);
3015 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3017 static void
3018 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3019 int size)
3021 memcpy (vec2, vec1, size * sizeof (*vec1));
3024 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3026 static void
3027 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3028 int m, int n)
3030 int i;
3032 for (i = 0; i < m; i++)
3033 lambda_vector_copy (mat1[i], mat2[i], n);
3036 /* Store the N x N identity matrix in MAT. */
3038 static void
3039 lambda_matrix_id (lambda_matrix mat, int size)
3041 int i, j;
3043 for (i = 0; i < size; i++)
3044 for (j = 0; j < size; j++)
3045 mat[i][j] = (i == j) ? 1 : 0;
3048 /* Return the first nonzero element of vector VEC1 between START and N.
3049 We must have START <= N. Returns N if VEC1 is the zero vector. */
3051 static int
3052 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3054 int j = start;
3055 while (j < n && vec1[j] == 0)
3056 j++;
3057 return j;
3060 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3061 R2 = R2 + CONST1 * R1. */
3063 static void
3064 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3066 int i;
3068 if (const1 == 0)
3069 return;
3071 for (i = 0; i < n; i++)
3072 mat[r2][i] += const1 * mat[r1][i];
3075 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3076 and store the result in VEC2. */
3078 static void
3079 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3080 int size, int const1)
3082 int i;
3084 if (const1 == 0)
3085 lambda_vector_clear (vec2, size);
3086 else
3087 for (i = 0; i < size; i++)
3088 vec2[i] = const1 * vec1[i];
3091 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3093 static void
3094 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3095 int size)
3097 lambda_vector_mult_const (vec1, vec2, size, -1);
3100 /* Negate row R1 of matrix MAT which has N columns. */
3102 static void
3103 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3105 lambda_vector_negate (mat[r1], mat[r1], n);
3108 /* Return true if two vectors are equal. */
3110 static bool
3111 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3113 int i;
3114 for (i = 0; i < size; i++)
3115 if (vec1[i] != vec2[i])
3116 return false;
3117 return true;
3120 /* Given an M x N integer matrix A, this function determines an M x
3121 M unimodular matrix U, and an M x N echelon matrix S such that
3122 "U.A = S". This decomposition is also known as "right Hermite".
3124 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3125 Restructuring Compilers" Utpal Banerjee. */
3127 static void
3128 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3129 lambda_matrix S, lambda_matrix U)
3131 int i, j, i0 = 0;
3133 lambda_matrix_copy (A, S, m, n);
3134 lambda_matrix_id (U, m);
3136 for (j = 0; j < n; j++)
3138 if (lambda_vector_first_nz (S[j], m, i0) < m)
3140 ++i0;
3141 for (i = m - 1; i >= i0; i--)
3143 while (S[i][j] != 0)
3145 int sigma, factor, a, b;
3147 a = S[i-1][j];
3148 b = S[i][j];
3149 sigma = (a * b < 0) ? -1: 1;
3150 a = abs (a);
3151 b = abs (b);
3152 factor = sigma * (a / b);
3154 lambda_matrix_row_add (S, n, i, i-1, -factor);
3155 std::swap (S[i], S[i-1]);
3157 lambda_matrix_row_add (U, m, i, i-1, -factor);
3158 std::swap (U[i], U[i-1]);
3165 /* Determines the overlapping elements due to accesses CHREC_A and
3166 CHREC_B, that are affine functions. This function cannot handle
3167 symbolic evolution functions, ie. when initial conditions are
3168 parameters, because it uses lambda matrices of integers. */
3170 static void
3171 analyze_subscript_affine_affine (tree chrec_a,
3172 tree chrec_b,
3173 conflict_function **overlaps_a,
3174 conflict_function **overlaps_b,
3175 tree *last_conflicts)
3177 unsigned nb_vars_a, nb_vars_b, dim;
3178 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3179 lambda_matrix A, U, S;
3180 struct obstack scratch_obstack;
3182 if (eq_evolutions_p (chrec_a, chrec_b))
3184 /* The accessed index overlaps for each iteration in the
3185 loop. */
3186 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3187 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3188 *last_conflicts = chrec_dont_know;
3189 return;
3191 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3194 /* For determining the initial intersection, we have to solve a
3195 Diophantine equation. This is the most time consuming part.
3197 For answering to the question: "Is there a dependence?" we have
3198 to prove that there exists a solution to the Diophantine
3199 equation, and that the solution is in the iteration domain,
3200 i.e. the solution is positive or zero, and that the solution
3201 happens before the upper bound loop.nb_iterations. Otherwise
3202 there is no dependence. This function outputs a description of
3203 the iterations that hold the intersections. */
3205 nb_vars_a = nb_vars_in_chrec (chrec_a);
3206 nb_vars_b = nb_vars_in_chrec (chrec_b);
3208 gcc_obstack_init (&scratch_obstack);
3210 dim = nb_vars_a + nb_vars_b;
3211 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3212 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3213 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3215 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3216 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3217 gamma = init_b - init_a;
3219 /* Don't do all the hard work of solving the Diophantine equation
3220 when we already know the solution: for example,
3221 | {3, +, 1}_1
3222 | {3, +, 4}_2
3223 | gamma = 3 - 3 = 0.
3224 Then the first overlap occurs during the first iterations:
3225 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3227 if (gamma == 0)
3229 if (nb_vars_a == 1 && nb_vars_b == 1)
3231 HOST_WIDE_INT step_a, step_b;
3232 HOST_WIDE_INT niter, niter_a, niter_b;
3233 affine_fn ova, ovb;
3235 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3236 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3237 niter = MIN (niter_a, niter_b);
3238 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3239 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3241 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3242 &ova, &ovb,
3243 last_conflicts, 1);
3244 *overlaps_a = conflict_fn (1, ova);
3245 *overlaps_b = conflict_fn (1, ovb);
3248 else if (nb_vars_a == 2 && nb_vars_b == 1)
3249 compute_overlap_steps_for_affine_1_2
3250 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3252 else if (nb_vars_a == 1 && nb_vars_b == 2)
3253 compute_overlap_steps_for_affine_1_2
3254 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3256 else
3258 if (dump_file && (dump_flags & TDF_DETAILS))
3259 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3260 *overlaps_a = conflict_fn_not_known ();
3261 *overlaps_b = conflict_fn_not_known ();
3262 *last_conflicts = chrec_dont_know;
3264 goto end_analyze_subs_aa;
3267 /* U.A = S */
3268 lambda_matrix_right_hermite (A, dim, 1, S, U);
3270 if (S[0][0] < 0)
3272 S[0][0] *= -1;
3273 lambda_matrix_row_negate (U, dim, 0);
3275 gcd_alpha_beta = S[0][0];
3277 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3278 but that is a quite strange case. Instead of ICEing, answer
3279 don't know. */
3280 if (gcd_alpha_beta == 0)
3282 *overlaps_a = conflict_fn_not_known ();
3283 *overlaps_b = conflict_fn_not_known ();
3284 *last_conflicts = chrec_dont_know;
3285 goto end_analyze_subs_aa;
3288 /* The classic "gcd-test". */
3289 if (!int_divides_p (gcd_alpha_beta, gamma))
3291 /* The "gcd-test" has determined that there is no integer
3292 solution, i.e. there is no dependence. */
3293 *overlaps_a = conflict_fn_no_dependence ();
3294 *overlaps_b = conflict_fn_no_dependence ();
3295 *last_conflicts = integer_zero_node;
3298 /* Both access functions are univariate. This includes SIV and MIV cases. */
3299 else if (nb_vars_a == 1 && nb_vars_b == 1)
3301 /* Both functions should have the same evolution sign. */
3302 if (((A[0][0] > 0 && -A[1][0] > 0)
3303 || (A[0][0] < 0 && -A[1][0] < 0)))
3305 /* The solutions are given by:
3307 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3308 | [u21 u22] [y0]
3310 For a given integer t. Using the following variables,
3312 | i0 = u11 * gamma / gcd_alpha_beta
3313 | j0 = u12 * gamma / gcd_alpha_beta
3314 | i1 = u21
3315 | j1 = u22
3317 the solutions are:
3319 | x0 = i0 + i1 * t,
3320 | y0 = j0 + j1 * t. */
3321 HOST_WIDE_INT i0, j0, i1, j1;
3323 i0 = U[0][0] * gamma / gcd_alpha_beta;
3324 j0 = U[0][1] * gamma / gcd_alpha_beta;
3325 i1 = U[1][0];
3326 j1 = U[1][1];
3328 if ((i1 == 0 && i0 < 0)
3329 || (j1 == 0 && j0 < 0))
3331 /* There is no solution.
3332 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3333 falls in here, but for the moment we don't look at the
3334 upper bound of the iteration domain. */
3335 *overlaps_a = conflict_fn_no_dependence ();
3336 *overlaps_b = conflict_fn_no_dependence ();
3337 *last_conflicts = integer_zero_node;
3338 goto end_analyze_subs_aa;
3341 if (i1 > 0 && j1 > 0)
3343 HOST_WIDE_INT niter_a
3344 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3345 HOST_WIDE_INT niter_b
3346 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3347 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3349 /* (X0, Y0) is a solution of the Diophantine equation:
3350 "chrec_a (X0) = chrec_b (Y0)". */
3351 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3352 CEIL (-j0, j1));
3353 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3354 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3356 /* (X1, Y1) is the smallest positive solution of the eq
3357 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3358 first conflict occurs. */
3359 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3360 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3361 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3363 if (niter > 0)
3365 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3366 FLOOR_DIV (niter_b - j0, j1));
3367 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3369 /* If the overlap occurs outside of the bounds of the
3370 loop, there is no dependence. */
3371 if (x1 >= niter_a || y1 >= niter_b)
3373 *overlaps_a = conflict_fn_no_dependence ();
3374 *overlaps_b = conflict_fn_no_dependence ();
3375 *last_conflicts = integer_zero_node;
3376 goto end_analyze_subs_aa;
3378 else
3379 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3381 else
3382 *last_conflicts = chrec_dont_know;
3384 *overlaps_a
3385 = conflict_fn (1,
3386 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3388 build_int_cst (NULL_TREE, i1)));
3389 *overlaps_b
3390 = conflict_fn (1,
3391 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3393 build_int_cst (NULL_TREE, j1)));
3395 else
3397 /* FIXME: For the moment, the upper bound of the
3398 iteration domain for i and j is not checked. */
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3401 *overlaps_a = conflict_fn_not_known ();
3402 *overlaps_b = conflict_fn_not_known ();
3403 *last_conflicts = chrec_dont_know;
3406 else
3408 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3410 *overlaps_a = conflict_fn_not_known ();
3411 *overlaps_b = conflict_fn_not_known ();
3412 *last_conflicts = chrec_dont_know;
3415 else
3417 if (dump_file && (dump_flags & TDF_DETAILS))
3418 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3419 *overlaps_a = conflict_fn_not_known ();
3420 *overlaps_b = conflict_fn_not_known ();
3421 *last_conflicts = chrec_dont_know;
3424 end_analyze_subs_aa:
3425 obstack_free (&scratch_obstack, NULL);
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3428 fprintf (dump_file, " (overlaps_a = ");
3429 dump_conflict_function (dump_file, *overlaps_a);
3430 fprintf (dump_file, ")\n (overlaps_b = ");
3431 dump_conflict_function (dump_file, *overlaps_b);
3432 fprintf (dump_file, "))\n");
3436 /* Returns true when analyze_subscript_affine_affine can be used for
3437 determining the dependence relation between chrec_a and chrec_b,
3438 that contain symbols. This function modifies chrec_a and chrec_b
3439 such that the analysis result is the same, and such that they don't
3440 contain symbols, and then can safely be passed to the analyzer.
3442 Example: The analysis of the following tuples of evolutions produce
3443 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3444 vs. {0, +, 1}_1
3446 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3447 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3450 static bool
3451 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3453 tree diff, type, left_a, left_b, right_b;
3455 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3456 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3457 /* FIXME: For the moment not handled. Might be refined later. */
3458 return false;
3460 type = chrec_type (*chrec_a);
3461 left_a = CHREC_LEFT (*chrec_a);
3462 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3463 diff = chrec_fold_minus (type, left_a, left_b);
3465 if (!evolution_function_is_constant_p (diff))
3466 return false;
3468 if (dump_file && (dump_flags & TDF_DETAILS))
3469 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3471 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3472 diff, CHREC_RIGHT (*chrec_a));
3473 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3474 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3475 build_int_cst (type, 0),
3476 right_b);
3477 return true;
3480 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3481 *OVERLAPS_B are initialized to the functions that describe the
3482 relation between the elements accessed twice by CHREC_A and
3483 CHREC_B. For k >= 0, the following property is verified:
3485 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3487 static void
3488 analyze_siv_subscript (tree chrec_a,
3489 tree chrec_b,
3490 conflict_function **overlaps_a,
3491 conflict_function **overlaps_b,
3492 tree *last_conflicts,
3493 int loop_nest_num)
3495 dependence_stats.num_siv++;
3497 if (dump_file && (dump_flags & TDF_DETAILS))
3498 fprintf (dump_file, "(analyze_siv_subscript \n");
3500 if (evolution_function_is_constant_p (chrec_a)
3501 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3502 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3503 overlaps_a, overlaps_b, last_conflicts);
3505 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3506 && evolution_function_is_constant_p (chrec_b))
3507 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3508 overlaps_b, overlaps_a, last_conflicts);
3510 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3511 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3513 if (!chrec_contains_symbols (chrec_a)
3514 && !chrec_contains_symbols (chrec_b))
3516 analyze_subscript_affine_affine (chrec_a, chrec_b,
3517 overlaps_a, overlaps_b,
3518 last_conflicts);
3520 if (CF_NOT_KNOWN_P (*overlaps_a)
3521 || CF_NOT_KNOWN_P (*overlaps_b))
3522 dependence_stats.num_siv_unimplemented++;
3523 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3524 || CF_NO_DEPENDENCE_P (*overlaps_b))
3525 dependence_stats.num_siv_independent++;
3526 else
3527 dependence_stats.num_siv_dependent++;
3529 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3530 &chrec_b))
3532 analyze_subscript_affine_affine (chrec_a, chrec_b,
3533 overlaps_a, overlaps_b,
3534 last_conflicts);
3536 if (CF_NOT_KNOWN_P (*overlaps_a)
3537 || CF_NOT_KNOWN_P (*overlaps_b))
3538 dependence_stats.num_siv_unimplemented++;
3539 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3540 || CF_NO_DEPENDENCE_P (*overlaps_b))
3541 dependence_stats.num_siv_independent++;
3542 else
3543 dependence_stats.num_siv_dependent++;
3545 else
3546 goto siv_subscript_dontknow;
3549 else
3551 siv_subscript_dontknow:;
3552 if (dump_file && (dump_flags & TDF_DETAILS))
3553 fprintf (dump_file, " siv test failed: unimplemented");
3554 *overlaps_a = conflict_fn_not_known ();
3555 *overlaps_b = conflict_fn_not_known ();
3556 *last_conflicts = chrec_dont_know;
3557 dependence_stats.num_siv_unimplemented++;
3560 if (dump_file && (dump_flags & TDF_DETAILS))
3561 fprintf (dump_file, ")\n");
3564 /* Returns false if we can prove that the greatest common divisor of the steps
3565 of CHREC does not divide CST, false otherwise. */
3567 static bool
3568 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3570 HOST_WIDE_INT cd = 0, val;
3571 tree step;
3573 if (!tree_fits_shwi_p (cst))
3574 return true;
3575 val = tree_to_shwi (cst);
3577 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3579 step = CHREC_RIGHT (chrec);
3580 if (!tree_fits_shwi_p (step))
3581 return true;
3582 cd = gcd (cd, tree_to_shwi (step));
3583 chrec = CHREC_LEFT (chrec);
3586 return val % cd == 0;
3589 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3590 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3591 functions that describe the relation between the elements accessed
3592 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3593 is verified:
3595 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3597 static void
3598 analyze_miv_subscript (tree chrec_a,
3599 tree chrec_b,
3600 conflict_function **overlaps_a,
3601 conflict_function **overlaps_b,
3602 tree *last_conflicts,
3603 struct loop *loop_nest)
3605 tree type, difference;
3607 dependence_stats.num_miv++;
3608 if (dump_file && (dump_flags & TDF_DETAILS))
3609 fprintf (dump_file, "(analyze_miv_subscript \n");
3611 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3612 chrec_a = chrec_convert (type, chrec_a, NULL);
3613 chrec_b = chrec_convert (type, chrec_b, NULL);
3614 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3616 if (eq_evolutions_p (chrec_a, chrec_b))
3618 /* Access functions are the same: all the elements are accessed
3619 in the same order. */
3620 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3621 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3622 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3623 dependence_stats.num_miv_dependent++;
3626 else if (evolution_function_is_constant_p (difference)
3627 /* For the moment, the following is verified:
3628 evolution_function_is_affine_multivariate_p (chrec_a,
3629 loop_nest->num) */
3630 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3632 /* testsuite/.../ssa-chrec-33.c
3633 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3635 The difference is 1, and all the evolution steps are multiples
3636 of 2, consequently there are no overlapping elements. */
3637 *overlaps_a = conflict_fn_no_dependence ();
3638 *overlaps_b = conflict_fn_no_dependence ();
3639 *last_conflicts = integer_zero_node;
3640 dependence_stats.num_miv_independent++;
3643 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3644 && !chrec_contains_symbols (chrec_a)
3645 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3646 && !chrec_contains_symbols (chrec_b))
3648 /* testsuite/.../ssa-chrec-35.c
3649 {0, +, 1}_2 vs. {0, +, 1}_3
3650 the overlapping elements are respectively located at iterations:
3651 {0, +, 1}_x and {0, +, 1}_x,
3652 in other words, we have the equality:
3653 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3655 Other examples:
3656 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3657 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3659 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3660 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3662 analyze_subscript_affine_affine (chrec_a, chrec_b,
3663 overlaps_a, overlaps_b, last_conflicts);
3665 if (CF_NOT_KNOWN_P (*overlaps_a)
3666 || CF_NOT_KNOWN_P (*overlaps_b))
3667 dependence_stats.num_miv_unimplemented++;
3668 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3669 || CF_NO_DEPENDENCE_P (*overlaps_b))
3670 dependence_stats.num_miv_independent++;
3671 else
3672 dependence_stats.num_miv_dependent++;
3675 else
3677 /* When the analysis is too difficult, answer "don't know". */
3678 if (dump_file && (dump_flags & TDF_DETAILS))
3679 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3681 *overlaps_a = conflict_fn_not_known ();
3682 *overlaps_b = conflict_fn_not_known ();
3683 *last_conflicts = chrec_dont_know;
3684 dependence_stats.num_miv_unimplemented++;
3687 if (dump_file && (dump_flags & TDF_DETAILS))
3688 fprintf (dump_file, ")\n");
3691 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3692 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3693 OVERLAP_ITERATIONS_B are initialized with two functions that
3694 describe the iterations that contain conflicting elements.
3696 Remark: For an integer k >= 0, the following equality is true:
3698 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3701 static void
3702 analyze_overlapping_iterations (tree chrec_a,
3703 tree chrec_b,
3704 conflict_function **overlap_iterations_a,
3705 conflict_function **overlap_iterations_b,
3706 tree *last_conflicts, struct loop *loop_nest)
3708 unsigned int lnn = loop_nest->num;
3710 dependence_stats.num_subscript_tests++;
3712 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3715 fprintf (dump_file, " (chrec_a = ");
3716 print_generic_expr (dump_file, chrec_a);
3717 fprintf (dump_file, ")\n (chrec_b = ");
3718 print_generic_expr (dump_file, chrec_b);
3719 fprintf (dump_file, ")\n");
3722 if (chrec_a == NULL_TREE
3723 || chrec_b == NULL_TREE
3724 || chrec_contains_undetermined (chrec_a)
3725 || chrec_contains_undetermined (chrec_b))
3727 dependence_stats.num_subscript_undetermined++;
3729 *overlap_iterations_a = conflict_fn_not_known ();
3730 *overlap_iterations_b = conflict_fn_not_known ();
3733 /* If they are the same chrec, and are affine, they overlap
3734 on every iteration. */
3735 else if (eq_evolutions_p (chrec_a, chrec_b)
3736 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3737 || operand_equal_p (chrec_a, chrec_b, 0)))
3739 dependence_stats.num_same_subscript_function++;
3740 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3741 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3742 *last_conflicts = chrec_dont_know;
3745 /* If they aren't the same, and aren't affine, we can't do anything
3746 yet. */
3747 else if ((chrec_contains_symbols (chrec_a)
3748 || chrec_contains_symbols (chrec_b))
3749 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3750 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3752 dependence_stats.num_subscript_undetermined++;
3753 *overlap_iterations_a = conflict_fn_not_known ();
3754 *overlap_iterations_b = conflict_fn_not_known ();
3757 else if (ziv_subscript_p (chrec_a, chrec_b))
3758 analyze_ziv_subscript (chrec_a, chrec_b,
3759 overlap_iterations_a, overlap_iterations_b,
3760 last_conflicts);
3762 else if (siv_subscript_p (chrec_a, chrec_b))
3763 analyze_siv_subscript (chrec_a, chrec_b,
3764 overlap_iterations_a, overlap_iterations_b,
3765 last_conflicts, lnn);
3767 else
3768 analyze_miv_subscript (chrec_a, chrec_b,
3769 overlap_iterations_a, overlap_iterations_b,
3770 last_conflicts, loop_nest);
3772 if (dump_file && (dump_flags & TDF_DETAILS))
3774 fprintf (dump_file, " (overlap_iterations_a = ");
3775 dump_conflict_function (dump_file, *overlap_iterations_a);
3776 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3777 dump_conflict_function (dump_file, *overlap_iterations_b);
3778 fprintf (dump_file, "))\n");
3782 /* Helper function for uniquely inserting distance vectors. */
3784 static void
3785 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3787 unsigned i;
3788 lambda_vector v;
3790 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3791 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3792 return;
3794 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3797 /* Helper function for uniquely inserting direction vectors. */
3799 static void
3800 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3802 unsigned i;
3803 lambda_vector v;
3805 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3806 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3807 return;
3809 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3812 /* Add a distance of 1 on all the loops outer than INDEX. If we
3813 haven't yet determined a distance for this outer loop, push a new
3814 distance vector composed of the previous distance, and a distance
3815 of 1 for this outer loop. Example:
3817 | loop_1
3818 | loop_2
3819 | A[10]
3820 | endloop_2
3821 | endloop_1
3823 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3824 save (0, 1), then we have to save (1, 0). */
3826 static void
3827 add_outer_distances (struct data_dependence_relation *ddr,
3828 lambda_vector dist_v, int index)
3830 /* For each outer loop where init_v is not set, the accesses are
3831 in dependence of distance 1 in the loop. */
3832 while (--index >= 0)
3834 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3835 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3836 save_v[index] = 1;
3837 save_dist_v (ddr, save_v);
3841 /* Return false when fail to represent the data dependence as a
3842 distance vector. INIT_B is set to true when a component has been
3843 added to the distance vector DIST_V. INDEX_CARRY is then set to
3844 the index in DIST_V that carries the dependence. */
3846 static bool
3847 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3848 struct data_reference *ddr_a,
3849 struct data_reference *ddr_b,
3850 lambda_vector dist_v, bool *init_b,
3851 int *index_carry)
3853 unsigned i;
3854 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3856 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3858 tree access_fn_a, access_fn_b;
3859 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3861 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3863 non_affine_dependence_relation (ddr);
3864 return false;
3867 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3868 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3870 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3871 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3873 HOST_WIDE_INT dist;
3874 int index;
3875 int var_a = CHREC_VARIABLE (access_fn_a);
3876 int var_b = CHREC_VARIABLE (access_fn_b);
3878 if (var_a != var_b
3879 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3881 non_affine_dependence_relation (ddr);
3882 return false;
3885 dist = int_cst_value (SUB_DISTANCE (subscript));
3886 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3887 *index_carry = MIN (index, *index_carry);
3889 /* This is the subscript coupling test. If we have already
3890 recorded a distance for this loop (a distance coming from
3891 another subscript), it should be the same. For example,
3892 in the following code, there is no dependence:
3894 | loop i = 0, N, 1
3895 | T[i+1][i] = ...
3896 | ... = T[i][i]
3897 | endloop
3899 if (init_v[index] != 0 && dist_v[index] != dist)
3901 finalize_ddr_dependent (ddr, chrec_known);
3902 return false;
3905 dist_v[index] = dist;
3906 init_v[index] = 1;
3907 *init_b = true;
3909 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3911 /* This can be for example an affine vs. constant dependence
3912 (T[i] vs. T[3]) that is not an affine dependence and is
3913 not representable as a distance vector. */
3914 non_affine_dependence_relation (ddr);
3915 return false;
3919 return true;
3922 /* Return true when the DDR contains only constant access functions. */
3924 static bool
3925 constant_access_functions (const struct data_dependence_relation *ddr)
3927 unsigned i;
3929 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3930 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3931 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3932 return false;
3934 return true;
3937 /* Helper function for the case where DDR_A and DDR_B are the same
3938 multivariate access function with a constant step. For an example
3939 see pr34635-1.c. */
3941 static void
3942 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3944 int x_1, x_2;
3945 tree c_1 = CHREC_LEFT (c_2);
3946 tree c_0 = CHREC_LEFT (c_1);
3947 lambda_vector dist_v;
3948 HOST_WIDE_INT v1, v2, cd;
3950 /* Polynomials with more than 2 variables are not handled yet. When
3951 the evolution steps are parameters, it is not possible to
3952 represent the dependence using classical distance vectors. */
3953 if (TREE_CODE (c_0) != INTEGER_CST
3954 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3955 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3957 DDR_AFFINE_P (ddr) = false;
3958 return;
3961 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3962 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3964 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3965 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3966 v1 = int_cst_value (CHREC_RIGHT (c_1));
3967 v2 = int_cst_value (CHREC_RIGHT (c_2));
3968 cd = gcd (v1, v2);
3969 v1 /= cd;
3970 v2 /= cd;
3972 if (v2 < 0)
3974 v2 = -v2;
3975 v1 = -v1;
3978 dist_v[x_1] = v2;
3979 dist_v[x_2] = -v1;
3980 save_dist_v (ddr, dist_v);
3982 add_outer_distances (ddr, dist_v, x_1);
3985 /* Helper function for the case where DDR_A and DDR_B are the same
3986 access functions. */
3988 static void
3989 add_other_self_distances (struct data_dependence_relation *ddr)
3991 lambda_vector dist_v;
3992 unsigned i;
3993 int index_carry = DDR_NB_LOOPS (ddr);
3995 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3997 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3999 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4001 if (!evolution_function_is_univariate_p (access_fun))
4003 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4005 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4006 return;
4009 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
4011 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4012 add_multivariate_self_dist (ddr, access_fun);
4013 else
4014 /* The evolution step is not constant: it varies in
4015 the outer loop, so this cannot be represented by a
4016 distance vector. For example in pr34635.c the
4017 evolution is {0, +, {0, +, 4}_1}_2. */
4018 DDR_AFFINE_P (ddr) = false;
4020 return;
4023 index_carry = MIN (index_carry,
4024 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4025 DDR_LOOP_NEST (ddr)));
4029 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4030 add_outer_distances (ddr, dist_v, index_carry);
4033 static void
4034 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4036 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4038 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4039 save_dist_v (ddr, dist_v);
4042 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4043 is the case for example when access functions are the same and
4044 equal to a constant, as in:
4046 | loop_1
4047 | A[3] = ...
4048 | ... = A[3]
4049 | endloop_1
4051 in which case the distance vectors are (0) and (1). */
4053 static void
4054 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4056 unsigned i, j;
4058 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4060 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4061 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4062 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4064 for (j = 0; j < ca->n; j++)
4065 if (affine_function_zero_p (ca->fns[j]))
4067 insert_innermost_unit_dist_vector (ddr);
4068 return;
4071 for (j = 0; j < cb->n; j++)
4072 if (affine_function_zero_p (cb->fns[j]))
4074 insert_innermost_unit_dist_vector (ddr);
4075 return;
4080 /* Compute the classic per loop distance vector. DDR is the data
4081 dependence relation to build a vector from. Return false when fail
4082 to represent the data dependence as a distance vector. */
4084 static bool
4085 build_classic_dist_vector (struct data_dependence_relation *ddr,
4086 struct loop *loop_nest)
4088 bool init_b = false;
4089 int index_carry = DDR_NB_LOOPS (ddr);
4090 lambda_vector dist_v;
4092 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4093 return false;
4095 if (same_access_functions (ddr))
4097 /* Save the 0 vector. */
4098 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4099 save_dist_v (ddr, dist_v);
4101 if (constant_access_functions (ddr))
4102 add_distance_for_zero_overlaps (ddr);
4104 if (DDR_NB_LOOPS (ddr) > 1)
4105 add_other_self_distances (ddr);
4107 return true;
4110 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4111 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
4112 dist_v, &init_b, &index_carry))
4113 return false;
4115 /* Save the distance vector if we initialized one. */
4116 if (init_b)
4118 /* Verify a basic constraint: classic distance vectors should
4119 always be lexicographically positive.
4121 Data references are collected in the order of execution of
4122 the program, thus for the following loop
4124 | for (i = 1; i < 100; i++)
4125 | for (j = 1; j < 100; j++)
4127 | t = T[j+1][i-1]; // A
4128 | T[j][i] = t + 2; // B
4131 references are collected following the direction of the wind:
4132 A then B. The data dependence tests are performed also
4133 following this order, such that we're looking at the distance
4134 separating the elements accessed by A from the elements later
4135 accessed by B. But in this example, the distance returned by
4136 test_dep (A, B) is lexicographically negative (-1, 1), that
4137 means that the access A occurs later than B with respect to
4138 the outer loop, ie. we're actually looking upwind. In this
4139 case we solve test_dep (B, A) looking downwind to the
4140 lexicographically positive solution, that returns the
4141 distance vector (1, -1). */
4142 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4144 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4145 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4146 loop_nest))
4147 return false;
4148 compute_subscript_distance (ddr);
4149 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4150 save_v, &init_b, &index_carry))
4151 return false;
4152 save_dist_v (ddr, save_v);
4153 DDR_REVERSED_P (ddr) = true;
4155 /* In this case there is a dependence forward for all the
4156 outer loops:
4158 | for (k = 1; k < 100; k++)
4159 | for (i = 1; i < 100; i++)
4160 | for (j = 1; j < 100; j++)
4162 | t = T[j+1][i-1]; // A
4163 | T[j][i] = t + 2; // B
4166 the vectors are:
4167 (0, 1, -1)
4168 (1, 1, -1)
4169 (1, -1, 1)
4171 if (DDR_NB_LOOPS (ddr) > 1)
4173 add_outer_distances (ddr, save_v, index_carry);
4174 add_outer_distances (ddr, dist_v, index_carry);
4177 else
4179 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4180 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4182 if (DDR_NB_LOOPS (ddr) > 1)
4184 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4186 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
4187 DDR_A (ddr), loop_nest))
4188 return false;
4189 compute_subscript_distance (ddr);
4190 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4191 opposite_v, &init_b,
4192 &index_carry))
4193 return false;
4195 save_dist_v (ddr, save_v);
4196 add_outer_distances (ddr, dist_v, index_carry);
4197 add_outer_distances (ddr, opposite_v, index_carry);
4199 else
4200 save_dist_v (ddr, save_v);
4203 else
4205 /* There is a distance of 1 on all the outer loops: Example:
4206 there is a dependence of distance 1 on loop_1 for the array A.
4208 | loop_1
4209 | A[5] = ...
4210 | endloop
4212 add_outer_distances (ddr, dist_v,
4213 lambda_vector_first_nz (dist_v,
4214 DDR_NB_LOOPS (ddr), 0));
4217 if (dump_file && (dump_flags & TDF_DETAILS))
4219 unsigned i;
4221 fprintf (dump_file, "(build_classic_dist_vector\n");
4222 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4224 fprintf (dump_file, " dist_vector = (");
4225 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4226 DDR_NB_LOOPS (ddr));
4227 fprintf (dump_file, " )\n");
4229 fprintf (dump_file, ")\n");
4232 return true;
4235 /* Return the direction for a given distance.
4236 FIXME: Computing dir this way is suboptimal, since dir can catch
4237 cases that dist is unable to represent. */
4239 static inline enum data_dependence_direction
4240 dir_from_dist (int dist)
4242 if (dist > 0)
4243 return dir_positive;
4244 else if (dist < 0)
4245 return dir_negative;
4246 else
4247 return dir_equal;
4250 /* Compute the classic per loop direction vector. DDR is the data
4251 dependence relation to build a vector from. */
4253 static void
4254 build_classic_dir_vector (struct data_dependence_relation *ddr)
4256 unsigned i, j;
4257 lambda_vector dist_v;
4259 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4261 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4263 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4264 dir_v[j] = dir_from_dist (dist_v[j]);
4266 save_dir_v (ddr, dir_v);
4270 /* Helper function. Returns true when there is a dependence between
4271 data references DRA and DRB. */
4273 static bool
4274 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4275 struct data_reference *dra,
4276 struct data_reference *drb,
4277 struct loop *loop_nest)
4279 unsigned int i;
4280 tree last_conflicts;
4281 struct subscript *subscript;
4282 tree res = NULL_TREE;
4284 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4286 conflict_function *overlaps_a, *overlaps_b;
4288 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
4289 DR_ACCESS_FN (drb, i),
4290 &overlaps_a, &overlaps_b,
4291 &last_conflicts, loop_nest);
4293 if (SUB_CONFLICTS_IN_A (subscript))
4294 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4295 if (SUB_CONFLICTS_IN_B (subscript))
4296 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4298 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4299 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4300 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4302 /* If there is any undetermined conflict function we have to
4303 give a conservative answer in case we cannot prove that
4304 no dependence exists when analyzing another subscript. */
4305 if (CF_NOT_KNOWN_P (overlaps_a)
4306 || CF_NOT_KNOWN_P (overlaps_b))
4308 res = chrec_dont_know;
4309 continue;
4312 /* When there is a subscript with no dependence we can stop. */
4313 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4314 || CF_NO_DEPENDENCE_P (overlaps_b))
4316 res = chrec_known;
4317 break;
4321 if (res == NULL_TREE)
4322 return true;
4324 if (res == chrec_known)
4325 dependence_stats.num_dependence_independent++;
4326 else
4327 dependence_stats.num_dependence_undetermined++;
4328 finalize_ddr_dependent (ddr, res);
4329 return false;
4332 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4334 static void
4335 subscript_dependence_tester (struct data_dependence_relation *ddr,
4336 struct loop *loop_nest)
4338 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
4339 dependence_stats.num_dependence_dependent++;
4341 compute_subscript_distance (ddr);
4342 if (build_classic_dist_vector (ddr, loop_nest))
4343 build_classic_dir_vector (ddr);
4346 /* Returns true when all the access functions of A are affine or
4347 constant with respect to LOOP_NEST. */
4349 static bool
4350 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4351 const struct loop *loop_nest)
4353 unsigned int i;
4354 vec<tree> fns = DR_ACCESS_FNS (a);
4355 tree t;
4357 FOR_EACH_VEC_ELT (fns, i, t)
4358 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4359 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4360 return false;
4362 return true;
4365 /* This computes the affine dependence relation between A and B with
4366 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4367 independence between two accesses, while CHREC_DONT_KNOW is used
4368 for representing the unknown relation.
4370 Note that it is possible to stop the computation of the dependence
4371 relation the first time we detect a CHREC_KNOWN element for a given
4372 subscript. */
4374 void
4375 compute_affine_dependence (struct data_dependence_relation *ddr,
4376 struct loop *loop_nest)
4378 struct data_reference *dra = DDR_A (ddr);
4379 struct data_reference *drb = DDR_B (ddr);
4381 if (dump_file && (dump_flags & TDF_DETAILS))
4383 fprintf (dump_file, "(compute_affine_dependence\n");
4384 fprintf (dump_file, " stmt_a: ");
4385 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4386 fprintf (dump_file, " stmt_b: ");
4387 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4390 /* Analyze only when the dependence relation is not yet known. */
4391 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4393 dependence_stats.num_dependence_tests++;
4395 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4396 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4397 subscript_dependence_tester (ddr, loop_nest);
4399 /* As a last case, if the dependence cannot be determined, or if
4400 the dependence is considered too difficult to determine, answer
4401 "don't know". */
4402 else
4404 dependence_stats.num_dependence_undetermined++;
4406 if (dump_file && (dump_flags & TDF_DETAILS))
4408 fprintf (dump_file, "Data ref a:\n");
4409 dump_data_reference (dump_file, dra);
4410 fprintf (dump_file, "Data ref b:\n");
4411 dump_data_reference (dump_file, drb);
4412 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4414 finalize_ddr_dependent (ddr, chrec_dont_know);
4418 if (dump_file && (dump_flags & TDF_DETAILS))
4420 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4421 fprintf (dump_file, ") -> no dependence\n");
4422 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4423 fprintf (dump_file, ") -> dependence analysis failed\n");
4424 else
4425 fprintf (dump_file, ")\n");
4429 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4430 the data references in DATAREFS, in the LOOP_NEST. When
4431 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4432 relations. Return true when successful, i.e. data references number
4433 is small enough to be handled. */
4435 bool
4436 compute_all_dependences (vec<data_reference_p> datarefs,
4437 vec<ddr_p> *dependence_relations,
4438 vec<loop_p> loop_nest,
4439 bool compute_self_and_rr)
4441 struct data_dependence_relation *ddr;
4442 struct data_reference *a, *b;
4443 unsigned int i, j;
4445 if ((int) datarefs.length ()
4446 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4448 struct data_dependence_relation *ddr;
4450 /* Insert a single relation into dependence_relations:
4451 chrec_dont_know. */
4452 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4453 dependence_relations->safe_push (ddr);
4454 return false;
4457 FOR_EACH_VEC_ELT (datarefs, i, a)
4458 for (j = i + 1; datarefs.iterate (j, &b); j++)
4459 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4461 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4462 dependence_relations->safe_push (ddr);
4463 if (loop_nest.exists ())
4464 compute_affine_dependence (ddr, loop_nest[0]);
4467 if (compute_self_and_rr)
4468 FOR_EACH_VEC_ELT (datarefs, i, a)
4470 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4471 dependence_relations->safe_push (ddr);
4472 if (loop_nest.exists ())
4473 compute_affine_dependence (ddr, loop_nest[0]);
4476 return true;
4479 /* Describes a location of a memory reference. */
4481 struct data_ref_loc
4483 /* The memory reference. */
4484 tree ref;
4486 /* True if the memory reference is read. */
4487 bool is_read;
4491 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4492 true if STMT clobbers memory, false otherwise. */
4494 static bool
4495 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4497 bool clobbers_memory = false;
4498 data_ref_loc ref;
4499 tree op0, op1;
4500 enum gimple_code stmt_code = gimple_code (stmt);
4502 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4503 As we cannot model data-references to not spelled out
4504 accesses give up if they may occur. */
4505 if (stmt_code == GIMPLE_CALL
4506 && !(gimple_call_flags (stmt) & ECF_CONST))
4508 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4509 if (gimple_call_internal_p (stmt))
4510 switch (gimple_call_internal_fn (stmt))
4512 case IFN_GOMP_SIMD_LANE:
4514 struct loop *loop = gimple_bb (stmt)->loop_father;
4515 tree uid = gimple_call_arg (stmt, 0);
4516 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4517 if (loop == NULL
4518 || loop->simduid != SSA_NAME_VAR (uid))
4519 clobbers_memory = true;
4520 break;
4522 case IFN_MASK_LOAD:
4523 case IFN_MASK_STORE:
4524 break;
4525 default:
4526 clobbers_memory = true;
4527 break;
4529 else
4530 clobbers_memory = true;
4532 else if (stmt_code == GIMPLE_ASM
4533 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4534 || gimple_vuse (stmt)))
4535 clobbers_memory = true;
4537 if (!gimple_vuse (stmt))
4538 return clobbers_memory;
4540 if (stmt_code == GIMPLE_ASSIGN)
4542 tree base;
4543 op0 = gimple_assign_lhs (stmt);
4544 op1 = gimple_assign_rhs1 (stmt);
4546 if (DECL_P (op1)
4547 || (REFERENCE_CLASS_P (op1)
4548 && (base = get_base_address (op1))
4549 && TREE_CODE (base) != SSA_NAME
4550 && !is_gimple_min_invariant (base)))
4552 ref.ref = op1;
4553 ref.is_read = true;
4554 references->safe_push (ref);
4557 else if (stmt_code == GIMPLE_CALL)
4559 unsigned i, n;
4560 tree ptr, type;
4561 unsigned int align;
4563 ref.is_read = false;
4564 if (gimple_call_internal_p (stmt))
4565 switch (gimple_call_internal_fn (stmt))
4567 case IFN_MASK_LOAD:
4568 if (gimple_call_lhs (stmt) == NULL_TREE)
4569 break;
4570 ref.is_read = true;
4571 /* FALLTHRU */
4572 case IFN_MASK_STORE:
4573 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4574 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4575 if (ref.is_read)
4576 type = TREE_TYPE (gimple_call_lhs (stmt));
4577 else
4578 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4579 if (TYPE_ALIGN (type) != align)
4580 type = build_aligned_type (type, align);
4581 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4582 ptr);
4583 references->safe_push (ref);
4584 return false;
4585 default:
4586 break;
4589 op0 = gimple_call_lhs (stmt);
4590 n = gimple_call_num_args (stmt);
4591 for (i = 0; i < n; i++)
4593 op1 = gimple_call_arg (stmt, i);
4595 if (DECL_P (op1)
4596 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4598 ref.ref = op1;
4599 ref.is_read = true;
4600 references->safe_push (ref);
4604 else
4605 return clobbers_memory;
4607 if (op0
4608 && (DECL_P (op0)
4609 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4611 ref.ref = op0;
4612 ref.is_read = false;
4613 references->safe_push (ref);
4615 return clobbers_memory;
4619 /* Returns true if the loop-nest has any data reference. */
4621 bool
4622 loop_nest_has_data_refs (loop_p loop)
4624 basic_block *bbs = get_loop_body (loop);
4625 auto_vec<data_ref_loc, 3> references;
4627 for (unsigned i = 0; i < loop->num_nodes; i++)
4629 basic_block bb = bbs[i];
4630 gimple_stmt_iterator bsi;
4632 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4634 gimple *stmt = gsi_stmt (bsi);
4635 get_references_in_stmt (stmt, &references);
4636 if (references.length ())
4638 free (bbs);
4639 return true;
4643 free (bbs);
4645 if (loop->inner)
4647 loop = loop->inner;
4648 while (loop)
4650 if (loop_nest_has_data_refs (loop))
4651 return true;
4652 loop = loop->next;
4655 return false;
4658 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4659 reference, returns false, otherwise returns true. NEST is the outermost
4660 loop of the loop nest in which the references should be analyzed. */
4662 bool
4663 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4664 vec<data_reference_p> *datarefs)
4666 unsigned i;
4667 auto_vec<data_ref_loc, 2> references;
4668 data_ref_loc *ref;
4669 bool ret = true;
4670 data_reference_p dr;
4672 if (get_references_in_stmt (stmt, &references))
4673 return false;
4675 FOR_EACH_VEC_ELT (references, i, ref)
4677 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4678 ref->ref, stmt, ref->is_read);
4679 gcc_assert (dr != NULL);
4680 datarefs->safe_push (dr);
4683 return ret;
4686 /* Stores the data references in STMT to DATAREFS. If there is an
4687 unanalyzable reference, returns false, otherwise returns true.
4688 NEST is the outermost loop of the loop nest in which the references
4689 should be instantiated, LOOP is the loop in which the references
4690 should be analyzed. */
4692 bool
4693 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
4694 vec<data_reference_p> *datarefs)
4696 unsigned i;
4697 auto_vec<data_ref_loc, 2> references;
4698 data_ref_loc *ref;
4699 bool ret = true;
4700 data_reference_p dr;
4702 if (get_references_in_stmt (stmt, &references))
4703 return false;
4705 FOR_EACH_VEC_ELT (references, i, ref)
4707 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4708 gcc_assert (dr != NULL);
4709 datarefs->safe_push (dr);
4712 return ret;
4715 /* Search the data references in LOOP, and record the information into
4716 DATAREFS. Returns chrec_dont_know when failing to analyze a
4717 difficult case, returns NULL_TREE otherwise. */
4719 tree
4720 find_data_references_in_bb (struct loop *loop, basic_block bb,
4721 vec<data_reference_p> *datarefs)
4723 gimple_stmt_iterator bsi;
4725 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4727 gimple *stmt = gsi_stmt (bsi);
4729 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4731 struct data_reference *res;
4732 res = XCNEW (struct data_reference);
4733 datarefs->safe_push (res);
4735 return chrec_dont_know;
4739 return NULL_TREE;
4742 /* Search the data references in LOOP, and record the information into
4743 DATAREFS. Returns chrec_dont_know when failing to analyze a
4744 difficult case, returns NULL_TREE otherwise.
4746 TODO: This function should be made smarter so that it can handle address
4747 arithmetic as if they were array accesses, etc. */
4749 tree
4750 find_data_references_in_loop (struct loop *loop,
4751 vec<data_reference_p> *datarefs)
4753 basic_block bb, *bbs;
4754 unsigned int i;
4756 bbs = get_loop_body_in_dom_order (loop);
4758 for (i = 0; i < loop->num_nodes; i++)
4760 bb = bbs[i];
4762 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4764 free (bbs);
4765 return chrec_dont_know;
4768 free (bbs);
4770 return NULL_TREE;
4773 /* Return the alignment in bytes that DRB is guaranteed to have at all
4774 times. */
4776 unsigned int
4777 dr_alignment (innermost_loop_behavior *drb)
4779 /* Get the alignment of BASE_ADDRESS + INIT. */
4780 unsigned int alignment = drb->base_alignment;
4781 unsigned int misalignment = (drb->base_misalignment
4782 + TREE_INT_CST_LOW (drb->init));
4783 if (misalignment != 0)
4784 alignment = MIN (alignment, misalignment & -misalignment);
4786 /* Cap it to the alignment of OFFSET. */
4787 if (!integer_zerop (drb->offset))
4788 alignment = MIN (alignment, drb->offset_alignment);
4790 /* Cap it to the alignment of STEP. */
4791 if (!integer_zerop (drb->step))
4792 alignment = MIN (alignment, drb->step_alignment);
4794 return alignment;
4797 /* Recursive helper function. */
4799 static bool
4800 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4802 /* Inner loops of the nest should not contain siblings. Example:
4803 when there are two consecutive loops,
4805 | loop_0
4806 | loop_1
4807 | A[{0, +, 1}_1]
4808 | endloop_1
4809 | loop_2
4810 | A[{0, +, 1}_2]
4811 | endloop_2
4812 | endloop_0
4814 the dependence relation cannot be captured by the distance
4815 abstraction. */
4816 if (loop->next)
4817 return false;
4819 loop_nest->safe_push (loop);
4820 if (loop->inner)
4821 return find_loop_nest_1 (loop->inner, loop_nest);
4822 return true;
4825 /* Return false when the LOOP is not well nested. Otherwise return
4826 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4827 contain the loops from the outermost to the innermost, as they will
4828 appear in the classic distance vector. */
4830 bool
4831 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4833 loop_nest->safe_push (loop);
4834 if (loop->inner)
4835 return find_loop_nest_1 (loop->inner, loop_nest);
4836 return true;
4839 /* Returns true when the data dependences have been computed, false otherwise.
4840 Given a loop nest LOOP, the following vectors are returned:
4841 DATAREFS is initialized to all the array elements contained in this loop,
4842 DEPENDENCE_RELATIONS contains the relations between the data references.
4843 Compute read-read and self relations if
4844 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4846 bool
4847 compute_data_dependences_for_loop (struct loop *loop,
4848 bool compute_self_and_read_read_dependences,
4849 vec<loop_p> *loop_nest,
4850 vec<data_reference_p> *datarefs,
4851 vec<ddr_p> *dependence_relations)
4853 bool res = true;
4855 memset (&dependence_stats, 0, sizeof (dependence_stats));
4857 /* If the loop nest is not well formed, or one of the data references
4858 is not computable, give up without spending time to compute other
4859 dependences. */
4860 if (!loop
4861 || !find_loop_nest (loop, loop_nest)
4862 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4863 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4864 compute_self_and_read_read_dependences))
4865 res = false;
4867 if (dump_file && (dump_flags & TDF_STATS))
4869 fprintf (dump_file, "Dependence tester statistics:\n");
4871 fprintf (dump_file, "Number of dependence tests: %d\n",
4872 dependence_stats.num_dependence_tests);
4873 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4874 dependence_stats.num_dependence_dependent);
4875 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4876 dependence_stats.num_dependence_independent);
4877 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4878 dependence_stats.num_dependence_undetermined);
4880 fprintf (dump_file, "Number of subscript tests: %d\n",
4881 dependence_stats.num_subscript_tests);
4882 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4883 dependence_stats.num_subscript_undetermined);
4884 fprintf (dump_file, "Number of same subscript function: %d\n",
4885 dependence_stats.num_same_subscript_function);
4887 fprintf (dump_file, "Number of ziv tests: %d\n",
4888 dependence_stats.num_ziv);
4889 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4890 dependence_stats.num_ziv_dependent);
4891 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4892 dependence_stats.num_ziv_independent);
4893 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4894 dependence_stats.num_ziv_unimplemented);
4896 fprintf (dump_file, "Number of siv tests: %d\n",
4897 dependence_stats.num_siv);
4898 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4899 dependence_stats.num_siv_dependent);
4900 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4901 dependence_stats.num_siv_independent);
4902 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4903 dependence_stats.num_siv_unimplemented);
4905 fprintf (dump_file, "Number of miv tests: %d\n",
4906 dependence_stats.num_miv);
4907 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4908 dependence_stats.num_miv_dependent);
4909 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4910 dependence_stats.num_miv_independent);
4911 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4912 dependence_stats.num_miv_unimplemented);
4915 return res;
4918 /* Free the memory used by a data dependence relation DDR. */
4920 void
4921 free_dependence_relation (struct data_dependence_relation *ddr)
4923 if (ddr == NULL)
4924 return;
4926 if (DDR_SUBSCRIPTS (ddr).exists ())
4927 free_subscripts (DDR_SUBSCRIPTS (ddr));
4928 DDR_DIST_VECTS (ddr).release ();
4929 DDR_DIR_VECTS (ddr).release ();
4931 free (ddr);
4934 /* Free the memory used by the data dependence relations from
4935 DEPENDENCE_RELATIONS. */
4937 void
4938 free_dependence_relations (vec<ddr_p> dependence_relations)
4940 unsigned int i;
4941 struct data_dependence_relation *ddr;
4943 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4944 if (ddr)
4945 free_dependence_relation (ddr);
4947 dependence_relations.release ();
4950 /* Free the memory used by the data references from DATAREFS. */
4952 void
4953 free_data_refs (vec<data_reference_p> datarefs)
4955 unsigned int i;
4956 struct data_reference *dr;
4958 FOR_EACH_VEC_ELT (datarefs, i, dr)
4959 free_data_ref (dr);
4960 datarefs.release ();