PR rtl-optimization/82913
[official-gcc.git] / gcc / tree-data-ref.c
blob559a8e4b8457208c4c8200cd686fb02015c6fb1a
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 /* Returns true iff A divides B. */
141 static inline bool
142 int_divides_p (int a, int b)
144 return ((b % a) == 0);
147 /* Return true if reference REF contains a union access. */
149 static bool
150 ref_contains_union_access_p (tree ref)
152 while (handled_component_p (ref))
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
159 return false;
164 /* Dump into FILE all the data references from DATAREFS. */
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 unsigned int i;
170 struct data_reference *dr;
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
176 /* Unified dump into FILE all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
181 dump_data_references (stderr, ref);
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
194 /* Dump into STDERR all the data references from DATAREFS. */
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
199 dump_data_references (stderr, datarefs);
202 /* Print to STDERR the data_reference DR. */
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
207 dump_data_reference (stderr, dr);
210 /* Dump function for a DATA_REFERENCE structure. */
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
216 unsigned int i;
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 fprintf (outf, "#)\n");
235 /* Unified dump function for a DATA_REFERENCE structure. */
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
240 dump_data_reference (stderr, &ref);
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
253 /* Dumps the affine function described by FN to the file OUTF. */
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
258 unsigned i;
259 tree coef;
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
270 /* Dumps the conflict function CF to the file OUTF. */
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
275 unsigned i;
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
283 for (i = 0; i < cf->n; i++)
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
294 /* Dump function for a SUBSCRIPT structure. */
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
326 /* Print the classic direction vector DIRV to OUTF. */
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
333 int eq;
335 for (eq = 0; eq < length; eq++)
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
340 switch (dir)
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
368 fprintf (outf, "\n");
371 /* Print a vector of direction vectors. */
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
377 unsigned j;
378 lambda_vector v;
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 unsigned j;
403 lambda_vector v;
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
415 struct data_reference *dra, *drb;
417 fprintf (outf, "(Data Dep: \n");
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 if (ddr)
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
434 fprintf (outf, " (don't know)\n)\n");
435 return;
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 unsigned int i;
449 struct loop *loopi;
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
597 *var = NULL_TREE;
598 *off = NULL_TREE;
600 switch (code)
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
627 case ADDR_EXPR:
629 tree base, poffset;
630 HOST_WIDE_INT pbitsize, pbitpos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
639 if (pbitpos % BITS_PER_UNIT != 0)
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
644 if (poffset)
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
655 var0 = fold_convert (type, base);
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
674 *var = var0;
675 *off = off0;
676 return true;
679 case SSA_NAME:
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
696 CASE_CONVERT:
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
712 return false;
715 default:
716 return false;
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
726 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
727 enum tree_code code;
729 *var = exp;
730 *off = ssize_int (0);
731 STRIP_NOPS (exp);
733 if (tree_is_chrec (exp)
734 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
735 return;
737 otype = TREE_TYPE (exp);
738 code = TREE_CODE (exp);
739 extract_ops_from_tree (exp, &code, &op0, &op1);
740 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
742 *var = fold_convert (type, e);
743 *off = o;
747 /* Returns the address ADDR of an object in a canonical shape (without nop
748 casts, and with type of pointer to the object). */
750 static tree
751 canonicalize_base_object_address (tree addr)
753 tree orig = addr;
755 STRIP_NOPS (addr);
757 /* The base address may be obtained by casting from integer, in that case
758 keep the cast. */
759 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
760 return orig;
762 if (TREE_CODE (addr) != ADDR_EXPR)
763 return addr;
765 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
768 /* Analyze the behavior of memory reference REF. There are two modes:
770 - BB analysis. In this case we simply split the address into base,
771 init and offset components, without reference to any containing loop.
772 The resulting base and offset are general expressions and they can
773 vary arbitrarily from one iteration of the containing loop to the next.
774 The step is always zero.
776 - loop analysis. In this case we analyze the reference both wrt LOOP
777 and on the basis that the reference occurs (is "used") in LOOP;
778 see the comment above analyze_scalar_evolution_in_loop for more
779 information about this distinction. The base, init, offset and
780 step fields are all invariant in LOOP.
782 Perform BB analysis if LOOP is null, or if LOOP is the function's
783 dummy outermost loop. In other cases perform loop analysis.
785 Return true if the analysis succeeded and store the results in DRB if so.
786 BB analysis can only fail for bitfield or reversed-storage accesses. */
788 bool
789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
790 struct loop *loop)
792 HOST_WIDE_INT pbitsize, pbitpos;
793 tree base, poffset;
794 machine_mode pmode;
795 int punsignedp, preversep, pvolatilep;
796 affine_iv base_iv, offset_iv;
797 tree init, dinit, step;
798 bool in_loop = (loop && loop->num);
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "analyze_innermost: ");
803 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
804 &punsignedp, &preversep, &pvolatilep);
805 gcc_assert (base != NULL_TREE);
807 if (pbitpos % BITS_PER_UNIT != 0)
809 if (dump_file && (dump_flags & TDF_DETAILS))
810 fprintf (dump_file, "failed: bit offset alignment.\n");
811 return false;
814 if (preversep)
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "failed: reverse storage order.\n");
818 return false;
821 /* Calculate the alignment and misalignment for the inner reference. */
822 unsigned int HOST_WIDE_INT base_misalignment;
823 unsigned int base_alignment;
824 get_object_alignment_1 (base, &base_alignment, &base_misalignment);
826 /* There are no bitfield references remaining in BASE, so the values
827 we got back must be whole bytes. */
828 gcc_assert (base_alignment % BITS_PER_UNIT == 0
829 && base_misalignment % BITS_PER_UNIT == 0);
830 base_alignment /= BITS_PER_UNIT;
831 base_misalignment /= BITS_PER_UNIT;
833 if (TREE_CODE (base) == MEM_REF)
835 if (!integer_zerop (TREE_OPERAND (base, 1)))
837 /* Subtract MOFF from the base and add it to POFFSET instead.
838 Adjust the misalignment to reflect the amount we subtracted. */
839 offset_int moff = mem_ref_offset (base);
840 base_misalignment -= moff.to_short_addr ();
841 tree mofft = wide_int_to_tree (sizetype, moff);
842 if (!poffset)
843 poffset = mofft;
844 else
845 poffset = size_binop (PLUS_EXPR, poffset, mofft);
847 base = TREE_OPERAND (base, 0);
849 else
850 base = build_fold_addr_expr (base);
852 if (in_loop)
854 if (!simple_iv (loop, loop, base, &base_iv, true))
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, "failed: evolution of base is not affine.\n");
858 return false;
861 else
863 base_iv.base = base;
864 base_iv.step = ssize_int (0);
865 base_iv.no_overflow = true;
868 if (!poffset)
870 offset_iv.base = ssize_int (0);
871 offset_iv.step = ssize_int (0);
873 else
875 if (!in_loop)
877 offset_iv.base = poffset;
878 offset_iv.step = ssize_int (0);
880 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
884 return false;
888 init = ssize_int (pbitpos / BITS_PER_UNIT);
890 /* Subtract any constant component from the base and add it to INIT instead.
891 Adjust the misalignment to reflect the amount we subtracted. */
892 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
893 init = size_binop (PLUS_EXPR, init, dinit);
894 base_misalignment -= TREE_INT_CST_LOW (dinit);
896 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
897 init = size_binop (PLUS_EXPR, init, dinit);
899 step = size_binop (PLUS_EXPR,
900 fold_convert (ssizetype, base_iv.step),
901 fold_convert (ssizetype, offset_iv.step));
903 base = canonicalize_base_object_address (base_iv.base);
905 /* See if get_pointer_alignment can guarantee a higher alignment than
906 the one we calculated above. */
907 unsigned int HOST_WIDE_INT alt_misalignment;
908 unsigned int alt_alignment;
909 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
911 /* As above, these values must be whole bytes. */
912 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
913 && alt_misalignment % BITS_PER_UNIT == 0);
914 alt_alignment /= BITS_PER_UNIT;
915 alt_misalignment /= BITS_PER_UNIT;
917 if (base_alignment < alt_alignment)
919 base_alignment = alt_alignment;
920 base_misalignment = alt_misalignment;
923 drb->base_address = base;
924 drb->offset = fold_convert (ssizetype, offset_iv.base);
925 drb->init = init;
926 drb->step = step;
927 drb->base_alignment = base_alignment;
928 drb->base_misalignment = base_misalignment & (base_alignment - 1);
929 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
930 drb->step_alignment = highest_pow2_factor (step);
932 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "success.\n");
935 return true;
938 /* Return true if OP is a valid component reference for a DR access
939 function. This accepts a subset of what handled_component_p accepts. */
941 static bool
942 access_fn_component_p (tree op)
944 switch (TREE_CODE (op))
946 case REALPART_EXPR:
947 case IMAGPART_EXPR:
948 case ARRAY_REF:
949 return true;
951 case COMPONENT_REF:
952 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
954 default:
955 return false;
959 /* Determines the base object and the list of indices of memory reference
960 DR, analyzed in LOOP and instantiated before NEST. */
962 static void
963 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
965 vec<tree> access_fns = vNULL;
966 tree ref, op;
967 tree base, off, access_fn;
969 /* If analyzing a basic-block there are no indices to analyze
970 and thus no access functions. */
971 if (!nest)
973 DR_BASE_OBJECT (dr) = DR_REF (dr);
974 DR_ACCESS_FNS (dr).create (0);
975 return;
978 ref = DR_REF (dr);
980 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
981 into a two element array with a constant index. The base is
982 then just the immediate underlying object. */
983 if (TREE_CODE (ref) == REALPART_EXPR)
985 ref = TREE_OPERAND (ref, 0);
986 access_fns.safe_push (integer_zero_node);
988 else if (TREE_CODE (ref) == IMAGPART_EXPR)
990 ref = TREE_OPERAND (ref, 0);
991 access_fns.safe_push (integer_one_node);
994 /* Analyze access functions of dimensions we know to be independent.
995 The list of component references handled here should be kept in
996 sync with access_fn_component_p. */
997 while (handled_component_p (ref))
999 if (TREE_CODE (ref) == ARRAY_REF)
1001 op = TREE_OPERAND (ref, 1);
1002 access_fn = analyze_scalar_evolution (loop, op);
1003 access_fn = instantiate_scev (nest, loop, access_fn);
1004 access_fns.safe_push (access_fn);
1006 else if (TREE_CODE (ref) == COMPONENT_REF
1007 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1009 /* For COMPONENT_REFs of records (but not unions!) use the
1010 FIELD_DECL offset as constant access function so we can
1011 disambiguate a[i].f1 and a[i].f2. */
1012 tree off = component_ref_field_offset (ref);
1013 off = size_binop (PLUS_EXPR,
1014 size_binop (MULT_EXPR,
1015 fold_convert (bitsizetype, off),
1016 bitsize_int (BITS_PER_UNIT)),
1017 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1018 access_fns.safe_push (off);
1020 else
1021 /* If we have an unhandled component we could not translate
1022 to an access function stop analyzing. We have determined
1023 our base object in this case. */
1024 break;
1026 ref = TREE_OPERAND (ref, 0);
1029 /* If the address operand of a MEM_REF base has an evolution in the
1030 analyzed nest, add it as an additional independent access-function. */
1031 if (TREE_CODE (ref) == MEM_REF)
1033 op = TREE_OPERAND (ref, 0);
1034 access_fn = analyze_scalar_evolution (loop, op);
1035 access_fn = instantiate_scev (nest, loop, access_fn);
1036 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1038 tree orig_type;
1039 tree memoff = TREE_OPERAND (ref, 1);
1040 base = initial_condition (access_fn);
1041 orig_type = TREE_TYPE (base);
1042 STRIP_USELESS_TYPE_CONVERSION (base);
1043 split_constant_offset (base, &base, &off);
1044 STRIP_USELESS_TYPE_CONVERSION (base);
1045 /* Fold the MEM_REF offset into the evolutions initial
1046 value to make more bases comparable. */
1047 if (!integer_zerop (memoff))
1049 off = size_binop (PLUS_EXPR, off,
1050 fold_convert (ssizetype, memoff));
1051 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1053 /* Adjust the offset so it is a multiple of the access type
1054 size and thus we separate bases that can possibly be used
1055 to produce partial overlaps (which the access_fn machinery
1056 cannot handle). */
1057 wide_int rem;
1058 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1059 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1060 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1061 rem = wi::mod_trunc
1062 (wi::to_wide (off),
1063 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1064 SIGNED);
1065 else
1066 /* If we can't compute the remainder simply force the initial
1067 condition to zero. */
1068 rem = wi::to_wide (off);
1069 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1070 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1071 /* And finally replace the initial condition. */
1072 access_fn = chrec_replace_initial_condition
1073 (access_fn, fold_convert (orig_type, off));
1074 /* ??? This is still not a suitable base object for
1075 dr_may_alias_p - the base object needs to be an
1076 access that covers the object as whole. With
1077 an evolution in the pointer this cannot be
1078 guaranteed.
1079 As a band-aid, mark the access so we can special-case
1080 it in dr_may_alias_p. */
1081 tree old = ref;
1082 ref = fold_build2_loc (EXPR_LOCATION (ref),
1083 MEM_REF, TREE_TYPE (ref),
1084 base, memoff);
1085 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1086 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1087 DR_UNCONSTRAINED_BASE (dr) = true;
1088 access_fns.safe_push (access_fn);
1091 else if (DECL_P (ref))
1093 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1094 ref = build2 (MEM_REF, TREE_TYPE (ref),
1095 build_fold_addr_expr (ref),
1096 build_int_cst (reference_alias_ptr_type (ref), 0));
1099 DR_BASE_OBJECT (dr) = ref;
1100 DR_ACCESS_FNS (dr) = access_fns;
1103 /* Extracts the alias analysis information from the memory reference DR. */
1105 static void
1106 dr_analyze_alias (struct data_reference *dr)
1108 tree ref = DR_REF (dr);
1109 tree base = get_base_address (ref), addr;
1111 if (INDIRECT_REF_P (base)
1112 || TREE_CODE (base) == MEM_REF)
1114 addr = TREE_OPERAND (base, 0);
1115 if (TREE_CODE (addr) == SSA_NAME)
1116 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1120 /* Frees data reference DR. */
1122 void
1123 free_data_ref (data_reference_p dr)
1125 DR_ACCESS_FNS (dr).release ();
1126 free (dr);
1129 /* Analyze memory reference MEMREF, which is accessed in STMT.
1130 The reference is a read if IS_READ is true, otherwise it is a write.
1131 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1132 within STMT, i.e. that it might not occur even if STMT is executed
1133 and runs to completion.
1135 Return the data_reference description of MEMREF. NEST is the outermost
1136 loop in which the reference should be instantiated, LOOP is the loop
1137 in which the data reference should be analyzed. */
1139 struct data_reference *
1140 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1141 bool is_read, bool is_conditional_in_stmt)
1143 struct data_reference *dr;
1145 if (dump_file && (dump_flags & TDF_DETAILS))
1147 fprintf (dump_file, "Creating dr for ");
1148 print_generic_expr (dump_file, memref, TDF_SLIM);
1149 fprintf (dump_file, "\n");
1152 dr = XCNEW (struct data_reference);
1153 DR_STMT (dr) = stmt;
1154 DR_REF (dr) = memref;
1155 DR_IS_READ (dr) = is_read;
1156 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1158 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1159 nest != NULL ? loop : NULL);
1160 dr_analyze_indices (dr, nest, loop);
1161 dr_analyze_alias (dr);
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1165 unsigned i;
1166 fprintf (dump_file, "\tbase_address: ");
1167 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1168 fprintf (dump_file, "\n\toffset from base address: ");
1169 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1170 fprintf (dump_file, "\n\tconstant offset from base address: ");
1171 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1172 fprintf (dump_file, "\n\tstep: ");
1173 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1174 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1175 fprintf (dump_file, "\n\tbase misalignment: %d",
1176 DR_BASE_MISALIGNMENT (dr));
1177 fprintf (dump_file, "\n\toffset alignment: %d",
1178 DR_OFFSET_ALIGNMENT (dr));
1179 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1180 fprintf (dump_file, "\n\tbase_object: ");
1181 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1182 fprintf (dump_file, "\n");
1183 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1185 fprintf (dump_file, "\tAccess function %d: ", i);
1186 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1190 return dr;
1193 /* A helper function computes order between two tree epxressions T1 and T2.
1194 This is used in comparator functions sorting objects based on the order
1195 of tree expressions. The function returns -1, 0, or 1. */
1198 data_ref_compare_tree (tree t1, tree t2)
1200 int i, cmp;
1201 enum tree_code code;
1202 char tclass;
1204 if (t1 == t2)
1205 return 0;
1206 if (t1 == NULL)
1207 return -1;
1208 if (t2 == NULL)
1209 return 1;
1211 STRIP_USELESS_TYPE_CONVERSION (t1);
1212 STRIP_USELESS_TYPE_CONVERSION (t2);
1213 if (t1 == t2)
1214 return 0;
1216 if (TREE_CODE (t1) != TREE_CODE (t2)
1217 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1218 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1220 code = TREE_CODE (t1);
1221 switch (code)
1223 case INTEGER_CST:
1224 return tree_int_cst_compare (t1, t2);
1226 case STRING_CST:
1227 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1228 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1229 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1230 TREE_STRING_LENGTH (t1));
1232 case SSA_NAME:
1233 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1234 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1235 break;
1237 default:
1238 tclass = TREE_CODE_CLASS (code);
1240 /* For decls, compare their UIDs. */
1241 if (tclass == tcc_declaration)
1243 if (DECL_UID (t1) != DECL_UID (t2))
1244 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1245 break;
1247 /* For expressions, compare their operands recursively. */
1248 else if (IS_EXPR_CODE_CLASS (tclass))
1250 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1252 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1253 TREE_OPERAND (t2, i));
1254 if (cmp != 0)
1255 return cmp;
1258 else
1259 gcc_unreachable ();
1262 return 0;
1265 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1266 check. */
1268 bool
1269 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1271 if (dump_enabled_p ())
1273 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1274 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1275 dump_printf (MSG_NOTE, " and ");
1276 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1277 dump_printf (MSG_NOTE, "\n");
1280 if (!speed_p)
1282 if (dump_enabled_p ())
1283 dump_printf (MSG_MISSED_OPTIMIZATION,
1284 "runtime alias check not supported when optimizing "
1285 "for size.\n");
1286 return false;
1289 /* FORNOW: We don't support versioning with outer-loop in either
1290 vectorization or loop distribution. */
1291 if (loop != NULL && loop->inner != NULL)
1293 if (dump_enabled_p ())
1294 dump_printf (MSG_MISSED_OPTIMIZATION,
1295 "runtime alias check not supported for outer loop.\n");
1296 return false;
1299 /* FORNOW: We don't support creating runtime alias tests for non-constant
1300 step. */
1301 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1302 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1304 if (dump_enabled_p ())
1305 dump_printf (MSG_MISSED_OPTIMIZATION,
1306 "runtime alias check not supported for non-constant "
1307 "step\n");
1308 return false;
1311 return true;
1314 /* Operator == between two dr_with_seg_len objects.
1316 This equality operator is used to make sure two data refs
1317 are the same one so that we will consider to combine the
1318 aliasing checks of those two pairs of data dependent data
1319 refs. */
1321 static bool
1322 operator == (const dr_with_seg_len& d1,
1323 const dr_with_seg_len& d2)
1325 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1326 DR_BASE_ADDRESS (d2.dr), 0)
1327 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1328 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1329 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1332 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1333 so that we can combine aliasing checks in one scan. */
1335 static int
1336 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1338 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1339 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1340 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1341 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1343 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1344 if a and c have the same basic address snd step, and b and d have the same
1345 address and step. Therefore, if any a&c or b&d don't have the same address
1346 and step, we don't care the order of those two pairs after sorting. */
1347 int comp_res;
1349 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1350 DR_BASE_ADDRESS (b1.dr))) != 0)
1351 return comp_res;
1352 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1353 DR_BASE_ADDRESS (b2.dr))) != 0)
1354 return comp_res;
1355 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1356 DR_STEP (b1.dr))) != 0)
1357 return comp_res;
1358 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1359 DR_STEP (b2.dr))) != 0)
1360 return comp_res;
1361 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1362 DR_OFFSET (b1.dr))) != 0)
1363 return comp_res;
1364 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1365 DR_INIT (b1.dr))) != 0)
1366 return comp_res;
1367 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1368 DR_OFFSET (b2.dr))) != 0)
1369 return comp_res;
1370 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1371 DR_INIT (b2.dr))) != 0)
1372 return comp_res;
1374 return 0;
1377 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1378 FACTOR is number of iterations that each data reference is accessed.
1380 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1381 we create an expression:
1383 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1384 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1386 for aliasing checks. However, in some cases we can decrease the number
1387 of checks by combining two checks into one. For example, suppose we have
1388 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1389 condition is satisfied:
1391 load_ptr_0 < load_ptr_1 &&
1392 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1394 (this condition means, in each iteration of vectorized loop, the accessed
1395 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1396 load_ptr_1.)
1398 we then can use only the following expression to finish the alising checks
1399 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1401 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1402 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1404 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1405 basic address. */
1407 void
1408 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1409 unsigned HOST_WIDE_INT factor)
1411 /* Sort the collected data ref pairs so that we can scan them once to
1412 combine all possible aliasing checks. */
1413 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1415 /* Scan the sorted dr pairs and check if we can combine alias checks
1416 of two neighboring dr pairs. */
1417 for (size_t i = 1; i < alias_pairs->length (); ++i)
1419 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1420 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1421 *dr_b1 = &(*alias_pairs)[i-1].second,
1422 *dr_a2 = &(*alias_pairs)[i].first,
1423 *dr_b2 = &(*alias_pairs)[i].second;
1425 /* Remove duplicate data ref pairs. */
1426 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1428 if (dump_enabled_p ())
1430 dump_printf (MSG_NOTE, "found equal ranges ");
1431 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1432 dump_printf (MSG_NOTE, ", ");
1433 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1434 dump_printf (MSG_NOTE, " and ");
1435 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1436 dump_printf (MSG_NOTE, ", ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1438 dump_printf (MSG_NOTE, "\n");
1440 alias_pairs->ordered_remove (i--);
1441 continue;
1444 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1446 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1447 and DR_A1 and DR_A2 are two consecutive memrefs. */
1448 if (*dr_a1 == *dr_a2)
1450 std::swap (dr_a1, dr_b1);
1451 std::swap (dr_a2, dr_b2);
1454 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1455 DR_BASE_ADDRESS (dr_a2->dr), 0)
1456 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1457 DR_OFFSET (dr_a2->dr), 0)
1458 || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1459 || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1460 continue;
1462 /* Only merge const step data references. */
1463 if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1464 || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1465 continue;
1467 /* DR_A1 and DR_A2 must goes in the same direction. */
1468 if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1469 != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1470 continue;
1472 bool neg_step
1473 = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1475 /* We need to compute merged segment length at compilation time for
1476 dr_a1 and dr_a2, which is impossible if either one has non-const
1477 segment length. */
1478 if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1479 || !tree_fits_uhwi_p (dr_a2->seg_len))
1480 && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1481 DR_STEP (dr_a2->dr)) != 0)
1482 continue;
1484 /* Make sure dr_a1 starts left of dr_a2. */
1485 if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1486 std::swap (*dr_a1, *dr_a2);
1488 bool do_remove = false;
1489 wide_int diff = (wi::to_wide (DR_INIT (dr_a2->dr))
1490 - wi::to_wide (DR_INIT (dr_a1->dr)));
1491 wide_int min_seg_len_b;
1492 tree new_seg_len;
1494 if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1495 min_seg_len_b = wi::abs (wi::to_wide (dr_b1->seg_len));
1496 else
1497 min_seg_len_b
1498 = factor * wi::abs (wi::to_wide (DR_STEP (dr_b1->dr)));
1500 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1502 Case A:
1503 check if the following condition is satisfied:
1505 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1507 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1508 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1509 have to make a best estimation. We can get the minimum value
1510 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1511 then either of the following two conditions can guarantee the
1512 one above:
1514 1: DIFF <= MIN_SEG_LEN_B
1515 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1516 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1517 to take care of wrapping behavior in it.
1519 Case B:
1520 If the left segment does not extend beyond the start of the
1521 right segment the new segment length is that of the right
1522 plus the segment distance. The condition is like:
1524 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1526 Note 1: Case A.2 and B combined together effectively merges every
1527 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1529 Note 2: Above description is based on positive DR_STEP, we need to
1530 take care of negative DR_STEP for wrapping behavior. See PR80815
1531 for more information. */
1532 if (neg_step)
1534 /* Adjust diff according to access size of both references. */
1535 tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1536 tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1537 diff += wi::to_wide (size_a2) - wi::to_wide (size_a1);
1538 /* Case A.1. */
1539 if (wi::leu_p (diff, min_seg_len_b)
1540 /* Case A.2 and B combined. */
1541 || (tree_fits_uhwi_p (dr_a2->seg_len)))
1543 if (tree_fits_uhwi_p (dr_a1->seg_len)
1544 && tree_fits_uhwi_p (dr_a2->seg_len))
1546 wide_int min_len
1547 = wi::umin (wi::to_wide (dr_a1->seg_len) - diff,
1548 wi::to_wide (dr_a2->seg_len));
1549 new_seg_len = wide_int_to_tree (sizetype, min_len);
1551 else
1552 new_seg_len
1553 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1554 wide_int_to_tree (sizetype, diff));
1556 dr_a2->seg_len = new_seg_len;
1557 do_remove = true;
1560 else
1562 /* Case A.1. */
1563 if (wi::leu_p (diff, min_seg_len_b)
1564 /* Case A.2 and B combined. */
1565 || (tree_fits_uhwi_p (dr_a1->seg_len)))
1567 if (tree_fits_uhwi_p (dr_a1->seg_len)
1568 && tree_fits_uhwi_p (dr_a2->seg_len))
1570 wide_int max_len
1571 = wi::umax (wi::to_wide (dr_a2->seg_len) + diff,
1572 wi::to_wide (dr_a1->seg_len));
1573 new_seg_len = wide_int_to_tree (sizetype, max_len);
1575 else
1576 new_seg_len
1577 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1578 wide_int_to_tree (sizetype, diff));
1580 dr_a1->seg_len = new_seg_len;
1581 do_remove = true;
1585 if (do_remove)
1587 if (dump_enabled_p ())
1589 dump_printf (MSG_NOTE, "merging ranges for ");
1590 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1591 dump_printf (MSG_NOTE, ", ");
1592 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1593 dump_printf (MSG_NOTE, " and ");
1594 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1595 dump_printf (MSG_NOTE, ", ");
1596 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1597 dump_printf (MSG_NOTE, "\n");
1599 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1600 i--;
1606 /* Given LOOP's two data references and segment lengths described by DR_A
1607 and DR_B, create expression checking if the two addresses ranges intersect
1608 with each other based on index of the two addresses. This can only be
1609 done if DR_A and DR_B referring to the same (array) object and the index
1610 is the only difference. For example:
1612 DR_A DR_B
1613 data-ref arr[i] arr[j]
1614 base_object arr arr
1615 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1617 The addresses and their index are like:
1619 |<- ADDR_A ->| |<- ADDR_B ->|
1620 ------------------------------------------------------->
1621 | | | | | | | | | |
1622 ------------------------------------------------------->
1623 i_0 ... i_0+4 j_0 ... j_0+4
1625 We can create expression based on index rather than address:
1627 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1629 Note evolution step of index needs to be considered in comparison. */
1631 static bool
1632 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1633 const dr_with_seg_len& dr_a,
1634 const dr_with_seg_len& dr_b)
1636 if (integer_zerop (DR_STEP (dr_a.dr))
1637 || integer_zerop (DR_STEP (dr_b.dr))
1638 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1639 return false;
1641 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1642 return false;
1644 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1645 return false;
1647 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1648 return false;
1650 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1651 return false;
1653 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1655 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1656 unsigned HOST_WIDE_INT abs_step
1657 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1659 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1660 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1661 /* Infer the number of iterations with which the memory segment is accessed
1662 by DR. In other words, alias is checked if memory segment accessed by
1663 DR_A in some iterations intersect with memory segment accessed by DR_B
1664 in the same amount iterations.
1665 Note segnment length is a linear function of number of iterations with
1666 DR_STEP as the coefficient. */
1667 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1668 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1670 unsigned int i;
1671 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1673 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1674 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1675 /* Two indices must be the same if they are not scev, or not scev wrto
1676 current loop being vecorized. */
1677 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1678 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1679 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1680 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1682 if (operand_equal_p (access1, access2, 0))
1683 continue;
1685 return false;
1687 /* The two indices must have the same step. */
1688 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1689 return false;
1691 tree idx_step = CHREC_RIGHT (access1);
1692 /* Index must have const step, otherwise DR_STEP won't be constant. */
1693 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1694 /* Index must evaluate in the same direction as DR. */
1695 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1697 tree min1 = CHREC_LEFT (access1);
1698 tree min2 = CHREC_LEFT (access2);
1699 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1700 return false;
1702 /* Ideally, alias can be checked against loop's control IV, but we
1703 need to prove linear mapping between control IV and reference
1704 index. Although that should be true, we check against (array)
1705 index of data reference. Like segment length, index length is
1706 linear function of the number of iterations with index_step as
1707 the coefficient, i.e, niter_len * idx_step. */
1708 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1709 build_int_cst (TREE_TYPE (min1),
1710 niter_len1));
1711 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1712 build_int_cst (TREE_TYPE (min2),
1713 niter_len2));
1714 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1715 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1716 /* Adjust ranges for negative step. */
1717 if (neg_step)
1719 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1720 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1721 CHREC_LEFT (access1), idx_step);
1722 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1723 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1724 CHREC_LEFT (access2), idx_step);
1726 tree part_cond_expr
1727 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1728 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1729 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1730 if (*cond_expr)
1731 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1732 *cond_expr, part_cond_expr);
1733 else
1734 *cond_expr = part_cond_expr;
1736 return true;
1739 /* Given two data references and segment lengths described by DR_A and DR_B,
1740 create expression checking if the two addresses ranges intersect with
1741 each other:
1743 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1744 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1746 static void
1747 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1748 const dr_with_seg_len& dr_a,
1749 const dr_with_seg_len& dr_b)
1751 *cond_expr = NULL_TREE;
1752 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1753 return;
1755 tree segment_length_a = dr_a.seg_len;
1756 tree segment_length_b = dr_b.seg_len;
1757 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1758 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1759 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1761 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1762 offset_a, DR_INIT (dr_a.dr));
1763 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1764 offset_b, DR_INIT (dr_b.dr));
1765 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1766 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1768 tree seg_a_min = addr_base_a;
1769 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1770 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1771 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1772 [a, a+12) */
1773 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1775 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1776 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1777 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1780 tree seg_b_min = addr_base_b;
1781 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1782 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1784 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1785 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1786 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1788 *cond_expr
1789 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1790 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1791 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1794 /* Create a conditional expression that represents the run-time checks for
1795 overlapping of address ranges represented by a list of data references
1796 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1797 COND_EXPR is the conditional expression to be used in the if statement
1798 that controls which version of the loop gets executed at runtime. */
1800 void
1801 create_runtime_alias_checks (struct loop *loop,
1802 vec<dr_with_seg_len_pair_t> *alias_pairs,
1803 tree * cond_expr)
1805 tree part_cond_expr;
1807 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1809 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1810 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1812 if (dump_enabled_p ())
1814 dump_printf (MSG_NOTE, "create runtime check for data references ");
1815 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1816 dump_printf (MSG_NOTE, " and ");
1817 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1818 dump_printf (MSG_NOTE, "\n");
1821 /* Create condition expression for each pair data references. */
1822 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1823 if (*cond_expr)
1824 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1825 *cond_expr, part_cond_expr);
1826 else
1827 *cond_expr = part_cond_expr;
1831 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1832 expressions. */
1833 static bool
1834 dr_equal_offsets_p1 (tree offset1, tree offset2)
1836 bool res;
1838 STRIP_NOPS (offset1);
1839 STRIP_NOPS (offset2);
1841 if (offset1 == offset2)
1842 return true;
1844 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1845 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1846 return false;
1848 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1849 TREE_OPERAND (offset2, 0));
1851 if (!res || !BINARY_CLASS_P (offset1))
1852 return res;
1854 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1855 TREE_OPERAND (offset2, 1));
1857 return res;
1860 /* Check if DRA and DRB have equal offsets. */
1861 bool
1862 dr_equal_offsets_p (struct data_reference *dra,
1863 struct data_reference *drb)
1865 tree offset1, offset2;
1867 offset1 = DR_OFFSET (dra);
1868 offset2 = DR_OFFSET (drb);
1870 return dr_equal_offsets_p1 (offset1, offset2);
1873 /* Returns true if FNA == FNB. */
1875 static bool
1876 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1878 unsigned i, n = fna.length ();
1880 if (n != fnb.length ())
1881 return false;
1883 for (i = 0; i < n; i++)
1884 if (!operand_equal_p (fna[i], fnb[i], 0))
1885 return false;
1887 return true;
1890 /* If all the functions in CF are the same, returns one of them,
1891 otherwise returns NULL. */
1893 static affine_fn
1894 common_affine_function (conflict_function *cf)
1896 unsigned i;
1897 affine_fn comm;
1899 if (!CF_NONTRIVIAL_P (cf))
1900 return affine_fn ();
1902 comm = cf->fns[0];
1904 for (i = 1; i < cf->n; i++)
1905 if (!affine_function_equal_p (comm, cf->fns[i]))
1906 return affine_fn ();
1908 return comm;
1911 /* Returns the base of the affine function FN. */
1913 static tree
1914 affine_function_base (affine_fn fn)
1916 return fn[0];
1919 /* Returns true if FN is a constant. */
1921 static bool
1922 affine_function_constant_p (affine_fn fn)
1924 unsigned i;
1925 tree coef;
1927 for (i = 1; fn.iterate (i, &coef); i++)
1928 if (!integer_zerop (coef))
1929 return false;
1931 return true;
1934 /* Returns true if FN is the zero constant function. */
1936 static bool
1937 affine_function_zero_p (affine_fn fn)
1939 return (integer_zerop (affine_function_base (fn))
1940 && affine_function_constant_p (fn));
1943 /* Returns a signed integer type with the largest precision from TA
1944 and TB. */
1946 static tree
1947 signed_type_for_types (tree ta, tree tb)
1949 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1950 return signed_type_for (ta);
1951 else
1952 return signed_type_for (tb);
1955 /* Applies operation OP on affine functions FNA and FNB, and returns the
1956 result. */
1958 static affine_fn
1959 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1961 unsigned i, n, m;
1962 affine_fn ret;
1963 tree coef;
1965 if (fnb.length () > fna.length ())
1967 n = fna.length ();
1968 m = fnb.length ();
1970 else
1972 n = fnb.length ();
1973 m = fna.length ();
1976 ret.create (m);
1977 for (i = 0; i < n; i++)
1979 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1980 TREE_TYPE (fnb[i]));
1981 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1984 for (; fna.iterate (i, &coef); i++)
1985 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1986 coef, integer_zero_node));
1987 for (; fnb.iterate (i, &coef); i++)
1988 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1989 integer_zero_node, coef));
1991 return ret;
1994 /* Returns the sum of affine functions FNA and FNB. */
1996 static affine_fn
1997 affine_fn_plus (affine_fn fna, affine_fn fnb)
1999 return affine_fn_op (PLUS_EXPR, fna, fnb);
2002 /* Returns the difference of affine functions FNA and FNB. */
2004 static affine_fn
2005 affine_fn_minus (affine_fn fna, affine_fn fnb)
2007 return affine_fn_op (MINUS_EXPR, fna, fnb);
2010 /* Frees affine function FN. */
2012 static void
2013 affine_fn_free (affine_fn fn)
2015 fn.release ();
2018 /* Determine for each subscript in the data dependence relation DDR
2019 the distance. */
2021 static void
2022 compute_subscript_distance (struct data_dependence_relation *ddr)
2024 conflict_function *cf_a, *cf_b;
2025 affine_fn fn_a, fn_b, diff;
2027 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2029 unsigned int i;
2031 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2033 struct subscript *subscript;
2035 subscript = DDR_SUBSCRIPT (ddr, i);
2036 cf_a = SUB_CONFLICTS_IN_A (subscript);
2037 cf_b = SUB_CONFLICTS_IN_B (subscript);
2039 fn_a = common_affine_function (cf_a);
2040 fn_b = common_affine_function (cf_b);
2041 if (!fn_a.exists () || !fn_b.exists ())
2043 SUB_DISTANCE (subscript) = chrec_dont_know;
2044 return;
2046 diff = affine_fn_minus (fn_a, fn_b);
2048 if (affine_function_constant_p (diff))
2049 SUB_DISTANCE (subscript) = affine_function_base (diff);
2050 else
2051 SUB_DISTANCE (subscript) = chrec_dont_know;
2053 affine_fn_free (diff);
2058 /* Returns the conflict function for "unknown". */
2060 static conflict_function *
2061 conflict_fn_not_known (void)
2063 conflict_function *fn = XCNEW (conflict_function);
2064 fn->n = NOT_KNOWN;
2066 return fn;
2069 /* Returns the conflict function for "independent". */
2071 static conflict_function *
2072 conflict_fn_no_dependence (void)
2074 conflict_function *fn = XCNEW (conflict_function);
2075 fn->n = NO_DEPENDENCE;
2077 return fn;
2080 /* Returns true if the address of OBJ is invariant in LOOP. */
2082 static bool
2083 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2085 while (handled_component_p (obj))
2087 if (TREE_CODE (obj) == ARRAY_REF)
2089 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2090 need to check the stride and the lower bound of the reference. */
2091 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2092 loop->num)
2093 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2094 loop->num))
2095 return false;
2097 else if (TREE_CODE (obj) == COMPONENT_REF)
2099 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2100 loop->num))
2101 return false;
2103 obj = TREE_OPERAND (obj, 0);
2106 if (!INDIRECT_REF_P (obj)
2107 && TREE_CODE (obj) != MEM_REF)
2108 return true;
2110 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2111 loop->num);
2114 /* Returns false if we can prove that data references A and B do not alias,
2115 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2116 considered. */
2118 bool
2119 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2120 bool loop_nest)
2122 tree addr_a = DR_BASE_OBJECT (a);
2123 tree addr_b = DR_BASE_OBJECT (b);
2125 /* If we are not processing a loop nest but scalar code we
2126 do not need to care about possible cross-iteration dependences
2127 and thus can process the full original reference. Do so,
2128 similar to how loop invariant motion applies extra offset-based
2129 disambiguation. */
2130 if (!loop_nest)
2132 aff_tree off1, off2;
2133 widest_int size1, size2;
2134 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2135 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2136 aff_combination_scale (&off1, -1);
2137 aff_combination_add (&off2, &off1);
2138 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2139 return false;
2142 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2143 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2144 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2145 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2146 return false;
2148 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2149 do not know the size of the base-object. So we cannot do any
2150 offset/overlap based analysis but have to rely on points-to
2151 information only. */
2152 if (TREE_CODE (addr_a) == MEM_REF
2153 && (DR_UNCONSTRAINED_BASE (a)
2154 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2156 /* For true dependences we can apply TBAA. */
2157 if (flag_strict_aliasing
2158 && DR_IS_WRITE (a) && DR_IS_READ (b)
2159 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2160 get_alias_set (DR_REF (b))))
2161 return false;
2162 if (TREE_CODE (addr_b) == MEM_REF)
2163 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2164 TREE_OPERAND (addr_b, 0));
2165 else
2166 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2167 build_fold_addr_expr (addr_b));
2169 else if (TREE_CODE (addr_b) == MEM_REF
2170 && (DR_UNCONSTRAINED_BASE (b)
2171 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2173 /* For true dependences we can apply TBAA. */
2174 if (flag_strict_aliasing
2175 && DR_IS_WRITE (a) && DR_IS_READ (b)
2176 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2177 get_alias_set (DR_REF (b))))
2178 return false;
2179 if (TREE_CODE (addr_a) == MEM_REF)
2180 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2181 TREE_OPERAND (addr_b, 0));
2182 else
2183 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2184 TREE_OPERAND (addr_b, 0));
2187 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2188 that is being subsetted in the loop nest. */
2189 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2190 return refs_output_dependent_p (addr_a, addr_b);
2191 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2192 return refs_anti_dependent_p (addr_a, addr_b);
2193 return refs_may_alias_p (addr_a, addr_b);
2196 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2197 if it is meaningful to compare their associated access functions
2198 when checking for dependencies. */
2200 static bool
2201 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2203 /* Allow pairs of component refs from the following sets:
2205 { REALPART_EXPR, IMAGPART_EXPR }
2206 { COMPONENT_REF }
2207 { ARRAY_REF }. */
2208 tree_code code_a = TREE_CODE (ref_a);
2209 tree_code code_b = TREE_CODE (ref_b);
2210 if (code_a == IMAGPART_EXPR)
2211 code_a = REALPART_EXPR;
2212 if (code_b == IMAGPART_EXPR)
2213 code_b = REALPART_EXPR;
2214 if (code_a != code_b)
2215 return false;
2217 if (TREE_CODE (ref_a) == COMPONENT_REF)
2218 /* ??? We cannot simply use the type of operand #0 of the refs here as
2219 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2220 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2221 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2222 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2224 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2225 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2228 /* Initialize a data dependence relation between data accesses A and
2229 B. NB_LOOPS is the number of loops surrounding the references: the
2230 size of the classic distance/direction vectors. */
2232 struct data_dependence_relation *
2233 initialize_data_dependence_relation (struct data_reference *a,
2234 struct data_reference *b,
2235 vec<loop_p> loop_nest)
2237 struct data_dependence_relation *res;
2238 unsigned int i;
2240 res = XCNEW (struct data_dependence_relation);
2241 DDR_A (res) = a;
2242 DDR_B (res) = b;
2243 DDR_LOOP_NEST (res).create (0);
2244 DDR_SUBSCRIPTS (res).create (0);
2245 DDR_DIR_VECTS (res).create (0);
2246 DDR_DIST_VECTS (res).create (0);
2248 if (a == NULL || b == NULL)
2250 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2251 return res;
2254 /* If the data references do not alias, then they are independent. */
2255 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2257 DDR_ARE_DEPENDENT (res) = chrec_known;
2258 return res;
2261 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2262 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2263 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2265 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2266 return res;
2269 /* For unconstrained bases, the root (highest-indexed) subscript
2270 describes a variation in the base of the original DR_REF rather
2271 than a component access. We have no type that accurately describes
2272 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2273 applying this subscript) so limit the search to the last real
2274 component access.
2276 E.g. for:
2278 void
2279 f (int a[][8], int b[][8])
2281 for (int i = 0; i < 8; ++i)
2282 a[i * 2][0] = b[i][0];
2285 the a and b accesses have a single ARRAY_REF component reference [0]
2286 but have two subscripts. */
2287 if (DR_UNCONSTRAINED_BASE (a))
2288 num_dimensions_a -= 1;
2289 if (DR_UNCONSTRAINED_BASE (b))
2290 num_dimensions_b -= 1;
2292 /* These structures describe sequences of component references in
2293 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2294 specific access function. */
2295 struct {
2296 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2297 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2298 indices. In C notation, these are the indices of the rightmost
2299 component references; e.g. for a sequence .b.c.d, the start
2300 index is for .d. */
2301 unsigned int start_a;
2302 unsigned int start_b;
2304 /* The sequence contains LENGTH consecutive access functions from
2305 each DR. */
2306 unsigned int length;
2308 /* The enclosing objects for the A and B sequences respectively,
2309 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2310 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2311 tree object_a;
2312 tree object_b;
2313 } full_seq = {}, struct_seq = {};
2315 /* Before each iteration of the loop:
2317 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2318 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2319 unsigned int index_a = 0;
2320 unsigned int index_b = 0;
2321 tree ref_a = DR_REF (a);
2322 tree ref_b = DR_REF (b);
2324 /* Now walk the component references from the final DR_REFs back up to
2325 the enclosing base objects. Each component reference corresponds
2326 to one access function in the DR, with access function 0 being for
2327 the final DR_REF and the highest-indexed access function being the
2328 one that is applied to the base of the DR.
2330 Look for a sequence of component references whose access functions
2331 are comparable (see access_fn_components_comparable_p). If more
2332 than one such sequence exists, pick the one nearest the base
2333 (which is the leftmost sequence in C notation). Store this sequence
2334 in FULL_SEQ.
2336 For example, if we have:
2338 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2340 A: a[0][i].s.c.d
2341 B: __real b[0][i].s.e[i].f
2343 (where d is the same type as the real component of f) then the access
2344 functions would be:
2346 0 1 2 3
2347 A: .d .c .s [i]
2349 0 1 2 3 4 5
2350 B: __real .f [i] .e .s [i]
2352 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2353 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2354 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2355 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2356 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2357 index foo[10] arrays, so is again comparable. The sequence is
2358 therefore:
2360 A: [1, 3] (i.e. [i].s.c)
2361 B: [3, 5] (i.e. [i].s.e)
2363 Also look for sequences of component references whose access
2364 functions are comparable and whose enclosing objects have the same
2365 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2366 example, STRUCT_SEQ would be:
2368 A: [1, 2] (i.e. s.c)
2369 B: [3, 4] (i.e. s.e) */
2370 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2372 /* REF_A and REF_B must be one of the component access types
2373 allowed by dr_analyze_indices. */
2374 gcc_checking_assert (access_fn_component_p (ref_a));
2375 gcc_checking_assert (access_fn_component_p (ref_b));
2377 /* Get the immediately-enclosing objects for REF_A and REF_B,
2378 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2379 and DR_ACCESS_FN (B, INDEX_B). */
2380 tree object_a = TREE_OPERAND (ref_a, 0);
2381 tree object_b = TREE_OPERAND (ref_b, 0);
2383 tree type_a = TREE_TYPE (object_a);
2384 tree type_b = TREE_TYPE (object_b);
2385 if (access_fn_components_comparable_p (ref_a, ref_b))
2387 /* This pair of component accesses is comparable for dependence
2388 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2389 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2390 if (full_seq.start_a + full_seq.length != index_a
2391 || full_seq.start_b + full_seq.length != index_b)
2393 /* The accesses don't extend the current sequence,
2394 so start a new one here. */
2395 full_seq.start_a = index_a;
2396 full_seq.start_b = index_b;
2397 full_seq.length = 0;
2400 /* Add this pair of references to the sequence. */
2401 full_seq.length += 1;
2402 full_seq.object_a = object_a;
2403 full_seq.object_b = object_b;
2405 /* If the enclosing objects are structures (and thus have the
2406 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2407 if (TREE_CODE (type_a) == RECORD_TYPE)
2408 struct_seq = full_seq;
2410 /* Move to the next containing reference for both A and B. */
2411 ref_a = object_a;
2412 ref_b = object_b;
2413 index_a += 1;
2414 index_b += 1;
2415 continue;
2418 /* Try to approach equal type sizes. */
2419 if (!COMPLETE_TYPE_P (type_a)
2420 || !COMPLETE_TYPE_P (type_b)
2421 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2422 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2423 break;
2425 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2426 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2427 if (size_a <= size_b)
2429 index_a += 1;
2430 ref_a = object_a;
2432 if (size_b <= size_a)
2434 index_b += 1;
2435 ref_b = object_b;
2439 /* See whether FULL_SEQ ends at the base and whether the two bases
2440 are equal. We do not care about TBAA or alignment info so we can
2441 use OEP_ADDRESS_OF to avoid false negatives. */
2442 tree base_a = DR_BASE_OBJECT (a);
2443 tree base_b = DR_BASE_OBJECT (b);
2444 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2445 && full_seq.start_b + full_seq.length == num_dimensions_b
2446 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2447 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2448 && types_compatible_p (TREE_TYPE (base_a),
2449 TREE_TYPE (base_b))
2450 && (!loop_nest.exists ()
2451 || (object_address_invariant_in_loop_p
2452 (loop_nest[0], base_a))));
2454 /* If the bases are the same, we can include the base variation too.
2455 E.g. the b accesses in:
2457 for (int i = 0; i < n; ++i)
2458 b[i + 4][0] = b[i][0];
2460 have a definite dependence distance of 4, while for:
2462 for (int i = 0; i < n; ++i)
2463 a[i + 4][0] = b[i][0];
2465 the dependence distance depends on the gap between a and b.
2467 If the bases are different then we can only rely on the sequence
2468 rooted at a structure access, since arrays are allowed to overlap
2469 arbitrarily and change shape arbitrarily. E.g. we treat this as
2470 valid code:
2472 int a[256];
2474 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2476 where two lvalues with the same int[4][3] type overlap, and where
2477 both lvalues are distinct from the object's declared type. */
2478 if (same_base_p)
2480 if (DR_UNCONSTRAINED_BASE (a))
2481 full_seq.length += 1;
2483 else
2484 full_seq = struct_seq;
2486 /* Punt if we didn't find a suitable sequence. */
2487 if (full_seq.length == 0)
2489 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2490 return res;
2493 if (!same_base_p)
2495 /* Partial overlap is possible for different bases when strict aliasing
2496 is not in effect. It's also possible if either base involves a union
2497 access; e.g. for:
2499 struct s1 { int a[2]; };
2500 struct s2 { struct s1 b; int c; };
2501 struct s3 { int d; struct s1 e; };
2502 union u { struct s2 f; struct s3 g; } *p, *q;
2504 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2505 "p->g.e" (base "p->g") and might partially overlap the s1 at
2506 "q->g.e" (base "q->g"). */
2507 if (!flag_strict_aliasing
2508 || ref_contains_union_access_p (full_seq.object_a)
2509 || ref_contains_union_access_p (full_seq.object_b))
2511 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2512 return res;
2515 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2516 if (!loop_nest.exists ()
2517 || (object_address_invariant_in_loop_p (loop_nest[0],
2518 full_seq.object_a)
2519 && object_address_invariant_in_loop_p (loop_nest[0],
2520 full_seq.object_b)))
2522 DDR_OBJECT_A (res) = full_seq.object_a;
2523 DDR_OBJECT_B (res) = full_seq.object_b;
2527 DDR_AFFINE_P (res) = true;
2528 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2529 DDR_SUBSCRIPTS (res).create (full_seq.length);
2530 DDR_LOOP_NEST (res) = loop_nest;
2531 DDR_INNER_LOOP (res) = 0;
2532 DDR_SELF_REFERENCE (res) = false;
2534 for (i = 0; i < full_seq.length; ++i)
2536 struct subscript *subscript;
2538 subscript = XNEW (struct subscript);
2539 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2540 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2541 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2542 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2543 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2544 SUB_DISTANCE (subscript) = chrec_dont_know;
2545 DDR_SUBSCRIPTS (res).safe_push (subscript);
2548 return res;
2551 /* Frees memory used by the conflict function F. */
2553 static void
2554 free_conflict_function (conflict_function *f)
2556 unsigned i;
2558 if (CF_NONTRIVIAL_P (f))
2560 for (i = 0; i < f->n; i++)
2561 affine_fn_free (f->fns[i]);
2563 free (f);
2566 /* Frees memory used by SUBSCRIPTS. */
2568 static void
2569 free_subscripts (vec<subscript_p> subscripts)
2571 unsigned i;
2572 subscript_p s;
2574 FOR_EACH_VEC_ELT (subscripts, i, s)
2576 free_conflict_function (s->conflicting_iterations_in_a);
2577 free_conflict_function (s->conflicting_iterations_in_b);
2578 free (s);
2580 subscripts.release ();
2583 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2584 description. */
2586 static inline void
2587 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2588 tree chrec)
2590 DDR_ARE_DEPENDENT (ddr) = chrec;
2591 free_subscripts (DDR_SUBSCRIPTS (ddr));
2592 DDR_SUBSCRIPTS (ddr).create (0);
2595 /* The dependence relation DDR cannot be represented by a distance
2596 vector. */
2598 static inline void
2599 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2601 if (dump_file && (dump_flags & TDF_DETAILS))
2602 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2604 DDR_AFFINE_P (ddr) = false;
2609 /* This section contains the classic Banerjee tests. */
2611 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2612 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2614 static inline bool
2615 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2617 return (evolution_function_is_constant_p (chrec_a)
2618 && evolution_function_is_constant_p (chrec_b));
2621 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2622 variable, i.e., if the SIV (Single Index Variable) test is true. */
2624 static bool
2625 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2627 if ((evolution_function_is_constant_p (chrec_a)
2628 && evolution_function_is_univariate_p (chrec_b))
2629 || (evolution_function_is_constant_p (chrec_b)
2630 && evolution_function_is_univariate_p (chrec_a)))
2631 return true;
2633 if (evolution_function_is_univariate_p (chrec_a)
2634 && evolution_function_is_univariate_p (chrec_b))
2636 switch (TREE_CODE (chrec_a))
2638 case POLYNOMIAL_CHREC:
2639 switch (TREE_CODE (chrec_b))
2641 case POLYNOMIAL_CHREC:
2642 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2643 return false;
2644 /* FALLTHRU */
2646 default:
2647 return true;
2650 default:
2651 return true;
2655 return false;
2658 /* Creates a conflict function with N dimensions. The affine functions
2659 in each dimension follow. */
2661 static conflict_function *
2662 conflict_fn (unsigned n, ...)
2664 unsigned i;
2665 conflict_function *ret = XCNEW (conflict_function);
2666 va_list ap;
2668 gcc_assert (0 < n && n <= MAX_DIM);
2669 va_start (ap, n);
2671 ret->n = n;
2672 for (i = 0; i < n; i++)
2673 ret->fns[i] = va_arg (ap, affine_fn);
2674 va_end (ap);
2676 return ret;
2679 /* Returns constant affine function with value CST. */
2681 static affine_fn
2682 affine_fn_cst (tree cst)
2684 affine_fn fn;
2685 fn.create (1);
2686 fn.quick_push (cst);
2687 return fn;
2690 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2692 static affine_fn
2693 affine_fn_univar (tree cst, unsigned dim, tree coef)
2695 affine_fn fn;
2696 fn.create (dim + 1);
2697 unsigned i;
2699 gcc_assert (dim > 0);
2700 fn.quick_push (cst);
2701 for (i = 1; i < dim; i++)
2702 fn.quick_push (integer_zero_node);
2703 fn.quick_push (coef);
2704 return fn;
2707 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2708 *OVERLAPS_B are initialized to the functions that describe the
2709 relation between the elements accessed twice by CHREC_A and
2710 CHREC_B. For k >= 0, the following property is verified:
2712 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2714 static void
2715 analyze_ziv_subscript (tree chrec_a,
2716 tree chrec_b,
2717 conflict_function **overlaps_a,
2718 conflict_function **overlaps_b,
2719 tree *last_conflicts)
2721 tree type, difference;
2722 dependence_stats.num_ziv++;
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 fprintf (dump_file, "(analyze_ziv_subscript \n");
2727 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2728 chrec_a = chrec_convert (type, chrec_a, NULL);
2729 chrec_b = chrec_convert (type, chrec_b, NULL);
2730 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2732 switch (TREE_CODE (difference))
2734 case INTEGER_CST:
2735 if (integer_zerop (difference))
2737 /* The difference is equal to zero: the accessed index
2738 overlaps for each iteration in the loop. */
2739 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2740 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2741 *last_conflicts = chrec_dont_know;
2742 dependence_stats.num_ziv_dependent++;
2744 else
2746 /* The accesses do not overlap. */
2747 *overlaps_a = conflict_fn_no_dependence ();
2748 *overlaps_b = conflict_fn_no_dependence ();
2749 *last_conflicts = integer_zero_node;
2750 dependence_stats.num_ziv_independent++;
2752 break;
2754 default:
2755 /* We're not sure whether the indexes overlap. For the moment,
2756 conservatively answer "don't know". */
2757 if (dump_file && (dump_flags & TDF_DETAILS))
2758 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2760 *overlaps_a = conflict_fn_not_known ();
2761 *overlaps_b = conflict_fn_not_known ();
2762 *last_conflicts = chrec_dont_know;
2763 dependence_stats.num_ziv_unimplemented++;
2764 break;
2767 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, ")\n");
2771 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2772 and only if it fits to the int type. If this is not the case, or the
2773 bound on the number of iterations of LOOP could not be derived, returns
2774 chrec_dont_know. */
2776 static tree
2777 max_stmt_executions_tree (struct loop *loop)
2779 widest_int nit;
2781 if (!max_stmt_executions (loop, &nit))
2782 return chrec_dont_know;
2784 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2785 return chrec_dont_know;
2787 return wide_int_to_tree (unsigned_type_node, nit);
2790 /* Determine whether the CHREC is always positive/negative. If the expression
2791 cannot be statically analyzed, return false, otherwise set the answer into
2792 VALUE. */
2794 static bool
2795 chrec_is_positive (tree chrec, bool *value)
2797 bool value0, value1, value2;
2798 tree end_value, nb_iter;
2800 switch (TREE_CODE (chrec))
2802 case POLYNOMIAL_CHREC:
2803 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2804 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2805 return false;
2807 /* FIXME -- overflows. */
2808 if (value0 == value1)
2810 *value = value0;
2811 return true;
2814 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2815 and the proof consists in showing that the sign never
2816 changes during the execution of the loop, from 0 to
2817 loop->nb_iterations. */
2818 if (!evolution_function_is_affine_p (chrec))
2819 return false;
2821 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2822 if (chrec_contains_undetermined (nb_iter))
2823 return false;
2825 #if 0
2826 /* TODO -- If the test is after the exit, we may decrease the number of
2827 iterations by one. */
2828 if (after_exit)
2829 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2830 #endif
2832 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2834 if (!chrec_is_positive (end_value, &value2))
2835 return false;
2837 *value = value0;
2838 return value0 == value1;
2840 case INTEGER_CST:
2841 switch (tree_int_cst_sgn (chrec))
2843 case -1:
2844 *value = false;
2845 break;
2846 case 1:
2847 *value = true;
2848 break;
2849 default:
2850 return false;
2852 return true;
2854 default:
2855 return false;
2860 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2861 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2862 *OVERLAPS_B are initialized to the functions that describe the
2863 relation between the elements accessed twice by CHREC_A and
2864 CHREC_B. For k >= 0, the following property is verified:
2866 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2868 static void
2869 analyze_siv_subscript_cst_affine (tree chrec_a,
2870 tree chrec_b,
2871 conflict_function **overlaps_a,
2872 conflict_function **overlaps_b,
2873 tree *last_conflicts)
2875 bool value0, value1, value2;
2876 tree type, difference, tmp;
2878 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2879 chrec_a = chrec_convert (type, chrec_a, NULL);
2880 chrec_b = chrec_convert (type, chrec_b, NULL);
2881 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2883 /* Special case overlap in the first iteration. */
2884 if (integer_zerop (difference))
2886 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2887 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2888 *last_conflicts = integer_one_node;
2889 return;
2892 if (!chrec_is_positive (initial_condition (difference), &value0))
2894 if (dump_file && (dump_flags & TDF_DETAILS))
2895 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2897 dependence_stats.num_siv_unimplemented++;
2898 *overlaps_a = conflict_fn_not_known ();
2899 *overlaps_b = conflict_fn_not_known ();
2900 *last_conflicts = chrec_dont_know;
2901 return;
2903 else
2905 if (value0 == false)
2907 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2912 *overlaps_a = conflict_fn_not_known ();
2913 *overlaps_b = conflict_fn_not_known ();
2914 *last_conflicts = chrec_dont_know;
2915 dependence_stats.num_siv_unimplemented++;
2916 return;
2918 else
2920 if (value1 == true)
2922 /* Example:
2923 chrec_a = 12
2924 chrec_b = {10, +, 1}
2927 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2929 HOST_WIDE_INT numiter;
2930 struct loop *loop = get_chrec_loop (chrec_b);
2932 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2933 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2934 fold_build1 (ABS_EXPR, type, difference),
2935 CHREC_RIGHT (chrec_b));
2936 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2937 *last_conflicts = integer_one_node;
2940 /* Perform weak-zero siv test to see if overlap is
2941 outside the loop bounds. */
2942 numiter = max_stmt_executions_int (loop);
2944 if (numiter >= 0
2945 && compare_tree_int (tmp, numiter) > 0)
2947 free_conflict_function (*overlaps_a);
2948 free_conflict_function (*overlaps_b);
2949 *overlaps_a = conflict_fn_no_dependence ();
2950 *overlaps_b = conflict_fn_no_dependence ();
2951 *last_conflicts = integer_zero_node;
2952 dependence_stats.num_siv_independent++;
2953 return;
2955 dependence_stats.num_siv_dependent++;
2956 return;
2959 /* When the step does not divide the difference, there are
2960 no overlaps. */
2961 else
2963 *overlaps_a = conflict_fn_no_dependence ();
2964 *overlaps_b = conflict_fn_no_dependence ();
2965 *last_conflicts = integer_zero_node;
2966 dependence_stats.num_siv_independent++;
2967 return;
2971 else
2973 /* Example:
2974 chrec_a = 12
2975 chrec_b = {10, +, -1}
2977 In this case, chrec_a will not overlap with chrec_b. */
2978 *overlaps_a = conflict_fn_no_dependence ();
2979 *overlaps_b = conflict_fn_no_dependence ();
2980 *last_conflicts = integer_zero_node;
2981 dependence_stats.num_siv_independent++;
2982 return;
2986 else
2988 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2993 *overlaps_a = conflict_fn_not_known ();
2994 *overlaps_b = conflict_fn_not_known ();
2995 *last_conflicts = chrec_dont_know;
2996 dependence_stats.num_siv_unimplemented++;
2997 return;
2999 else
3001 if (value2 == false)
3003 /* Example:
3004 chrec_a = 3
3005 chrec_b = {10, +, -1}
3007 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3009 HOST_WIDE_INT numiter;
3010 struct loop *loop = get_chrec_loop (chrec_b);
3012 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3013 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3014 CHREC_RIGHT (chrec_b));
3015 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3016 *last_conflicts = integer_one_node;
3018 /* Perform weak-zero siv test to see if overlap is
3019 outside the loop bounds. */
3020 numiter = max_stmt_executions_int (loop);
3022 if (numiter >= 0
3023 && compare_tree_int (tmp, numiter) > 0)
3025 free_conflict_function (*overlaps_a);
3026 free_conflict_function (*overlaps_b);
3027 *overlaps_a = conflict_fn_no_dependence ();
3028 *overlaps_b = conflict_fn_no_dependence ();
3029 *last_conflicts = integer_zero_node;
3030 dependence_stats.num_siv_independent++;
3031 return;
3033 dependence_stats.num_siv_dependent++;
3034 return;
3037 /* When the step does not divide the difference, there
3038 are no overlaps. */
3039 else
3041 *overlaps_a = conflict_fn_no_dependence ();
3042 *overlaps_b = conflict_fn_no_dependence ();
3043 *last_conflicts = integer_zero_node;
3044 dependence_stats.num_siv_independent++;
3045 return;
3048 else
3050 /* Example:
3051 chrec_a = 3
3052 chrec_b = {4, +, 1}
3054 In this case, chrec_a will not overlap with chrec_b. */
3055 *overlaps_a = conflict_fn_no_dependence ();
3056 *overlaps_b = conflict_fn_no_dependence ();
3057 *last_conflicts = integer_zero_node;
3058 dependence_stats.num_siv_independent++;
3059 return;
3066 /* Helper recursive function for initializing the matrix A. Returns
3067 the initial value of CHREC. */
3069 static tree
3070 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3072 gcc_assert (chrec);
3074 switch (TREE_CODE (chrec))
3076 case POLYNOMIAL_CHREC:
3077 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3078 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3080 case PLUS_EXPR:
3081 case MULT_EXPR:
3082 case MINUS_EXPR:
3084 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3085 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3087 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3090 CASE_CONVERT:
3092 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3093 return chrec_convert (chrec_type (chrec), op, NULL);
3096 case BIT_NOT_EXPR:
3098 /* Handle ~X as -1 - X. */
3099 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3100 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3101 build_int_cst (TREE_TYPE (chrec), -1), op);
3104 case INTEGER_CST:
3105 return chrec;
3107 default:
3108 gcc_unreachable ();
3109 return NULL_TREE;
3113 #define FLOOR_DIV(x,y) ((x) / (y))
3115 /* Solves the special case of the Diophantine equation:
3116 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3118 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3119 number of iterations that loops X and Y run. The overlaps will be
3120 constructed as evolutions in dimension DIM. */
3122 static void
3123 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3124 HOST_WIDE_INT step_a,
3125 HOST_WIDE_INT step_b,
3126 affine_fn *overlaps_a,
3127 affine_fn *overlaps_b,
3128 tree *last_conflicts, int dim)
3130 if (((step_a > 0 && step_b > 0)
3131 || (step_a < 0 && step_b < 0)))
3133 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3134 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3136 gcd_steps_a_b = gcd (step_a, step_b);
3137 step_overlaps_a = step_b / gcd_steps_a_b;
3138 step_overlaps_b = step_a / gcd_steps_a_b;
3140 if (niter > 0)
3142 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3143 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3144 last_conflict = tau2;
3145 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3147 else
3148 *last_conflicts = chrec_dont_know;
3150 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3151 build_int_cst (NULL_TREE,
3152 step_overlaps_a));
3153 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3154 build_int_cst (NULL_TREE,
3155 step_overlaps_b));
3158 else
3160 *overlaps_a = affine_fn_cst (integer_zero_node);
3161 *overlaps_b = affine_fn_cst (integer_zero_node);
3162 *last_conflicts = integer_zero_node;
3166 /* Solves the special case of a Diophantine equation where CHREC_A is
3167 an affine bivariate function, and CHREC_B is an affine univariate
3168 function. For example,
3170 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3172 has the following overlapping functions:
3174 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3175 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3176 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3178 FORNOW: This is a specialized implementation for a case occurring in
3179 a common benchmark. Implement the general algorithm. */
3181 static void
3182 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3183 conflict_function **overlaps_a,
3184 conflict_function **overlaps_b,
3185 tree *last_conflicts)
3187 bool xz_p, yz_p, xyz_p;
3188 HOST_WIDE_INT step_x, step_y, step_z;
3189 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3190 affine_fn overlaps_a_xz, overlaps_b_xz;
3191 affine_fn overlaps_a_yz, overlaps_b_yz;
3192 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3193 affine_fn ova1, ova2, ovb;
3194 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3196 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3197 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3198 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3200 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3201 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3202 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3204 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3206 if (dump_file && (dump_flags & TDF_DETAILS))
3207 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3209 *overlaps_a = conflict_fn_not_known ();
3210 *overlaps_b = conflict_fn_not_known ();
3211 *last_conflicts = chrec_dont_know;
3212 return;
3215 niter = MIN (niter_x, niter_z);
3216 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3217 &overlaps_a_xz,
3218 &overlaps_b_xz,
3219 &last_conflicts_xz, 1);
3220 niter = MIN (niter_y, niter_z);
3221 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3222 &overlaps_a_yz,
3223 &overlaps_b_yz,
3224 &last_conflicts_yz, 2);
3225 niter = MIN (niter_x, niter_z);
3226 niter = MIN (niter_y, niter);
3227 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3228 &overlaps_a_xyz,
3229 &overlaps_b_xyz,
3230 &last_conflicts_xyz, 3);
3232 xz_p = !integer_zerop (last_conflicts_xz);
3233 yz_p = !integer_zerop (last_conflicts_yz);
3234 xyz_p = !integer_zerop (last_conflicts_xyz);
3236 if (xz_p || yz_p || xyz_p)
3238 ova1 = affine_fn_cst (integer_zero_node);
3239 ova2 = affine_fn_cst (integer_zero_node);
3240 ovb = affine_fn_cst (integer_zero_node);
3241 if (xz_p)
3243 affine_fn t0 = ova1;
3244 affine_fn t2 = ovb;
3246 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3247 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3248 affine_fn_free (t0);
3249 affine_fn_free (t2);
3250 *last_conflicts = last_conflicts_xz;
3252 if (yz_p)
3254 affine_fn t0 = ova2;
3255 affine_fn t2 = ovb;
3257 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3258 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3259 affine_fn_free (t0);
3260 affine_fn_free (t2);
3261 *last_conflicts = last_conflicts_yz;
3263 if (xyz_p)
3265 affine_fn t0 = ova1;
3266 affine_fn t2 = ova2;
3267 affine_fn t4 = ovb;
3269 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3270 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3271 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3272 affine_fn_free (t0);
3273 affine_fn_free (t2);
3274 affine_fn_free (t4);
3275 *last_conflicts = last_conflicts_xyz;
3277 *overlaps_a = conflict_fn (2, ova1, ova2);
3278 *overlaps_b = conflict_fn (1, ovb);
3280 else
3282 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3283 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3284 *last_conflicts = integer_zero_node;
3287 affine_fn_free (overlaps_a_xz);
3288 affine_fn_free (overlaps_b_xz);
3289 affine_fn_free (overlaps_a_yz);
3290 affine_fn_free (overlaps_b_yz);
3291 affine_fn_free (overlaps_a_xyz);
3292 affine_fn_free (overlaps_b_xyz);
3295 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3297 static void
3298 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3299 int size)
3301 memcpy (vec2, vec1, size * sizeof (*vec1));
3304 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3306 static void
3307 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3308 int m, int n)
3310 int i;
3312 for (i = 0; i < m; i++)
3313 lambda_vector_copy (mat1[i], mat2[i], n);
3316 /* Store the N x N identity matrix in MAT. */
3318 static void
3319 lambda_matrix_id (lambda_matrix mat, int size)
3321 int i, j;
3323 for (i = 0; i < size; i++)
3324 for (j = 0; j < size; j++)
3325 mat[i][j] = (i == j) ? 1 : 0;
3328 /* Return the first nonzero element of vector VEC1 between START and N.
3329 We must have START <= N. Returns N if VEC1 is the zero vector. */
3331 static int
3332 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3334 int j = start;
3335 while (j < n && vec1[j] == 0)
3336 j++;
3337 return j;
3340 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3341 R2 = R2 + CONST1 * R1. */
3343 static void
3344 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3346 int i;
3348 if (const1 == 0)
3349 return;
3351 for (i = 0; i < n; i++)
3352 mat[r2][i] += const1 * mat[r1][i];
3355 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3356 and store the result in VEC2. */
3358 static void
3359 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3360 int size, int const1)
3362 int i;
3364 if (const1 == 0)
3365 lambda_vector_clear (vec2, size);
3366 else
3367 for (i = 0; i < size; i++)
3368 vec2[i] = const1 * vec1[i];
3371 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3373 static void
3374 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3375 int size)
3377 lambda_vector_mult_const (vec1, vec2, size, -1);
3380 /* Negate row R1 of matrix MAT which has N columns. */
3382 static void
3383 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3385 lambda_vector_negate (mat[r1], mat[r1], n);
3388 /* Return true if two vectors are equal. */
3390 static bool
3391 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3393 int i;
3394 for (i = 0; i < size; i++)
3395 if (vec1[i] != vec2[i])
3396 return false;
3397 return true;
3400 /* Given an M x N integer matrix A, this function determines an M x
3401 M unimodular matrix U, and an M x N echelon matrix S such that
3402 "U.A = S". This decomposition is also known as "right Hermite".
3404 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3405 Restructuring Compilers" Utpal Banerjee. */
3407 static void
3408 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3409 lambda_matrix S, lambda_matrix U)
3411 int i, j, i0 = 0;
3413 lambda_matrix_copy (A, S, m, n);
3414 lambda_matrix_id (U, m);
3416 for (j = 0; j < n; j++)
3418 if (lambda_vector_first_nz (S[j], m, i0) < m)
3420 ++i0;
3421 for (i = m - 1; i >= i0; i--)
3423 while (S[i][j] != 0)
3425 int sigma, factor, a, b;
3427 a = S[i-1][j];
3428 b = S[i][j];
3429 sigma = (a * b < 0) ? -1: 1;
3430 a = abs (a);
3431 b = abs (b);
3432 factor = sigma * (a / b);
3434 lambda_matrix_row_add (S, n, i, i-1, -factor);
3435 std::swap (S[i], S[i-1]);
3437 lambda_matrix_row_add (U, m, i, i-1, -factor);
3438 std::swap (U[i], U[i-1]);
3445 /* Determines the overlapping elements due to accesses CHREC_A and
3446 CHREC_B, that are affine functions. This function cannot handle
3447 symbolic evolution functions, ie. when initial conditions are
3448 parameters, because it uses lambda matrices of integers. */
3450 static void
3451 analyze_subscript_affine_affine (tree chrec_a,
3452 tree chrec_b,
3453 conflict_function **overlaps_a,
3454 conflict_function **overlaps_b,
3455 tree *last_conflicts)
3457 unsigned nb_vars_a, nb_vars_b, dim;
3458 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3459 lambda_matrix A, U, S;
3460 struct obstack scratch_obstack;
3462 if (eq_evolutions_p (chrec_a, chrec_b))
3464 /* The accessed index overlaps for each iteration in the
3465 loop. */
3466 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3467 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3468 *last_conflicts = chrec_dont_know;
3469 return;
3471 if (dump_file && (dump_flags & TDF_DETAILS))
3472 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3474 /* For determining the initial intersection, we have to solve a
3475 Diophantine equation. This is the most time consuming part.
3477 For answering to the question: "Is there a dependence?" we have
3478 to prove that there exists a solution to the Diophantine
3479 equation, and that the solution is in the iteration domain,
3480 i.e. the solution is positive or zero, and that the solution
3481 happens before the upper bound loop.nb_iterations. Otherwise
3482 there is no dependence. This function outputs a description of
3483 the iterations that hold the intersections. */
3485 nb_vars_a = nb_vars_in_chrec (chrec_a);
3486 nb_vars_b = nb_vars_in_chrec (chrec_b);
3488 gcc_obstack_init (&scratch_obstack);
3490 dim = nb_vars_a + nb_vars_b;
3491 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3492 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3493 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3495 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3496 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3497 gamma = init_b - init_a;
3499 /* Don't do all the hard work of solving the Diophantine equation
3500 when we already know the solution: for example,
3501 | {3, +, 1}_1
3502 | {3, +, 4}_2
3503 | gamma = 3 - 3 = 0.
3504 Then the first overlap occurs during the first iterations:
3505 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3507 if (gamma == 0)
3509 if (nb_vars_a == 1 && nb_vars_b == 1)
3511 HOST_WIDE_INT step_a, step_b;
3512 HOST_WIDE_INT niter, niter_a, niter_b;
3513 affine_fn ova, ovb;
3515 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3516 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3517 niter = MIN (niter_a, niter_b);
3518 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3519 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3521 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3522 &ova, &ovb,
3523 last_conflicts, 1);
3524 *overlaps_a = conflict_fn (1, ova);
3525 *overlaps_b = conflict_fn (1, ovb);
3528 else if (nb_vars_a == 2 && nb_vars_b == 1)
3529 compute_overlap_steps_for_affine_1_2
3530 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3532 else if (nb_vars_a == 1 && nb_vars_b == 2)
3533 compute_overlap_steps_for_affine_1_2
3534 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3536 else
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3540 *overlaps_a = conflict_fn_not_known ();
3541 *overlaps_b = conflict_fn_not_known ();
3542 *last_conflicts = chrec_dont_know;
3544 goto end_analyze_subs_aa;
3547 /* U.A = S */
3548 lambda_matrix_right_hermite (A, dim, 1, S, U);
3550 if (S[0][0] < 0)
3552 S[0][0] *= -1;
3553 lambda_matrix_row_negate (U, dim, 0);
3555 gcd_alpha_beta = S[0][0];
3557 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3558 but that is a quite strange case. Instead of ICEing, answer
3559 don't know. */
3560 if (gcd_alpha_beta == 0)
3562 *overlaps_a = conflict_fn_not_known ();
3563 *overlaps_b = conflict_fn_not_known ();
3564 *last_conflicts = chrec_dont_know;
3565 goto end_analyze_subs_aa;
3568 /* The classic "gcd-test". */
3569 if (!int_divides_p (gcd_alpha_beta, gamma))
3571 /* The "gcd-test" has determined that there is no integer
3572 solution, i.e. there is no dependence. */
3573 *overlaps_a = conflict_fn_no_dependence ();
3574 *overlaps_b = conflict_fn_no_dependence ();
3575 *last_conflicts = integer_zero_node;
3578 /* Both access functions are univariate. This includes SIV and MIV cases. */
3579 else if (nb_vars_a == 1 && nb_vars_b == 1)
3581 /* Both functions should have the same evolution sign. */
3582 if (((A[0][0] > 0 && -A[1][0] > 0)
3583 || (A[0][0] < 0 && -A[1][0] < 0)))
3585 /* The solutions are given by:
3587 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3588 | [u21 u22] [y0]
3590 For a given integer t. Using the following variables,
3592 | i0 = u11 * gamma / gcd_alpha_beta
3593 | j0 = u12 * gamma / gcd_alpha_beta
3594 | i1 = u21
3595 | j1 = u22
3597 the solutions are:
3599 | x0 = i0 + i1 * t,
3600 | y0 = j0 + j1 * t. */
3601 HOST_WIDE_INT i0, j0, i1, j1;
3603 i0 = U[0][0] * gamma / gcd_alpha_beta;
3604 j0 = U[0][1] * gamma / gcd_alpha_beta;
3605 i1 = U[1][0];
3606 j1 = U[1][1];
3608 if ((i1 == 0 && i0 < 0)
3609 || (j1 == 0 && j0 < 0))
3611 /* There is no solution.
3612 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3613 falls in here, but for the moment we don't look at the
3614 upper bound of the iteration domain. */
3615 *overlaps_a = conflict_fn_no_dependence ();
3616 *overlaps_b = conflict_fn_no_dependence ();
3617 *last_conflicts = integer_zero_node;
3618 goto end_analyze_subs_aa;
3621 if (i1 > 0 && j1 > 0)
3623 HOST_WIDE_INT niter_a
3624 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3625 HOST_WIDE_INT niter_b
3626 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3627 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3629 /* (X0, Y0) is a solution of the Diophantine equation:
3630 "chrec_a (X0) = chrec_b (Y0)". */
3631 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3632 CEIL (-j0, j1));
3633 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3634 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3636 /* (X1, Y1) is the smallest positive solution of the eq
3637 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3638 first conflict occurs. */
3639 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3640 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3641 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3643 if (niter > 0)
3645 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3646 FLOOR_DIV (niter_b - j0, j1));
3647 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3649 /* If the overlap occurs outside of the bounds of the
3650 loop, there is no dependence. */
3651 if (x1 >= niter_a || y1 >= niter_b)
3653 *overlaps_a = conflict_fn_no_dependence ();
3654 *overlaps_b = conflict_fn_no_dependence ();
3655 *last_conflicts = integer_zero_node;
3656 goto end_analyze_subs_aa;
3658 else
3659 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3661 else
3662 *last_conflicts = chrec_dont_know;
3664 *overlaps_a
3665 = conflict_fn (1,
3666 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3668 build_int_cst (NULL_TREE, i1)));
3669 *overlaps_b
3670 = conflict_fn (1,
3671 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3673 build_int_cst (NULL_TREE, j1)));
3675 else
3677 /* FIXME: For the moment, the upper bound of the
3678 iteration domain for i and j is not checked. */
3679 if (dump_file && (dump_flags & TDF_DETAILS))
3680 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3681 *overlaps_a = conflict_fn_not_known ();
3682 *overlaps_b = conflict_fn_not_known ();
3683 *last_conflicts = chrec_dont_know;
3686 else
3688 if (dump_file && (dump_flags & TDF_DETAILS))
3689 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3690 *overlaps_a = conflict_fn_not_known ();
3691 *overlaps_b = conflict_fn_not_known ();
3692 *last_conflicts = chrec_dont_know;
3695 else
3697 if (dump_file && (dump_flags & TDF_DETAILS))
3698 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3699 *overlaps_a = conflict_fn_not_known ();
3700 *overlaps_b = conflict_fn_not_known ();
3701 *last_conflicts = chrec_dont_know;
3704 end_analyze_subs_aa:
3705 obstack_free (&scratch_obstack, NULL);
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3708 fprintf (dump_file, " (overlaps_a = ");
3709 dump_conflict_function (dump_file, *overlaps_a);
3710 fprintf (dump_file, ")\n (overlaps_b = ");
3711 dump_conflict_function (dump_file, *overlaps_b);
3712 fprintf (dump_file, "))\n");
3716 /* Returns true when analyze_subscript_affine_affine can be used for
3717 determining the dependence relation between chrec_a and chrec_b,
3718 that contain symbols. This function modifies chrec_a and chrec_b
3719 such that the analysis result is the same, and such that they don't
3720 contain symbols, and then can safely be passed to the analyzer.
3722 Example: The analysis of the following tuples of evolutions produce
3723 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3724 vs. {0, +, 1}_1
3726 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3727 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3730 static bool
3731 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3733 tree diff, type, left_a, left_b, right_b;
3735 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3736 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3737 /* FIXME: For the moment not handled. Might be refined later. */
3738 return false;
3740 type = chrec_type (*chrec_a);
3741 left_a = CHREC_LEFT (*chrec_a);
3742 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3743 diff = chrec_fold_minus (type, left_a, left_b);
3745 if (!evolution_function_is_constant_p (diff))
3746 return false;
3748 if (dump_file && (dump_flags & TDF_DETAILS))
3749 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3751 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3752 diff, CHREC_RIGHT (*chrec_a));
3753 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3754 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3755 build_int_cst (type, 0),
3756 right_b);
3757 return true;
3760 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3761 *OVERLAPS_B are initialized to the functions that describe the
3762 relation between the elements accessed twice by CHREC_A and
3763 CHREC_B. For k >= 0, the following property is verified:
3765 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3767 static void
3768 analyze_siv_subscript (tree chrec_a,
3769 tree chrec_b,
3770 conflict_function **overlaps_a,
3771 conflict_function **overlaps_b,
3772 tree *last_conflicts,
3773 int loop_nest_num)
3775 dependence_stats.num_siv++;
3777 if (dump_file && (dump_flags & TDF_DETAILS))
3778 fprintf (dump_file, "(analyze_siv_subscript \n");
3780 if (evolution_function_is_constant_p (chrec_a)
3781 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3782 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3783 overlaps_a, overlaps_b, last_conflicts);
3785 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3786 && evolution_function_is_constant_p (chrec_b))
3787 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3788 overlaps_b, overlaps_a, last_conflicts);
3790 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3791 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3793 if (!chrec_contains_symbols (chrec_a)
3794 && !chrec_contains_symbols (chrec_b))
3796 analyze_subscript_affine_affine (chrec_a, chrec_b,
3797 overlaps_a, overlaps_b,
3798 last_conflicts);
3800 if (CF_NOT_KNOWN_P (*overlaps_a)
3801 || CF_NOT_KNOWN_P (*overlaps_b))
3802 dependence_stats.num_siv_unimplemented++;
3803 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3804 || CF_NO_DEPENDENCE_P (*overlaps_b))
3805 dependence_stats.num_siv_independent++;
3806 else
3807 dependence_stats.num_siv_dependent++;
3809 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3810 &chrec_b))
3812 analyze_subscript_affine_affine (chrec_a, chrec_b,
3813 overlaps_a, overlaps_b,
3814 last_conflicts);
3816 if (CF_NOT_KNOWN_P (*overlaps_a)
3817 || CF_NOT_KNOWN_P (*overlaps_b))
3818 dependence_stats.num_siv_unimplemented++;
3819 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3820 || CF_NO_DEPENDENCE_P (*overlaps_b))
3821 dependence_stats.num_siv_independent++;
3822 else
3823 dependence_stats.num_siv_dependent++;
3825 else
3826 goto siv_subscript_dontknow;
3829 else
3831 siv_subscript_dontknow:;
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (dump_file, " siv test failed: unimplemented");
3834 *overlaps_a = conflict_fn_not_known ();
3835 *overlaps_b = conflict_fn_not_known ();
3836 *last_conflicts = chrec_dont_know;
3837 dependence_stats.num_siv_unimplemented++;
3840 if (dump_file && (dump_flags & TDF_DETAILS))
3841 fprintf (dump_file, ")\n");
3844 /* Returns false if we can prove that the greatest common divisor of the steps
3845 of CHREC does not divide CST, false otherwise. */
3847 static bool
3848 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3850 HOST_WIDE_INT cd = 0, val;
3851 tree step;
3853 if (!tree_fits_shwi_p (cst))
3854 return true;
3855 val = tree_to_shwi (cst);
3857 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3859 step = CHREC_RIGHT (chrec);
3860 if (!tree_fits_shwi_p (step))
3861 return true;
3862 cd = gcd (cd, tree_to_shwi (step));
3863 chrec = CHREC_LEFT (chrec);
3866 return val % cd == 0;
3869 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3870 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3871 functions that describe the relation between the elements accessed
3872 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3873 is verified:
3875 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3877 static void
3878 analyze_miv_subscript (tree chrec_a,
3879 tree chrec_b,
3880 conflict_function **overlaps_a,
3881 conflict_function **overlaps_b,
3882 tree *last_conflicts,
3883 struct loop *loop_nest)
3885 tree type, difference;
3887 dependence_stats.num_miv++;
3888 if (dump_file && (dump_flags & TDF_DETAILS))
3889 fprintf (dump_file, "(analyze_miv_subscript \n");
3891 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3892 chrec_a = chrec_convert (type, chrec_a, NULL);
3893 chrec_b = chrec_convert (type, chrec_b, NULL);
3894 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3896 if (eq_evolutions_p (chrec_a, chrec_b))
3898 /* Access functions are the same: all the elements are accessed
3899 in the same order. */
3900 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3901 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3902 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3903 dependence_stats.num_miv_dependent++;
3906 else if (evolution_function_is_constant_p (difference)
3907 /* For the moment, the following is verified:
3908 evolution_function_is_affine_multivariate_p (chrec_a,
3909 loop_nest->num) */
3910 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3912 /* testsuite/.../ssa-chrec-33.c
3913 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3915 The difference is 1, and all the evolution steps are multiples
3916 of 2, consequently there are no overlapping elements. */
3917 *overlaps_a = conflict_fn_no_dependence ();
3918 *overlaps_b = conflict_fn_no_dependence ();
3919 *last_conflicts = integer_zero_node;
3920 dependence_stats.num_miv_independent++;
3923 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3924 && !chrec_contains_symbols (chrec_a)
3925 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3926 && !chrec_contains_symbols (chrec_b))
3928 /* testsuite/.../ssa-chrec-35.c
3929 {0, +, 1}_2 vs. {0, +, 1}_3
3930 the overlapping elements are respectively located at iterations:
3931 {0, +, 1}_x and {0, +, 1}_x,
3932 in other words, we have the equality:
3933 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3935 Other examples:
3936 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3937 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3939 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3940 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3942 analyze_subscript_affine_affine (chrec_a, chrec_b,
3943 overlaps_a, overlaps_b, last_conflicts);
3945 if (CF_NOT_KNOWN_P (*overlaps_a)
3946 || CF_NOT_KNOWN_P (*overlaps_b))
3947 dependence_stats.num_miv_unimplemented++;
3948 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3949 || CF_NO_DEPENDENCE_P (*overlaps_b))
3950 dependence_stats.num_miv_independent++;
3951 else
3952 dependence_stats.num_miv_dependent++;
3955 else
3957 /* When the analysis is too difficult, answer "don't know". */
3958 if (dump_file && (dump_flags & TDF_DETAILS))
3959 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3961 *overlaps_a = conflict_fn_not_known ();
3962 *overlaps_b = conflict_fn_not_known ();
3963 *last_conflicts = chrec_dont_know;
3964 dependence_stats.num_miv_unimplemented++;
3967 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, ")\n");
3971 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3972 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3973 OVERLAP_ITERATIONS_B are initialized with two functions that
3974 describe the iterations that contain conflicting elements.
3976 Remark: For an integer k >= 0, the following equality is true:
3978 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3981 static void
3982 analyze_overlapping_iterations (tree chrec_a,
3983 tree chrec_b,
3984 conflict_function **overlap_iterations_a,
3985 conflict_function **overlap_iterations_b,
3986 tree *last_conflicts, struct loop *loop_nest)
3988 unsigned int lnn = loop_nest->num;
3990 dependence_stats.num_subscript_tests++;
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3994 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3995 fprintf (dump_file, " (chrec_a = ");
3996 print_generic_expr (dump_file, chrec_a);
3997 fprintf (dump_file, ")\n (chrec_b = ");
3998 print_generic_expr (dump_file, chrec_b);
3999 fprintf (dump_file, ")\n");
4002 if (chrec_a == NULL_TREE
4003 || chrec_b == NULL_TREE
4004 || chrec_contains_undetermined (chrec_a)
4005 || chrec_contains_undetermined (chrec_b))
4007 dependence_stats.num_subscript_undetermined++;
4009 *overlap_iterations_a = conflict_fn_not_known ();
4010 *overlap_iterations_b = conflict_fn_not_known ();
4013 /* If they are the same chrec, and are affine, they overlap
4014 on every iteration. */
4015 else if (eq_evolutions_p (chrec_a, chrec_b)
4016 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4017 || operand_equal_p (chrec_a, chrec_b, 0)))
4019 dependence_stats.num_same_subscript_function++;
4020 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4021 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4022 *last_conflicts = chrec_dont_know;
4025 /* If they aren't the same, and aren't affine, we can't do anything
4026 yet. */
4027 else if ((chrec_contains_symbols (chrec_a)
4028 || chrec_contains_symbols (chrec_b))
4029 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4030 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4032 dependence_stats.num_subscript_undetermined++;
4033 *overlap_iterations_a = conflict_fn_not_known ();
4034 *overlap_iterations_b = conflict_fn_not_known ();
4037 else if (ziv_subscript_p (chrec_a, chrec_b))
4038 analyze_ziv_subscript (chrec_a, chrec_b,
4039 overlap_iterations_a, overlap_iterations_b,
4040 last_conflicts);
4042 else if (siv_subscript_p (chrec_a, chrec_b))
4043 analyze_siv_subscript (chrec_a, chrec_b,
4044 overlap_iterations_a, overlap_iterations_b,
4045 last_conflicts, lnn);
4047 else
4048 analyze_miv_subscript (chrec_a, chrec_b,
4049 overlap_iterations_a, overlap_iterations_b,
4050 last_conflicts, loop_nest);
4052 if (dump_file && (dump_flags & TDF_DETAILS))
4054 fprintf (dump_file, " (overlap_iterations_a = ");
4055 dump_conflict_function (dump_file, *overlap_iterations_a);
4056 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4057 dump_conflict_function (dump_file, *overlap_iterations_b);
4058 fprintf (dump_file, "))\n");
4062 /* Helper function for uniquely inserting distance vectors. */
4064 static void
4065 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4067 unsigned i;
4068 lambda_vector v;
4070 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4071 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4072 return;
4074 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4077 /* Helper function for uniquely inserting direction vectors. */
4079 static void
4080 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4082 unsigned i;
4083 lambda_vector v;
4085 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4086 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4087 return;
4089 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4092 /* Add a distance of 1 on all the loops outer than INDEX. If we
4093 haven't yet determined a distance for this outer loop, push a new
4094 distance vector composed of the previous distance, and a distance
4095 of 1 for this outer loop. Example:
4097 | loop_1
4098 | loop_2
4099 | A[10]
4100 | endloop_2
4101 | endloop_1
4103 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4104 save (0, 1), then we have to save (1, 0). */
4106 static void
4107 add_outer_distances (struct data_dependence_relation *ddr,
4108 lambda_vector dist_v, int index)
4110 /* For each outer loop where init_v is not set, the accesses are
4111 in dependence of distance 1 in the loop. */
4112 while (--index >= 0)
4114 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4115 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4116 save_v[index] = 1;
4117 save_dist_v (ddr, save_v);
4121 /* Return false when fail to represent the data dependence as a
4122 distance vector. A_INDEX is the index of the first reference
4123 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4124 second reference. INIT_B is set to true when a component has been
4125 added to the distance vector DIST_V. INDEX_CARRY is then set to
4126 the index in DIST_V that carries the dependence. */
4128 static bool
4129 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4130 unsigned int a_index, unsigned int b_index,
4131 lambda_vector dist_v, bool *init_b,
4132 int *index_carry)
4134 unsigned i;
4135 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4137 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4139 tree access_fn_a, access_fn_b;
4140 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4142 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4144 non_affine_dependence_relation (ddr);
4145 return false;
4148 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4149 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4151 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4152 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4154 HOST_WIDE_INT dist;
4155 int index;
4156 int var_a = CHREC_VARIABLE (access_fn_a);
4157 int var_b = CHREC_VARIABLE (access_fn_b);
4159 if (var_a != var_b
4160 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4162 non_affine_dependence_relation (ddr);
4163 return false;
4166 dist = int_cst_value (SUB_DISTANCE (subscript));
4167 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4168 *index_carry = MIN (index, *index_carry);
4170 /* This is the subscript coupling test. If we have already
4171 recorded a distance for this loop (a distance coming from
4172 another subscript), it should be the same. For example,
4173 in the following code, there is no dependence:
4175 | loop i = 0, N, 1
4176 | T[i+1][i] = ...
4177 | ... = T[i][i]
4178 | endloop
4180 if (init_v[index] != 0 && dist_v[index] != dist)
4182 finalize_ddr_dependent (ddr, chrec_known);
4183 return false;
4186 dist_v[index] = dist;
4187 init_v[index] = 1;
4188 *init_b = true;
4190 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4192 /* This can be for example an affine vs. constant dependence
4193 (T[i] vs. T[3]) that is not an affine dependence and is
4194 not representable as a distance vector. */
4195 non_affine_dependence_relation (ddr);
4196 return false;
4200 return true;
4203 /* Return true when the DDR contains only constant access functions. */
4205 static bool
4206 constant_access_functions (const struct data_dependence_relation *ddr)
4208 unsigned i;
4209 subscript *sub;
4211 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4212 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4213 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4214 return false;
4216 return true;
4219 /* Helper function for the case where DDR_A and DDR_B are the same
4220 multivariate access function with a constant step. For an example
4221 see pr34635-1.c. */
4223 static void
4224 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4226 int x_1, x_2;
4227 tree c_1 = CHREC_LEFT (c_2);
4228 tree c_0 = CHREC_LEFT (c_1);
4229 lambda_vector dist_v;
4230 HOST_WIDE_INT v1, v2, cd;
4232 /* Polynomials with more than 2 variables are not handled yet. When
4233 the evolution steps are parameters, it is not possible to
4234 represent the dependence using classical distance vectors. */
4235 if (TREE_CODE (c_0) != INTEGER_CST
4236 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4237 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4239 DDR_AFFINE_P (ddr) = false;
4240 return;
4243 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4244 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4246 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4247 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4248 v1 = int_cst_value (CHREC_RIGHT (c_1));
4249 v2 = int_cst_value (CHREC_RIGHT (c_2));
4250 cd = gcd (v1, v2);
4251 v1 /= cd;
4252 v2 /= cd;
4254 if (v2 < 0)
4256 v2 = -v2;
4257 v1 = -v1;
4260 dist_v[x_1] = v2;
4261 dist_v[x_2] = -v1;
4262 save_dist_v (ddr, dist_v);
4264 add_outer_distances (ddr, dist_v, x_1);
4267 /* Helper function for the case where DDR_A and DDR_B are the same
4268 access functions. */
4270 static void
4271 add_other_self_distances (struct data_dependence_relation *ddr)
4273 lambda_vector dist_v;
4274 unsigned i;
4275 int index_carry = DDR_NB_LOOPS (ddr);
4276 subscript *sub;
4278 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4280 tree access_fun = SUB_ACCESS_FN (sub, 0);
4282 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4284 if (!evolution_function_is_univariate_p (access_fun))
4286 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4288 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4289 return;
4292 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4294 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4295 add_multivariate_self_dist (ddr, access_fun);
4296 else
4297 /* The evolution step is not constant: it varies in
4298 the outer loop, so this cannot be represented by a
4299 distance vector. For example in pr34635.c the
4300 evolution is {0, +, {0, +, 4}_1}_2. */
4301 DDR_AFFINE_P (ddr) = false;
4303 return;
4306 index_carry = MIN (index_carry,
4307 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4308 DDR_LOOP_NEST (ddr)));
4312 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4313 add_outer_distances (ddr, dist_v, index_carry);
4316 static void
4317 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4319 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4321 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4322 save_dist_v (ddr, dist_v);
4325 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4326 is the case for example when access functions are the same and
4327 equal to a constant, as in:
4329 | loop_1
4330 | A[3] = ...
4331 | ... = A[3]
4332 | endloop_1
4334 in which case the distance vectors are (0) and (1). */
4336 static void
4337 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4339 unsigned i, j;
4341 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4343 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4344 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4345 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4347 for (j = 0; j < ca->n; j++)
4348 if (affine_function_zero_p (ca->fns[j]))
4350 insert_innermost_unit_dist_vector (ddr);
4351 return;
4354 for (j = 0; j < cb->n; j++)
4355 if (affine_function_zero_p (cb->fns[j]))
4357 insert_innermost_unit_dist_vector (ddr);
4358 return;
4363 /* Return true when the DDR contains two data references that have the
4364 same access functions. */
4366 static inline bool
4367 same_access_functions (const struct data_dependence_relation *ddr)
4369 unsigned i;
4370 subscript *sub;
4372 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4373 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4374 SUB_ACCESS_FN (sub, 1)))
4375 return false;
4377 return true;
4380 /* Compute the classic per loop distance vector. DDR is the data
4381 dependence relation to build a vector from. Return false when fail
4382 to represent the data dependence as a distance vector. */
4384 static bool
4385 build_classic_dist_vector (struct data_dependence_relation *ddr,
4386 struct loop *loop_nest)
4388 bool init_b = false;
4389 int index_carry = DDR_NB_LOOPS (ddr);
4390 lambda_vector dist_v;
4392 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4393 return false;
4395 if (same_access_functions (ddr))
4397 /* Save the 0 vector. */
4398 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4399 save_dist_v (ddr, dist_v);
4401 if (constant_access_functions (ddr))
4402 add_distance_for_zero_overlaps (ddr);
4404 if (DDR_NB_LOOPS (ddr) > 1)
4405 add_other_self_distances (ddr);
4407 return true;
4410 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4411 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4412 return false;
4414 /* Save the distance vector if we initialized one. */
4415 if (init_b)
4417 /* Verify a basic constraint: classic distance vectors should
4418 always be lexicographically positive.
4420 Data references are collected in the order of execution of
4421 the program, thus for the following loop
4423 | for (i = 1; i < 100; i++)
4424 | for (j = 1; j < 100; j++)
4426 | t = T[j+1][i-1]; // A
4427 | T[j][i] = t + 2; // B
4430 references are collected following the direction of the wind:
4431 A then B. The data dependence tests are performed also
4432 following this order, such that we're looking at the distance
4433 separating the elements accessed by A from the elements later
4434 accessed by B. But in this example, the distance returned by
4435 test_dep (A, B) is lexicographically negative (-1, 1), that
4436 means that the access A occurs later than B with respect to
4437 the outer loop, ie. we're actually looking upwind. In this
4438 case we solve test_dep (B, A) looking downwind to the
4439 lexicographically positive solution, that returns the
4440 distance vector (1, -1). */
4441 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4443 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4444 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4445 return false;
4446 compute_subscript_distance (ddr);
4447 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4448 &index_carry))
4449 return false;
4450 save_dist_v (ddr, save_v);
4451 DDR_REVERSED_P (ddr) = true;
4453 /* In this case there is a dependence forward for all the
4454 outer loops:
4456 | for (k = 1; k < 100; k++)
4457 | for (i = 1; i < 100; i++)
4458 | for (j = 1; j < 100; j++)
4460 | t = T[j+1][i-1]; // A
4461 | T[j][i] = t + 2; // B
4464 the vectors are:
4465 (0, 1, -1)
4466 (1, 1, -1)
4467 (1, -1, 1)
4469 if (DDR_NB_LOOPS (ddr) > 1)
4471 add_outer_distances (ddr, save_v, index_carry);
4472 add_outer_distances (ddr, dist_v, index_carry);
4475 else
4477 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4478 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4480 if (DDR_NB_LOOPS (ddr) > 1)
4482 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4484 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4485 return false;
4486 compute_subscript_distance (ddr);
4487 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4488 &index_carry))
4489 return false;
4491 save_dist_v (ddr, save_v);
4492 add_outer_distances (ddr, dist_v, index_carry);
4493 add_outer_distances (ddr, opposite_v, index_carry);
4495 else
4496 save_dist_v (ddr, save_v);
4499 else
4501 /* There is a distance of 1 on all the outer loops: Example:
4502 there is a dependence of distance 1 on loop_1 for the array A.
4504 | loop_1
4505 | A[5] = ...
4506 | endloop
4508 add_outer_distances (ddr, dist_v,
4509 lambda_vector_first_nz (dist_v,
4510 DDR_NB_LOOPS (ddr), 0));
4513 if (dump_file && (dump_flags & TDF_DETAILS))
4515 unsigned i;
4517 fprintf (dump_file, "(build_classic_dist_vector\n");
4518 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4520 fprintf (dump_file, " dist_vector = (");
4521 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4522 DDR_NB_LOOPS (ddr));
4523 fprintf (dump_file, " )\n");
4525 fprintf (dump_file, ")\n");
4528 return true;
4531 /* Return the direction for a given distance.
4532 FIXME: Computing dir this way is suboptimal, since dir can catch
4533 cases that dist is unable to represent. */
4535 static inline enum data_dependence_direction
4536 dir_from_dist (int dist)
4538 if (dist > 0)
4539 return dir_positive;
4540 else if (dist < 0)
4541 return dir_negative;
4542 else
4543 return dir_equal;
4546 /* Compute the classic per loop direction vector. DDR is the data
4547 dependence relation to build a vector from. */
4549 static void
4550 build_classic_dir_vector (struct data_dependence_relation *ddr)
4552 unsigned i, j;
4553 lambda_vector dist_v;
4555 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4557 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4559 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4560 dir_v[j] = dir_from_dist (dist_v[j]);
4562 save_dir_v (ddr, dir_v);
4566 /* Helper function. Returns true when there is a dependence between the
4567 data references. A_INDEX is the index of the first reference (0 for
4568 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4570 static bool
4571 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4572 unsigned int a_index, unsigned int b_index,
4573 struct loop *loop_nest)
4575 unsigned int i;
4576 tree last_conflicts;
4577 struct subscript *subscript;
4578 tree res = NULL_TREE;
4580 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4582 conflict_function *overlaps_a, *overlaps_b;
4584 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4585 SUB_ACCESS_FN (subscript, b_index),
4586 &overlaps_a, &overlaps_b,
4587 &last_conflicts, loop_nest);
4589 if (SUB_CONFLICTS_IN_A (subscript))
4590 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4591 if (SUB_CONFLICTS_IN_B (subscript))
4592 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4594 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4595 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4596 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4598 /* If there is any undetermined conflict function we have to
4599 give a conservative answer in case we cannot prove that
4600 no dependence exists when analyzing another subscript. */
4601 if (CF_NOT_KNOWN_P (overlaps_a)
4602 || CF_NOT_KNOWN_P (overlaps_b))
4604 res = chrec_dont_know;
4605 continue;
4608 /* When there is a subscript with no dependence we can stop. */
4609 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4610 || CF_NO_DEPENDENCE_P (overlaps_b))
4612 res = chrec_known;
4613 break;
4617 if (res == NULL_TREE)
4618 return true;
4620 if (res == chrec_known)
4621 dependence_stats.num_dependence_independent++;
4622 else
4623 dependence_stats.num_dependence_undetermined++;
4624 finalize_ddr_dependent (ddr, res);
4625 return false;
4628 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4630 static void
4631 subscript_dependence_tester (struct data_dependence_relation *ddr,
4632 struct loop *loop_nest)
4634 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4635 dependence_stats.num_dependence_dependent++;
4637 compute_subscript_distance (ddr);
4638 if (build_classic_dist_vector (ddr, loop_nest))
4639 build_classic_dir_vector (ddr);
4642 /* Returns true when all the access functions of A are affine or
4643 constant with respect to LOOP_NEST. */
4645 static bool
4646 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4647 const struct loop *loop_nest)
4649 unsigned int i;
4650 vec<tree> fns = DR_ACCESS_FNS (a);
4651 tree t;
4653 FOR_EACH_VEC_ELT (fns, i, t)
4654 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4655 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4656 return false;
4658 return true;
4661 /* This computes the affine dependence relation between A and B with
4662 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4663 independence between two accesses, while CHREC_DONT_KNOW is used
4664 for representing the unknown relation.
4666 Note that it is possible to stop the computation of the dependence
4667 relation the first time we detect a CHREC_KNOWN element for a given
4668 subscript. */
4670 void
4671 compute_affine_dependence (struct data_dependence_relation *ddr,
4672 struct loop *loop_nest)
4674 struct data_reference *dra = DDR_A (ddr);
4675 struct data_reference *drb = DDR_B (ddr);
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4679 fprintf (dump_file, "(compute_affine_dependence\n");
4680 fprintf (dump_file, " stmt_a: ");
4681 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4682 fprintf (dump_file, " stmt_b: ");
4683 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4686 /* Analyze only when the dependence relation is not yet known. */
4687 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4689 dependence_stats.num_dependence_tests++;
4691 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4692 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4693 subscript_dependence_tester (ddr, loop_nest);
4695 /* As a last case, if the dependence cannot be determined, or if
4696 the dependence is considered too difficult to determine, answer
4697 "don't know". */
4698 else
4700 dependence_stats.num_dependence_undetermined++;
4702 if (dump_file && (dump_flags & TDF_DETAILS))
4704 fprintf (dump_file, "Data ref a:\n");
4705 dump_data_reference (dump_file, dra);
4706 fprintf (dump_file, "Data ref b:\n");
4707 dump_data_reference (dump_file, drb);
4708 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4710 finalize_ddr_dependent (ddr, chrec_dont_know);
4714 if (dump_file && (dump_flags & TDF_DETAILS))
4716 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4717 fprintf (dump_file, ") -> no dependence\n");
4718 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4719 fprintf (dump_file, ") -> dependence analysis failed\n");
4720 else
4721 fprintf (dump_file, ")\n");
4725 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4726 the data references in DATAREFS, in the LOOP_NEST. When
4727 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4728 relations. Return true when successful, i.e. data references number
4729 is small enough to be handled. */
4731 bool
4732 compute_all_dependences (vec<data_reference_p> datarefs,
4733 vec<ddr_p> *dependence_relations,
4734 vec<loop_p> loop_nest,
4735 bool compute_self_and_rr)
4737 struct data_dependence_relation *ddr;
4738 struct data_reference *a, *b;
4739 unsigned int i, j;
4741 if ((int) datarefs.length ()
4742 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4744 struct data_dependence_relation *ddr;
4746 /* Insert a single relation into dependence_relations:
4747 chrec_dont_know. */
4748 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4749 dependence_relations->safe_push (ddr);
4750 return false;
4753 FOR_EACH_VEC_ELT (datarefs, i, a)
4754 for (j = i + 1; datarefs.iterate (j, &b); j++)
4755 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4757 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4758 dependence_relations->safe_push (ddr);
4759 if (loop_nest.exists ())
4760 compute_affine_dependence (ddr, loop_nest[0]);
4763 if (compute_self_and_rr)
4764 FOR_EACH_VEC_ELT (datarefs, i, a)
4766 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4767 dependence_relations->safe_push (ddr);
4768 if (loop_nest.exists ())
4769 compute_affine_dependence (ddr, loop_nest[0]);
4772 return true;
4775 /* Describes a location of a memory reference. */
4777 struct data_ref_loc
4779 /* The memory reference. */
4780 tree ref;
4782 /* True if the memory reference is read. */
4783 bool is_read;
4785 /* True if the data reference is conditional within the containing
4786 statement, i.e. if it might not occur even when the statement
4787 is executed and runs to completion. */
4788 bool is_conditional_in_stmt;
4792 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4793 true if STMT clobbers memory, false otherwise. */
4795 static bool
4796 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4798 bool clobbers_memory = false;
4799 data_ref_loc ref;
4800 tree op0, op1;
4801 enum gimple_code stmt_code = gimple_code (stmt);
4803 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4804 As we cannot model data-references to not spelled out
4805 accesses give up if they may occur. */
4806 if (stmt_code == GIMPLE_CALL
4807 && !(gimple_call_flags (stmt) & ECF_CONST))
4809 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4810 if (gimple_call_internal_p (stmt))
4811 switch (gimple_call_internal_fn (stmt))
4813 case IFN_GOMP_SIMD_LANE:
4815 struct loop *loop = gimple_bb (stmt)->loop_father;
4816 tree uid = gimple_call_arg (stmt, 0);
4817 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4818 if (loop == NULL
4819 || loop->simduid != SSA_NAME_VAR (uid))
4820 clobbers_memory = true;
4821 break;
4823 case IFN_MASK_LOAD:
4824 case IFN_MASK_STORE:
4825 break;
4826 default:
4827 clobbers_memory = true;
4828 break;
4830 else
4831 clobbers_memory = true;
4833 else if (stmt_code == GIMPLE_ASM
4834 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4835 || gimple_vuse (stmt)))
4836 clobbers_memory = true;
4838 if (!gimple_vuse (stmt))
4839 return clobbers_memory;
4841 if (stmt_code == GIMPLE_ASSIGN)
4843 tree base;
4844 op0 = gimple_assign_lhs (stmt);
4845 op1 = gimple_assign_rhs1 (stmt);
4847 if (DECL_P (op1)
4848 || (REFERENCE_CLASS_P (op1)
4849 && (base = get_base_address (op1))
4850 && TREE_CODE (base) != SSA_NAME
4851 && !is_gimple_min_invariant (base)))
4853 ref.ref = op1;
4854 ref.is_read = true;
4855 ref.is_conditional_in_stmt = false;
4856 references->safe_push (ref);
4859 else if (stmt_code == GIMPLE_CALL)
4861 unsigned i, n;
4862 tree ptr, type;
4863 unsigned int align;
4865 ref.is_read = false;
4866 if (gimple_call_internal_p (stmt))
4867 switch (gimple_call_internal_fn (stmt))
4869 case IFN_MASK_LOAD:
4870 if (gimple_call_lhs (stmt) == NULL_TREE)
4871 break;
4872 ref.is_read = true;
4873 /* FALLTHRU */
4874 case IFN_MASK_STORE:
4875 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4876 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4877 if (ref.is_read)
4878 type = TREE_TYPE (gimple_call_lhs (stmt));
4879 else
4880 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4881 if (TYPE_ALIGN (type) != align)
4882 type = build_aligned_type (type, align);
4883 ref.is_conditional_in_stmt = true;
4884 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4885 ptr);
4886 references->safe_push (ref);
4887 return false;
4888 default:
4889 break;
4892 op0 = gimple_call_lhs (stmt);
4893 n = gimple_call_num_args (stmt);
4894 for (i = 0; i < n; i++)
4896 op1 = gimple_call_arg (stmt, i);
4898 if (DECL_P (op1)
4899 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4901 ref.ref = op1;
4902 ref.is_read = true;
4903 ref.is_conditional_in_stmt = false;
4904 references->safe_push (ref);
4908 else
4909 return clobbers_memory;
4911 if (op0
4912 && (DECL_P (op0)
4913 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4915 ref.ref = op0;
4916 ref.is_read = false;
4917 ref.is_conditional_in_stmt = false;
4918 references->safe_push (ref);
4920 return clobbers_memory;
4924 /* Returns true if the loop-nest has any data reference. */
4926 bool
4927 loop_nest_has_data_refs (loop_p loop)
4929 basic_block *bbs = get_loop_body (loop);
4930 auto_vec<data_ref_loc, 3> references;
4932 for (unsigned i = 0; i < loop->num_nodes; i++)
4934 basic_block bb = bbs[i];
4935 gimple_stmt_iterator bsi;
4937 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4939 gimple *stmt = gsi_stmt (bsi);
4940 get_references_in_stmt (stmt, &references);
4941 if (references.length ())
4943 free (bbs);
4944 return true;
4948 free (bbs);
4949 return false;
4952 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4953 reference, returns false, otherwise returns true. NEST is the outermost
4954 loop of the loop nest in which the references should be analyzed. */
4956 bool
4957 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4958 vec<data_reference_p> *datarefs)
4960 unsigned i;
4961 auto_vec<data_ref_loc, 2> references;
4962 data_ref_loc *ref;
4963 bool ret = true;
4964 data_reference_p dr;
4966 if (get_references_in_stmt (stmt, &references))
4967 return false;
4969 FOR_EACH_VEC_ELT (references, i, ref)
4971 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4972 loop_containing_stmt (stmt), ref->ref,
4973 stmt, ref->is_read, ref->is_conditional_in_stmt);
4974 gcc_assert (dr != NULL);
4975 datarefs->safe_push (dr);
4978 return ret;
4981 /* Stores the data references in STMT to DATAREFS. If there is an
4982 unanalyzable reference, returns false, otherwise returns true.
4983 NEST is the outermost loop of the loop nest in which the references
4984 should be instantiated, LOOP is the loop in which the references
4985 should be analyzed. */
4987 bool
4988 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
4989 vec<data_reference_p> *datarefs)
4991 unsigned i;
4992 auto_vec<data_ref_loc, 2> references;
4993 data_ref_loc *ref;
4994 bool ret = true;
4995 data_reference_p dr;
4997 if (get_references_in_stmt (stmt, &references))
4998 return false;
5000 FOR_EACH_VEC_ELT (references, i, ref)
5002 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5003 ref->is_conditional_in_stmt);
5004 gcc_assert (dr != NULL);
5005 datarefs->safe_push (dr);
5008 return ret;
5011 /* Search the data references in LOOP, and record the information into
5012 DATAREFS. Returns chrec_dont_know when failing to analyze a
5013 difficult case, returns NULL_TREE otherwise. */
5015 tree
5016 find_data_references_in_bb (struct loop *loop, basic_block bb,
5017 vec<data_reference_p> *datarefs)
5019 gimple_stmt_iterator bsi;
5021 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5023 gimple *stmt = gsi_stmt (bsi);
5025 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5027 struct data_reference *res;
5028 res = XCNEW (struct data_reference);
5029 datarefs->safe_push (res);
5031 return chrec_dont_know;
5035 return NULL_TREE;
5038 /* Search the data references in LOOP, and record the information into
5039 DATAREFS. Returns chrec_dont_know when failing to analyze a
5040 difficult case, returns NULL_TREE otherwise.
5042 TODO: This function should be made smarter so that it can handle address
5043 arithmetic as if they were array accesses, etc. */
5045 tree
5046 find_data_references_in_loop (struct loop *loop,
5047 vec<data_reference_p> *datarefs)
5049 basic_block bb, *bbs;
5050 unsigned int i;
5052 bbs = get_loop_body_in_dom_order (loop);
5054 for (i = 0; i < loop->num_nodes; i++)
5056 bb = bbs[i];
5058 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5060 free (bbs);
5061 return chrec_dont_know;
5064 free (bbs);
5066 return NULL_TREE;
5069 /* Return the alignment in bytes that DRB is guaranteed to have at all
5070 times. */
5072 unsigned int
5073 dr_alignment (innermost_loop_behavior *drb)
5075 /* Get the alignment of BASE_ADDRESS + INIT. */
5076 unsigned int alignment = drb->base_alignment;
5077 unsigned int misalignment = (drb->base_misalignment
5078 + TREE_INT_CST_LOW (drb->init));
5079 if (misalignment != 0)
5080 alignment = MIN (alignment, misalignment & -misalignment);
5082 /* Cap it to the alignment of OFFSET. */
5083 if (!integer_zerop (drb->offset))
5084 alignment = MIN (alignment, drb->offset_alignment);
5086 /* Cap it to the alignment of STEP. */
5087 if (!integer_zerop (drb->step))
5088 alignment = MIN (alignment, drb->step_alignment);
5090 return alignment;
5093 /* Recursive helper function. */
5095 static bool
5096 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5098 /* Inner loops of the nest should not contain siblings. Example:
5099 when there are two consecutive loops,
5101 | loop_0
5102 | loop_1
5103 | A[{0, +, 1}_1]
5104 | endloop_1
5105 | loop_2
5106 | A[{0, +, 1}_2]
5107 | endloop_2
5108 | endloop_0
5110 the dependence relation cannot be captured by the distance
5111 abstraction. */
5112 if (loop->next)
5113 return false;
5115 loop_nest->safe_push (loop);
5116 if (loop->inner)
5117 return find_loop_nest_1 (loop->inner, loop_nest);
5118 return true;
5121 /* Return false when the LOOP is not well nested. Otherwise return
5122 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5123 contain the loops from the outermost to the innermost, as they will
5124 appear in the classic distance vector. */
5126 bool
5127 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5129 loop_nest->safe_push (loop);
5130 if (loop->inner)
5131 return find_loop_nest_1 (loop->inner, loop_nest);
5132 return true;
5135 /* Returns true when the data dependences have been computed, false otherwise.
5136 Given a loop nest LOOP, the following vectors are returned:
5137 DATAREFS is initialized to all the array elements contained in this loop,
5138 DEPENDENCE_RELATIONS contains the relations between the data references.
5139 Compute read-read and self relations if
5140 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5142 bool
5143 compute_data_dependences_for_loop (struct loop *loop,
5144 bool compute_self_and_read_read_dependences,
5145 vec<loop_p> *loop_nest,
5146 vec<data_reference_p> *datarefs,
5147 vec<ddr_p> *dependence_relations)
5149 bool res = true;
5151 memset (&dependence_stats, 0, sizeof (dependence_stats));
5153 /* If the loop nest is not well formed, or one of the data references
5154 is not computable, give up without spending time to compute other
5155 dependences. */
5156 if (!loop
5157 || !find_loop_nest (loop, loop_nest)
5158 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5159 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5160 compute_self_and_read_read_dependences))
5161 res = false;
5163 if (dump_file && (dump_flags & TDF_STATS))
5165 fprintf (dump_file, "Dependence tester statistics:\n");
5167 fprintf (dump_file, "Number of dependence tests: %d\n",
5168 dependence_stats.num_dependence_tests);
5169 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5170 dependence_stats.num_dependence_dependent);
5171 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5172 dependence_stats.num_dependence_independent);
5173 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5174 dependence_stats.num_dependence_undetermined);
5176 fprintf (dump_file, "Number of subscript tests: %d\n",
5177 dependence_stats.num_subscript_tests);
5178 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5179 dependence_stats.num_subscript_undetermined);
5180 fprintf (dump_file, "Number of same subscript function: %d\n",
5181 dependence_stats.num_same_subscript_function);
5183 fprintf (dump_file, "Number of ziv tests: %d\n",
5184 dependence_stats.num_ziv);
5185 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5186 dependence_stats.num_ziv_dependent);
5187 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5188 dependence_stats.num_ziv_independent);
5189 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5190 dependence_stats.num_ziv_unimplemented);
5192 fprintf (dump_file, "Number of siv tests: %d\n",
5193 dependence_stats.num_siv);
5194 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5195 dependence_stats.num_siv_dependent);
5196 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5197 dependence_stats.num_siv_independent);
5198 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5199 dependence_stats.num_siv_unimplemented);
5201 fprintf (dump_file, "Number of miv tests: %d\n",
5202 dependence_stats.num_miv);
5203 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5204 dependence_stats.num_miv_dependent);
5205 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5206 dependence_stats.num_miv_independent);
5207 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5208 dependence_stats.num_miv_unimplemented);
5211 return res;
5214 /* Free the memory used by a data dependence relation DDR. */
5216 void
5217 free_dependence_relation (struct data_dependence_relation *ddr)
5219 if (ddr == NULL)
5220 return;
5222 if (DDR_SUBSCRIPTS (ddr).exists ())
5223 free_subscripts (DDR_SUBSCRIPTS (ddr));
5224 DDR_DIST_VECTS (ddr).release ();
5225 DDR_DIR_VECTS (ddr).release ();
5227 free (ddr);
5230 /* Free the memory used by the data dependence relations from
5231 DEPENDENCE_RELATIONS. */
5233 void
5234 free_dependence_relations (vec<ddr_p> dependence_relations)
5236 unsigned int i;
5237 struct data_dependence_relation *ddr;
5239 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5240 if (ddr)
5241 free_dependence_relation (ddr);
5243 dependence_relations.release ();
5246 /* Free the memory used by the data references from DATAREFS. */
5248 void
5249 free_data_refs (vec<data_reference_p> datarefs)
5251 unsigned int i;
5252 struct data_reference *dr;
5254 FOR_EACH_VEC_ELT (datarefs, i, dr)
5255 free_data_ref (dr);
5256 datarefs.release ();