Allow 2 insns from sched group to issue in same cycle, if no stalls needed.
[official-gcc.git] / gcc / tree-data-ref.c
blob19cceb8dfd95012bce2b061c32aed8183e24e955
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 /* Returns true iff A divides B. */
141 static inline bool
142 int_divides_p (int a, int b)
144 return ((b % a) == 0);
147 /* Return true if reference REF contains a union access. */
149 static bool
150 ref_contains_union_access_p (tree ref)
152 while (handled_component_p (ref))
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
159 return false;
164 /* Dump into FILE all the data references from DATAREFS. */
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 unsigned int i;
170 struct data_reference *dr;
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
176 /* Unified dump into FILE all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
181 dump_data_references (stderr, ref);
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
194 /* Dump into STDERR all the data references from DATAREFS. */
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
199 dump_data_references (stderr, datarefs);
202 /* Print to STDERR the data_reference DR. */
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
207 dump_data_reference (stderr, dr);
210 /* Dump function for a DATA_REFERENCE structure. */
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
216 unsigned int i;
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 fprintf (outf, "#)\n");
235 /* Unified dump function for a DATA_REFERENCE structure. */
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
240 dump_data_reference (stderr, &ref);
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
253 /* Dumps the affine function described by FN to the file OUTF. */
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
258 unsigned i;
259 tree coef;
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
270 /* Dumps the conflict function CF to the file OUTF. */
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
275 unsigned i;
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
283 for (i = 0; i < cf->n; i++)
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
294 /* Dump function for a SUBSCRIPT structure. */
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
326 /* Print the classic direction vector DIRV to OUTF. */
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
333 int eq;
335 for (eq = 0; eq < length; eq++)
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
340 switch (dir)
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
368 fprintf (outf, "\n");
371 /* Print a vector of direction vectors. */
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
377 unsigned j;
378 lambda_vector v;
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 unsigned j;
403 lambda_vector v;
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
415 struct data_reference *dra, *drb;
417 fprintf (outf, "(Data Dep: \n");
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 if (ddr)
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
434 fprintf (outf, " (don't know)\n)\n");
435 return;
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 unsigned int i;
449 struct loop *loopi;
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
597 *var = NULL_TREE;
598 *off = NULL_TREE;
600 switch (code)
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
627 case ADDR_EXPR:
629 tree base, poffset;
630 HOST_WIDE_INT pbitsize, pbitpos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
639 if (pbitpos % BITS_PER_UNIT != 0)
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
644 if (poffset)
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
655 var0 = fold_convert (type, base);
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
674 *var = var0;
675 *off = off0;
676 return true;
679 case SSA_NAME:
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
696 CASE_CONVERT:
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
712 return false;
715 default:
716 return false;
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
726 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
727 enum tree_code code;
729 *var = exp;
730 *off = ssize_int (0);
731 STRIP_NOPS (exp);
733 if (tree_is_chrec (exp)
734 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
735 return;
737 otype = TREE_TYPE (exp);
738 code = TREE_CODE (exp);
739 extract_ops_from_tree (exp, &code, &op0, &op1);
740 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
742 *var = fold_convert (type, e);
743 *off = o;
747 /* Returns the address ADDR of an object in a canonical shape (without nop
748 casts, and with type of pointer to the object). */
750 static tree
751 canonicalize_base_object_address (tree addr)
753 tree orig = addr;
755 STRIP_NOPS (addr);
757 /* The base address may be obtained by casting from integer, in that case
758 keep the cast. */
759 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
760 return orig;
762 if (TREE_CODE (addr) != ADDR_EXPR)
763 return addr;
765 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
768 /* Analyze the behavior of memory reference REF. There are two modes:
770 - BB analysis. In this case we simply split the address into base,
771 init and offset components, without reference to any containing loop.
772 The resulting base and offset are general expressions and they can
773 vary arbitrarily from one iteration of the containing loop to the next.
774 The step is always zero.
776 - loop analysis. In this case we analyze the reference both wrt LOOP
777 and on the basis that the reference occurs (is "used") in LOOP;
778 see the comment above analyze_scalar_evolution_in_loop for more
779 information about this distinction. The base, init, offset and
780 step fields are all invariant in LOOP.
782 Perform BB analysis if LOOP is null, or if LOOP is the function's
783 dummy outermost loop. In other cases perform loop analysis.
785 Return true if the analysis succeeded and store the results in DRB if so.
786 BB analysis can only fail for bitfield or reversed-storage accesses. */
788 bool
789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
790 struct loop *loop)
792 HOST_WIDE_INT pbitsize, pbitpos;
793 tree base, poffset;
794 machine_mode pmode;
795 int punsignedp, preversep, pvolatilep;
796 affine_iv base_iv, offset_iv;
797 tree init, dinit, step;
798 bool in_loop = (loop && loop->num);
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "analyze_innermost: ");
803 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
804 &punsignedp, &preversep, &pvolatilep);
805 gcc_assert (base != NULL_TREE);
807 if (pbitpos % BITS_PER_UNIT != 0)
809 if (dump_file && (dump_flags & TDF_DETAILS))
810 fprintf (dump_file, "failed: bit offset alignment.\n");
811 return false;
814 if (preversep)
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "failed: reverse storage order.\n");
818 return false;
821 /* Calculate the alignment and misalignment for the inner reference. */
822 unsigned int HOST_WIDE_INT base_misalignment;
823 unsigned int base_alignment;
824 get_object_alignment_1 (base, &base_alignment, &base_misalignment);
826 /* There are no bitfield references remaining in BASE, so the values
827 we got back must be whole bytes. */
828 gcc_assert (base_alignment % BITS_PER_UNIT == 0
829 && base_misalignment % BITS_PER_UNIT == 0);
830 base_alignment /= BITS_PER_UNIT;
831 base_misalignment /= BITS_PER_UNIT;
833 if (TREE_CODE (base) == MEM_REF)
835 if (!integer_zerop (TREE_OPERAND (base, 1)))
837 /* Subtract MOFF from the base and add it to POFFSET instead.
838 Adjust the misalignment to reflect the amount we subtracted. */
839 offset_int moff = mem_ref_offset (base);
840 base_misalignment -= moff.to_short_addr ();
841 tree mofft = wide_int_to_tree (sizetype, moff);
842 if (!poffset)
843 poffset = mofft;
844 else
845 poffset = size_binop (PLUS_EXPR, poffset, mofft);
847 base = TREE_OPERAND (base, 0);
849 else
850 base = build_fold_addr_expr (base);
852 if (in_loop)
854 if (!simple_iv (loop, loop, base, &base_iv, true))
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, "failed: evolution of base is not affine.\n");
858 return false;
861 else
863 base_iv.base = base;
864 base_iv.step = ssize_int (0);
865 base_iv.no_overflow = true;
868 if (!poffset)
870 offset_iv.base = ssize_int (0);
871 offset_iv.step = ssize_int (0);
873 else
875 if (!in_loop)
877 offset_iv.base = poffset;
878 offset_iv.step = ssize_int (0);
880 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
884 return false;
888 init = ssize_int (pbitpos / BITS_PER_UNIT);
890 /* Subtract any constant component from the base and add it to INIT instead.
891 Adjust the misalignment to reflect the amount we subtracted. */
892 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
893 init = size_binop (PLUS_EXPR, init, dinit);
894 base_misalignment -= TREE_INT_CST_LOW (dinit);
896 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
897 init = size_binop (PLUS_EXPR, init, dinit);
899 step = size_binop (PLUS_EXPR,
900 fold_convert (ssizetype, base_iv.step),
901 fold_convert (ssizetype, offset_iv.step));
903 base = canonicalize_base_object_address (base_iv.base);
905 /* See if get_pointer_alignment can guarantee a higher alignment than
906 the one we calculated above. */
907 unsigned int HOST_WIDE_INT alt_misalignment;
908 unsigned int alt_alignment;
909 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
911 /* As above, these values must be whole bytes. */
912 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
913 && alt_misalignment % BITS_PER_UNIT == 0);
914 alt_alignment /= BITS_PER_UNIT;
915 alt_misalignment /= BITS_PER_UNIT;
917 if (base_alignment < alt_alignment)
919 base_alignment = alt_alignment;
920 base_misalignment = alt_misalignment;
923 drb->base_address = base;
924 drb->offset = fold_convert (ssizetype, offset_iv.base);
925 drb->init = init;
926 drb->step = step;
927 drb->base_alignment = base_alignment;
928 drb->base_misalignment = base_misalignment & (base_alignment - 1);
929 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
930 drb->step_alignment = highest_pow2_factor (step);
932 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "success.\n");
935 return true;
938 /* Return true if OP is a valid component reference for a DR access
939 function. This accepts a subset of what handled_component_p accepts. */
941 static bool
942 access_fn_component_p (tree op)
944 switch (TREE_CODE (op))
946 case REALPART_EXPR:
947 case IMAGPART_EXPR:
948 case ARRAY_REF:
949 return true;
951 case COMPONENT_REF:
952 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
954 default:
955 return false;
959 /* Determines the base object and the list of indices of memory reference
960 DR, analyzed in LOOP and instantiated in loop nest NEST. */
962 static void
963 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
965 vec<tree> access_fns = vNULL;
966 tree ref, op;
967 tree base, off, access_fn;
968 basic_block before_loop;
970 /* If analyzing a basic-block there are no indices to analyze
971 and thus no access functions. */
972 if (!nest)
974 DR_BASE_OBJECT (dr) = DR_REF (dr);
975 DR_ACCESS_FNS (dr).create (0);
976 return;
979 ref = DR_REF (dr);
980 before_loop = block_before_loop (nest);
982 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
983 into a two element array with a constant index. The base is
984 then just the immediate underlying object. */
985 if (TREE_CODE (ref) == REALPART_EXPR)
987 ref = TREE_OPERAND (ref, 0);
988 access_fns.safe_push (integer_zero_node);
990 else if (TREE_CODE (ref) == IMAGPART_EXPR)
992 ref = TREE_OPERAND (ref, 0);
993 access_fns.safe_push (integer_one_node);
996 /* Analyze access functions of dimensions we know to be independent.
997 The list of component references handled here should be kept in
998 sync with access_fn_component_p. */
999 while (handled_component_p (ref))
1001 if (TREE_CODE (ref) == ARRAY_REF)
1003 op = TREE_OPERAND (ref, 1);
1004 access_fn = analyze_scalar_evolution (loop, op);
1005 access_fn = instantiate_scev (before_loop, loop, access_fn);
1006 access_fns.safe_push (access_fn);
1008 else if (TREE_CODE (ref) == COMPONENT_REF
1009 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1011 /* For COMPONENT_REFs of records (but not unions!) use the
1012 FIELD_DECL offset as constant access function so we can
1013 disambiguate a[i].f1 and a[i].f2. */
1014 tree off = component_ref_field_offset (ref);
1015 off = size_binop (PLUS_EXPR,
1016 size_binop (MULT_EXPR,
1017 fold_convert (bitsizetype, off),
1018 bitsize_int (BITS_PER_UNIT)),
1019 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1020 access_fns.safe_push (off);
1022 else
1023 /* If we have an unhandled component we could not translate
1024 to an access function stop analyzing. We have determined
1025 our base object in this case. */
1026 break;
1028 ref = TREE_OPERAND (ref, 0);
1031 /* If the address operand of a MEM_REF base has an evolution in the
1032 analyzed nest, add it as an additional independent access-function. */
1033 if (TREE_CODE (ref) == MEM_REF)
1035 op = TREE_OPERAND (ref, 0);
1036 access_fn = analyze_scalar_evolution (loop, op);
1037 access_fn = instantiate_scev (before_loop, loop, access_fn);
1038 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1040 tree orig_type;
1041 tree memoff = TREE_OPERAND (ref, 1);
1042 base = initial_condition (access_fn);
1043 orig_type = TREE_TYPE (base);
1044 STRIP_USELESS_TYPE_CONVERSION (base);
1045 split_constant_offset (base, &base, &off);
1046 STRIP_USELESS_TYPE_CONVERSION (base);
1047 /* Fold the MEM_REF offset into the evolutions initial
1048 value to make more bases comparable. */
1049 if (!integer_zerop (memoff))
1051 off = size_binop (PLUS_EXPR, off,
1052 fold_convert (ssizetype, memoff));
1053 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1055 /* Adjust the offset so it is a multiple of the access type
1056 size and thus we separate bases that can possibly be used
1057 to produce partial overlaps (which the access_fn machinery
1058 cannot handle). */
1059 wide_int rem;
1060 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1061 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1062 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1063 rem = wi::mod_trunc
1064 (wi::to_wide (off),
1065 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1066 SIGNED);
1067 else
1068 /* If we can't compute the remainder simply force the initial
1069 condition to zero. */
1070 rem = wi::to_wide (off);
1071 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1072 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1073 /* And finally replace the initial condition. */
1074 access_fn = chrec_replace_initial_condition
1075 (access_fn, fold_convert (orig_type, off));
1076 /* ??? This is still not a suitable base object for
1077 dr_may_alias_p - the base object needs to be an
1078 access that covers the object as whole. With
1079 an evolution in the pointer this cannot be
1080 guaranteed.
1081 As a band-aid, mark the access so we can special-case
1082 it in dr_may_alias_p. */
1083 tree old = ref;
1084 ref = fold_build2_loc (EXPR_LOCATION (ref),
1085 MEM_REF, TREE_TYPE (ref),
1086 base, memoff);
1087 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1088 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1089 DR_UNCONSTRAINED_BASE (dr) = true;
1090 access_fns.safe_push (access_fn);
1093 else if (DECL_P (ref))
1095 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1096 ref = build2 (MEM_REF, TREE_TYPE (ref),
1097 build_fold_addr_expr (ref),
1098 build_int_cst (reference_alias_ptr_type (ref), 0));
1101 DR_BASE_OBJECT (dr) = ref;
1102 DR_ACCESS_FNS (dr) = access_fns;
1105 /* Extracts the alias analysis information from the memory reference DR. */
1107 static void
1108 dr_analyze_alias (struct data_reference *dr)
1110 tree ref = DR_REF (dr);
1111 tree base = get_base_address (ref), addr;
1113 if (INDIRECT_REF_P (base)
1114 || TREE_CODE (base) == MEM_REF)
1116 addr = TREE_OPERAND (base, 0);
1117 if (TREE_CODE (addr) == SSA_NAME)
1118 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1122 /* Frees data reference DR. */
1124 void
1125 free_data_ref (data_reference_p dr)
1127 DR_ACCESS_FNS (dr).release ();
1128 free (dr);
1131 /* Analyze memory reference MEMREF, which is accessed in STMT.
1132 The reference is a read if IS_READ is true, otherwise it is a write.
1133 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1134 within STMT, i.e. that it might not occur even if STMT is executed
1135 and runs to completion.
1137 Return the data_reference description of MEMREF. NEST is the outermost
1138 loop in which the reference should be instantiated, LOOP is the loop
1139 in which the data reference should be analyzed. */
1141 struct data_reference *
1142 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
1143 bool is_read, bool is_conditional_in_stmt)
1145 struct data_reference *dr;
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, "Creating dr for ");
1150 print_generic_expr (dump_file, memref, TDF_SLIM);
1151 fprintf (dump_file, "\n");
1154 dr = XCNEW (struct data_reference);
1155 DR_STMT (dr) = stmt;
1156 DR_REF (dr) = memref;
1157 DR_IS_READ (dr) = is_read;
1158 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1160 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1161 nest != NULL ? loop : NULL);
1162 dr_analyze_indices (dr, nest, loop);
1163 dr_analyze_alias (dr);
1165 if (dump_file && (dump_flags & TDF_DETAILS))
1167 unsigned i;
1168 fprintf (dump_file, "\tbase_address: ");
1169 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1170 fprintf (dump_file, "\n\toffset from base address: ");
1171 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1172 fprintf (dump_file, "\n\tconstant offset from base address: ");
1173 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1174 fprintf (dump_file, "\n\tstep: ");
1175 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1176 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1177 fprintf (dump_file, "\n\tbase misalignment: %d",
1178 DR_BASE_MISALIGNMENT (dr));
1179 fprintf (dump_file, "\n\toffset alignment: %d",
1180 DR_OFFSET_ALIGNMENT (dr));
1181 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1182 fprintf (dump_file, "\n\tbase_object: ");
1183 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1184 fprintf (dump_file, "\n");
1185 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1187 fprintf (dump_file, "\tAccess function %d: ", i);
1188 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1192 return dr;
1195 /* A helper function computes order between two tree epxressions T1 and T2.
1196 This is used in comparator functions sorting objects based on the order
1197 of tree expressions. The function returns -1, 0, or 1. */
1200 data_ref_compare_tree (tree t1, tree t2)
1202 int i, cmp;
1203 enum tree_code code;
1204 char tclass;
1206 if (t1 == t2)
1207 return 0;
1208 if (t1 == NULL)
1209 return -1;
1210 if (t2 == NULL)
1211 return 1;
1213 STRIP_USELESS_TYPE_CONVERSION (t1);
1214 STRIP_USELESS_TYPE_CONVERSION (t2);
1215 if (t1 == t2)
1216 return 0;
1218 if (TREE_CODE (t1) != TREE_CODE (t2)
1219 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1220 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1222 code = TREE_CODE (t1);
1223 switch (code)
1225 case INTEGER_CST:
1226 return tree_int_cst_compare (t1, t2);
1228 case STRING_CST:
1229 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1230 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1231 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1232 TREE_STRING_LENGTH (t1));
1234 case SSA_NAME:
1235 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1236 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1237 break;
1239 default:
1240 tclass = TREE_CODE_CLASS (code);
1242 /* For decls, compare their UIDs. */
1243 if (tclass == tcc_declaration)
1245 if (DECL_UID (t1) != DECL_UID (t2))
1246 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1247 break;
1249 /* For expressions, compare their operands recursively. */
1250 else if (IS_EXPR_CODE_CLASS (tclass))
1252 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1254 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1255 TREE_OPERAND (t2, i));
1256 if (cmp != 0)
1257 return cmp;
1260 else
1261 gcc_unreachable ();
1264 return 0;
1267 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1268 check. */
1270 bool
1271 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1273 if (dump_enabled_p ())
1275 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1276 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1277 dump_printf (MSG_NOTE, " and ");
1278 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1279 dump_printf (MSG_NOTE, "\n");
1282 if (!speed_p)
1284 if (dump_enabled_p ())
1285 dump_printf (MSG_MISSED_OPTIMIZATION,
1286 "runtime alias check not supported when optimizing "
1287 "for size.\n");
1288 return false;
1291 /* FORNOW: We don't support versioning with outer-loop in either
1292 vectorization or loop distribution. */
1293 if (loop != NULL && loop->inner != NULL)
1295 if (dump_enabled_p ())
1296 dump_printf (MSG_MISSED_OPTIMIZATION,
1297 "runtime alias check not supported for outer loop.\n");
1298 return false;
1301 /* FORNOW: We don't support creating runtime alias tests for non-constant
1302 step. */
1303 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1304 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1306 if (dump_enabled_p ())
1307 dump_printf (MSG_MISSED_OPTIMIZATION,
1308 "runtime alias check not supported for non-constant "
1309 "step\n");
1310 return false;
1313 return true;
1316 /* Operator == between two dr_with_seg_len objects.
1318 This equality operator is used to make sure two data refs
1319 are the same one so that we will consider to combine the
1320 aliasing checks of those two pairs of data dependent data
1321 refs. */
1323 static bool
1324 operator == (const dr_with_seg_len& d1,
1325 const dr_with_seg_len& d2)
1327 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1328 DR_BASE_ADDRESS (d2.dr), 0)
1329 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1330 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1331 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1334 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1335 so that we can combine aliasing checks in one scan. */
1337 static int
1338 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1340 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1341 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1342 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1343 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1345 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1346 if a and c have the same basic address snd step, and b and d have the same
1347 address and step. Therefore, if any a&c or b&d don't have the same address
1348 and step, we don't care the order of those two pairs after sorting. */
1349 int comp_res;
1351 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1352 DR_BASE_ADDRESS (b1.dr))) != 0)
1353 return comp_res;
1354 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1355 DR_BASE_ADDRESS (b2.dr))) != 0)
1356 return comp_res;
1357 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1358 DR_STEP (b1.dr))) != 0)
1359 return comp_res;
1360 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1361 DR_STEP (b2.dr))) != 0)
1362 return comp_res;
1363 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1364 DR_OFFSET (b1.dr))) != 0)
1365 return comp_res;
1366 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1367 DR_INIT (b1.dr))) != 0)
1368 return comp_res;
1369 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1370 DR_OFFSET (b2.dr))) != 0)
1371 return comp_res;
1372 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1373 DR_INIT (b2.dr))) != 0)
1374 return comp_res;
1376 return 0;
1379 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1380 FACTOR is number of iterations that each data reference is accessed.
1382 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1383 we create an expression:
1385 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1386 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1388 for aliasing checks. However, in some cases we can decrease the number
1389 of checks by combining two checks into one. For example, suppose we have
1390 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1391 condition is satisfied:
1393 load_ptr_0 < load_ptr_1 &&
1394 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1396 (this condition means, in each iteration of vectorized loop, the accessed
1397 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1398 load_ptr_1.)
1400 we then can use only the following expression to finish the alising checks
1401 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1403 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1404 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1406 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1407 basic address. */
1409 void
1410 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1411 unsigned HOST_WIDE_INT factor)
1413 /* Sort the collected data ref pairs so that we can scan them once to
1414 combine all possible aliasing checks. */
1415 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1417 /* Scan the sorted dr pairs and check if we can combine alias checks
1418 of two neighboring dr pairs. */
1419 for (size_t i = 1; i < alias_pairs->length (); ++i)
1421 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1422 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1423 *dr_b1 = &(*alias_pairs)[i-1].second,
1424 *dr_a2 = &(*alias_pairs)[i].first,
1425 *dr_b2 = &(*alias_pairs)[i].second;
1427 /* Remove duplicate data ref pairs. */
1428 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1430 if (dump_enabled_p ())
1432 dump_printf (MSG_NOTE, "found equal ranges ");
1433 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1434 dump_printf (MSG_NOTE, ", ");
1435 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1436 dump_printf (MSG_NOTE, " and ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1438 dump_printf (MSG_NOTE, ", ");
1439 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1440 dump_printf (MSG_NOTE, "\n");
1442 alias_pairs->ordered_remove (i--);
1443 continue;
1446 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1448 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1449 and DR_A1 and DR_A2 are two consecutive memrefs. */
1450 if (*dr_a1 == *dr_a2)
1452 std::swap (dr_a1, dr_b1);
1453 std::swap (dr_a2, dr_b2);
1456 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1457 DR_BASE_ADDRESS (dr_a2->dr), 0)
1458 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1459 DR_OFFSET (dr_a2->dr), 0)
1460 || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1461 || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1462 continue;
1464 /* Only merge const step data references. */
1465 if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1466 || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1467 continue;
1469 /* DR_A1 and DR_A2 must goes in the same direction. */
1470 if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1471 != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1472 continue;
1474 bool neg_step
1475 = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1477 /* We need to compute merged segment length at compilation time for
1478 dr_a1 and dr_a2, which is impossible if either one has non-const
1479 segment length. */
1480 if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1481 || !tree_fits_uhwi_p (dr_a2->seg_len))
1482 && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1483 DR_STEP (dr_a2->dr)) != 0)
1484 continue;
1486 /* Make sure dr_a1 starts left of dr_a2. */
1487 if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1488 std::swap (*dr_a1, *dr_a2);
1490 bool do_remove = false;
1491 wide_int diff = (wi::to_wide (DR_INIT (dr_a2->dr))
1492 - wi::to_wide (DR_INIT (dr_a1->dr)));
1493 wide_int min_seg_len_b;
1494 tree new_seg_len;
1496 if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1497 min_seg_len_b = wi::abs (wi::to_wide (dr_b1->seg_len));
1498 else
1499 min_seg_len_b
1500 = factor * wi::abs (wi::to_wide (DR_STEP (dr_b1->dr)));
1502 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1504 Case A:
1505 check if the following condition is satisfied:
1507 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1509 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1510 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1511 have to make a best estimation. We can get the minimum value
1512 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1513 then either of the following two conditions can guarantee the
1514 one above:
1516 1: DIFF <= MIN_SEG_LEN_B
1517 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1518 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1519 to take care of wrapping behavior in it.
1521 Case B:
1522 If the left segment does not extend beyond the start of the
1523 right segment the new segment length is that of the right
1524 plus the segment distance. The condition is like:
1526 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1528 Note 1: Case A.2 and B combined together effectively merges every
1529 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1531 Note 2: Above description is based on positive DR_STEP, we need to
1532 take care of negative DR_STEP for wrapping behavior. See PR80815
1533 for more information. */
1534 if (neg_step)
1536 /* Adjust diff according to access size of both references. */
1537 tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1538 tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1539 diff += wi::to_wide (size_a2) - wi::to_wide (size_a1);
1540 /* Case A.1. */
1541 if (wi::leu_p (diff, min_seg_len_b)
1542 /* Case A.2 and B combined. */
1543 || (tree_fits_uhwi_p (dr_a2->seg_len)))
1545 if (tree_fits_uhwi_p (dr_a1->seg_len)
1546 && tree_fits_uhwi_p (dr_a2->seg_len))
1548 wide_int min_len
1549 = wi::umin (wi::to_wide (dr_a1->seg_len) - diff,
1550 wi::to_wide (dr_a2->seg_len));
1551 new_seg_len = wide_int_to_tree (sizetype, min_len);
1553 else
1554 new_seg_len
1555 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1556 wide_int_to_tree (sizetype, diff));
1558 dr_a2->seg_len = new_seg_len;
1559 do_remove = true;
1562 else
1564 /* Case A.1. */
1565 if (wi::leu_p (diff, min_seg_len_b)
1566 /* Case A.2 and B combined. */
1567 || (tree_fits_uhwi_p (dr_a1->seg_len)))
1569 if (tree_fits_uhwi_p (dr_a1->seg_len)
1570 && tree_fits_uhwi_p (dr_a2->seg_len))
1572 wide_int max_len
1573 = wi::umax (wi::to_wide (dr_a2->seg_len) + diff,
1574 wi::to_wide (dr_a1->seg_len));
1575 new_seg_len = wide_int_to_tree (sizetype, max_len);
1577 else
1578 new_seg_len
1579 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1580 wide_int_to_tree (sizetype, diff));
1582 dr_a1->seg_len = new_seg_len;
1583 do_remove = true;
1587 if (do_remove)
1589 if (dump_enabled_p ())
1591 dump_printf (MSG_NOTE, "merging ranges for ");
1592 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1593 dump_printf (MSG_NOTE, ", ");
1594 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1595 dump_printf (MSG_NOTE, " and ");
1596 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1597 dump_printf (MSG_NOTE, ", ");
1598 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1599 dump_printf (MSG_NOTE, "\n");
1601 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1602 i--;
1608 /* Given LOOP's two data references and segment lengths described by DR_A
1609 and DR_B, create expression checking if the two addresses ranges intersect
1610 with each other based on index of the two addresses. This can only be
1611 done if DR_A and DR_B referring to the same (array) object and the index
1612 is the only difference. For example:
1614 DR_A DR_B
1615 data-ref arr[i] arr[j]
1616 base_object arr arr
1617 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1619 The addresses and their index are like:
1621 |<- ADDR_A ->| |<- ADDR_B ->|
1622 ------------------------------------------------------->
1623 | | | | | | | | | |
1624 ------------------------------------------------------->
1625 i_0 ... i_0+4 j_0 ... j_0+4
1627 We can create expression based on index rather than address:
1629 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1631 Note evolution step of index needs to be considered in comparison. */
1633 static bool
1634 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1635 const dr_with_seg_len& dr_a,
1636 const dr_with_seg_len& dr_b)
1638 if (integer_zerop (DR_STEP (dr_a.dr))
1639 || integer_zerop (DR_STEP (dr_b.dr))
1640 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1641 return false;
1643 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1644 return false;
1646 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1647 return false;
1649 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1650 return false;
1652 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1653 return false;
1655 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1657 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1658 unsigned HOST_WIDE_INT abs_step
1659 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1661 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1662 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1663 /* Infer the number of iterations with which the memory segment is accessed
1664 by DR. In other words, alias is checked if memory segment accessed by
1665 DR_A in some iterations intersect with memory segment accessed by DR_B
1666 in the same amount iterations.
1667 Note segnment length is a linear function of number of iterations with
1668 DR_STEP as the coefficient. */
1669 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1670 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1672 unsigned int i;
1673 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1675 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1676 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1677 /* Two indices must be the same if they are not scev, or not scev wrto
1678 current loop being vecorized. */
1679 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1680 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1681 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1682 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1684 if (operand_equal_p (access1, access2, 0))
1685 continue;
1687 return false;
1689 /* The two indices must have the same step. */
1690 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1691 return false;
1693 tree idx_step = CHREC_RIGHT (access1);
1694 /* Index must have const step, otherwise DR_STEP won't be constant. */
1695 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1696 /* Index must evaluate in the same direction as DR. */
1697 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1699 tree min1 = CHREC_LEFT (access1);
1700 tree min2 = CHREC_LEFT (access2);
1701 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1702 return false;
1704 /* Ideally, alias can be checked against loop's control IV, but we
1705 need to prove linear mapping between control IV and reference
1706 index. Although that should be true, we check against (array)
1707 index of data reference. Like segment length, index length is
1708 linear function of the number of iterations with index_step as
1709 the coefficient, i.e, niter_len * idx_step. */
1710 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1711 build_int_cst (TREE_TYPE (min1),
1712 niter_len1));
1713 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1714 build_int_cst (TREE_TYPE (min2),
1715 niter_len2));
1716 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1717 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1718 /* Adjust ranges for negative step. */
1719 if (neg_step)
1721 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1722 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1723 CHREC_LEFT (access1), idx_step);
1724 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1725 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1726 CHREC_LEFT (access2), idx_step);
1728 tree part_cond_expr
1729 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1730 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1731 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1732 if (*cond_expr)
1733 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1734 *cond_expr, part_cond_expr);
1735 else
1736 *cond_expr = part_cond_expr;
1738 return true;
1741 /* Given two data references and segment lengths described by DR_A and DR_B,
1742 create expression checking if the two addresses ranges intersect with
1743 each other:
1745 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1746 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1748 static void
1749 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1750 const dr_with_seg_len& dr_a,
1751 const dr_with_seg_len& dr_b)
1753 *cond_expr = NULL_TREE;
1754 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1755 return;
1757 tree segment_length_a = dr_a.seg_len;
1758 tree segment_length_b = dr_b.seg_len;
1759 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1760 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1761 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1763 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1764 offset_a, DR_INIT (dr_a.dr));
1765 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1766 offset_b, DR_INIT (dr_b.dr));
1767 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1768 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1770 tree seg_a_min = addr_base_a;
1771 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1772 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1773 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1774 [a, a+12) */
1775 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1777 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1778 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1779 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1782 tree seg_b_min = addr_base_b;
1783 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1784 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1786 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1787 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1788 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1790 *cond_expr
1791 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1792 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1793 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1796 /* Create a conditional expression that represents the run-time checks for
1797 overlapping of address ranges represented by a list of data references
1798 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1799 COND_EXPR is the conditional expression to be used in the if statement
1800 that controls which version of the loop gets executed at runtime. */
1802 void
1803 create_runtime_alias_checks (struct loop *loop,
1804 vec<dr_with_seg_len_pair_t> *alias_pairs,
1805 tree * cond_expr)
1807 tree part_cond_expr;
1809 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1811 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1812 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1814 if (dump_enabled_p ())
1816 dump_printf (MSG_NOTE, "create runtime check for data references ");
1817 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1818 dump_printf (MSG_NOTE, " and ");
1819 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1820 dump_printf (MSG_NOTE, "\n");
1823 /* Create condition expression for each pair data references. */
1824 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1825 if (*cond_expr)
1826 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1827 *cond_expr, part_cond_expr);
1828 else
1829 *cond_expr = part_cond_expr;
1833 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1834 expressions. */
1835 static bool
1836 dr_equal_offsets_p1 (tree offset1, tree offset2)
1838 bool res;
1840 STRIP_NOPS (offset1);
1841 STRIP_NOPS (offset2);
1843 if (offset1 == offset2)
1844 return true;
1846 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1847 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1848 return false;
1850 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1851 TREE_OPERAND (offset2, 0));
1853 if (!res || !BINARY_CLASS_P (offset1))
1854 return res;
1856 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1857 TREE_OPERAND (offset2, 1));
1859 return res;
1862 /* Check if DRA and DRB have equal offsets. */
1863 bool
1864 dr_equal_offsets_p (struct data_reference *dra,
1865 struct data_reference *drb)
1867 tree offset1, offset2;
1869 offset1 = DR_OFFSET (dra);
1870 offset2 = DR_OFFSET (drb);
1872 return dr_equal_offsets_p1 (offset1, offset2);
1875 /* Returns true if FNA == FNB. */
1877 static bool
1878 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1880 unsigned i, n = fna.length ();
1882 if (n != fnb.length ())
1883 return false;
1885 for (i = 0; i < n; i++)
1886 if (!operand_equal_p (fna[i], fnb[i], 0))
1887 return false;
1889 return true;
1892 /* If all the functions in CF are the same, returns one of them,
1893 otherwise returns NULL. */
1895 static affine_fn
1896 common_affine_function (conflict_function *cf)
1898 unsigned i;
1899 affine_fn comm;
1901 if (!CF_NONTRIVIAL_P (cf))
1902 return affine_fn ();
1904 comm = cf->fns[0];
1906 for (i = 1; i < cf->n; i++)
1907 if (!affine_function_equal_p (comm, cf->fns[i]))
1908 return affine_fn ();
1910 return comm;
1913 /* Returns the base of the affine function FN. */
1915 static tree
1916 affine_function_base (affine_fn fn)
1918 return fn[0];
1921 /* Returns true if FN is a constant. */
1923 static bool
1924 affine_function_constant_p (affine_fn fn)
1926 unsigned i;
1927 tree coef;
1929 for (i = 1; fn.iterate (i, &coef); i++)
1930 if (!integer_zerop (coef))
1931 return false;
1933 return true;
1936 /* Returns true if FN is the zero constant function. */
1938 static bool
1939 affine_function_zero_p (affine_fn fn)
1941 return (integer_zerop (affine_function_base (fn))
1942 && affine_function_constant_p (fn));
1945 /* Returns a signed integer type with the largest precision from TA
1946 and TB. */
1948 static tree
1949 signed_type_for_types (tree ta, tree tb)
1951 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1952 return signed_type_for (ta);
1953 else
1954 return signed_type_for (tb);
1957 /* Applies operation OP on affine functions FNA and FNB, and returns the
1958 result. */
1960 static affine_fn
1961 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1963 unsigned i, n, m;
1964 affine_fn ret;
1965 tree coef;
1967 if (fnb.length () > fna.length ())
1969 n = fna.length ();
1970 m = fnb.length ();
1972 else
1974 n = fnb.length ();
1975 m = fna.length ();
1978 ret.create (m);
1979 for (i = 0; i < n; i++)
1981 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1982 TREE_TYPE (fnb[i]));
1983 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1986 for (; fna.iterate (i, &coef); i++)
1987 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1988 coef, integer_zero_node));
1989 for (; fnb.iterate (i, &coef); i++)
1990 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1991 integer_zero_node, coef));
1993 return ret;
1996 /* Returns the sum of affine functions FNA and FNB. */
1998 static affine_fn
1999 affine_fn_plus (affine_fn fna, affine_fn fnb)
2001 return affine_fn_op (PLUS_EXPR, fna, fnb);
2004 /* Returns the difference of affine functions FNA and FNB. */
2006 static affine_fn
2007 affine_fn_minus (affine_fn fna, affine_fn fnb)
2009 return affine_fn_op (MINUS_EXPR, fna, fnb);
2012 /* Frees affine function FN. */
2014 static void
2015 affine_fn_free (affine_fn fn)
2017 fn.release ();
2020 /* Determine for each subscript in the data dependence relation DDR
2021 the distance. */
2023 static void
2024 compute_subscript_distance (struct data_dependence_relation *ddr)
2026 conflict_function *cf_a, *cf_b;
2027 affine_fn fn_a, fn_b, diff;
2029 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2031 unsigned int i;
2033 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2035 struct subscript *subscript;
2037 subscript = DDR_SUBSCRIPT (ddr, i);
2038 cf_a = SUB_CONFLICTS_IN_A (subscript);
2039 cf_b = SUB_CONFLICTS_IN_B (subscript);
2041 fn_a = common_affine_function (cf_a);
2042 fn_b = common_affine_function (cf_b);
2043 if (!fn_a.exists () || !fn_b.exists ())
2045 SUB_DISTANCE (subscript) = chrec_dont_know;
2046 return;
2048 diff = affine_fn_minus (fn_a, fn_b);
2050 if (affine_function_constant_p (diff))
2051 SUB_DISTANCE (subscript) = affine_function_base (diff);
2052 else
2053 SUB_DISTANCE (subscript) = chrec_dont_know;
2055 affine_fn_free (diff);
2060 /* Returns the conflict function for "unknown". */
2062 static conflict_function *
2063 conflict_fn_not_known (void)
2065 conflict_function *fn = XCNEW (conflict_function);
2066 fn->n = NOT_KNOWN;
2068 return fn;
2071 /* Returns the conflict function for "independent". */
2073 static conflict_function *
2074 conflict_fn_no_dependence (void)
2076 conflict_function *fn = XCNEW (conflict_function);
2077 fn->n = NO_DEPENDENCE;
2079 return fn;
2082 /* Returns true if the address of OBJ is invariant in LOOP. */
2084 static bool
2085 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2087 while (handled_component_p (obj))
2089 if (TREE_CODE (obj) == ARRAY_REF)
2091 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2092 need to check the stride and the lower bound of the reference. */
2093 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2094 loop->num)
2095 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2096 loop->num))
2097 return false;
2099 else if (TREE_CODE (obj) == COMPONENT_REF)
2101 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2102 loop->num))
2103 return false;
2105 obj = TREE_OPERAND (obj, 0);
2108 if (!INDIRECT_REF_P (obj)
2109 && TREE_CODE (obj) != MEM_REF)
2110 return true;
2112 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2113 loop->num);
2116 /* Returns false if we can prove that data references A and B do not alias,
2117 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2118 considered. */
2120 bool
2121 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2122 bool loop_nest)
2124 tree addr_a = DR_BASE_OBJECT (a);
2125 tree addr_b = DR_BASE_OBJECT (b);
2127 /* If we are not processing a loop nest but scalar code we
2128 do not need to care about possible cross-iteration dependences
2129 and thus can process the full original reference. Do so,
2130 similar to how loop invariant motion applies extra offset-based
2131 disambiguation. */
2132 if (!loop_nest)
2134 aff_tree off1, off2;
2135 widest_int size1, size2;
2136 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2137 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2138 aff_combination_scale (&off1, -1);
2139 aff_combination_add (&off2, &off1);
2140 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2141 return false;
2144 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2145 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2146 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2147 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2148 return false;
2150 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2151 do not know the size of the base-object. So we cannot do any
2152 offset/overlap based analysis but have to rely on points-to
2153 information only. */
2154 if (TREE_CODE (addr_a) == MEM_REF
2155 && (DR_UNCONSTRAINED_BASE (a)
2156 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2158 /* For true dependences we can apply TBAA. */
2159 if (flag_strict_aliasing
2160 && DR_IS_WRITE (a) && DR_IS_READ (b)
2161 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2162 get_alias_set (DR_REF (b))))
2163 return false;
2164 if (TREE_CODE (addr_b) == MEM_REF)
2165 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2166 TREE_OPERAND (addr_b, 0));
2167 else
2168 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2169 build_fold_addr_expr (addr_b));
2171 else if (TREE_CODE (addr_b) == MEM_REF
2172 && (DR_UNCONSTRAINED_BASE (b)
2173 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2175 /* For true dependences we can apply TBAA. */
2176 if (flag_strict_aliasing
2177 && DR_IS_WRITE (a) && DR_IS_READ (b)
2178 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2179 get_alias_set (DR_REF (b))))
2180 return false;
2181 if (TREE_CODE (addr_a) == MEM_REF)
2182 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2183 TREE_OPERAND (addr_b, 0));
2184 else
2185 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2186 TREE_OPERAND (addr_b, 0));
2189 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2190 that is being subsetted in the loop nest. */
2191 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2192 return refs_output_dependent_p (addr_a, addr_b);
2193 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2194 return refs_anti_dependent_p (addr_a, addr_b);
2195 return refs_may_alias_p (addr_a, addr_b);
2198 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2199 if it is meaningful to compare their associated access functions
2200 when checking for dependencies. */
2202 static bool
2203 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2205 /* Allow pairs of component refs from the following sets:
2207 { REALPART_EXPR, IMAGPART_EXPR }
2208 { COMPONENT_REF }
2209 { ARRAY_REF }. */
2210 tree_code code_a = TREE_CODE (ref_a);
2211 tree_code code_b = TREE_CODE (ref_b);
2212 if (code_a == IMAGPART_EXPR)
2213 code_a = REALPART_EXPR;
2214 if (code_b == IMAGPART_EXPR)
2215 code_b = REALPART_EXPR;
2216 if (code_a != code_b)
2217 return false;
2219 if (TREE_CODE (ref_a) == COMPONENT_REF)
2220 /* ??? We cannot simply use the type of operand #0 of the refs here as
2221 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2222 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2223 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2224 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2226 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2227 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2230 /* Initialize a data dependence relation between data accesses A and
2231 B. NB_LOOPS is the number of loops surrounding the references: the
2232 size of the classic distance/direction vectors. */
2234 struct data_dependence_relation *
2235 initialize_data_dependence_relation (struct data_reference *a,
2236 struct data_reference *b,
2237 vec<loop_p> loop_nest)
2239 struct data_dependence_relation *res;
2240 unsigned int i;
2242 res = XCNEW (struct data_dependence_relation);
2243 DDR_A (res) = a;
2244 DDR_B (res) = b;
2245 DDR_LOOP_NEST (res).create (0);
2246 DDR_SUBSCRIPTS (res).create (0);
2247 DDR_DIR_VECTS (res).create (0);
2248 DDR_DIST_VECTS (res).create (0);
2250 if (a == NULL || b == NULL)
2252 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2253 return res;
2256 /* If the data references do not alias, then they are independent. */
2257 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2259 DDR_ARE_DEPENDENT (res) = chrec_known;
2260 return res;
2263 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2264 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2265 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2267 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2268 return res;
2271 /* For unconstrained bases, the root (highest-indexed) subscript
2272 describes a variation in the base of the original DR_REF rather
2273 than a component access. We have no type that accurately describes
2274 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2275 applying this subscript) so limit the search to the last real
2276 component access.
2278 E.g. for:
2280 void
2281 f (int a[][8], int b[][8])
2283 for (int i = 0; i < 8; ++i)
2284 a[i * 2][0] = b[i][0];
2287 the a and b accesses have a single ARRAY_REF component reference [0]
2288 but have two subscripts. */
2289 if (DR_UNCONSTRAINED_BASE (a))
2290 num_dimensions_a -= 1;
2291 if (DR_UNCONSTRAINED_BASE (b))
2292 num_dimensions_b -= 1;
2294 /* These structures describe sequences of component references in
2295 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2296 specific access function. */
2297 struct {
2298 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2299 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2300 indices. In C notation, these are the indices of the rightmost
2301 component references; e.g. for a sequence .b.c.d, the start
2302 index is for .d. */
2303 unsigned int start_a;
2304 unsigned int start_b;
2306 /* The sequence contains LENGTH consecutive access functions from
2307 each DR. */
2308 unsigned int length;
2310 /* The enclosing objects for the A and B sequences respectively,
2311 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2312 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2313 tree object_a;
2314 tree object_b;
2315 } full_seq = {}, struct_seq = {};
2317 /* Before each iteration of the loop:
2319 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2320 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2321 unsigned int index_a = 0;
2322 unsigned int index_b = 0;
2323 tree ref_a = DR_REF (a);
2324 tree ref_b = DR_REF (b);
2326 /* Now walk the component references from the final DR_REFs back up to
2327 the enclosing base objects. Each component reference corresponds
2328 to one access function in the DR, with access function 0 being for
2329 the final DR_REF and the highest-indexed access function being the
2330 one that is applied to the base of the DR.
2332 Look for a sequence of component references whose access functions
2333 are comparable (see access_fn_components_comparable_p). If more
2334 than one such sequence exists, pick the one nearest the base
2335 (which is the leftmost sequence in C notation). Store this sequence
2336 in FULL_SEQ.
2338 For example, if we have:
2340 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2342 A: a[0][i].s.c.d
2343 B: __real b[0][i].s.e[i].f
2345 (where d is the same type as the real component of f) then the access
2346 functions would be:
2348 0 1 2 3
2349 A: .d .c .s [i]
2351 0 1 2 3 4 5
2352 B: __real .f [i] .e .s [i]
2354 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2355 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2356 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2357 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2358 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2359 index foo[10] arrays, so is again comparable. The sequence is
2360 therefore:
2362 A: [1, 3] (i.e. [i].s.c)
2363 B: [3, 5] (i.e. [i].s.e)
2365 Also look for sequences of component references whose access
2366 functions are comparable and whose enclosing objects have the same
2367 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2368 example, STRUCT_SEQ would be:
2370 A: [1, 2] (i.e. s.c)
2371 B: [3, 4] (i.e. s.e) */
2372 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2374 /* REF_A and REF_B must be one of the component access types
2375 allowed by dr_analyze_indices. */
2376 gcc_checking_assert (access_fn_component_p (ref_a));
2377 gcc_checking_assert (access_fn_component_p (ref_b));
2379 /* Get the immediately-enclosing objects for REF_A and REF_B,
2380 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2381 and DR_ACCESS_FN (B, INDEX_B). */
2382 tree object_a = TREE_OPERAND (ref_a, 0);
2383 tree object_b = TREE_OPERAND (ref_b, 0);
2385 tree type_a = TREE_TYPE (object_a);
2386 tree type_b = TREE_TYPE (object_b);
2387 if (access_fn_components_comparable_p (ref_a, ref_b))
2389 /* This pair of component accesses is comparable for dependence
2390 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2391 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2392 if (full_seq.start_a + full_seq.length != index_a
2393 || full_seq.start_b + full_seq.length != index_b)
2395 /* The accesses don't extend the current sequence,
2396 so start a new one here. */
2397 full_seq.start_a = index_a;
2398 full_seq.start_b = index_b;
2399 full_seq.length = 0;
2402 /* Add this pair of references to the sequence. */
2403 full_seq.length += 1;
2404 full_seq.object_a = object_a;
2405 full_seq.object_b = object_b;
2407 /* If the enclosing objects are structures (and thus have the
2408 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2409 if (TREE_CODE (type_a) == RECORD_TYPE)
2410 struct_seq = full_seq;
2412 /* Move to the next containing reference for both A and B. */
2413 ref_a = object_a;
2414 ref_b = object_b;
2415 index_a += 1;
2416 index_b += 1;
2417 continue;
2420 /* Try to approach equal type sizes. */
2421 if (!COMPLETE_TYPE_P (type_a)
2422 || !COMPLETE_TYPE_P (type_b)
2423 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2424 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2425 break;
2427 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2428 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2429 if (size_a <= size_b)
2431 index_a += 1;
2432 ref_a = object_a;
2434 if (size_b <= size_a)
2436 index_b += 1;
2437 ref_b = object_b;
2441 /* See whether FULL_SEQ ends at the base and whether the two bases
2442 are equal. We do not care about TBAA or alignment info so we can
2443 use OEP_ADDRESS_OF to avoid false negatives. */
2444 tree base_a = DR_BASE_OBJECT (a);
2445 tree base_b = DR_BASE_OBJECT (b);
2446 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2447 && full_seq.start_b + full_seq.length == num_dimensions_b
2448 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2449 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2450 && types_compatible_p (TREE_TYPE (base_a),
2451 TREE_TYPE (base_b))
2452 && (!loop_nest.exists ()
2453 || (object_address_invariant_in_loop_p
2454 (loop_nest[0], base_a))));
2456 /* If the bases are the same, we can include the base variation too.
2457 E.g. the b accesses in:
2459 for (int i = 0; i < n; ++i)
2460 b[i + 4][0] = b[i][0];
2462 have a definite dependence distance of 4, while for:
2464 for (int i = 0; i < n; ++i)
2465 a[i + 4][0] = b[i][0];
2467 the dependence distance depends on the gap between a and b.
2469 If the bases are different then we can only rely on the sequence
2470 rooted at a structure access, since arrays are allowed to overlap
2471 arbitrarily and change shape arbitrarily. E.g. we treat this as
2472 valid code:
2474 int a[256];
2476 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2478 where two lvalues with the same int[4][3] type overlap, and where
2479 both lvalues are distinct from the object's declared type. */
2480 if (same_base_p)
2482 if (DR_UNCONSTRAINED_BASE (a))
2483 full_seq.length += 1;
2485 else
2486 full_seq = struct_seq;
2488 /* Punt if we didn't find a suitable sequence. */
2489 if (full_seq.length == 0)
2491 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2492 return res;
2495 if (!same_base_p)
2497 /* Partial overlap is possible for different bases when strict aliasing
2498 is not in effect. It's also possible if either base involves a union
2499 access; e.g. for:
2501 struct s1 { int a[2]; };
2502 struct s2 { struct s1 b; int c; };
2503 struct s3 { int d; struct s1 e; };
2504 union u { struct s2 f; struct s3 g; } *p, *q;
2506 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2507 "p->g.e" (base "p->g") and might partially overlap the s1 at
2508 "q->g.e" (base "q->g"). */
2509 if (!flag_strict_aliasing
2510 || ref_contains_union_access_p (full_seq.object_a)
2511 || ref_contains_union_access_p (full_seq.object_b))
2513 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2514 return res;
2517 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2518 if (!loop_nest.exists ()
2519 || (object_address_invariant_in_loop_p (loop_nest[0],
2520 full_seq.object_a)
2521 && object_address_invariant_in_loop_p (loop_nest[0],
2522 full_seq.object_b)))
2524 DDR_OBJECT_A (res) = full_seq.object_a;
2525 DDR_OBJECT_B (res) = full_seq.object_b;
2529 DDR_AFFINE_P (res) = true;
2530 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2531 DDR_SUBSCRIPTS (res).create (full_seq.length);
2532 DDR_LOOP_NEST (res) = loop_nest;
2533 DDR_INNER_LOOP (res) = 0;
2534 DDR_SELF_REFERENCE (res) = false;
2536 for (i = 0; i < full_seq.length; ++i)
2538 struct subscript *subscript;
2540 subscript = XNEW (struct subscript);
2541 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2542 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2543 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2544 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2545 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2546 SUB_DISTANCE (subscript) = chrec_dont_know;
2547 DDR_SUBSCRIPTS (res).safe_push (subscript);
2550 return res;
2553 /* Frees memory used by the conflict function F. */
2555 static void
2556 free_conflict_function (conflict_function *f)
2558 unsigned i;
2560 if (CF_NONTRIVIAL_P (f))
2562 for (i = 0; i < f->n; i++)
2563 affine_fn_free (f->fns[i]);
2565 free (f);
2568 /* Frees memory used by SUBSCRIPTS. */
2570 static void
2571 free_subscripts (vec<subscript_p> subscripts)
2573 unsigned i;
2574 subscript_p s;
2576 FOR_EACH_VEC_ELT (subscripts, i, s)
2578 free_conflict_function (s->conflicting_iterations_in_a);
2579 free_conflict_function (s->conflicting_iterations_in_b);
2580 free (s);
2582 subscripts.release ();
2585 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2586 description. */
2588 static inline void
2589 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2590 tree chrec)
2592 DDR_ARE_DEPENDENT (ddr) = chrec;
2593 free_subscripts (DDR_SUBSCRIPTS (ddr));
2594 DDR_SUBSCRIPTS (ddr).create (0);
2597 /* The dependence relation DDR cannot be represented by a distance
2598 vector. */
2600 static inline void
2601 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2603 if (dump_file && (dump_flags & TDF_DETAILS))
2604 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2606 DDR_AFFINE_P (ddr) = false;
2611 /* This section contains the classic Banerjee tests. */
2613 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2614 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2616 static inline bool
2617 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2619 return (evolution_function_is_constant_p (chrec_a)
2620 && evolution_function_is_constant_p (chrec_b));
2623 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2624 variable, i.e., if the SIV (Single Index Variable) test is true. */
2626 static bool
2627 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2629 if ((evolution_function_is_constant_p (chrec_a)
2630 && evolution_function_is_univariate_p (chrec_b))
2631 || (evolution_function_is_constant_p (chrec_b)
2632 && evolution_function_is_univariate_p (chrec_a)))
2633 return true;
2635 if (evolution_function_is_univariate_p (chrec_a)
2636 && evolution_function_is_univariate_p (chrec_b))
2638 switch (TREE_CODE (chrec_a))
2640 case POLYNOMIAL_CHREC:
2641 switch (TREE_CODE (chrec_b))
2643 case POLYNOMIAL_CHREC:
2644 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2645 return false;
2646 /* FALLTHRU */
2648 default:
2649 return true;
2652 default:
2653 return true;
2657 return false;
2660 /* Creates a conflict function with N dimensions. The affine functions
2661 in each dimension follow. */
2663 static conflict_function *
2664 conflict_fn (unsigned n, ...)
2666 unsigned i;
2667 conflict_function *ret = XCNEW (conflict_function);
2668 va_list ap;
2670 gcc_assert (0 < n && n <= MAX_DIM);
2671 va_start (ap, n);
2673 ret->n = n;
2674 for (i = 0; i < n; i++)
2675 ret->fns[i] = va_arg (ap, affine_fn);
2676 va_end (ap);
2678 return ret;
2681 /* Returns constant affine function with value CST. */
2683 static affine_fn
2684 affine_fn_cst (tree cst)
2686 affine_fn fn;
2687 fn.create (1);
2688 fn.quick_push (cst);
2689 return fn;
2692 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2694 static affine_fn
2695 affine_fn_univar (tree cst, unsigned dim, tree coef)
2697 affine_fn fn;
2698 fn.create (dim + 1);
2699 unsigned i;
2701 gcc_assert (dim > 0);
2702 fn.quick_push (cst);
2703 for (i = 1; i < dim; i++)
2704 fn.quick_push (integer_zero_node);
2705 fn.quick_push (coef);
2706 return fn;
2709 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2710 *OVERLAPS_B are initialized to the functions that describe the
2711 relation between the elements accessed twice by CHREC_A and
2712 CHREC_B. For k >= 0, the following property is verified:
2714 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2716 static void
2717 analyze_ziv_subscript (tree chrec_a,
2718 tree chrec_b,
2719 conflict_function **overlaps_a,
2720 conflict_function **overlaps_b,
2721 tree *last_conflicts)
2723 tree type, difference;
2724 dependence_stats.num_ziv++;
2726 if (dump_file && (dump_flags & TDF_DETAILS))
2727 fprintf (dump_file, "(analyze_ziv_subscript \n");
2729 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2730 chrec_a = chrec_convert (type, chrec_a, NULL);
2731 chrec_b = chrec_convert (type, chrec_b, NULL);
2732 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2734 switch (TREE_CODE (difference))
2736 case INTEGER_CST:
2737 if (integer_zerop (difference))
2739 /* The difference is equal to zero: the accessed index
2740 overlaps for each iteration in the loop. */
2741 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2742 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2743 *last_conflicts = chrec_dont_know;
2744 dependence_stats.num_ziv_dependent++;
2746 else
2748 /* The accesses do not overlap. */
2749 *overlaps_a = conflict_fn_no_dependence ();
2750 *overlaps_b = conflict_fn_no_dependence ();
2751 *last_conflicts = integer_zero_node;
2752 dependence_stats.num_ziv_independent++;
2754 break;
2756 default:
2757 /* We're not sure whether the indexes overlap. For the moment,
2758 conservatively answer "don't know". */
2759 if (dump_file && (dump_flags & TDF_DETAILS))
2760 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2762 *overlaps_a = conflict_fn_not_known ();
2763 *overlaps_b = conflict_fn_not_known ();
2764 *last_conflicts = chrec_dont_know;
2765 dependence_stats.num_ziv_unimplemented++;
2766 break;
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2770 fprintf (dump_file, ")\n");
2773 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2774 and only if it fits to the int type. If this is not the case, or the
2775 bound on the number of iterations of LOOP could not be derived, returns
2776 chrec_dont_know. */
2778 static tree
2779 max_stmt_executions_tree (struct loop *loop)
2781 widest_int nit;
2783 if (!max_stmt_executions (loop, &nit))
2784 return chrec_dont_know;
2786 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2787 return chrec_dont_know;
2789 return wide_int_to_tree (unsigned_type_node, nit);
2792 /* Determine whether the CHREC is always positive/negative. If the expression
2793 cannot be statically analyzed, return false, otherwise set the answer into
2794 VALUE. */
2796 static bool
2797 chrec_is_positive (tree chrec, bool *value)
2799 bool value0, value1, value2;
2800 tree end_value, nb_iter;
2802 switch (TREE_CODE (chrec))
2804 case POLYNOMIAL_CHREC:
2805 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2806 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2807 return false;
2809 /* FIXME -- overflows. */
2810 if (value0 == value1)
2812 *value = value0;
2813 return true;
2816 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2817 and the proof consists in showing that the sign never
2818 changes during the execution of the loop, from 0 to
2819 loop->nb_iterations. */
2820 if (!evolution_function_is_affine_p (chrec))
2821 return false;
2823 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2824 if (chrec_contains_undetermined (nb_iter))
2825 return false;
2827 #if 0
2828 /* TODO -- If the test is after the exit, we may decrease the number of
2829 iterations by one. */
2830 if (after_exit)
2831 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2832 #endif
2834 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2836 if (!chrec_is_positive (end_value, &value2))
2837 return false;
2839 *value = value0;
2840 return value0 == value1;
2842 case INTEGER_CST:
2843 switch (tree_int_cst_sgn (chrec))
2845 case -1:
2846 *value = false;
2847 break;
2848 case 1:
2849 *value = true;
2850 break;
2851 default:
2852 return false;
2854 return true;
2856 default:
2857 return false;
2862 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2863 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2864 *OVERLAPS_B are initialized to the functions that describe the
2865 relation between the elements accessed twice by CHREC_A and
2866 CHREC_B. For k >= 0, the following property is verified:
2868 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2870 static void
2871 analyze_siv_subscript_cst_affine (tree chrec_a,
2872 tree chrec_b,
2873 conflict_function **overlaps_a,
2874 conflict_function **overlaps_b,
2875 tree *last_conflicts)
2877 bool value0, value1, value2;
2878 tree type, difference, tmp;
2880 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2881 chrec_a = chrec_convert (type, chrec_a, NULL);
2882 chrec_b = chrec_convert (type, chrec_b, NULL);
2883 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2885 /* Special case overlap in the first iteration. */
2886 if (integer_zerop (difference))
2888 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2889 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2890 *last_conflicts = integer_one_node;
2891 return;
2894 if (!chrec_is_positive (initial_condition (difference), &value0))
2896 if (dump_file && (dump_flags & TDF_DETAILS))
2897 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2899 dependence_stats.num_siv_unimplemented++;
2900 *overlaps_a = conflict_fn_not_known ();
2901 *overlaps_b = conflict_fn_not_known ();
2902 *last_conflicts = chrec_dont_know;
2903 return;
2905 else
2907 if (value0 == false)
2909 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2912 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2914 *overlaps_a = conflict_fn_not_known ();
2915 *overlaps_b = conflict_fn_not_known ();
2916 *last_conflicts = chrec_dont_know;
2917 dependence_stats.num_siv_unimplemented++;
2918 return;
2920 else
2922 if (value1 == true)
2924 /* Example:
2925 chrec_a = 12
2926 chrec_b = {10, +, 1}
2929 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2931 HOST_WIDE_INT numiter;
2932 struct loop *loop = get_chrec_loop (chrec_b);
2934 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2935 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2936 fold_build1 (ABS_EXPR, type, difference),
2937 CHREC_RIGHT (chrec_b));
2938 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2939 *last_conflicts = integer_one_node;
2942 /* Perform weak-zero siv test to see if overlap is
2943 outside the loop bounds. */
2944 numiter = max_stmt_executions_int (loop);
2946 if (numiter >= 0
2947 && compare_tree_int (tmp, numiter) > 0)
2949 free_conflict_function (*overlaps_a);
2950 free_conflict_function (*overlaps_b);
2951 *overlaps_a = conflict_fn_no_dependence ();
2952 *overlaps_b = conflict_fn_no_dependence ();
2953 *last_conflicts = integer_zero_node;
2954 dependence_stats.num_siv_independent++;
2955 return;
2957 dependence_stats.num_siv_dependent++;
2958 return;
2961 /* When the step does not divide the difference, there are
2962 no overlaps. */
2963 else
2965 *overlaps_a = conflict_fn_no_dependence ();
2966 *overlaps_b = conflict_fn_no_dependence ();
2967 *last_conflicts = integer_zero_node;
2968 dependence_stats.num_siv_independent++;
2969 return;
2973 else
2975 /* Example:
2976 chrec_a = 12
2977 chrec_b = {10, +, -1}
2979 In this case, chrec_a will not overlap with chrec_b. */
2980 *overlaps_a = conflict_fn_no_dependence ();
2981 *overlaps_b = conflict_fn_no_dependence ();
2982 *last_conflicts = integer_zero_node;
2983 dependence_stats.num_siv_independent++;
2984 return;
2988 else
2990 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2992 if (dump_file && (dump_flags & TDF_DETAILS))
2993 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2995 *overlaps_a = conflict_fn_not_known ();
2996 *overlaps_b = conflict_fn_not_known ();
2997 *last_conflicts = chrec_dont_know;
2998 dependence_stats.num_siv_unimplemented++;
2999 return;
3001 else
3003 if (value2 == false)
3005 /* Example:
3006 chrec_a = 3
3007 chrec_b = {10, +, -1}
3009 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3011 HOST_WIDE_INT numiter;
3012 struct loop *loop = get_chrec_loop (chrec_b);
3014 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3015 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3016 CHREC_RIGHT (chrec_b));
3017 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3018 *last_conflicts = integer_one_node;
3020 /* Perform weak-zero siv test to see if overlap is
3021 outside the loop bounds. */
3022 numiter = max_stmt_executions_int (loop);
3024 if (numiter >= 0
3025 && compare_tree_int (tmp, numiter) > 0)
3027 free_conflict_function (*overlaps_a);
3028 free_conflict_function (*overlaps_b);
3029 *overlaps_a = conflict_fn_no_dependence ();
3030 *overlaps_b = conflict_fn_no_dependence ();
3031 *last_conflicts = integer_zero_node;
3032 dependence_stats.num_siv_independent++;
3033 return;
3035 dependence_stats.num_siv_dependent++;
3036 return;
3039 /* When the step does not divide the difference, there
3040 are no overlaps. */
3041 else
3043 *overlaps_a = conflict_fn_no_dependence ();
3044 *overlaps_b = conflict_fn_no_dependence ();
3045 *last_conflicts = integer_zero_node;
3046 dependence_stats.num_siv_independent++;
3047 return;
3050 else
3052 /* Example:
3053 chrec_a = 3
3054 chrec_b = {4, +, 1}
3056 In this case, chrec_a will not overlap with chrec_b. */
3057 *overlaps_a = conflict_fn_no_dependence ();
3058 *overlaps_b = conflict_fn_no_dependence ();
3059 *last_conflicts = integer_zero_node;
3060 dependence_stats.num_siv_independent++;
3061 return;
3068 /* Helper recursive function for initializing the matrix A. Returns
3069 the initial value of CHREC. */
3071 static tree
3072 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3074 gcc_assert (chrec);
3076 switch (TREE_CODE (chrec))
3078 case POLYNOMIAL_CHREC:
3079 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3080 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3082 case PLUS_EXPR:
3083 case MULT_EXPR:
3084 case MINUS_EXPR:
3086 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3087 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3089 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3092 CASE_CONVERT:
3094 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3095 return chrec_convert (chrec_type (chrec), op, NULL);
3098 case BIT_NOT_EXPR:
3100 /* Handle ~X as -1 - X. */
3101 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3102 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3103 build_int_cst (TREE_TYPE (chrec), -1), op);
3106 case INTEGER_CST:
3107 return chrec;
3109 default:
3110 gcc_unreachable ();
3111 return NULL_TREE;
3115 #define FLOOR_DIV(x,y) ((x) / (y))
3117 /* Solves the special case of the Diophantine equation:
3118 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3120 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3121 number of iterations that loops X and Y run. The overlaps will be
3122 constructed as evolutions in dimension DIM. */
3124 static void
3125 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3126 HOST_WIDE_INT step_a,
3127 HOST_WIDE_INT step_b,
3128 affine_fn *overlaps_a,
3129 affine_fn *overlaps_b,
3130 tree *last_conflicts, int dim)
3132 if (((step_a > 0 && step_b > 0)
3133 || (step_a < 0 && step_b < 0)))
3135 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3136 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3138 gcd_steps_a_b = gcd (step_a, step_b);
3139 step_overlaps_a = step_b / gcd_steps_a_b;
3140 step_overlaps_b = step_a / gcd_steps_a_b;
3142 if (niter > 0)
3144 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3145 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3146 last_conflict = tau2;
3147 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3149 else
3150 *last_conflicts = chrec_dont_know;
3152 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3153 build_int_cst (NULL_TREE,
3154 step_overlaps_a));
3155 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3156 build_int_cst (NULL_TREE,
3157 step_overlaps_b));
3160 else
3162 *overlaps_a = affine_fn_cst (integer_zero_node);
3163 *overlaps_b = affine_fn_cst (integer_zero_node);
3164 *last_conflicts = integer_zero_node;
3168 /* Solves the special case of a Diophantine equation where CHREC_A is
3169 an affine bivariate function, and CHREC_B is an affine univariate
3170 function. For example,
3172 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3174 has the following overlapping functions:
3176 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3177 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3178 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3180 FORNOW: This is a specialized implementation for a case occurring in
3181 a common benchmark. Implement the general algorithm. */
3183 static void
3184 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3185 conflict_function **overlaps_a,
3186 conflict_function **overlaps_b,
3187 tree *last_conflicts)
3189 bool xz_p, yz_p, xyz_p;
3190 HOST_WIDE_INT step_x, step_y, step_z;
3191 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3192 affine_fn overlaps_a_xz, overlaps_b_xz;
3193 affine_fn overlaps_a_yz, overlaps_b_yz;
3194 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3195 affine_fn ova1, ova2, ovb;
3196 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3198 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3199 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3200 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3202 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3203 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3204 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3206 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3208 if (dump_file && (dump_flags & TDF_DETAILS))
3209 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3211 *overlaps_a = conflict_fn_not_known ();
3212 *overlaps_b = conflict_fn_not_known ();
3213 *last_conflicts = chrec_dont_know;
3214 return;
3217 niter = MIN (niter_x, niter_z);
3218 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3219 &overlaps_a_xz,
3220 &overlaps_b_xz,
3221 &last_conflicts_xz, 1);
3222 niter = MIN (niter_y, niter_z);
3223 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3224 &overlaps_a_yz,
3225 &overlaps_b_yz,
3226 &last_conflicts_yz, 2);
3227 niter = MIN (niter_x, niter_z);
3228 niter = MIN (niter_y, niter);
3229 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3230 &overlaps_a_xyz,
3231 &overlaps_b_xyz,
3232 &last_conflicts_xyz, 3);
3234 xz_p = !integer_zerop (last_conflicts_xz);
3235 yz_p = !integer_zerop (last_conflicts_yz);
3236 xyz_p = !integer_zerop (last_conflicts_xyz);
3238 if (xz_p || yz_p || xyz_p)
3240 ova1 = affine_fn_cst (integer_zero_node);
3241 ova2 = affine_fn_cst (integer_zero_node);
3242 ovb = affine_fn_cst (integer_zero_node);
3243 if (xz_p)
3245 affine_fn t0 = ova1;
3246 affine_fn t2 = ovb;
3248 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3249 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3250 affine_fn_free (t0);
3251 affine_fn_free (t2);
3252 *last_conflicts = last_conflicts_xz;
3254 if (yz_p)
3256 affine_fn t0 = ova2;
3257 affine_fn t2 = ovb;
3259 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3260 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3261 affine_fn_free (t0);
3262 affine_fn_free (t2);
3263 *last_conflicts = last_conflicts_yz;
3265 if (xyz_p)
3267 affine_fn t0 = ova1;
3268 affine_fn t2 = ova2;
3269 affine_fn t4 = ovb;
3271 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3272 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3273 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3274 affine_fn_free (t0);
3275 affine_fn_free (t2);
3276 affine_fn_free (t4);
3277 *last_conflicts = last_conflicts_xyz;
3279 *overlaps_a = conflict_fn (2, ova1, ova2);
3280 *overlaps_b = conflict_fn (1, ovb);
3282 else
3284 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3285 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3286 *last_conflicts = integer_zero_node;
3289 affine_fn_free (overlaps_a_xz);
3290 affine_fn_free (overlaps_b_xz);
3291 affine_fn_free (overlaps_a_yz);
3292 affine_fn_free (overlaps_b_yz);
3293 affine_fn_free (overlaps_a_xyz);
3294 affine_fn_free (overlaps_b_xyz);
3297 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3299 static void
3300 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3301 int size)
3303 memcpy (vec2, vec1, size * sizeof (*vec1));
3306 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3308 static void
3309 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3310 int m, int n)
3312 int i;
3314 for (i = 0; i < m; i++)
3315 lambda_vector_copy (mat1[i], mat2[i], n);
3318 /* Store the N x N identity matrix in MAT. */
3320 static void
3321 lambda_matrix_id (lambda_matrix mat, int size)
3323 int i, j;
3325 for (i = 0; i < size; i++)
3326 for (j = 0; j < size; j++)
3327 mat[i][j] = (i == j) ? 1 : 0;
3330 /* Return the first nonzero element of vector VEC1 between START and N.
3331 We must have START <= N. Returns N if VEC1 is the zero vector. */
3333 static int
3334 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3336 int j = start;
3337 while (j < n && vec1[j] == 0)
3338 j++;
3339 return j;
3342 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3343 R2 = R2 + CONST1 * R1. */
3345 static void
3346 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3348 int i;
3350 if (const1 == 0)
3351 return;
3353 for (i = 0; i < n; i++)
3354 mat[r2][i] += const1 * mat[r1][i];
3357 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3358 and store the result in VEC2. */
3360 static void
3361 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3362 int size, int const1)
3364 int i;
3366 if (const1 == 0)
3367 lambda_vector_clear (vec2, size);
3368 else
3369 for (i = 0; i < size; i++)
3370 vec2[i] = const1 * vec1[i];
3373 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3375 static void
3376 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3377 int size)
3379 lambda_vector_mult_const (vec1, vec2, size, -1);
3382 /* Negate row R1 of matrix MAT which has N columns. */
3384 static void
3385 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3387 lambda_vector_negate (mat[r1], mat[r1], n);
3390 /* Return true if two vectors are equal. */
3392 static bool
3393 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3395 int i;
3396 for (i = 0; i < size; i++)
3397 if (vec1[i] != vec2[i])
3398 return false;
3399 return true;
3402 /* Given an M x N integer matrix A, this function determines an M x
3403 M unimodular matrix U, and an M x N echelon matrix S such that
3404 "U.A = S". This decomposition is also known as "right Hermite".
3406 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3407 Restructuring Compilers" Utpal Banerjee. */
3409 static void
3410 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3411 lambda_matrix S, lambda_matrix U)
3413 int i, j, i0 = 0;
3415 lambda_matrix_copy (A, S, m, n);
3416 lambda_matrix_id (U, m);
3418 for (j = 0; j < n; j++)
3420 if (lambda_vector_first_nz (S[j], m, i0) < m)
3422 ++i0;
3423 for (i = m - 1; i >= i0; i--)
3425 while (S[i][j] != 0)
3427 int sigma, factor, a, b;
3429 a = S[i-1][j];
3430 b = S[i][j];
3431 sigma = (a * b < 0) ? -1: 1;
3432 a = abs (a);
3433 b = abs (b);
3434 factor = sigma * (a / b);
3436 lambda_matrix_row_add (S, n, i, i-1, -factor);
3437 std::swap (S[i], S[i-1]);
3439 lambda_matrix_row_add (U, m, i, i-1, -factor);
3440 std::swap (U[i], U[i-1]);
3447 /* Determines the overlapping elements due to accesses CHREC_A and
3448 CHREC_B, that are affine functions. This function cannot handle
3449 symbolic evolution functions, ie. when initial conditions are
3450 parameters, because it uses lambda matrices of integers. */
3452 static void
3453 analyze_subscript_affine_affine (tree chrec_a,
3454 tree chrec_b,
3455 conflict_function **overlaps_a,
3456 conflict_function **overlaps_b,
3457 tree *last_conflicts)
3459 unsigned nb_vars_a, nb_vars_b, dim;
3460 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3461 lambda_matrix A, U, S;
3462 struct obstack scratch_obstack;
3464 if (eq_evolutions_p (chrec_a, chrec_b))
3466 /* The accessed index overlaps for each iteration in the
3467 loop. */
3468 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3469 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3470 *last_conflicts = chrec_dont_know;
3471 return;
3473 if (dump_file && (dump_flags & TDF_DETAILS))
3474 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3476 /* For determining the initial intersection, we have to solve a
3477 Diophantine equation. This is the most time consuming part.
3479 For answering to the question: "Is there a dependence?" we have
3480 to prove that there exists a solution to the Diophantine
3481 equation, and that the solution is in the iteration domain,
3482 i.e. the solution is positive or zero, and that the solution
3483 happens before the upper bound loop.nb_iterations. Otherwise
3484 there is no dependence. This function outputs a description of
3485 the iterations that hold the intersections. */
3487 nb_vars_a = nb_vars_in_chrec (chrec_a);
3488 nb_vars_b = nb_vars_in_chrec (chrec_b);
3490 gcc_obstack_init (&scratch_obstack);
3492 dim = nb_vars_a + nb_vars_b;
3493 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3494 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3495 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3497 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3498 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3499 gamma = init_b - init_a;
3501 /* Don't do all the hard work of solving the Diophantine equation
3502 when we already know the solution: for example,
3503 | {3, +, 1}_1
3504 | {3, +, 4}_2
3505 | gamma = 3 - 3 = 0.
3506 Then the first overlap occurs during the first iterations:
3507 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3509 if (gamma == 0)
3511 if (nb_vars_a == 1 && nb_vars_b == 1)
3513 HOST_WIDE_INT step_a, step_b;
3514 HOST_WIDE_INT niter, niter_a, niter_b;
3515 affine_fn ova, ovb;
3517 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3518 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3519 niter = MIN (niter_a, niter_b);
3520 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3521 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3523 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3524 &ova, &ovb,
3525 last_conflicts, 1);
3526 *overlaps_a = conflict_fn (1, ova);
3527 *overlaps_b = conflict_fn (1, ovb);
3530 else if (nb_vars_a == 2 && nb_vars_b == 1)
3531 compute_overlap_steps_for_affine_1_2
3532 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3534 else if (nb_vars_a == 1 && nb_vars_b == 2)
3535 compute_overlap_steps_for_affine_1_2
3536 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3538 else
3540 if (dump_file && (dump_flags & TDF_DETAILS))
3541 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3542 *overlaps_a = conflict_fn_not_known ();
3543 *overlaps_b = conflict_fn_not_known ();
3544 *last_conflicts = chrec_dont_know;
3546 goto end_analyze_subs_aa;
3549 /* U.A = S */
3550 lambda_matrix_right_hermite (A, dim, 1, S, U);
3552 if (S[0][0] < 0)
3554 S[0][0] *= -1;
3555 lambda_matrix_row_negate (U, dim, 0);
3557 gcd_alpha_beta = S[0][0];
3559 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3560 but that is a quite strange case. Instead of ICEing, answer
3561 don't know. */
3562 if (gcd_alpha_beta == 0)
3564 *overlaps_a = conflict_fn_not_known ();
3565 *overlaps_b = conflict_fn_not_known ();
3566 *last_conflicts = chrec_dont_know;
3567 goto end_analyze_subs_aa;
3570 /* The classic "gcd-test". */
3571 if (!int_divides_p (gcd_alpha_beta, gamma))
3573 /* The "gcd-test" has determined that there is no integer
3574 solution, i.e. there is no dependence. */
3575 *overlaps_a = conflict_fn_no_dependence ();
3576 *overlaps_b = conflict_fn_no_dependence ();
3577 *last_conflicts = integer_zero_node;
3580 /* Both access functions are univariate. This includes SIV and MIV cases. */
3581 else if (nb_vars_a == 1 && nb_vars_b == 1)
3583 /* Both functions should have the same evolution sign. */
3584 if (((A[0][0] > 0 && -A[1][0] > 0)
3585 || (A[0][0] < 0 && -A[1][0] < 0)))
3587 /* The solutions are given by:
3589 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3590 | [u21 u22] [y0]
3592 For a given integer t. Using the following variables,
3594 | i0 = u11 * gamma / gcd_alpha_beta
3595 | j0 = u12 * gamma / gcd_alpha_beta
3596 | i1 = u21
3597 | j1 = u22
3599 the solutions are:
3601 | x0 = i0 + i1 * t,
3602 | y0 = j0 + j1 * t. */
3603 HOST_WIDE_INT i0, j0, i1, j1;
3605 i0 = U[0][0] * gamma / gcd_alpha_beta;
3606 j0 = U[0][1] * gamma / gcd_alpha_beta;
3607 i1 = U[1][0];
3608 j1 = U[1][1];
3610 if ((i1 == 0 && i0 < 0)
3611 || (j1 == 0 && j0 < 0))
3613 /* There is no solution.
3614 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3615 falls in here, but for the moment we don't look at the
3616 upper bound of the iteration domain. */
3617 *overlaps_a = conflict_fn_no_dependence ();
3618 *overlaps_b = conflict_fn_no_dependence ();
3619 *last_conflicts = integer_zero_node;
3620 goto end_analyze_subs_aa;
3623 if (i1 > 0 && j1 > 0)
3625 HOST_WIDE_INT niter_a
3626 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3627 HOST_WIDE_INT niter_b
3628 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3629 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3631 /* (X0, Y0) is a solution of the Diophantine equation:
3632 "chrec_a (X0) = chrec_b (Y0)". */
3633 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3634 CEIL (-j0, j1));
3635 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3636 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3638 /* (X1, Y1) is the smallest positive solution of the eq
3639 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3640 first conflict occurs. */
3641 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3642 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3643 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3645 if (niter > 0)
3647 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3648 FLOOR_DIV (niter_b - j0, j1));
3649 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3651 /* If the overlap occurs outside of the bounds of the
3652 loop, there is no dependence. */
3653 if (x1 >= niter_a || y1 >= niter_b)
3655 *overlaps_a = conflict_fn_no_dependence ();
3656 *overlaps_b = conflict_fn_no_dependence ();
3657 *last_conflicts = integer_zero_node;
3658 goto end_analyze_subs_aa;
3660 else
3661 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3663 else
3664 *last_conflicts = chrec_dont_know;
3666 *overlaps_a
3667 = conflict_fn (1,
3668 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3670 build_int_cst (NULL_TREE, i1)));
3671 *overlaps_b
3672 = conflict_fn (1,
3673 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3675 build_int_cst (NULL_TREE, j1)));
3677 else
3679 /* FIXME: For the moment, the upper bound of the
3680 iteration domain for i and j is not checked. */
3681 if (dump_file && (dump_flags & TDF_DETAILS))
3682 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3683 *overlaps_a = conflict_fn_not_known ();
3684 *overlaps_b = conflict_fn_not_known ();
3685 *last_conflicts = chrec_dont_know;
3688 else
3690 if (dump_file && (dump_flags & TDF_DETAILS))
3691 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3692 *overlaps_a = conflict_fn_not_known ();
3693 *overlaps_b = conflict_fn_not_known ();
3694 *last_conflicts = chrec_dont_know;
3697 else
3699 if (dump_file && (dump_flags & TDF_DETAILS))
3700 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3701 *overlaps_a = conflict_fn_not_known ();
3702 *overlaps_b = conflict_fn_not_known ();
3703 *last_conflicts = chrec_dont_know;
3706 end_analyze_subs_aa:
3707 obstack_free (&scratch_obstack, NULL);
3708 if (dump_file && (dump_flags & TDF_DETAILS))
3710 fprintf (dump_file, " (overlaps_a = ");
3711 dump_conflict_function (dump_file, *overlaps_a);
3712 fprintf (dump_file, ")\n (overlaps_b = ");
3713 dump_conflict_function (dump_file, *overlaps_b);
3714 fprintf (dump_file, "))\n");
3718 /* Returns true when analyze_subscript_affine_affine can be used for
3719 determining the dependence relation between chrec_a and chrec_b,
3720 that contain symbols. This function modifies chrec_a and chrec_b
3721 such that the analysis result is the same, and such that they don't
3722 contain symbols, and then can safely be passed to the analyzer.
3724 Example: The analysis of the following tuples of evolutions produce
3725 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3726 vs. {0, +, 1}_1
3728 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3729 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3732 static bool
3733 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3735 tree diff, type, left_a, left_b, right_b;
3737 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3738 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3739 /* FIXME: For the moment not handled. Might be refined later. */
3740 return false;
3742 type = chrec_type (*chrec_a);
3743 left_a = CHREC_LEFT (*chrec_a);
3744 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3745 diff = chrec_fold_minus (type, left_a, left_b);
3747 if (!evolution_function_is_constant_p (diff))
3748 return false;
3750 if (dump_file && (dump_flags & TDF_DETAILS))
3751 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3753 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3754 diff, CHREC_RIGHT (*chrec_a));
3755 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3756 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3757 build_int_cst (type, 0),
3758 right_b);
3759 return true;
3762 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3763 *OVERLAPS_B are initialized to the functions that describe the
3764 relation between the elements accessed twice by CHREC_A and
3765 CHREC_B. For k >= 0, the following property is verified:
3767 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3769 static void
3770 analyze_siv_subscript (tree chrec_a,
3771 tree chrec_b,
3772 conflict_function **overlaps_a,
3773 conflict_function **overlaps_b,
3774 tree *last_conflicts,
3775 int loop_nest_num)
3777 dependence_stats.num_siv++;
3779 if (dump_file && (dump_flags & TDF_DETAILS))
3780 fprintf (dump_file, "(analyze_siv_subscript \n");
3782 if (evolution_function_is_constant_p (chrec_a)
3783 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3784 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3785 overlaps_a, overlaps_b, last_conflicts);
3787 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3788 && evolution_function_is_constant_p (chrec_b))
3789 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3790 overlaps_b, overlaps_a, last_conflicts);
3792 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3793 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3795 if (!chrec_contains_symbols (chrec_a)
3796 && !chrec_contains_symbols (chrec_b))
3798 analyze_subscript_affine_affine (chrec_a, chrec_b,
3799 overlaps_a, overlaps_b,
3800 last_conflicts);
3802 if (CF_NOT_KNOWN_P (*overlaps_a)
3803 || CF_NOT_KNOWN_P (*overlaps_b))
3804 dependence_stats.num_siv_unimplemented++;
3805 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3806 || CF_NO_DEPENDENCE_P (*overlaps_b))
3807 dependence_stats.num_siv_independent++;
3808 else
3809 dependence_stats.num_siv_dependent++;
3811 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3812 &chrec_b))
3814 analyze_subscript_affine_affine (chrec_a, chrec_b,
3815 overlaps_a, overlaps_b,
3816 last_conflicts);
3818 if (CF_NOT_KNOWN_P (*overlaps_a)
3819 || CF_NOT_KNOWN_P (*overlaps_b))
3820 dependence_stats.num_siv_unimplemented++;
3821 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3822 || CF_NO_DEPENDENCE_P (*overlaps_b))
3823 dependence_stats.num_siv_independent++;
3824 else
3825 dependence_stats.num_siv_dependent++;
3827 else
3828 goto siv_subscript_dontknow;
3831 else
3833 siv_subscript_dontknow:;
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3835 fprintf (dump_file, " siv test failed: unimplemented");
3836 *overlaps_a = conflict_fn_not_known ();
3837 *overlaps_b = conflict_fn_not_known ();
3838 *last_conflicts = chrec_dont_know;
3839 dependence_stats.num_siv_unimplemented++;
3842 if (dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (dump_file, ")\n");
3846 /* Returns false if we can prove that the greatest common divisor of the steps
3847 of CHREC does not divide CST, false otherwise. */
3849 static bool
3850 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3852 HOST_WIDE_INT cd = 0, val;
3853 tree step;
3855 if (!tree_fits_shwi_p (cst))
3856 return true;
3857 val = tree_to_shwi (cst);
3859 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3861 step = CHREC_RIGHT (chrec);
3862 if (!tree_fits_shwi_p (step))
3863 return true;
3864 cd = gcd (cd, tree_to_shwi (step));
3865 chrec = CHREC_LEFT (chrec);
3868 return val % cd == 0;
3871 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3872 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3873 functions that describe the relation between the elements accessed
3874 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3875 is verified:
3877 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3879 static void
3880 analyze_miv_subscript (tree chrec_a,
3881 tree chrec_b,
3882 conflict_function **overlaps_a,
3883 conflict_function **overlaps_b,
3884 tree *last_conflicts,
3885 struct loop *loop_nest)
3887 tree type, difference;
3889 dependence_stats.num_miv++;
3890 if (dump_file && (dump_flags & TDF_DETAILS))
3891 fprintf (dump_file, "(analyze_miv_subscript \n");
3893 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3894 chrec_a = chrec_convert (type, chrec_a, NULL);
3895 chrec_b = chrec_convert (type, chrec_b, NULL);
3896 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3898 if (eq_evolutions_p (chrec_a, chrec_b))
3900 /* Access functions are the same: all the elements are accessed
3901 in the same order. */
3902 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3903 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3904 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3905 dependence_stats.num_miv_dependent++;
3908 else if (evolution_function_is_constant_p (difference)
3909 /* For the moment, the following is verified:
3910 evolution_function_is_affine_multivariate_p (chrec_a,
3911 loop_nest->num) */
3912 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3914 /* testsuite/.../ssa-chrec-33.c
3915 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3917 The difference is 1, and all the evolution steps are multiples
3918 of 2, consequently there are no overlapping elements. */
3919 *overlaps_a = conflict_fn_no_dependence ();
3920 *overlaps_b = conflict_fn_no_dependence ();
3921 *last_conflicts = integer_zero_node;
3922 dependence_stats.num_miv_independent++;
3925 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3926 && !chrec_contains_symbols (chrec_a)
3927 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3928 && !chrec_contains_symbols (chrec_b))
3930 /* testsuite/.../ssa-chrec-35.c
3931 {0, +, 1}_2 vs. {0, +, 1}_3
3932 the overlapping elements are respectively located at iterations:
3933 {0, +, 1}_x and {0, +, 1}_x,
3934 in other words, we have the equality:
3935 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3937 Other examples:
3938 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3939 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3941 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3942 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3944 analyze_subscript_affine_affine (chrec_a, chrec_b,
3945 overlaps_a, overlaps_b, last_conflicts);
3947 if (CF_NOT_KNOWN_P (*overlaps_a)
3948 || CF_NOT_KNOWN_P (*overlaps_b))
3949 dependence_stats.num_miv_unimplemented++;
3950 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3951 || CF_NO_DEPENDENCE_P (*overlaps_b))
3952 dependence_stats.num_miv_independent++;
3953 else
3954 dependence_stats.num_miv_dependent++;
3957 else
3959 /* When the analysis is too difficult, answer "don't know". */
3960 if (dump_file && (dump_flags & TDF_DETAILS))
3961 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3963 *overlaps_a = conflict_fn_not_known ();
3964 *overlaps_b = conflict_fn_not_known ();
3965 *last_conflicts = chrec_dont_know;
3966 dependence_stats.num_miv_unimplemented++;
3969 if (dump_file && (dump_flags & TDF_DETAILS))
3970 fprintf (dump_file, ")\n");
3973 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3974 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3975 OVERLAP_ITERATIONS_B are initialized with two functions that
3976 describe the iterations that contain conflicting elements.
3978 Remark: For an integer k >= 0, the following equality is true:
3980 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3983 static void
3984 analyze_overlapping_iterations (tree chrec_a,
3985 tree chrec_b,
3986 conflict_function **overlap_iterations_a,
3987 conflict_function **overlap_iterations_b,
3988 tree *last_conflicts, struct loop *loop_nest)
3990 unsigned int lnn = loop_nest->num;
3992 dependence_stats.num_subscript_tests++;
3994 if (dump_file && (dump_flags & TDF_DETAILS))
3996 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3997 fprintf (dump_file, " (chrec_a = ");
3998 print_generic_expr (dump_file, chrec_a);
3999 fprintf (dump_file, ")\n (chrec_b = ");
4000 print_generic_expr (dump_file, chrec_b);
4001 fprintf (dump_file, ")\n");
4004 if (chrec_a == NULL_TREE
4005 || chrec_b == NULL_TREE
4006 || chrec_contains_undetermined (chrec_a)
4007 || chrec_contains_undetermined (chrec_b))
4009 dependence_stats.num_subscript_undetermined++;
4011 *overlap_iterations_a = conflict_fn_not_known ();
4012 *overlap_iterations_b = conflict_fn_not_known ();
4015 /* If they are the same chrec, and are affine, they overlap
4016 on every iteration. */
4017 else if (eq_evolutions_p (chrec_a, chrec_b)
4018 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4019 || operand_equal_p (chrec_a, chrec_b, 0)))
4021 dependence_stats.num_same_subscript_function++;
4022 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4023 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4024 *last_conflicts = chrec_dont_know;
4027 /* If they aren't the same, and aren't affine, we can't do anything
4028 yet. */
4029 else if ((chrec_contains_symbols (chrec_a)
4030 || chrec_contains_symbols (chrec_b))
4031 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4032 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4034 dependence_stats.num_subscript_undetermined++;
4035 *overlap_iterations_a = conflict_fn_not_known ();
4036 *overlap_iterations_b = conflict_fn_not_known ();
4039 else if (ziv_subscript_p (chrec_a, chrec_b))
4040 analyze_ziv_subscript (chrec_a, chrec_b,
4041 overlap_iterations_a, overlap_iterations_b,
4042 last_conflicts);
4044 else if (siv_subscript_p (chrec_a, chrec_b))
4045 analyze_siv_subscript (chrec_a, chrec_b,
4046 overlap_iterations_a, overlap_iterations_b,
4047 last_conflicts, lnn);
4049 else
4050 analyze_miv_subscript (chrec_a, chrec_b,
4051 overlap_iterations_a, overlap_iterations_b,
4052 last_conflicts, loop_nest);
4054 if (dump_file && (dump_flags & TDF_DETAILS))
4056 fprintf (dump_file, " (overlap_iterations_a = ");
4057 dump_conflict_function (dump_file, *overlap_iterations_a);
4058 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4059 dump_conflict_function (dump_file, *overlap_iterations_b);
4060 fprintf (dump_file, "))\n");
4064 /* Helper function for uniquely inserting distance vectors. */
4066 static void
4067 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4069 unsigned i;
4070 lambda_vector v;
4072 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4073 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4074 return;
4076 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4079 /* Helper function for uniquely inserting direction vectors. */
4081 static void
4082 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4084 unsigned i;
4085 lambda_vector v;
4087 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4088 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4089 return;
4091 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4094 /* Add a distance of 1 on all the loops outer than INDEX. If we
4095 haven't yet determined a distance for this outer loop, push a new
4096 distance vector composed of the previous distance, and a distance
4097 of 1 for this outer loop. Example:
4099 | loop_1
4100 | loop_2
4101 | A[10]
4102 | endloop_2
4103 | endloop_1
4105 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4106 save (0, 1), then we have to save (1, 0). */
4108 static void
4109 add_outer_distances (struct data_dependence_relation *ddr,
4110 lambda_vector dist_v, int index)
4112 /* For each outer loop where init_v is not set, the accesses are
4113 in dependence of distance 1 in the loop. */
4114 while (--index >= 0)
4116 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4117 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4118 save_v[index] = 1;
4119 save_dist_v (ddr, save_v);
4123 /* Return false when fail to represent the data dependence as a
4124 distance vector. A_INDEX is the index of the first reference
4125 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4126 second reference. INIT_B is set to true when a component has been
4127 added to the distance vector DIST_V. INDEX_CARRY is then set to
4128 the index in DIST_V that carries the dependence. */
4130 static bool
4131 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4132 unsigned int a_index, unsigned int b_index,
4133 lambda_vector dist_v, bool *init_b,
4134 int *index_carry)
4136 unsigned i;
4137 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4139 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4141 tree access_fn_a, access_fn_b;
4142 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4144 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4146 non_affine_dependence_relation (ddr);
4147 return false;
4150 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4151 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4153 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4154 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4156 HOST_WIDE_INT dist;
4157 int index;
4158 int var_a = CHREC_VARIABLE (access_fn_a);
4159 int var_b = CHREC_VARIABLE (access_fn_b);
4161 if (var_a != var_b
4162 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4164 non_affine_dependence_relation (ddr);
4165 return false;
4168 dist = int_cst_value (SUB_DISTANCE (subscript));
4169 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4170 *index_carry = MIN (index, *index_carry);
4172 /* This is the subscript coupling test. If we have already
4173 recorded a distance for this loop (a distance coming from
4174 another subscript), it should be the same. For example,
4175 in the following code, there is no dependence:
4177 | loop i = 0, N, 1
4178 | T[i+1][i] = ...
4179 | ... = T[i][i]
4180 | endloop
4182 if (init_v[index] != 0 && dist_v[index] != dist)
4184 finalize_ddr_dependent (ddr, chrec_known);
4185 return false;
4188 dist_v[index] = dist;
4189 init_v[index] = 1;
4190 *init_b = true;
4192 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4194 /* This can be for example an affine vs. constant dependence
4195 (T[i] vs. T[3]) that is not an affine dependence and is
4196 not representable as a distance vector. */
4197 non_affine_dependence_relation (ddr);
4198 return false;
4202 return true;
4205 /* Return true when the DDR contains only constant access functions. */
4207 static bool
4208 constant_access_functions (const struct data_dependence_relation *ddr)
4210 unsigned i;
4211 subscript *sub;
4213 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4214 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4215 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4216 return false;
4218 return true;
4221 /* Helper function for the case where DDR_A and DDR_B are the same
4222 multivariate access function with a constant step. For an example
4223 see pr34635-1.c. */
4225 static void
4226 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4228 int x_1, x_2;
4229 tree c_1 = CHREC_LEFT (c_2);
4230 tree c_0 = CHREC_LEFT (c_1);
4231 lambda_vector dist_v;
4232 HOST_WIDE_INT v1, v2, cd;
4234 /* Polynomials with more than 2 variables are not handled yet. When
4235 the evolution steps are parameters, it is not possible to
4236 represent the dependence using classical distance vectors. */
4237 if (TREE_CODE (c_0) != INTEGER_CST
4238 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4239 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4241 DDR_AFFINE_P (ddr) = false;
4242 return;
4245 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4246 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4248 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4249 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4250 v1 = int_cst_value (CHREC_RIGHT (c_1));
4251 v2 = int_cst_value (CHREC_RIGHT (c_2));
4252 cd = gcd (v1, v2);
4253 v1 /= cd;
4254 v2 /= cd;
4256 if (v2 < 0)
4258 v2 = -v2;
4259 v1 = -v1;
4262 dist_v[x_1] = v2;
4263 dist_v[x_2] = -v1;
4264 save_dist_v (ddr, dist_v);
4266 add_outer_distances (ddr, dist_v, x_1);
4269 /* Helper function for the case where DDR_A and DDR_B are the same
4270 access functions. */
4272 static void
4273 add_other_self_distances (struct data_dependence_relation *ddr)
4275 lambda_vector dist_v;
4276 unsigned i;
4277 int index_carry = DDR_NB_LOOPS (ddr);
4278 subscript *sub;
4280 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4282 tree access_fun = SUB_ACCESS_FN (sub, 0);
4284 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4286 if (!evolution_function_is_univariate_p (access_fun))
4288 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4290 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4291 return;
4294 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4296 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4297 add_multivariate_self_dist (ddr, access_fun);
4298 else
4299 /* The evolution step is not constant: it varies in
4300 the outer loop, so this cannot be represented by a
4301 distance vector. For example in pr34635.c the
4302 evolution is {0, +, {0, +, 4}_1}_2. */
4303 DDR_AFFINE_P (ddr) = false;
4305 return;
4308 index_carry = MIN (index_carry,
4309 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4310 DDR_LOOP_NEST (ddr)));
4314 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4315 add_outer_distances (ddr, dist_v, index_carry);
4318 static void
4319 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4321 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4323 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4324 save_dist_v (ddr, dist_v);
4327 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4328 is the case for example when access functions are the same and
4329 equal to a constant, as in:
4331 | loop_1
4332 | A[3] = ...
4333 | ... = A[3]
4334 | endloop_1
4336 in which case the distance vectors are (0) and (1). */
4338 static void
4339 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4341 unsigned i, j;
4343 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4345 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4346 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4347 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4349 for (j = 0; j < ca->n; j++)
4350 if (affine_function_zero_p (ca->fns[j]))
4352 insert_innermost_unit_dist_vector (ddr);
4353 return;
4356 for (j = 0; j < cb->n; j++)
4357 if (affine_function_zero_p (cb->fns[j]))
4359 insert_innermost_unit_dist_vector (ddr);
4360 return;
4365 /* Return true when the DDR contains two data references that have the
4366 same access functions. */
4368 static inline bool
4369 same_access_functions (const struct data_dependence_relation *ddr)
4371 unsigned i;
4372 subscript *sub;
4374 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4375 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4376 SUB_ACCESS_FN (sub, 1)))
4377 return false;
4379 return true;
4382 /* Compute the classic per loop distance vector. DDR is the data
4383 dependence relation to build a vector from. Return false when fail
4384 to represent the data dependence as a distance vector. */
4386 static bool
4387 build_classic_dist_vector (struct data_dependence_relation *ddr,
4388 struct loop *loop_nest)
4390 bool init_b = false;
4391 int index_carry = DDR_NB_LOOPS (ddr);
4392 lambda_vector dist_v;
4394 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4395 return false;
4397 if (same_access_functions (ddr))
4399 /* Save the 0 vector. */
4400 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4401 save_dist_v (ddr, dist_v);
4403 if (constant_access_functions (ddr))
4404 add_distance_for_zero_overlaps (ddr);
4406 if (DDR_NB_LOOPS (ddr) > 1)
4407 add_other_self_distances (ddr);
4409 return true;
4412 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4413 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4414 return false;
4416 /* Save the distance vector if we initialized one. */
4417 if (init_b)
4419 /* Verify a basic constraint: classic distance vectors should
4420 always be lexicographically positive.
4422 Data references are collected in the order of execution of
4423 the program, thus for the following loop
4425 | for (i = 1; i < 100; i++)
4426 | for (j = 1; j < 100; j++)
4428 | t = T[j+1][i-1]; // A
4429 | T[j][i] = t + 2; // B
4432 references are collected following the direction of the wind:
4433 A then B. The data dependence tests are performed also
4434 following this order, such that we're looking at the distance
4435 separating the elements accessed by A from the elements later
4436 accessed by B. But in this example, the distance returned by
4437 test_dep (A, B) is lexicographically negative (-1, 1), that
4438 means that the access A occurs later than B with respect to
4439 the outer loop, ie. we're actually looking upwind. In this
4440 case we solve test_dep (B, A) looking downwind to the
4441 lexicographically positive solution, that returns the
4442 distance vector (1, -1). */
4443 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4445 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4446 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4447 return false;
4448 compute_subscript_distance (ddr);
4449 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4450 &index_carry))
4451 return false;
4452 save_dist_v (ddr, save_v);
4453 DDR_REVERSED_P (ddr) = true;
4455 /* In this case there is a dependence forward for all the
4456 outer loops:
4458 | for (k = 1; k < 100; k++)
4459 | for (i = 1; i < 100; i++)
4460 | for (j = 1; j < 100; j++)
4462 | t = T[j+1][i-1]; // A
4463 | T[j][i] = t + 2; // B
4466 the vectors are:
4467 (0, 1, -1)
4468 (1, 1, -1)
4469 (1, -1, 1)
4471 if (DDR_NB_LOOPS (ddr) > 1)
4473 add_outer_distances (ddr, save_v, index_carry);
4474 add_outer_distances (ddr, dist_v, index_carry);
4477 else
4479 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4480 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4482 if (DDR_NB_LOOPS (ddr) > 1)
4484 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4486 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4487 return false;
4488 compute_subscript_distance (ddr);
4489 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4490 &index_carry))
4491 return false;
4493 save_dist_v (ddr, save_v);
4494 add_outer_distances (ddr, dist_v, index_carry);
4495 add_outer_distances (ddr, opposite_v, index_carry);
4497 else
4498 save_dist_v (ddr, save_v);
4501 else
4503 /* There is a distance of 1 on all the outer loops: Example:
4504 there is a dependence of distance 1 on loop_1 for the array A.
4506 | loop_1
4507 | A[5] = ...
4508 | endloop
4510 add_outer_distances (ddr, dist_v,
4511 lambda_vector_first_nz (dist_v,
4512 DDR_NB_LOOPS (ddr), 0));
4515 if (dump_file && (dump_flags & TDF_DETAILS))
4517 unsigned i;
4519 fprintf (dump_file, "(build_classic_dist_vector\n");
4520 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4522 fprintf (dump_file, " dist_vector = (");
4523 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4524 DDR_NB_LOOPS (ddr));
4525 fprintf (dump_file, " )\n");
4527 fprintf (dump_file, ")\n");
4530 return true;
4533 /* Return the direction for a given distance.
4534 FIXME: Computing dir this way is suboptimal, since dir can catch
4535 cases that dist is unable to represent. */
4537 static inline enum data_dependence_direction
4538 dir_from_dist (int dist)
4540 if (dist > 0)
4541 return dir_positive;
4542 else if (dist < 0)
4543 return dir_negative;
4544 else
4545 return dir_equal;
4548 /* Compute the classic per loop direction vector. DDR is the data
4549 dependence relation to build a vector from. */
4551 static void
4552 build_classic_dir_vector (struct data_dependence_relation *ddr)
4554 unsigned i, j;
4555 lambda_vector dist_v;
4557 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4559 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4561 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4562 dir_v[j] = dir_from_dist (dist_v[j]);
4564 save_dir_v (ddr, dir_v);
4568 /* Helper function. Returns true when there is a dependence between the
4569 data references. A_INDEX is the index of the first reference (0 for
4570 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4572 static bool
4573 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4574 unsigned int a_index, unsigned int b_index,
4575 struct loop *loop_nest)
4577 unsigned int i;
4578 tree last_conflicts;
4579 struct subscript *subscript;
4580 tree res = NULL_TREE;
4582 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4584 conflict_function *overlaps_a, *overlaps_b;
4586 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4587 SUB_ACCESS_FN (subscript, b_index),
4588 &overlaps_a, &overlaps_b,
4589 &last_conflicts, loop_nest);
4591 if (SUB_CONFLICTS_IN_A (subscript))
4592 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4593 if (SUB_CONFLICTS_IN_B (subscript))
4594 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4596 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4597 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4598 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4600 /* If there is any undetermined conflict function we have to
4601 give a conservative answer in case we cannot prove that
4602 no dependence exists when analyzing another subscript. */
4603 if (CF_NOT_KNOWN_P (overlaps_a)
4604 || CF_NOT_KNOWN_P (overlaps_b))
4606 res = chrec_dont_know;
4607 continue;
4610 /* When there is a subscript with no dependence we can stop. */
4611 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4612 || CF_NO_DEPENDENCE_P (overlaps_b))
4614 res = chrec_known;
4615 break;
4619 if (res == NULL_TREE)
4620 return true;
4622 if (res == chrec_known)
4623 dependence_stats.num_dependence_independent++;
4624 else
4625 dependence_stats.num_dependence_undetermined++;
4626 finalize_ddr_dependent (ddr, res);
4627 return false;
4630 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4632 static void
4633 subscript_dependence_tester (struct data_dependence_relation *ddr,
4634 struct loop *loop_nest)
4636 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4637 dependence_stats.num_dependence_dependent++;
4639 compute_subscript_distance (ddr);
4640 if (build_classic_dist_vector (ddr, loop_nest))
4641 build_classic_dir_vector (ddr);
4644 /* Returns true when all the access functions of A are affine or
4645 constant with respect to LOOP_NEST. */
4647 static bool
4648 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4649 const struct loop *loop_nest)
4651 unsigned int i;
4652 vec<tree> fns = DR_ACCESS_FNS (a);
4653 tree t;
4655 FOR_EACH_VEC_ELT (fns, i, t)
4656 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4657 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4658 return false;
4660 return true;
4663 /* This computes the affine dependence relation between A and B with
4664 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4665 independence between two accesses, while CHREC_DONT_KNOW is used
4666 for representing the unknown relation.
4668 Note that it is possible to stop the computation of the dependence
4669 relation the first time we detect a CHREC_KNOWN element for a given
4670 subscript. */
4672 void
4673 compute_affine_dependence (struct data_dependence_relation *ddr,
4674 struct loop *loop_nest)
4676 struct data_reference *dra = DDR_A (ddr);
4677 struct data_reference *drb = DDR_B (ddr);
4679 if (dump_file && (dump_flags & TDF_DETAILS))
4681 fprintf (dump_file, "(compute_affine_dependence\n");
4682 fprintf (dump_file, " stmt_a: ");
4683 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4684 fprintf (dump_file, " stmt_b: ");
4685 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4688 /* Analyze only when the dependence relation is not yet known. */
4689 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4691 dependence_stats.num_dependence_tests++;
4693 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4694 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4695 subscript_dependence_tester (ddr, loop_nest);
4697 /* As a last case, if the dependence cannot be determined, or if
4698 the dependence is considered too difficult to determine, answer
4699 "don't know". */
4700 else
4702 dependence_stats.num_dependence_undetermined++;
4704 if (dump_file && (dump_flags & TDF_DETAILS))
4706 fprintf (dump_file, "Data ref a:\n");
4707 dump_data_reference (dump_file, dra);
4708 fprintf (dump_file, "Data ref b:\n");
4709 dump_data_reference (dump_file, drb);
4710 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4712 finalize_ddr_dependent (ddr, chrec_dont_know);
4716 if (dump_file && (dump_flags & TDF_DETAILS))
4718 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4719 fprintf (dump_file, ") -> no dependence\n");
4720 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4721 fprintf (dump_file, ") -> dependence analysis failed\n");
4722 else
4723 fprintf (dump_file, ")\n");
4727 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4728 the data references in DATAREFS, in the LOOP_NEST. When
4729 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4730 relations. Return true when successful, i.e. data references number
4731 is small enough to be handled. */
4733 bool
4734 compute_all_dependences (vec<data_reference_p> datarefs,
4735 vec<ddr_p> *dependence_relations,
4736 vec<loop_p> loop_nest,
4737 bool compute_self_and_rr)
4739 struct data_dependence_relation *ddr;
4740 struct data_reference *a, *b;
4741 unsigned int i, j;
4743 if ((int) datarefs.length ()
4744 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4746 struct data_dependence_relation *ddr;
4748 /* Insert a single relation into dependence_relations:
4749 chrec_dont_know. */
4750 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4751 dependence_relations->safe_push (ddr);
4752 return false;
4755 FOR_EACH_VEC_ELT (datarefs, i, a)
4756 for (j = i + 1; datarefs.iterate (j, &b); j++)
4757 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4759 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4760 dependence_relations->safe_push (ddr);
4761 if (loop_nest.exists ())
4762 compute_affine_dependence (ddr, loop_nest[0]);
4765 if (compute_self_and_rr)
4766 FOR_EACH_VEC_ELT (datarefs, i, a)
4768 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4769 dependence_relations->safe_push (ddr);
4770 if (loop_nest.exists ())
4771 compute_affine_dependence (ddr, loop_nest[0]);
4774 return true;
4777 /* Describes a location of a memory reference. */
4779 struct data_ref_loc
4781 /* The memory reference. */
4782 tree ref;
4784 /* True if the memory reference is read. */
4785 bool is_read;
4787 /* True if the data reference is conditional within the containing
4788 statement, i.e. if it might not occur even when the statement
4789 is executed and runs to completion. */
4790 bool is_conditional_in_stmt;
4794 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4795 true if STMT clobbers memory, false otherwise. */
4797 static bool
4798 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4800 bool clobbers_memory = false;
4801 data_ref_loc ref;
4802 tree op0, op1;
4803 enum gimple_code stmt_code = gimple_code (stmt);
4805 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4806 As we cannot model data-references to not spelled out
4807 accesses give up if they may occur. */
4808 if (stmt_code == GIMPLE_CALL
4809 && !(gimple_call_flags (stmt) & ECF_CONST))
4811 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4812 if (gimple_call_internal_p (stmt))
4813 switch (gimple_call_internal_fn (stmt))
4815 case IFN_GOMP_SIMD_LANE:
4817 struct loop *loop = gimple_bb (stmt)->loop_father;
4818 tree uid = gimple_call_arg (stmt, 0);
4819 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4820 if (loop == NULL
4821 || loop->simduid != SSA_NAME_VAR (uid))
4822 clobbers_memory = true;
4823 break;
4825 case IFN_MASK_LOAD:
4826 case IFN_MASK_STORE:
4827 break;
4828 default:
4829 clobbers_memory = true;
4830 break;
4832 else
4833 clobbers_memory = true;
4835 else if (stmt_code == GIMPLE_ASM
4836 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4837 || gimple_vuse (stmt)))
4838 clobbers_memory = true;
4840 if (!gimple_vuse (stmt))
4841 return clobbers_memory;
4843 if (stmt_code == GIMPLE_ASSIGN)
4845 tree base;
4846 op0 = gimple_assign_lhs (stmt);
4847 op1 = gimple_assign_rhs1 (stmt);
4849 if (DECL_P (op1)
4850 || (REFERENCE_CLASS_P (op1)
4851 && (base = get_base_address (op1))
4852 && TREE_CODE (base) != SSA_NAME
4853 && !is_gimple_min_invariant (base)))
4855 ref.ref = op1;
4856 ref.is_read = true;
4857 ref.is_conditional_in_stmt = false;
4858 references->safe_push (ref);
4861 else if (stmt_code == GIMPLE_CALL)
4863 unsigned i, n;
4864 tree ptr, type;
4865 unsigned int align;
4867 ref.is_read = false;
4868 if (gimple_call_internal_p (stmt))
4869 switch (gimple_call_internal_fn (stmt))
4871 case IFN_MASK_LOAD:
4872 if (gimple_call_lhs (stmt) == NULL_TREE)
4873 break;
4874 ref.is_read = true;
4875 /* FALLTHRU */
4876 case IFN_MASK_STORE:
4877 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4878 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4879 if (ref.is_read)
4880 type = TREE_TYPE (gimple_call_lhs (stmt));
4881 else
4882 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4883 if (TYPE_ALIGN (type) != align)
4884 type = build_aligned_type (type, align);
4885 ref.is_conditional_in_stmt = true;
4886 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4887 ptr);
4888 references->safe_push (ref);
4889 return false;
4890 default:
4891 break;
4894 op0 = gimple_call_lhs (stmt);
4895 n = gimple_call_num_args (stmt);
4896 for (i = 0; i < n; i++)
4898 op1 = gimple_call_arg (stmt, i);
4900 if (DECL_P (op1)
4901 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4903 ref.ref = op1;
4904 ref.is_read = true;
4905 ref.is_conditional_in_stmt = false;
4906 references->safe_push (ref);
4910 else
4911 return clobbers_memory;
4913 if (op0
4914 && (DECL_P (op0)
4915 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4917 ref.ref = op0;
4918 ref.is_read = false;
4919 ref.is_conditional_in_stmt = false;
4920 references->safe_push (ref);
4922 return clobbers_memory;
4926 /* Returns true if the loop-nest has any data reference. */
4928 bool
4929 loop_nest_has_data_refs (loop_p loop)
4931 basic_block *bbs = get_loop_body (loop);
4932 auto_vec<data_ref_loc, 3> references;
4934 for (unsigned i = 0; i < loop->num_nodes; i++)
4936 basic_block bb = bbs[i];
4937 gimple_stmt_iterator bsi;
4939 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4941 gimple *stmt = gsi_stmt (bsi);
4942 get_references_in_stmt (stmt, &references);
4943 if (references.length ())
4945 free (bbs);
4946 return true;
4950 free (bbs);
4951 return false;
4954 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4955 reference, returns false, otherwise returns true. NEST is the outermost
4956 loop of the loop nest in which the references should be analyzed. */
4958 bool
4959 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4960 vec<data_reference_p> *datarefs)
4962 unsigned i;
4963 auto_vec<data_ref_loc, 2> references;
4964 data_ref_loc *ref;
4965 bool ret = true;
4966 data_reference_p dr;
4968 if (get_references_in_stmt (stmt, &references))
4969 return false;
4971 FOR_EACH_VEC_ELT (references, i, ref)
4973 dr = create_data_ref (nest, loop_containing_stmt (stmt), ref->ref,
4974 stmt, ref->is_read, ref->is_conditional_in_stmt);
4975 gcc_assert (dr != NULL);
4976 datarefs->safe_push (dr);
4979 return ret;
4982 /* Stores the data references in STMT to DATAREFS. If there is an
4983 unanalyzable reference, returns false, otherwise returns true.
4984 NEST is the outermost loop of the loop nest in which the references
4985 should be instantiated, LOOP is the loop in which the references
4986 should be analyzed. */
4988 bool
4989 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
4990 vec<data_reference_p> *datarefs)
4992 unsigned i;
4993 auto_vec<data_ref_loc, 2> references;
4994 data_ref_loc *ref;
4995 bool ret = true;
4996 data_reference_p dr;
4998 if (get_references_in_stmt (stmt, &references))
4999 return false;
5001 FOR_EACH_VEC_ELT (references, i, ref)
5003 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5004 ref->is_conditional_in_stmt);
5005 gcc_assert (dr != NULL);
5006 datarefs->safe_push (dr);
5009 return ret;
5012 /* Search the data references in LOOP, and record the information into
5013 DATAREFS. Returns chrec_dont_know when failing to analyze a
5014 difficult case, returns NULL_TREE otherwise. */
5016 tree
5017 find_data_references_in_bb (struct loop *loop, basic_block bb,
5018 vec<data_reference_p> *datarefs)
5020 gimple_stmt_iterator bsi;
5022 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5024 gimple *stmt = gsi_stmt (bsi);
5026 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5028 struct data_reference *res;
5029 res = XCNEW (struct data_reference);
5030 datarefs->safe_push (res);
5032 return chrec_dont_know;
5036 return NULL_TREE;
5039 /* Search the data references in LOOP, and record the information into
5040 DATAREFS. Returns chrec_dont_know when failing to analyze a
5041 difficult case, returns NULL_TREE otherwise.
5043 TODO: This function should be made smarter so that it can handle address
5044 arithmetic as if they were array accesses, etc. */
5046 tree
5047 find_data_references_in_loop (struct loop *loop,
5048 vec<data_reference_p> *datarefs)
5050 basic_block bb, *bbs;
5051 unsigned int i;
5053 bbs = get_loop_body_in_dom_order (loop);
5055 for (i = 0; i < loop->num_nodes; i++)
5057 bb = bbs[i];
5059 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5061 free (bbs);
5062 return chrec_dont_know;
5065 free (bbs);
5067 return NULL_TREE;
5070 /* Return the alignment in bytes that DRB is guaranteed to have at all
5071 times. */
5073 unsigned int
5074 dr_alignment (innermost_loop_behavior *drb)
5076 /* Get the alignment of BASE_ADDRESS + INIT. */
5077 unsigned int alignment = drb->base_alignment;
5078 unsigned int misalignment = (drb->base_misalignment
5079 + TREE_INT_CST_LOW (drb->init));
5080 if (misalignment != 0)
5081 alignment = MIN (alignment, misalignment & -misalignment);
5083 /* Cap it to the alignment of OFFSET. */
5084 if (!integer_zerop (drb->offset))
5085 alignment = MIN (alignment, drb->offset_alignment);
5087 /* Cap it to the alignment of STEP. */
5088 if (!integer_zerop (drb->step))
5089 alignment = MIN (alignment, drb->step_alignment);
5091 return alignment;
5094 /* Recursive helper function. */
5096 static bool
5097 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5099 /* Inner loops of the nest should not contain siblings. Example:
5100 when there are two consecutive loops,
5102 | loop_0
5103 | loop_1
5104 | A[{0, +, 1}_1]
5105 | endloop_1
5106 | loop_2
5107 | A[{0, +, 1}_2]
5108 | endloop_2
5109 | endloop_0
5111 the dependence relation cannot be captured by the distance
5112 abstraction. */
5113 if (loop->next)
5114 return false;
5116 loop_nest->safe_push (loop);
5117 if (loop->inner)
5118 return find_loop_nest_1 (loop->inner, loop_nest);
5119 return true;
5122 /* Return false when the LOOP is not well nested. Otherwise return
5123 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5124 contain the loops from the outermost to the innermost, as they will
5125 appear in the classic distance vector. */
5127 bool
5128 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5130 loop_nest->safe_push (loop);
5131 if (loop->inner)
5132 return find_loop_nest_1 (loop->inner, loop_nest);
5133 return true;
5136 /* Returns true when the data dependences have been computed, false otherwise.
5137 Given a loop nest LOOP, the following vectors are returned:
5138 DATAREFS is initialized to all the array elements contained in this loop,
5139 DEPENDENCE_RELATIONS contains the relations between the data references.
5140 Compute read-read and self relations if
5141 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5143 bool
5144 compute_data_dependences_for_loop (struct loop *loop,
5145 bool compute_self_and_read_read_dependences,
5146 vec<loop_p> *loop_nest,
5147 vec<data_reference_p> *datarefs,
5148 vec<ddr_p> *dependence_relations)
5150 bool res = true;
5152 memset (&dependence_stats, 0, sizeof (dependence_stats));
5154 /* If the loop nest is not well formed, or one of the data references
5155 is not computable, give up without spending time to compute other
5156 dependences. */
5157 if (!loop
5158 || !find_loop_nest (loop, loop_nest)
5159 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5160 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5161 compute_self_and_read_read_dependences))
5162 res = false;
5164 if (dump_file && (dump_flags & TDF_STATS))
5166 fprintf (dump_file, "Dependence tester statistics:\n");
5168 fprintf (dump_file, "Number of dependence tests: %d\n",
5169 dependence_stats.num_dependence_tests);
5170 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5171 dependence_stats.num_dependence_dependent);
5172 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5173 dependence_stats.num_dependence_independent);
5174 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5175 dependence_stats.num_dependence_undetermined);
5177 fprintf (dump_file, "Number of subscript tests: %d\n",
5178 dependence_stats.num_subscript_tests);
5179 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5180 dependence_stats.num_subscript_undetermined);
5181 fprintf (dump_file, "Number of same subscript function: %d\n",
5182 dependence_stats.num_same_subscript_function);
5184 fprintf (dump_file, "Number of ziv tests: %d\n",
5185 dependence_stats.num_ziv);
5186 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5187 dependence_stats.num_ziv_dependent);
5188 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5189 dependence_stats.num_ziv_independent);
5190 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5191 dependence_stats.num_ziv_unimplemented);
5193 fprintf (dump_file, "Number of siv tests: %d\n",
5194 dependence_stats.num_siv);
5195 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5196 dependence_stats.num_siv_dependent);
5197 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5198 dependence_stats.num_siv_independent);
5199 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5200 dependence_stats.num_siv_unimplemented);
5202 fprintf (dump_file, "Number of miv tests: %d\n",
5203 dependence_stats.num_miv);
5204 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5205 dependence_stats.num_miv_dependent);
5206 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5207 dependence_stats.num_miv_independent);
5208 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5209 dependence_stats.num_miv_unimplemented);
5212 return res;
5215 /* Free the memory used by a data dependence relation DDR. */
5217 void
5218 free_dependence_relation (struct data_dependence_relation *ddr)
5220 if (ddr == NULL)
5221 return;
5223 if (DDR_SUBSCRIPTS (ddr).exists ())
5224 free_subscripts (DDR_SUBSCRIPTS (ddr));
5225 DDR_DIST_VECTS (ddr).release ();
5226 DDR_DIR_VECTS (ddr).release ();
5228 free (ddr);
5231 /* Free the memory used by the data dependence relations from
5232 DEPENDENCE_RELATIONS. */
5234 void
5235 free_dependence_relations (vec<ddr_p> dependence_relations)
5237 unsigned int i;
5238 struct data_dependence_relation *ddr;
5240 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5241 if (ddr)
5242 free_dependence_relation (ddr);
5244 dependence_relations.release ();
5247 /* Free the memory used by the data references from DATAREFS. */
5249 void
5250 free_data_refs (vec<data_reference_p> datarefs)
5252 unsigned int i;
5253 struct data_reference *dr;
5255 FOR_EACH_VEC_ELT (datarefs, i, dr)
5256 free_data_ref (dr);
5257 datarefs.release ();