poly_int: decode_addr_const
[official-gcc.git] / gcc / tree-data-ref.c
blob86a587d04c0559eced40438c1649fa52a7bcd20a
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 /* Returns true iff A divides B. */
141 static inline bool
142 int_divides_p (int a, int b)
144 return ((b % a) == 0);
147 /* Return true if reference REF contains a union access. */
149 static bool
150 ref_contains_union_access_p (tree ref)
152 while (handled_component_p (ref))
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
159 return false;
164 /* Dump into FILE all the data references from DATAREFS. */
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 unsigned int i;
170 struct data_reference *dr;
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
176 /* Unified dump into FILE all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
181 dump_data_references (stderr, ref);
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
194 /* Dump into STDERR all the data references from DATAREFS. */
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
199 dump_data_references (stderr, datarefs);
202 /* Print to STDERR the data_reference DR. */
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
207 dump_data_reference (stderr, dr);
210 /* Dump function for a DATA_REFERENCE structure. */
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
216 unsigned int i;
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 fprintf (outf, "#)\n");
235 /* Unified dump function for a DATA_REFERENCE structure. */
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
240 dump_data_reference (stderr, &ref);
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
253 /* Dumps the affine function described by FN to the file OUTF. */
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
258 unsigned i;
259 tree coef;
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
270 /* Dumps the conflict function CF to the file OUTF. */
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
275 unsigned i;
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
283 for (i = 0; i < cf->n; i++)
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
294 /* Dump function for a SUBSCRIPT structure. */
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
326 /* Print the classic direction vector DIRV to OUTF. */
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
333 int eq;
335 for (eq = 0; eq < length; eq++)
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
340 switch (dir)
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
368 fprintf (outf, "\n");
371 /* Print a vector of direction vectors. */
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
377 unsigned j;
378 lambda_vector v;
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 unsigned j;
403 lambda_vector v;
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
415 struct data_reference *dra, *drb;
417 fprintf (outf, "(Data Dep: \n");
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 if (ddr)
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
434 fprintf (outf, " (don't know)\n)\n");
435 return;
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 unsigned int i;
449 struct loop *loopi;
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
597 *var = NULL_TREE;
598 *off = NULL_TREE;
600 switch (code)
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
627 case ADDR_EXPR:
629 tree base, poffset;
630 poly_int64 pbitsize, pbitpos, pbytepos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
639 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbytepos);
644 if (poffset)
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
655 var0 = fold_convert (type, base);
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
674 *var = var0;
675 *off = off0;
676 return true;
679 case SSA_NAME:
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
696 CASE_CONVERT:
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
712 return false;
715 default:
716 return false;
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
726 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
727 enum tree_code code;
729 *var = exp;
730 *off = ssize_int (0);
731 STRIP_NOPS (exp);
733 if (tree_is_chrec (exp)
734 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
735 return;
737 otype = TREE_TYPE (exp);
738 code = TREE_CODE (exp);
739 extract_ops_from_tree (exp, &code, &op0, &op1);
740 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
742 *var = fold_convert (type, e);
743 *off = o;
747 /* Returns the address ADDR of an object in a canonical shape (without nop
748 casts, and with type of pointer to the object). */
750 static tree
751 canonicalize_base_object_address (tree addr)
753 tree orig = addr;
755 STRIP_NOPS (addr);
757 /* The base address may be obtained by casting from integer, in that case
758 keep the cast. */
759 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
760 return orig;
762 if (TREE_CODE (addr) != ADDR_EXPR)
763 return addr;
765 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
768 /* Analyze the behavior of memory reference REF. There are two modes:
770 - BB analysis. In this case we simply split the address into base,
771 init and offset components, without reference to any containing loop.
772 The resulting base and offset are general expressions and they can
773 vary arbitrarily from one iteration of the containing loop to the next.
774 The step is always zero.
776 - loop analysis. In this case we analyze the reference both wrt LOOP
777 and on the basis that the reference occurs (is "used") in LOOP;
778 see the comment above analyze_scalar_evolution_in_loop for more
779 information about this distinction. The base, init, offset and
780 step fields are all invariant in LOOP.
782 Perform BB analysis if LOOP is null, or if LOOP is the function's
783 dummy outermost loop. In other cases perform loop analysis.
785 Return true if the analysis succeeded and store the results in DRB if so.
786 BB analysis can only fail for bitfield or reversed-storage accesses. */
788 bool
789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
790 struct loop *loop)
792 poly_int64 pbitsize, pbitpos;
793 tree base, poffset;
794 machine_mode pmode;
795 int punsignedp, preversep, pvolatilep;
796 affine_iv base_iv, offset_iv;
797 tree init, dinit, step;
798 bool in_loop = (loop && loop->num);
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "analyze_innermost: ");
803 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
804 &punsignedp, &preversep, &pvolatilep);
805 gcc_assert (base != NULL_TREE);
807 poly_int64 pbytepos;
808 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
810 if (dump_file && (dump_flags & TDF_DETAILS))
811 fprintf (dump_file, "failed: bit offset alignment.\n");
812 return false;
815 if (preversep)
817 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "failed: reverse storage order.\n");
819 return false;
822 /* Calculate the alignment and misalignment for the inner reference. */
823 unsigned int HOST_WIDE_INT base_misalignment;
824 unsigned int base_alignment;
825 get_object_alignment_1 (base, &base_alignment, &base_misalignment);
827 /* There are no bitfield references remaining in BASE, so the values
828 we got back must be whole bytes. */
829 gcc_assert (base_alignment % BITS_PER_UNIT == 0
830 && base_misalignment % BITS_PER_UNIT == 0);
831 base_alignment /= BITS_PER_UNIT;
832 base_misalignment /= BITS_PER_UNIT;
834 if (TREE_CODE (base) == MEM_REF)
836 if (!integer_zerop (TREE_OPERAND (base, 1)))
838 /* Subtract MOFF from the base and add it to POFFSET instead.
839 Adjust the misalignment to reflect the amount we subtracted. */
840 offset_int moff = mem_ref_offset (base);
841 base_misalignment -= moff.to_short_addr ();
842 tree mofft = wide_int_to_tree (sizetype, moff);
843 if (!poffset)
844 poffset = mofft;
845 else
846 poffset = size_binop (PLUS_EXPR, poffset, mofft);
848 base = TREE_OPERAND (base, 0);
850 else
851 base = build_fold_addr_expr (base);
853 if (in_loop)
855 if (!simple_iv (loop, loop, base, &base_iv, true))
857 if (dump_file && (dump_flags & TDF_DETAILS))
858 fprintf (dump_file, "failed: evolution of base is not affine.\n");
859 return false;
862 else
864 base_iv.base = base;
865 base_iv.step = ssize_int (0);
866 base_iv.no_overflow = true;
869 if (!poffset)
871 offset_iv.base = ssize_int (0);
872 offset_iv.step = ssize_int (0);
874 else
876 if (!in_loop)
878 offset_iv.base = poffset;
879 offset_iv.step = ssize_int (0);
881 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
885 return false;
889 init = ssize_int (pbytepos);
891 /* Subtract any constant component from the base and add it to INIT instead.
892 Adjust the misalignment to reflect the amount we subtracted. */
893 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
894 init = size_binop (PLUS_EXPR, init, dinit);
895 base_misalignment -= TREE_INT_CST_LOW (dinit);
897 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
898 init = size_binop (PLUS_EXPR, init, dinit);
900 step = size_binop (PLUS_EXPR,
901 fold_convert (ssizetype, base_iv.step),
902 fold_convert (ssizetype, offset_iv.step));
904 base = canonicalize_base_object_address (base_iv.base);
906 /* See if get_pointer_alignment can guarantee a higher alignment than
907 the one we calculated above. */
908 unsigned int HOST_WIDE_INT alt_misalignment;
909 unsigned int alt_alignment;
910 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
912 /* As above, these values must be whole bytes. */
913 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
914 && alt_misalignment % BITS_PER_UNIT == 0);
915 alt_alignment /= BITS_PER_UNIT;
916 alt_misalignment /= BITS_PER_UNIT;
918 if (base_alignment < alt_alignment)
920 base_alignment = alt_alignment;
921 base_misalignment = alt_misalignment;
924 drb->base_address = base;
925 drb->offset = fold_convert (ssizetype, offset_iv.base);
926 drb->init = init;
927 drb->step = step;
928 drb->base_alignment = base_alignment;
929 drb->base_misalignment = base_misalignment & (base_alignment - 1);
930 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
931 drb->step_alignment = highest_pow2_factor (step);
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "success.\n");
936 return true;
939 /* Return true if OP is a valid component reference for a DR access
940 function. This accepts a subset of what handled_component_p accepts. */
942 static bool
943 access_fn_component_p (tree op)
945 switch (TREE_CODE (op))
947 case REALPART_EXPR:
948 case IMAGPART_EXPR:
949 case ARRAY_REF:
950 return true;
952 case COMPONENT_REF:
953 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
955 default:
956 return false;
960 /* Determines the base object and the list of indices of memory reference
961 DR, analyzed in LOOP and instantiated before NEST. */
963 static void
964 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
966 vec<tree> access_fns = vNULL;
967 tree ref, op;
968 tree base, off, access_fn;
970 /* If analyzing a basic-block there are no indices to analyze
971 and thus no access functions. */
972 if (!nest)
974 DR_BASE_OBJECT (dr) = DR_REF (dr);
975 DR_ACCESS_FNS (dr).create (0);
976 return;
979 ref = DR_REF (dr);
981 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
982 into a two element array with a constant index. The base is
983 then just the immediate underlying object. */
984 if (TREE_CODE (ref) == REALPART_EXPR)
986 ref = TREE_OPERAND (ref, 0);
987 access_fns.safe_push (integer_zero_node);
989 else if (TREE_CODE (ref) == IMAGPART_EXPR)
991 ref = TREE_OPERAND (ref, 0);
992 access_fns.safe_push (integer_one_node);
995 /* Analyze access functions of dimensions we know to be independent.
996 The list of component references handled here should be kept in
997 sync with access_fn_component_p. */
998 while (handled_component_p (ref))
1000 if (TREE_CODE (ref) == ARRAY_REF)
1002 op = TREE_OPERAND (ref, 1);
1003 access_fn = analyze_scalar_evolution (loop, op);
1004 access_fn = instantiate_scev (nest, loop, access_fn);
1005 access_fns.safe_push (access_fn);
1007 else if (TREE_CODE (ref) == COMPONENT_REF
1008 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1010 /* For COMPONENT_REFs of records (but not unions!) use the
1011 FIELD_DECL offset as constant access function so we can
1012 disambiguate a[i].f1 and a[i].f2. */
1013 tree off = component_ref_field_offset (ref);
1014 off = size_binop (PLUS_EXPR,
1015 size_binop (MULT_EXPR,
1016 fold_convert (bitsizetype, off),
1017 bitsize_int (BITS_PER_UNIT)),
1018 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1019 access_fns.safe_push (off);
1021 else
1022 /* If we have an unhandled component we could not translate
1023 to an access function stop analyzing. We have determined
1024 our base object in this case. */
1025 break;
1027 ref = TREE_OPERAND (ref, 0);
1030 /* If the address operand of a MEM_REF base has an evolution in the
1031 analyzed nest, add it as an additional independent access-function. */
1032 if (TREE_CODE (ref) == MEM_REF)
1034 op = TREE_OPERAND (ref, 0);
1035 access_fn = analyze_scalar_evolution (loop, op);
1036 access_fn = instantiate_scev (nest, loop, access_fn);
1037 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1039 tree orig_type;
1040 tree memoff = TREE_OPERAND (ref, 1);
1041 base = initial_condition (access_fn);
1042 orig_type = TREE_TYPE (base);
1043 STRIP_USELESS_TYPE_CONVERSION (base);
1044 split_constant_offset (base, &base, &off);
1045 STRIP_USELESS_TYPE_CONVERSION (base);
1046 /* Fold the MEM_REF offset into the evolutions initial
1047 value to make more bases comparable. */
1048 if (!integer_zerop (memoff))
1050 off = size_binop (PLUS_EXPR, off,
1051 fold_convert (ssizetype, memoff));
1052 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1054 /* Adjust the offset so it is a multiple of the access type
1055 size and thus we separate bases that can possibly be used
1056 to produce partial overlaps (which the access_fn machinery
1057 cannot handle). */
1058 wide_int rem;
1059 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1060 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1061 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1062 rem = wi::mod_trunc
1063 (wi::to_wide (off),
1064 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1065 SIGNED);
1066 else
1067 /* If we can't compute the remainder simply force the initial
1068 condition to zero. */
1069 rem = wi::to_wide (off);
1070 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1071 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1072 /* And finally replace the initial condition. */
1073 access_fn = chrec_replace_initial_condition
1074 (access_fn, fold_convert (orig_type, off));
1075 /* ??? This is still not a suitable base object for
1076 dr_may_alias_p - the base object needs to be an
1077 access that covers the object as whole. With
1078 an evolution in the pointer this cannot be
1079 guaranteed.
1080 As a band-aid, mark the access so we can special-case
1081 it in dr_may_alias_p. */
1082 tree old = ref;
1083 ref = fold_build2_loc (EXPR_LOCATION (ref),
1084 MEM_REF, TREE_TYPE (ref),
1085 base, memoff);
1086 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1087 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1088 DR_UNCONSTRAINED_BASE (dr) = true;
1089 access_fns.safe_push (access_fn);
1092 else if (DECL_P (ref))
1094 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1095 ref = build2 (MEM_REF, TREE_TYPE (ref),
1096 build_fold_addr_expr (ref),
1097 build_int_cst (reference_alias_ptr_type (ref), 0));
1100 DR_BASE_OBJECT (dr) = ref;
1101 DR_ACCESS_FNS (dr) = access_fns;
1104 /* Extracts the alias analysis information from the memory reference DR. */
1106 static void
1107 dr_analyze_alias (struct data_reference *dr)
1109 tree ref = DR_REF (dr);
1110 tree base = get_base_address (ref), addr;
1112 if (INDIRECT_REF_P (base)
1113 || TREE_CODE (base) == MEM_REF)
1115 addr = TREE_OPERAND (base, 0);
1116 if (TREE_CODE (addr) == SSA_NAME)
1117 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1121 /* Frees data reference DR. */
1123 void
1124 free_data_ref (data_reference_p dr)
1126 DR_ACCESS_FNS (dr).release ();
1127 free (dr);
1130 /* Analyze memory reference MEMREF, which is accessed in STMT.
1131 The reference is a read if IS_READ is true, otherwise it is a write.
1132 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1133 within STMT, i.e. that it might not occur even if STMT is executed
1134 and runs to completion.
1136 Return the data_reference description of MEMREF. NEST is the outermost
1137 loop in which the reference should be instantiated, LOOP is the loop
1138 in which the data reference should be analyzed. */
1140 struct data_reference *
1141 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1142 bool is_read, bool is_conditional_in_stmt)
1144 struct data_reference *dr;
1146 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, "Creating dr for ");
1149 print_generic_expr (dump_file, memref, TDF_SLIM);
1150 fprintf (dump_file, "\n");
1153 dr = XCNEW (struct data_reference);
1154 DR_STMT (dr) = stmt;
1155 DR_REF (dr) = memref;
1156 DR_IS_READ (dr) = is_read;
1157 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1159 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1160 nest != NULL ? loop : NULL);
1161 dr_analyze_indices (dr, nest, loop);
1162 dr_analyze_alias (dr);
1164 if (dump_file && (dump_flags & TDF_DETAILS))
1166 unsigned i;
1167 fprintf (dump_file, "\tbase_address: ");
1168 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1169 fprintf (dump_file, "\n\toffset from base address: ");
1170 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1171 fprintf (dump_file, "\n\tconstant offset from base address: ");
1172 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1173 fprintf (dump_file, "\n\tstep: ");
1174 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1175 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1176 fprintf (dump_file, "\n\tbase misalignment: %d",
1177 DR_BASE_MISALIGNMENT (dr));
1178 fprintf (dump_file, "\n\toffset alignment: %d",
1179 DR_OFFSET_ALIGNMENT (dr));
1180 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1181 fprintf (dump_file, "\n\tbase_object: ");
1182 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1183 fprintf (dump_file, "\n");
1184 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1186 fprintf (dump_file, "\tAccess function %d: ", i);
1187 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1191 return dr;
1194 /* A helper function computes order between two tree epxressions T1 and T2.
1195 This is used in comparator functions sorting objects based on the order
1196 of tree expressions. The function returns -1, 0, or 1. */
1199 data_ref_compare_tree (tree t1, tree t2)
1201 int i, cmp;
1202 enum tree_code code;
1203 char tclass;
1205 if (t1 == t2)
1206 return 0;
1207 if (t1 == NULL)
1208 return -1;
1209 if (t2 == NULL)
1210 return 1;
1212 STRIP_USELESS_TYPE_CONVERSION (t1);
1213 STRIP_USELESS_TYPE_CONVERSION (t2);
1214 if (t1 == t2)
1215 return 0;
1217 if (TREE_CODE (t1) != TREE_CODE (t2)
1218 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1219 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1221 code = TREE_CODE (t1);
1222 switch (code)
1224 case INTEGER_CST:
1225 return tree_int_cst_compare (t1, t2);
1227 case STRING_CST:
1228 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1229 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1230 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1231 TREE_STRING_LENGTH (t1));
1233 case SSA_NAME:
1234 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1235 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1236 break;
1238 default:
1239 if (POLY_INT_CST_P (t1))
1240 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1241 wi::to_poly_widest (t2));
1243 tclass = TREE_CODE_CLASS (code);
1245 /* For decls, compare their UIDs. */
1246 if (tclass == tcc_declaration)
1248 if (DECL_UID (t1) != DECL_UID (t2))
1249 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1250 break;
1252 /* For expressions, compare their operands recursively. */
1253 else if (IS_EXPR_CODE_CLASS (tclass))
1255 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1257 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1258 TREE_OPERAND (t2, i));
1259 if (cmp != 0)
1260 return cmp;
1263 else
1264 gcc_unreachable ();
1267 return 0;
1270 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1271 check. */
1273 bool
1274 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1276 if (dump_enabled_p ())
1278 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1279 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1280 dump_printf (MSG_NOTE, " and ");
1281 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1282 dump_printf (MSG_NOTE, "\n");
1285 if (!speed_p)
1287 if (dump_enabled_p ())
1288 dump_printf (MSG_MISSED_OPTIMIZATION,
1289 "runtime alias check not supported when optimizing "
1290 "for size.\n");
1291 return false;
1294 /* FORNOW: We don't support versioning with outer-loop in either
1295 vectorization or loop distribution. */
1296 if (loop != NULL && loop->inner != NULL)
1298 if (dump_enabled_p ())
1299 dump_printf (MSG_MISSED_OPTIMIZATION,
1300 "runtime alias check not supported for outer loop.\n");
1301 return false;
1304 /* FORNOW: We don't support creating runtime alias tests for non-constant
1305 step. */
1306 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1307 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1309 if (dump_enabled_p ())
1310 dump_printf (MSG_MISSED_OPTIMIZATION,
1311 "runtime alias check not supported for non-constant "
1312 "step\n");
1313 return false;
1316 return true;
1319 /* Operator == between two dr_with_seg_len objects.
1321 This equality operator is used to make sure two data refs
1322 are the same one so that we will consider to combine the
1323 aliasing checks of those two pairs of data dependent data
1324 refs. */
1326 static bool
1327 operator == (const dr_with_seg_len& d1,
1328 const dr_with_seg_len& d2)
1330 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1331 DR_BASE_ADDRESS (d2.dr), 0)
1332 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1333 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1334 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1337 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1338 so that we can combine aliasing checks in one scan. */
1340 static int
1341 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1343 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1344 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1345 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1346 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1348 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1349 if a and c have the same basic address snd step, and b and d have the same
1350 address and step. Therefore, if any a&c or b&d don't have the same address
1351 and step, we don't care the order of those two pairs after sorting. */
1352 int comp_res;
1354 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1355 DR_BASE_ADDRESS (b1.dr))) != 0)
1356 return comp_res;
1357 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1358 DR_BASE_ADDRESS (b2.dr))) != 0)
1359 return comp_res;
1360 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1361 DR_STEP (b1.dr))) != 0)
1362 return comp_res;
1363 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1364 DR_STEP (b2.dr))) != 0)
1365 return comp_res;
1366 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1367 DR_OFFSET (b1.dr))) != 0)
1368 return comp_res;
1369 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1370 DR_INIT (b1.dr))) != 0)
1371 return comp_res;
1372 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1373 DR_OFFSET (b2.dr))) != 0)
1374 return comp_res;
1375 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1376 DR_INIT (b2.dr))) != 0)
1377 return comp_res;
1379 return 0;
1382 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1383 FACTOR is number of iterations that each data reference is accessed.
1385 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1386 we create an expression:
1388 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1389 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1391 for aliasing checks. However, in some cases we can decrease the number
1392 of checks by combining two checks into one. For example, suppose we have
1393 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1394 condition is satisfied:
1396 load_ptr_0 < load_ptr_1 &&
1397 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1399 (this condition means, in each iteration of vectorized loop, the accessed
1400 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1401 load_ptr_1.)
1403 we then can use only the following expression to finish the alising checks
1404 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1406 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1407 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1409 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1410 basic address. */
1412 void
1413 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1414 unsigned HOST_WIDE_INT factor)
1416 /* Sort the collected data ref pairs so that we can scan them once to
1417 combine all possible aliasing checks. */
1418 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1420 /* Scan the sorted dr pairs and check if we can combine alias checks
1421 of two neighboring dr pairs. */
1422 for (size_t i = 1; i < alias_pairs->length (); ++i)
1424 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1425 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1426 *dr_b1 = &(*alias_pairs)[i-1].second,
1427 *dr_a2 = &(*alias_pairs)[i].first,
1428 *dr_b2 = &(*alias_pairs)[i].second;
1430 /* Remove duplicate data ref pairs. */
1431 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1433 if (dump_enabled_p ())
1435 dump_printf (MSG_NOTE, "found equal ranges ");
1436 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1437 dump_printf (MSG_NOTE, ", ");
1438 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1439 dump_printf (MSG_NOTE, " and ");
1440 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1441 dump_printf (MSG_NOTE, ", ");
1442 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1443 dump_printf (MSG_NOTE, "\n");
1445 alias_pairs->ordered_remove (i--);
1446 continue;
1449 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1451 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1452 and DR_A1 and DR_A2 are two consecutive memrefs. */
1453 if (*dr_a1 == *dr_a2)
1455 std::swap (dr_a1, dr_b1);
1456 std::swap (dr_a2, dr_b2);
1459 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1460 DR_BASE_ADDRESS (dr_a2->dr), 0)
1461 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1462 DR_OFFSET (dr_a2->dr), 0)
1463 || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1464 || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1465 continue;
1467 /* Only merge const step data references. */
1468 if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1469 || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1470 continue;
1472 /* DR_A1 and DR_A2 must goes in the same direction. */
1473 if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1474 != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1475 continue;
1477 bool neg_step
1478 = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1480 /* We need to compute merged segment length at compilation time for
1481 dr_a1 and dr_a2, which is impossible if either one has non-const
1482 segment length. */
1483 if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1484 || !tree_fits_uhwi_p (dr_a2->seg_len))
1485 && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1486 DR_STEP (dr_a2->dr)) != 0)
1487 continue;
1489 /* Make sure dr_a1 starts left of dr_a2. */
1490 if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1491 std::swap (*dr_a1, *dr_a2);
1493 bool do_remove = false;
1494 wide_int diff = (wi::to_wide (DR_INIT (dr_a2->dr))
1495 - wi::to_wide (DR_INIT (dr_a1->dr)));
1496 wide_int min_seg_len_b;
1497 tree new_seg_len;
1499 if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1500 min_seg_len_b = wi::abs (wi::to_wide (dr_b1->seg_len));
1501 else
1502 min_seg_len_b
1503 = factor * wi::abs (wi::to_wide (DR_STEP (dr_b1->dr)));
1505 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1507 Case A:
1508 check if the following condition is satisfied:
1510 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1512 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1513 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1514 have to make a best estimation. We can get the minimum value
1515 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1516 then either of the following two conditions can guarantee the
1517 one above:
1519 1: DIFF <= MIN_SEG_LEN_B
1520 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1521 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1522 to take care of wrapping behavior in it.
1524 Case B:
1525 If the left segment does not extend beyond the start of the
1526 right segment the new segment length is that of the right
1527 plus the segment distance. The condition is like:
1529 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1531 Note 1: Case A.2 and B combined together effectively merges every
1532 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1534 Note 2: Above description is based on positive DR_STEP, we need to
1535 take care of negative DR_STEP for wrapping behavior. See PR80815
1536 for more information. */
1537 if (neg_step)
1539 /* Adjust diff according to access size of both references. */
1540 tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1541 tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1542 diff += wi::to_wide (size_a2) - wi::to_wide (size_a1);
1543 /* Case A.1. */
1544 if (wi::leu_p (diff, min_seg_len_b)
1545 /* Case A.2 and B combined. */
1546 || (tree_fits_uhwi_p (dr_a2->seg_len)))
1548 if (tree_fits_uhwi_p (dr_a1->seg_len)
1549 && tree_fits_uhwi_p (dr_a2->seg_len))
1551 wide_int min_len
1552 = wi::umin (wi::to_wide (dr_a1->seg_len) - diff,
1553 wi::to_wide (dr_a2->seg_len));
1554 new_seg_len = wide_int_to_tree (sizetype, min_len);
1556 else
1557 new_seg_len
1558 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1559 wide_int_to_tree (sizetype, diff));
1561 dr_a2->seg_len = new_seg_len;
1562 do_remove = true;
1565 else
1567 /* Case A.1. */
1568 if (wi::leu_p (diff, min_seg_len_b)
1569 /* Case A.2 and B combined. */
1570 || (tree_fits_uhwi_p (dr_a1->seg_len)))
1572 if (tree_fits_uhwi_p (dr_a1->seg_len)
1573 && tree_fits_uhwi_p (dr_a2->seg_len))
1575 wide_int max_len
1576 = wi::umax (wi::to_wide (dr_a2->seg_len) + diff,
1577 wi::to_wide (dr_a1->seg_len));
1578 new_seg_len = wide_int_to_tree (sizetype, max_len);
1580 else
1581 new_seg_len
1582 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1583 wide_int_to_tree (sizetype, diff));
1585 dr_a1->seg_len = new_seg_len;
1586 do_remove = true;
1590 if (do_remove)
1592 if (dump_enabled_p ())
1594 dump_printf (MSG_NOTE, "merging ranges for ");
1595 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1596 dump_printf (MSG_NOTE, ", ");
1597 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1598 dump_printf (MSG_NOTE, " and ");
1599 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1600 dump_printf (MSG_NOTE, ", ");
1601 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1602 dump_printf (MSG_NOTE, "\n");
1604 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1605 i--;
1611 /* Given LOOP's two data references and segment lengths described by DR_A
1612 and DR_B, create expression checking if the two addresses ranges intersect
1613 with each other based on index of the two addresses. This can only be
1614 done if DR_A and DR_B referring to the same (array) object and the index
1615 is the only difference. For example:
1617 DR_A DR_B
1618 data-ref arr[i] arr[j]
1619 base_object arr arr
1620 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1622 The addresses and their index are like:
1624 |<- ADDR_A ->| |<- ADDR_B ->|
1625 ------------------------------------------------------->
1626 | | | | | | | | | |
1627 ------------------------------------------------------->
1628 i_0 ... i_0+4 j_0 ... j_0+4
1630 We can create expression based on index rather than address:
1632 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1634 Note evolution step of index needs to be considered in comparison. */
1636 static bool
1637 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1638 const dr_with_seg_len& dr_a,
1639 const dr_with_seg_len& dr_b)
1641 if (integer_zerop (DR_STEP (dr_a.dr))
1642 || integer_zerop (DR_STEP (dr_b.dr))
1643 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1644 return false;
1646 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1647 return false;
1649 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1650 return false;
1652 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1653 return false;
1655 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1656 return false;
1658 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1660 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1661 unsigned HOST_WIDE_INT abs_step
1662 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1664 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1665 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1666 /* Infer the number of iterations with which the memory segment is accessed
1667 by DR. In other words, alias is checked if memory segment accessed by
1668 DR_A in some iterations intersect with memory segment accessed by DR_B
1669 in the same amount iterations.
1670 Note segnment length is a linear function of number of iterations with
1671 DR_STEP as the coefficient. */
1672 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1673 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1675 unsigned int i;
1676 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1678 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1679 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1680 /* Two indices must be the same if they are not scev, or not scev wrto
1681 current loop being vecorized. */
1682 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1683 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1684 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1685 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1687 if (operand_equal_p (access1, access2, 0))
1688 continue;
1690 return false;
1692 /* The two indices must have the same step. */
1693 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1694 return false;
1696 tree idx_step = CHREC_RIGHT (access1);
1697 /* Index must have const step, otherwise DR_STEP won't be constant. */
1698 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1699 /* Index must evaluate in the same direction as DR. */
1700 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1702 tree min1 = CHREC_LEFT (access1);
1703 tree min2 = CHREC_LEFT (access2);
1704 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1705 return false;
1707 /* Ideally, alias can be checked against loop's control IV, but we
1708 need to prove linear mapping between control IV and reference
1709 index. Although that should be true, we check against (array)
1710 index of data reference. Like segment length, index length is
1711 linear function of the number of iterations with index_step as
1712 the coefficient, i.e, niter_len * idx_step. */
1713 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1714 build_int_cst (TREE_TYPE (min1),
1715 niter_len1));
1716 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1717 build_int_cst (TREE_TYPE (min2),
1718 niter_len2));
1719 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1720 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1721 /* Adjust ranges for negative step. */
1722 if (neg_step)
1724 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1725 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1726 CHREC_LEFT (access1), idx_step);
1727 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1728 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1729 CHREC_LEFT (access2), idx_step);
1731 tree part_cond_expr
1732 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1733 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1734 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1735 if (*cond_expr)
1736 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1737 *cond_expr, part_cond_expr);
1738 else
1739 *cond_expr = part_cond_expr;
1741 return true;
1744 /* Given two data references and segment lengths described by DR_A and DR_B,
1745 create expression checking if the two addresses ranges intersect with
1746 each other:
1748 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1749 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1751 static void
1752 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1753 const dr_with_seg_len& dr_a,
1754 const dr_with_seg_len& dr_b)
1756 *cond_expr = NULL_TREE;
1757 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1758 return;
1760 tree segment_length_a = dr_a.seg_len;
1761 tree segment_length_b = dr_b.seg_len;
1762 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1763 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1764 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1766 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1767 offset_a, DR_INIT (dr_a.dr));
1768 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1769 offset_b, DR_INIT (dr_b.dr));
1770 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1771 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1773 tree seg_a_min = addr_base_a;
1774 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1775 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1776 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1777 [a, a+12) */
1778 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1780 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1781 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1782 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1785 tree seg_b_min = addr_base_b;
1786 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1787 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1789 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1790 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1791 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1793 *cond_expr
1794 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1795 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1796 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1799 /* Create a conditional expression that represents the run-time checks for
1800 overlapping of address ranges represented by a list of data references
1801 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1802 COND_EXPR is the conditional expression to be used in the if statement
1803 that controls which version of the loop gets executed at runtime. */
1805 void
1806 create_runtime_alias_checks (struct loop *loop,
1807 vec<dr_with_seg_len_pair_t> *alias_pairs,
1808 tree * cond_expr)
1810 tree part_cond_expr;
1812 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1814 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1815 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1817 if (dump_enabled_p ())
1819 dump_printf (MSG_NOTE, "create runtime check for data references ");
1820 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1821 dump_printf (MSG_NOTE, " and ");
1822 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1823 dump_printf (MSG_NOTE, "\n");
1826 /* Create condition expression for each pair data references. */
1827 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1828 if (*cond_expr)
1829 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1830 *cond_expr, part_cond_expr);
1831 else
1832 *cond_expr = part_cond_expr;
1836 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1837 expressions. */
1838 static bool
1839 dr_equal_offsets_p1 (tree offset1, tree offset2)
1841 bool res;
1843 STRIP_NOPS (offset1);
1844 STRIP_NOPS (offset2);
1846 if (offset1 == offset2)
1847 return true;
1849 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1850 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1851 return false;
1853 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1854 TREE_OPERAND (offset2, 0));
1856 if (!res || !BINARY_CLASS_P (offset1))
1857 return res;
1859 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1860 TREE_OPERAND (offset2, 1));
1862 return res;
1865 /* Check if DRA and DRB have equal offsets. */
1866 bool
1867 dr_equal_offsets_p (struct data_reference *dra,
1868 struct data_reference *drb)
1870 tree offset1, offset2;
1872 offset1 = DR_OFFSET (dra);
1873 offset2 = DR_OFFSET (drb);
1875 return dr_equal_offsets_p1 (offset1, offset2);
1878 /* Returns true if FNA == FNB. */
1880 static bool
1881 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1883 unsigned i, n = fna.length ();
1885 if (n != fnb.length ())
1886 return false;
1888 for (i = 0; i < n; i++)
1889 if (!operand_equal_p (fna[i], fnb[i], 0))
1890 return false;
1892 return true;
1895 /* If all the functions in CF are the same, returns one of them,
1896 otherwise returns NULL. */
1898 static affine_fn
1899 common_affine_function (conflict_function *cf)
1901 unsigned i;
1902 affine_fn comm;
1904 if (!CF_NONTRIVIAL_P (cf))
1905 return affine_fn ();
1907 comm = cf->fns[0];
1909 for (i = 1; i < cf->n; i++)
1910 if (!affine_function_equal_p (comm, cf->fns[i]))
1911 return affine_fn ();
1913 return comm;
1916 /* Returns the base of the affine function FN. */
1918 static tree
1919 affine_function_base (affine_fn fn)
1921 return fn[0];
1924 /* Returns true if FN is a constant. */
1926 static bool
1927 affine_function_constant_p (affine_fn fn)
1929 unsigned i;
1930 tree coef;
1932 for (i = 1; fn.iterate (i, &coef); i++)
1933 if (!integer_zerop (coef))
1934 return false;
1936 return true;
1939 /* Returns true if FN is the zero constant function. */
1941 static bool
1942 affine_function_zero_p (affine_fn fn)
1944 return (integer_zerop (affine_function_base (fn))
1945 && affine_function_constant_p (fn));
1948 /* Returns a signed integer type with the largest precision from TA
1949 and TB. */
1951 static tree
1952 signed_type_for_types (tree ta, tree tb)
1954 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1955 return signed_type_for (ta);
1956 else
1957 return signed_type_for (tb);
1960 /* Applies operation OP on affine functions FNA and FNB, and returns the
1961 result. */
1963 static affine_fn
1964 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1966 unsigned i, n, m;
1967 affine_fn ret;
1968 tree coef;
1970 if (fnb.length () > fna.length ())
1972 n = fna.length ();
1973 m = fnb.length ();
1975 else
1977 n = fnb.length ();
1978 m = fna.length ();
1981 ret.create (m);
1982 for (i = 0; i < n; i++)
1984 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1985 TREE_TYPE (fnb[i]));
1986 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1989 for (; fna.iterate (i, &coef); i++)
1990 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1991 coef, integer_zero_node));
1992 for (; fnb.iterate (i, &coef); i++)
1993 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1994 integer_zero_node, coef));
1996 return ret;
1999 /* Returns the sum of affine functions FNA and FNB. */
2001 static affine_fn
2002 affine_fn_plus (affine_fn fna, affine_fn fnb)
2004 return affine_fn_op (PLUS_EXPR, fna, fnb);
2007 /* Returns the difference of affine functions FNA and FNB. */
2009 static affine_fn
2010 affine_fn_minus (affine_fn fna, affine_fn fnb)
2012 return affine_fn_op (MINUS_EXPR, fna, fnb);
2015 /* Frees affine function FN. */
2017 static void
2018 affine_fn_free (affine_fn fn)
2020 fn.release ();
2023 /* Determine for each subscript in the data dependence relation DDR
2024 the distance. */
2026 static void
2027 compute_subscript_distance (struct data_dependence_relation *ddr)
2029 conflict_function *cf_a, *cf_b;
2030 affine_fn fn_a, fn_b, diff;
2032 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2034 unsigned int i;
2036 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2038 struct subscript *subscript;
2040 subscript = DDR_SUBSCRIPT (ddr, i);
2041 cf_a = SUB_CONFLICTS_IN_A (subscript);
2042 cf_b = SUB_CONFLICTS_IN_B (subscript);
2044 fn_a = common_affine_function (cf_a);
2045 fn_b = common_affine_function (cf_b);
2046 if (!fn_a.exists () || !fn_b.exists ())
2048 SUB_DISTANCE (subscript) = chrec_dont_know;
2049 return;
2051 diff = affine_fn_minus (fn_a, fn_b);
2053 if (affine_function_constant_p (diff))
2054 SUB_DISTANCE (subscript) = affine_function_base (diff);
2055 else
2056 SUB_DISTANCE (subscript) = chrec_dont_know;
2058 affine_fn_free (diff);
2063 /* Returns the conflict function for "unknown". */
2065 static conflict_function *
2066 conflict_fn_not_known (void)
2068 conflict_function *fn = XCNEW (conflict_function);
2069 fn->n = NOT_KNOWN;
2071 return fn;
2074 /* Returns the conflict function for "independent". */
2076 static conflict_function *
2077 conflict_fn_no_dependence (void)
2079 conflict_function *fn = XCNEW (conflict_function);
2080 fn->n = NO_DEPENDENCE;
2082 return fn;
2085 /* Returns true if the address of OBJ is invariant in LOOP. */
2087 static bool
2088 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2090 while (handled_component_p (obj))
2092 if (TREE_CODE (obj) == ARRAY_REF)
2094 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2095 need to check the stride and the lower bound of the reference. */
2096 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2097 loop->num)
2098 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2099 loop->num))
2100 return false;
2102 else if (TREE_CODE (obj) == COMPONENT_REF)
2104 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2105 loop->num))
2106 return false;
2108 obj = TREE_OPERAND (obj, 0);
2111 if (!INDIRECT_REF_P (obj)
2112 && TREE_CODE (obj) != MEM_REF)
2113 return true;
2115 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2116 loop->num);
2119 /* Returns false if we can prove that data references A and B do not alias,
2120 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2121 considered. */
2123 bool
2124 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2125 bool loop_nest)
2127 tree addr_a = DR_BASE_OBJECT (a);
2128 tree addr_b = DR_BASE_OBJECT (b);
2130 /* If we are not processing a loop nest but scalar code we
2131 do not need to care about possible cross-iteration dependences
2132 and thus can process the full original reference. Do so,
2133 similar to how loop invariant motion applies extra offset-based
2134 disambiguation. */
2135 if (!loop_nest)
2137 aff_tree off1, off2;
2138 poly_widest_int size1, size2;
2139 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2140 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2141 aff_combination_scale (&off1, -1);
2142 aff_combination_add (&off2, &off1);
2143 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2144 return false;
2147 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2148 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2149 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2150 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2151 return false;
2153 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2154 do not know the size of the base-object. So we cannot do any
2155 offset/overlap based analysis but have to rely on points-to
2156 information only. */
2157 if (TREE_CODE (addr_a) == MEM_REF
2158 && (DR_UNCONSTRAINED_BASE (a)
2159 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2161 /* For true dependences we can apply TBAA. */
2162 if (flag_strict_aliasing
2163 && DR_IS_WRITE (a) && DR_IS_READ (b)
2164 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2165 get_alias_set (DR_REF (b))))
2166 return false;
2167 if (TREE_CODE (addr_b) == MEM_REF)
2168 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2169 TREE_OPERAND (addr_b, 0));
2170 else
2171 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2172 build_fold_addr_expr (addr_b));
2174 else if (TREE_CODE (addr_b) == MEM_REF
2175 && (DR_UNCONSTRAINED_BASE (b)
2176 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2178 /* For true dependences we can apply TBAA. */
2179 if (flag_strict_aliasing
2180 && DR_IS_WRITE (a) && DR_IS_READ (b)
2181 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2182 get_alias_set (DR_REF (b))))
2183 return false;
2184 if (TREE_CODE (addr_a) == MEM_REF)
2185 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2186 TREE_OPERAND (addr_b, 0));
2187 else
2188 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2189 TREE_OPERAND (addr_b, 0));
2192 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2193 that is being subsetted in the loop nest. */
2194 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2195 return refs_output_dependent_p (addr_a, addr_b);
2196 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2197 return refs_anti_dependent_p (addr_a, addr_b);
2198 return refs_may_alias_p (addr_a, addr_b);
2201 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2202 if it is meaningful to compare their associated access functions
2203 when checking for dependencies. */
2205 static bool
2206 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2208 /* Allow pairs of component refs from the following sets:
2210 { REALPART_EXPR, IMAGPART_EXPR }
2211 { COMPONENT_REF }
2212 { ARRAY_REF }. */
2213 tree_code code_a = TREE_CODE (ref_a);
2214 tree_code code_b = TREE_CODE (ref_b);
2215 if (code_a == IMAGPART_EXPR)
2216 code_a = REALPART_EXPR;
2217 if (code_b == IMAGPART_EXPR)
2218 code_b = REALPART_EXPR;
2219 if (code_a != code_b)
2220 return false;
2222 if (TREE_CODE (ref_a) == COMPONENT_REF)
2223 /* ??? We cannot simply use the type of operand #0 of the refs here as
2224 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2225 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2226 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2227 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2229 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2230 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2233 /* Initialize a data dependence relation between data accesses A and
2234 B. NB_LOOPS is the number of loops surrounding the references: the
2235 size of the classic distance/direction vectors. */
2237 struct data_dependence_relation *
2238 initialize_data_dependence_relation (struct data_reference *a,
2239 struct data_reference *b,
2240 vec<loop_p> loop_nest)
2242 struct data_dependence_relation *res;
2243 unsigned int i;
2245 res = XCNEW (struct data_dependence_relation);
2246 DDR_A (res) = a;
2247 DDR_B (res) = b;
2248 DDR_LOOP_NEST (res).create (0);
2249 DDR_SUBSCRIPTS (res).create (0);
2250 DDR_DIR_VECTS (res).create (0);
2251 DDR_DIST_VECTS (res).create (0);
2253 if (a == NULL || b == NULL)
2255 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2256 return res;
2259 /* If the data references do not alias, then they are independent. */
2260 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2262 DDR_ARE_DEPENDENT (res) = chrec_known;
2263 return res;
2266 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2267 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2268 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2270 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2271 return res;
2274 /* For unconstrained bases, the root (highest-indexed) subscript
2275 describes a variation in the base of the original DR_REF rather
2276 than a component access. We have no type that accurately describes
2277 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2278 applying this subscript) so limit the search to the last real
2279 component access.
2281 E.g. for:
2283 void
2284 f (int a[][8], int b[][8])
2286 for (int i = 0; i < 8; ++i)
2287 a[i * 2][0] = b[i][0];
2290 the a and b accesses have a single ARRAY_REF component reference [0]
2291 but have two subscripts. */
2292 if (DR_UNCONSTRAINED_BASE (a))
2293 num_dimensions_a -= 1;
2294 if (DR_UNCONSTRAINED_BASE (b))
2295 num_dimensions_b -= 1;
2297 /* These structures describe sequences of component references in
2298 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2299 specific access function. */
2300 struct {
2301 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2302 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2303 indices. In C notation, these are the indices of the rightmost
2304 component references; e.g. for a sequence .b.c.d, the start
2305 index is for .d. */
2306 unsigned int start_a;
2307 unsigned int start_b;
2309 /* The sequence contains LENGTH consecutive access functions from
2310 each DR. */
2311 unsigned int length;
2313 /* The enclosing objects for the A and B sequences respectively,
2314 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2315 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2316 tree object_a;
2317 tree object_b;
2318 } full_seq = {}, struct_seq = {};
2320 /* Before each iteration of the loop:
2322 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2323 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2324 unsigned int index_a = 0;
2325 unsigned int index_b = 0;
2326 tree ref_a = DR_REF (a);
2327 tree ref_b = DR_REF (b);
2329 /* Now walk the component references from the final DR_REFs back up to
2330 the enclosing base objects. Each component reference corresponds
2331 to one access function in the DR, with access function 0 being for
2332 the final DR_REF and the highest-indexed access function being the
2333 one that is applied to the base of the DR.
2335 Look for a sequence of component references whose access functions
2336 are comparable (see access_fn_components_comparable_p). If more
2337 than one such sequence exists, pick the one nearest the base
2338 (which is the leftmost sequence in C notation). Store this sequence
2339 in FULL_SEQ.
2341 For example, if we have:
2343 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2345 A: a[0][i].s.c.d
2346 B: __real b[0][i].s.e[i].f
2348 (where d is the same type as the real component of f) then the access
2349 functions would be:
2351 0 1 2 3
2352 A: .d .c .s [i]
2354 0 1 2 3 4 5
2355 B: __real .f [i] .e .s [i]
2357 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2358 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2359 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2360 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2361 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2362 index foo[10] arrays, so is again comparable. The sequence is
2363 therefore:
2365 A: [1, 3] (i.e. [i].s.c)
2366 B: [3, 5] (i.e. [i].s.e)
2368 Also look for sequences of component references whose access
2369 functions are comparable and whose enclosing objects have the same
2370 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2371 example, STRUCT_SEQ would be:
2373 A: [1, 2] (i.e. s.c)
2374 B: [3, 4] (i.e. s.e) */
2375 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2377 /* REF_A and REF_B must be one of the component access types
2378 allowed by dr_analyze_indices. */
2379 gcc_checking_assert (access_fn_component_p (ref_a));
2380 gcc_checking_assert (access_fn_component_p (ref_b));
2382 /* Get the immediately-enclosing objects for REF_A and REF_B,
2383 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2384 and DR_ACCESS_FN (B, INDEX_B). */
2385 tree object_a = TREE_OPERAND (ref_a, 0);
2386 tree object_b = TREE_OPERAND (ref_b, 0);
2388 tree type_a = TREE_TYPE (object_a);
2389 tree type_b = TREE_TYPE (object_b);
2390 if (access_fn_components_comparable_p (ref_a, ref_b))
2392 /* This pair of component accesses is comparable for dependence
2393 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2394 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2395 if (full_seq.start_a + full_seq.length != index_a
2396 || full_seq.start_b + full_seq.length != index_b)
2398 /* The accesses don't extend the current sequence,
2399 so start a new one here. */
2400 full_seq.start_a = index_a;
2401 full_seq.start_b = index_b;
2402 full_seq.length = 0;
2405 /* Add this pair of references to the sequence. */
2406 full_seq.length += 1;
2407 full_seq.object_a = object_a;
2408 full_seq.object_b = object_b;
2410 /* If the enclosing objects are structures (and thus have the
2411 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2412 if (TREE_CODE (type_a) == RECORD_TYPE)
2413 struct_seq = full_seq;
2415 /* Move to the next containing reference for both A and B. */
2416 ref_a = object_a;
2417 ref_b = object_b;
2418 index_a += 1;
2419 index_b += 1;
2420 continue;
2423 /* Try to approach equal type sizes. */
2424 if (!COMPLETE_TYPE_P (type_a)
2425 || !COMPLETE_TYPE_P (type_b)
2426 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2427 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2428 break;
2430 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2431 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2432 if (size_a <= size_b)
2434 index_a += 1;
2435 ref_a = object_a;
2437 if (size_b <= size_a)
2439 index_b += 1;
2440 ref_b = object_b;
2444 /* See whether FULL_SEQ ends at the base and whether the two bases
2445 are equal. We do not care about TBAA or alignment info so we can
2446 use OEP_ADDRESS_OF to avoid false negatives. */
2447 tree base_a = DR_BASE_OBJECT (a);
2448 tree base_b = DR_BASE_OBJECT (b);
2449 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2450 && full_seq.start_b + full_seq.length == num_dimensions_b
2451 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2452 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2453 && types_compatible_p (TREE_TYPE (base_a),
2454 TREE_TYPE (base_b))
2455 && (!loop_nest.exists ()
2456 || (object_address_invariant_in_loop_p
2457 (loop_nest[0], base_a))));
2459 /* If the bases are the same, we can include the base variation too.
2460 E.g. the b accesses in:
2462 for (int i = 0; i < n; ++i)
2463 b[i + 4][0] = b[i][0];
2465 have a definite dependence distance of 4, while for:
2467 for (int i = 0; i < n; ++i)
2468 a[i + 4][0] = b[i][0];
2470 the dependence distance depends on the gap between a and b.
2472 If the bases are different then we can only rely on the sequence
2473 rooted at a structure access, since arrays are allowed to overlap
2474 arbitrarily and change shape arbitrarily. E.g. we treat this as
2475 valid code:
2477 int a[256];
2479 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2481 where two lvalues with the same int[4][3] type overlap, and where
2482 both lvalues are distinct from the object's declared type. */
2483 if (same_base_p)
2485 if (DR_UNCONSTRAINED_BASE (a))
2486 full_seq.length += 1;
2488 else
2489 full_seq = struct_seq;
2491 /* Punt if we didn't find a suitable sequence. */
2492 if (full_seq.length == 0)
2494 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2495 return res;
2498 if (!same_base_p)
2500 /* Partial overlap is possible for different bases when strict aliasing
2501 is not in effect. It's also possible if either base involves a union
2502 access; e.g. for:
2504 struct s1 { int a[2]; };
2505 struct s2 { struct s1 b; int c; };
2506 struct s3 { int d; struct s1 e; };
2507 union u { struct s2 f; struct s3 g; } *p, *q;
2509 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2510 "p->g.e" (base "p->g") and might partially overlap the s1 at
2511 "q->g.e" (base "q->g"). */
2512 if (!flag_strict_aliasing
2513 || ref_contains_union_access_p (full_seq.object_a)
2514 || ref_contains_union_access_p (full_seq.object_b))
2516 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2517 return res;
2520 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2521 if (!loop_nest.exists ()
2522 || (object_address_invariant_in_loop_p (loop_nest[0],
2523 full_seq.object_a)
2524 && object_address_invariant_in_loop_p (loop_nest[0],
2525 full_seq.object_b)))
2527 DDR_OBJECT_A (res) = full_seq.object_a;
2528 DDR_OBJECT_B (res) = full_seq.object_b;
2532 DDR_AFFINE_P (res) = true;
2533 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2534 DDR_SUBSCRIPTS (res).create (full_seq.length);
2535 DDR_LOOP_NEST (res) = loop_nest;
2536 DDR_INNER_LOOP (res) = 0;
2537 DDR_SELF_REFERENCE (res) = false;
2539 for (i = 0; i < full_seq.length; ++i)
2541 struct subscript *subscript;
2543 subscript = XNEW (struct subscript);
2544 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2545 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2546 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2547 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2548 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2549 SUB_DISTANCE (subscript) = chrec_dont_know;
2550 DDR_SUBSCRIPTS (res).safe_push (subscript);
2553 return res;
2556 /* Frees memory used by the conflict function F. */
2558 static void
2559 free_conflict_function (conflict_function *f)
2561 unsigned i;
2563 if (CF_NONTRIVIAL_P (f))
2565 for (i = 0; i < f->n; i++)
2566 affine_fn_free (f->fns[i]);
2568 free (f);
2571 /* Frees memory used by SUBSCRIPTS. */
2573 static void
2574 free_subscripts (vec<subscript_p> subscripts)
2576 unsigned i;
2577 subscript_p s;
2579 FOR_EACH_VEC_ELT (subscripts, i, s)
2581 free_conflict_function (s->conflicting_iterations_in_a);
2582 free_conflict_function (s->conflicting_iterations_in_b);
2583 free (s);
2585 subscripts.release ();
2588 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2589 description. */
2591 static inline void
2592 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2593 tree chrec)
2595 DDR_ARE_DEPENDENT (ddr) = chrec;
2596 free_subscripts (DDR_SUBSCRIPTS (ddr));
2597 DDR_SUBSCRIPTS (ddr).create (0);
2600 /* The dependence relation DDR cannot be represented by a distance
2601 vector. */
2603 static inline void
2604 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2609 DDR_AFFINE_P (ddr) = false;
2614 /* This section contains the classic Banerjee tests. */
2616 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2617 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2619 static inline bool
2620 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2622 return (evolution_function_is_constant_p (chrec_a)
2623 && evolution_function_is_constant_p (chrec_b));
2626 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2627 variable, i.e., if the SIV (Single Index Variable) test is true. */
2629 static bool
2630 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2632 if ((evolution_function_is_constant_p (chrec_a)
2633 && evolution_function_is_univariate_p (chrec_b))
2634 || (evolution_function_is_constant_p (chrec_b)
2635 && evolution_function_is_univariate_p (chrec_a)))
2636 return true;
2638 if (evolution_function_is_univariate_p (chrec_a)
2639 && evolution_function_is_univariate_p (chrec_b))
2641 switch (TREE_CODE (chrec_a))
2643 case POLYNOMIAL_CHREC:
2644 switch (TREE_CODE (chrec_b))
2646 case POLYNOMIAL_CHREC:
2647 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2648 return false;
2649 /* FALLTHRU */
2651 default:
2652 return true;
2655 default:
2656 return true;
2660 return false;
2663 /* Creates a conflict function with N dimensions. The affine functions
2664 in each dimension follow. */
2666 static conflict_function *
2667 conflict_fn (unsigned n, ...)
2669 unsigned i;
2670 conflict_function *ret = XCNEW (conflict_function);
2671 va_list ap;
2673 gcc_assert (n > 0 && n <= MAX_DIM);
2674 va_start (ap, n);
2676 ret->n = n;
2677 for (i = 0; i < n; i++)
2678 ret->fns[i] = va_arg (ap, affine_fn);
2679 va_end (ap);
2681 return ret;
2684 /* Returns constant affine function with value CST. */
2686 static affine_fn
2687 affine_fn_cst (tree cst)
2689 affine_fn fn;
2690 fn.create (1);
2691 fn.quick_push (cst);
2692 return fn;
2695 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2697 static affine_fn
2698 affine_fn_univar (tree cst, unsigned dim, tree coef)
2700 affine_fn fn;
2701 fn.create (dim + 1);
2702 unsigned i;
2704 gcc_assert (dim > 0);
2705 fn.quick_push (cst);
2706 for (i = 1; i < dim; i++)
2707 fn.quick_push (integer_zero_node);
2708 fn.quick_push (coef);
2709 return fn;
2712 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2713 *OVERLAPS_B are initialized to the functions that describe the
2714 relation between the elements accessed twice by CHREC_A and
2715 CHREC_B. For k >= 0, the following property is verified:
2717 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2719 static void
2720 analyze_ziv_subscript (tree chrec_a,
2721 tree chrec_b,
2722 conflict_function **overlaps_a,
2723 conflict_function **overlaps_b,
2724 tree *last_conflicts)
2726 tree type, difference;
2727 dependence_stats.num_ziv++;
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 fprintf (dump_file, "(analyze_ziv_subscript \n");
2732 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2733 chrec_a = chrec_convert (type, chrec_a, NULL);
2734 chrec_b = chrec_convert (type, chrec_b, NULL);
2735 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2737 switch (TREE_CODE (difference))
2739 case INTEGER_CST:
2740 if (integer_zerop (difference))
2742 /* The difference is equal to zero: the accessed index
2743 overlaps for each iteration in the loop. */
2744 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2745 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2746 *last_conflicts = chrec_dont_know;
2747 dependence_stats.num_ziv_dependent++;
2749 else
2751 /* The accesses do not overlap. */
2752 *overlaps_a = conflict_fn_no_dependence ();
2753 *overlaps_b = conflict_fn_no_dependence ();
2754 *last_conflicts = integer_zero_node;
2755 dependence_stats.num_ziv_independent++;
2757 break;
2759 default:
2760 /* We're not sure whether the indexes overlap. For the moment,
2761 conservatively answer "don't know". */
2762 if (dump_file && (dump_flags & TDF_DETAILS))
2763 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2765 *overlaps_a = conflict_fn_not_known ();
2766 *overlaps_b = conflict_fn_not_known ();
2767 *last_conflicts = chrec_dont_know;
2768 dependence_stats.num_ziv_unimplemented++;
2769 break;
2772 if (dump_file && (dump_flags & TDF_DETAILS))
2773 fprintf (dump_file, ")\n");
2776 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2777 and only if it fits to the int type. If this is not the case, or the
2778 bound on the number of iterations of LOOP could not be derived, returns
2779 chrec_dont_know. */
2781 static tree
2782 max_stmt_executions_tree (struct loop *loop)
2784 widest_int nit;
2786 if (!max_stmt_executions (loop, &nit))
2787 return chrec_dont_know;
2789 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2790 return chrec_dont_know;
2792 return wide_int_to_tree (unsigned_type_node, nit);
2795 /* Determine whether the CHREC is always positive/negative. If the expression
2796 cannot be statically analyzed, return false, otherwise set the answer into
2797 VALUE. */
2799 static bool
2800 chrec_is_positive (tree chrec, bool *value)
2802 bool value0, value1, value2;
2803 tree end_value, nb_iter;
2805 switch (TREE_CODE (chrec))
2807 case POLYNOMIAL_CHREC:
2808 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2809 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2810 return false;
2812 /* FIXME -- overflows. */
2813 if (value0 == value1)
2815 *value = value0;
2816 return true;
2819 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2820 and the proof consists in showing that the sign never
2821 changes during the execution of the loop, from 0 to
2822 loop->nb_iterations. */
2823 if (!evolution_function_is_affine_p (chrec))
2824 return false;
2826 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2827 if (chrec_contains_undetermined (nb_iter))
2828 return false;
2830 #if 0
2831 /* TODO -- If the test is after the exit, we may decrease the number of
2832 iterations by one. */
2833 if (after_exit)
2834 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2835 #endif
2837 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2839 if (!chrec_is_positive (end_value, &value2))
2840 return false;
2842 *value = value0;
2843 return value0 == value1;
2845 case INTEGER_CST:
2846 switch (tree_int_cst_sgn (chrec))
2848 case -1:
2849 *value = false;
2850 break;
2851 case 1:
2852 *value = true;
2853 break;
2854 default:
2855 return false;
2857 return true;
2859 default:
2860 return false;
2865 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2866 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2867 *OVERLAPS_B are initialized to the functions that describe the
2868 relation between the elements accessed twice by CHREC_A and
2869 CHREC_B. For k >= 0, the following property is verified:
2871 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2873 static void
2874 analyze_siv_subscript_cst_affine (tree chrec_a,
2875 tree chrec_b,
2876 conflict_function **overlaps_a,
2877 conflict_function **overlaps_b,
2878 tree *last_conflicts)
2880 bool value0, value1, value2;
2881 tree type, difference, tmp;
2883 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2884 chrec_a = chrec_convert (type, chrec_a, NULL);
2885 chrec_b = chrec_convert (type, chrec_b, NULL);
2886 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2888 /* Special case overlap in the first iteration. */
2889 if (integer_zerop (difference))
2891 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2892 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2893 *last_conflicts = integer_one_node;
2894 return;
2897 if (!chrec_is_positive (initial_condition (difference), &value0))
2899 if (dump_file && (dump_flags & TDF_DETAILS))
2900 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2902 dependence_stats.num_siv_unimplemented++;
2903 *overlaps_a = conflict_fn_not_known ();
2904 *overlaps_b = conflict_fn_not_known ();
2905 *last_conflicts = chrec_dont_know;
2906 return;
2908 else
2910 if (value0 == false)
2912 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2914 if (dump_file && (dump_flags & TDF_DETAILS))
2915 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2917 *overlaps_a = conflict_fn_not_known ();
2918 *overlaps_b = conflict_fn_not_known ();
2919 *last_conflicts = chrec_dont_know;
2920 dependence_stats.num_siv_unimplemented++;
2921 return;
2923 else
2925 if (value1 == true)
2927 /* Example:
2928 chrec_a = 12
2929 chrec_b = {10, +, 1}
2932 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2934 HOST_WIDE_INT numiter;
2935 struct loop *loop = get_chrec_loop (chrec_b);
2937 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2938 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2939 fold_build1 (ABS_EXPR, type, difference),
2940 CHREC_RIGHT (chrec_b));
2941 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2942 *last_conflicts = integer_one_node;
2945 /* Perform weak-zero siv test to see if overlap is
2946 outside the loop bounds. */
2947 numiter = max_stmt_executions_int (loop);
2949 if (numiter >= 0
2950 && compare_tree_int (tmp, numiter) > 0)
2952 free_conflict_function (*overlaps_a);
2953 free_conflict_function (*overlaps_b);
2954 *overlaps_a = conflict_fn_no_dependence ();
2955 *overlaps_b = conflict_fn_no_dependence ();
2956 *last_conflicts = integer_zero_node;
2957 dependence_stats.num_siv_independent++;
2958 return;
2960 dependence_stats.num_siv_dependent++;
2961 return;
2964 /* When the step does not divide the difference, there are
2965 no overlaps. */
2966 else
2968 *overlaps_a = conflict_fn_no_dependence ();
2969 *overlaps_b = conflict_fn_no_dependence ();
2970 *last_conflicts = integer_zero_node;
2971 dependence_stats.num_siv_independent++;
2972 return;
2976 else
2978 /* Example:
2979 chrec_a = 12
2980 chrec_b = {10, +, -1}
2982 In this case, chrec_a will not overlap with chrec_b. */
2983 *overlaps_a = conflict_fn_no_dependence ();
2984 *overlaps_b = conflict_fn_no_dependence ();
2985 *last_conflicts = integer_zero_node;
2986 dependence_stats.num_siv_independent++;
2987 return;
2991 else
2993 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2995 if (dump_file && (dump_flags & TDF_DETAILS))
2996 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2998 *overlaps_a = conflict_fn_not_known ();
2999 *overlaps_b = conflict_fn_not_known ();
3000 *last_conflicts = chrec_dont_know;
3001 dependence_stats.num_siv_unimplemented++;
3002 return;
3004 else
3006 if (value2 == false)
3008 /* Example:
3009 chrec_a = 3
3010 chrec_b = {10, +, -1}
3012 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3014 HOST_WIDE_INT numiter;
3015 struct loop *loop = get_chrec_loop (chrec_b);
3017 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3018 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3019 CHREC_RIGHT (chrec_b));
3020 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3021 *last_conflicts = integer_one_node;
3023 /* Perform weak-zero siv test to see if overlap is
3024 outside the loop bounds. */
3025 numiter = max_stmt_executions_int (loop);
3027 if (numiter >= 0
3028 && compare_tree_int (tmp, numiter) > 0)
3030 free_conflict_function (*overlaps_a);
3031 free_conflict_function (*overlaps_b);
3032 *overlaps_a = conflict_fn_no_dependence ();
3033 *overlaps_b = conflict_fn_no_dependence ();
3034 *last_conflicts = integer_zero_node;
3035 dependence_stats.num_siv_independent++;
3036 return;
3038 dependence_stats.num_siv_dependent++;
3039 return;
3042 /* When the step does not divide the difference, there
3043 are no overlaps. */
3044 else
3046 *overlaps_a = conflict_fn_no_dependence ();
3047 *overlaps_b = conflict_fn_no_dependence ();
3048 *last_conflicts = integer_zero_node;
3049 dependence_stats.num_siv_independent++;
3050 return;
3053 else
3055 /* Example:
3056 chrec_a = 3
3057 chrec_b = {4, +, 1}
3059 In this case, chrec_a will not overlap with chrec_b. */
3060 *overlaps_a = conflict_fn_no_dependence ();
3061 *overlaps_b = conflict_fn_no_dependence ();
3062 *last_conflicts = integer_zero_node;
3063 dependence_stats.num_siv_independent++;
3064 return;
3071 /* Helper recursive function for initializing the matrix A. Returns
3072 the initial value of CHREC. */
3074 static tree
3075 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3077 gcc_assert (chrec);
3079 switch (TREE_CODE (chrec))
3081 case POLYNOMIAL_CHREC:
3082 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3083 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3085 case PLUS_EXPR:
3086 case MULT_EXPR:
3087 case MINUS_EXPR:
3089 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3090 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3092 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3095 CASE_CONVERT:
3097 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3098 return chrec_convert (chrec_type (chrec), op, NULL);
3101 case BIT_NOT_EXPR:
3103 /* Handle ~X as -1 - X. */
3104 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3105 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3106 build_int_cst (TREE_TYPE (chrec), -1), op);
3109 case INTEGER_CST:
3110 return chrec;
3112 default:
3113 gcc_unreachable ();
3114 return NULL_TREE;
3118 #define FLOOR_DIV(x,y) ((x) / (y))
3120 /* Solves the special case of the Diophantine equation:
3121 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3123 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3124 number of iterations that loops X and Y run. The overlaps will be
3125 constructed as evolutions in dimension DIM. */
3127 static void
3128 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3129 HOST_WIDE_INT step_a,
3130 HOST_WIDE_INT step_b,
3131 affine_fn *overlaps_a,
3132 affine_fn *overlaps_b,
3133 tree *last_conflicts, int dim)
3135 if (((step_a > 0 && step_b > 0)
3136 || (step_a < 0 && step_b < 0)))
3138 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3139 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3141 gcd_steps_a_b = gcd (step_a, step_b);
3142 step_overlaps_a = step_b / gcd_steps_a_b;
3143 step_overlaps_b = step_a / gcd_steps_a_b;
3145 if (niter > 0)
3147 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3148 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3149 last_conflict = tau2;
3150 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3152 else
3153 *last_conflicts = chrec_dont_know;
3155 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3156 build_int_cst (NULL_TREE,
3157 step_overlaps_a));
3158 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3159 build_int_cst (NULL_TREE,
3160 step_overlaps_b));
3163 else
3165 *overlaps_a = affine_fn_cst (integer_zero_node);
3166 *overlaps_b = affine_fn_cst (integer_zero_node);
3167 *last_conflicts = integer_zero_node;
3171 /* Solves the special case of a Diophantine equation where CHREC_A is
3172 an affine bivariate function, and CHREC_B is an affine univariate
3173 function. For example,
3175 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3177 has the following overlapping functions:
3179 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3180 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3181 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3183 FORNOW: This is a specialized implementation for a case occurring in
3184 a common benchmark. Implement the general algorithm. */
3186 static void
3187 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3188 conflict_function **overlaps_a,
3189 conflict_function **overlaps_b,
3190 tree *last_conflicts)
3192 bool xz_p, yz_p, xyz_p;
3193 HOST_WIDE_INT step_x, step_y, step_z;
3194 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3195 affine_fn overlaps_a_xz, overlaps_b_xz;
3196 affine_fn overlaps_a_yz, overlaps_b_yz;
3197 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3198 affine_fn ova1, ova2, ovb;
3199 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3201 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3202 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3203 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3205 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3206 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3207 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3209 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3211 if (dump_file && (dump_flags & TDF_DETAILS))
3212 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3214 *overlaps_a = conflict_fn_not_known ();
3215 *overlaps_b = conflict_fn_not_known ();
3216 *last_conflicts = chrec_dont_know;
3217 return;
3220 niter = MIN (niter_x, niter_z);
3221 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3222 &overlaps_a_xz,
3223 &overlaps_b_xz,
3224 &last_conflicts_xz, 1);
3225 niter = MIN (niter_y, niter_z);
3226 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3227 &overlaps_a_yz,
3228 &overlaps_b_yz,
3229 &last_conflicts_yz, 2);
3230 niter = MIN (niter_x, niter_z);
3231 niter = MIN (niter_y, niter);
3232 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3233 &overlaps_a_xyz,
3234 &overlaps_b_xyz,
3235 &last_conflicts_xyz, 3);
3237 xz_p = !integer_zerop (last_conflicts_xz);
3238 yz_p = !integer_zerop (last_conflicts_yz);
3239 xyz_p = !integer_zerop (last_conflicts_xyz);
3241 if (xz_p || yz_p || xyz_p)
3243 ova1 = affine_fn_cst (integer_zero_node);
3244 ova2 = affine_fn_cst (integer_zero_node);
3245 ovb = affine_fn_cst (integer_zero_node);
3246 if (xz_p)
3248 affine_fn t0 = ova1;
3249 affine_fn t2 = ovb;
3251 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3252 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3253 affine_fn_free (t0);
3254 affine_fn_free (t2);
3255 *last_conflicts = last_conflicts_xz;
3257 if (yz_p)
3259 affine_fn t0 = ova2;
3260 affine_fn t2 = ovb;
3262 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3263 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3264 affine_fn_free (t0);
3265 affine_fn_free (t2);
3266 *last_conflicts = last_conflicts_yz;
3268 if (xyz_p)
3270 affine_fn t0 = ova1;
3271 affine_fn t2 = ova2;
3272 affine_fn t4 = ovb;
3274 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3275 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3276 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3277 affine_fn_free (t0);
3278 affine_fn_free (t2);
3279 affine_fn_free (t4);
3280 *last_conflicts = last_conflicts_xyz;
3282 *overlaps_a = conflict_fn (2, ova1, ova2);
3283 *overlaps_b = conflict_fn (1, ovb);
3285 else
3287 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3288 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3289 *last_conflicts = integer_zero_node;
3292 affine_fn_free (overlaps_a_xz);
3293 affine_fn_free (overlaps_b_xz);
3294 affine_fn_free (overlaps_a_yz);
3295 affine_fn_free (overlaps_b_yz);
3296 affine_fn_free (overlaps_a_xyz);
3297 affine_fn_free (overlaps_b_xyz);
3300 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3302 static void
3303 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3304 int size)
3306 memcpy (vec2, vec1, size * sizeof (*vec1));
3309 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3311 static void
3312 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3313 int m, int n)
3315 int i;
3317 for (i = 0; i < m; i++)
3318 lambda_vector_copy (mat1[i], mat2[i], n);
3321 /* Store the N x N identity matrix in MAT. */
3323 static void
3324 lambda_matrix_id (lambda_matrix mat, int size)
3326 int i, j;
3328 for (i = 0; i < size; i++)
3329 for (j = 0; j < size; j++)
3330 mat[i][j] = (i == j) ? 1 : 0;
3333 /* Return the first nonzero element of vector VEC1 between START and N.
3334 We must have START <= N. Returns N if VEC1 is the zero vector. */
3336 static int
3337 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3339 int j = start;
3340 while (j < n && vec1[j] == 0)
3341 j++;
3342 return j;
3345 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3346 R2 = R2 + CONST1 * R1. */
3348 static void
3349 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3351 int i;
3353 if (const1 == 0)
3354 return;
3356 for (i = 0; i < n; i++)
3357 mat[r2][i] += const1 * mat[r1][i];
3360 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3361 and store the result in VEC2. */
3363 static void
3364 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3365 int size, int const1)
3367 int i;
3369 if (const1 == 0)
3370 lambda_vector_clear (vec2, size);
3371 else
3372 for (i = 0; i < size; i++)
3373 vec2[i] = const1 * vec1[i];
3376 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3378 static void
3379 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3380 int size)
3382 lambda_vector_mult_const (vec1, vec2, size, -1);
3385 /* Negate row R1 of matrix MAT which has N columns. */
3387 static void
3388 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3390 lambda_vector_negate (mat[r1], mat[r1], n);
3393 /* Return true if two vectors are equal. */
3395 static bool
3396 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3398 int i;
3399 for (i = 0; i < size; i++)
3400 if (vec1[i] != vec2[i])
3401 return false;
3402 return true;
3405 /* Given an M x N integer matrix A, this function determines an M x
3406 M unimodular matrix U, and an M x N echelon matrix S such that
3407 "U.A = S". This decomposition is also known as "right Hermite".
3409 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3410 Restructuring Compilers" Utpal Banerjee. */
3412 static void
3413 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3414 lambda_matrix S, lambda_matrix U)
3416 int i, j, i0 = 0;
3418 lambda_matrix_copy (A, S, m, n);
3419 lambda_matrix_id (U, m);
3421 for (j = 0; j < n; j++)
3423 if (lambda_vector_first_nz (S[j], m, i0) < m)
3425 ++i0;
3426 for (i = m - 1; i >= i0; i--)
3428 while (S[i][j] != 0)
3430 int sigma, factor, a, b;
3432 a = S[i-1][j];
3433 b = S[i][j];
3434 sigma = (a * b < 0) ? -1: 1;
3435 a = abs (a);
3436 b = abs (b);
3437 factor = sigma * (a / b);
3439 lambda_matrix_row_add (S, n, i, i-1, -factor);
3440 std::swap (S[i], S[i-1]);
3442 lambda_matrix_row_add (U, m, i, i-1, -factor);
3443 std::swap (U[i], U[i-1]);
3450 /* Determines the overlapping elements due to accesses CHREC_A and
3451 CHREC_B, that are affine functions. This function cannot handle
3452 symbolic evolution functions, ie. when initial conditions are
3453 parameters, because it uses lambda matrices of integers. */
3455 static void
3456 analyze_subscript_affine_affine (tree chrec_a,
3457 tree chrec_b,
3458 conflict_function **overlaps_a,
3459 conflict_function **overlaps_b,
3460 tree *last_conflicts)
3462 unsigned nb_vars_a, nb_vars_b, dim;
3463 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3464 lambda_matrix A, U, S;
3465 struct obstack scratch_obstack;
3467 if (eq_evolutions_p (chrec_a, chrec_b))
3469 /* The accessed index overlaps for each iteration in the
3470 loop. */
3471 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3472 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3473 *last_conflicts = chrec_dont_know;
3474 return;
3476 if (dump_file && (dump_flags & TDF_DETAILS))
3477 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3479 /* For determining the initial intersection, we have to solve a
3480 Diophantine equation. This is the most time consuming part.
3482 For answering to the question: "Is there a dependence?" we have
3483 to prove that there exists a solution to the Diophantine
3484 equation, and that the solution is in the iteration domain,
3485 i.e. the solution is positive or zero, and that the solution
3486 happens before the upper bound loop.nb_iterations. Otherwise
3487 there is no dependence. This function outputs a description of
3488 the iterations that hold the intersections. */
3490 nb_vars_a = nb_vars_in_chrec (chrec_a);
3491 nb_vars_b = nb_vars_in_chrec (chrec_b);
3493 gcc_obstack_init (&scratch_obstack);
3495 dim = nb_vars_a + nb_vars_b;
3496 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3497 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3498 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3500 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3501 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3502 gamma = init_b - init_a;
3504 /* Don't do all the hard work of solving the Diophantine equation
3505 when we already know the solution: for example,
3506 | {3, +, 1}_1
3507 | {3, +, 4}_2
3508 | gamma = 3 - 3 = 0.
3509 Then the first overlap occurs during the first iterations:
3510 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3512 if (gamma == 0)
3514 if (nb_vars_a == 1 && nb_vars_b == 1)
3516 HOST_WIDE_INT step_a, step_b;
3517 HOST_WIDE_INT niter, niter_a, niter_b;
3518 affine_fn ova, ovb;
3520 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3521 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3522 niter = MIN (niter_a, niter_b);
3523 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3524 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3526 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3527 &ova, &ovb,
3528 last_conflicts, 1);
3529 *overlaps_a = conflict_fn (1, ova);
3530 *overlaps_b = conflict_fn (1, ovb);
3533 else if (nb_vars_a == 2 && nb_vars_b == 1)
3534 compute_overlap_steps_for_affine_1_2
3535 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3537 else if (nb_vars_a == 1 && nb_vars_b == 2)
3538 compute_overlap_steps_for_affine_1_2
3539 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3541 else
3543 if (dump_file && (dump_flags & TDF_DETAILS))
3544 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3545 *overlaps_a = conflict_fn_not_known ();
3546 *overlaps_b = conflict_fn_not_known ();
3547 *last_conflicts = chrec_dont_know;
3549 goto end_analyze_subs_aa;
3552 /* U.A = S */
3553 lambda_matrix_right_hermite (A, dim, 1, S, U);
3555 if (S[0][0] < 0)
3557 S[0][0] *= -1;
3558 lambda_matrix_row_negate (U, dim, 0);
3560 gcd_alpha_beta = S[0][0];
3562 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3563 but that is a quite strange case. Instead of ICEing, answer
3564 don't know. */
3565 if (gcd_alpha_beta == 0)
3567 *overlaps_a = conflict_fn_not_known ();
3568 *overlaps_b = conflict_fn_not_known ();
3569 *last_conflicts = chrec_dont_know;
3570 goto end_analyze_subs_aa;
3573 /* The classic "gcd-test". */
3574 if (!int_divides_p (gcd_alpha_beta, gamma))
3576 /* The "gcd-test" has determined that there is no integer
3577 solution, i.e. there is no dependence. */
3578 *overlaps_a = conflict_fn_no_dependence ();
3579 *overlaps_b = conflict_fn_no_dependence ();
3580 *last_conflicts = integer_zero_node;
3583 /* Both access functions are univariate. This includes SIV and MIV cases. */
3584 else if (nb_vars_a == 1 && nb_vars_b == 1)
3586 /* Both functions should have the same evolution sign. */
3587 if (((A[0][0] > 0 && -A[1][0] > 0)
3588 || (A[0][0] < 0 && -A[1][0] < 0)))
3590 /* The solutions are given by:
3592 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3593 | [u21 u22] [y0]
3595 For a given integer t. Using the following variables,
3597 | i0 = u11 * gamma / gcd_alpha_beta
3598 | j0 = u12 * gamma / gcd_alpha_beta
3599 | i1 = u21
3600 | j1 = u22
3602 the solutions are:
3604 | x0 = i0 + i1 * t,
3605 | y0 = j0 + j1 * t. */
3606 HOST_WIDE_INT i0, j0, i1, j1;
3608 i0 = U[0][0] * gamma / gcd_alpha_beta;
3609 j0 = U[0][1] * gamma / gcd_alpha_beta;
3610 i1 = U[1][0];
3611 j1 = U[1][1];
3613 if ((i1 == 0 && i0 < 0)
3614 || (j1 == 0 && j0 < 0))
3616 /* There is no solution.
3617 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3618 falls in here, but for the moment we don't look at the
3619 upper bound of the iteration domain. */
3620 *overlaps_a = conflict_fn_no_dependence ();
3621 *overlaps_b = conflict_fn_no_dependence ();
3622 *last_conflicts = integer_zero_node;
3623 goto end_analyze_subs_aa;
3626 if (i1 > 0 && j1 > 0)
3628 HOST_WIDE_INT niter_a
3629 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3630 HOST_WIDE_INT niter_b
3631 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3632 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3634 /* (X0, Y0) is a solution of the Diophantine equation:
3635 "chrec_a (X0) = chrec_b (Y0)". */
3636 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3637 CEIL (-j0, j1));
3638 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3639 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3641 /* (X1, Y1) is the smallest positive solution of the eq
3642 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3643 first conflict occurs. */
3644 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3645 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3646 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3648 if (niter > 0)
3650 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3651 FLOOR_DIV (niter_b - j0, j1));
3652 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3654 /* If the overlap occurs outside of the bounds of the
3655 loop, there is no dependence. */
3656 if (x1 >= niter_a || y1 >= niter_b)
3658 *overlaps_a = conflict_fn_no_dependence ();
3659 *overlaps_b = conflict_fn_no_dependence ();
3660 *last_conflicts = integer_zero_node;
3661 goto end_analyze_subs_aa;
3663 else
3664 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3666 else
3667 *last_conflicts = chrec_dont_know;
3669 *overlaps_a
3670 = conflict_fn (1,
3671 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3673 build_int_cst (NULL_TREE, i1)));
3674 *overlaps_b
3675 = conflict_fn (1,
3676 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3678 build_int_cst (NULL_TREE, j1)));
3680 else
3682 /* FIXME: For the moment, the upper bound of the
3683 iteration domain for i and j is not checked. */
3684 if (dump_file && (dump_flags & TDF_DETAILS))
3685 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3686 *overlaps_a = conflict_fn_not_known ();
3687 *overlaps_b = conflict_fn_not_known ();
3688 *last_conflicts = chrec_dont_know;
3691 else
3693 if (dump_file && (dump_flags & TDF_DETAILS))
3694 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3695 *overlaps_a = conflict_fn_not_known ();
3696 *overlaps_b = conflict_fn_not_known ();
3697 *last_conflicts = chrec_dont_know;
3700 else
3702 if (dump_file && (dump_flags & TDF_DETAILS))
3703 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3704 *overlaps_a = conflict_fn_not_known ();
3705 *overlaps_b = conflict_fn_not_known ();
3706 *last_conflicts = chrec_dont_know;
3709 end_analyze_subs_aa:
3710 obstack_free (&scratch_obstack, NULL);
3711 if (dump_file && (dump_flags & TDF_DETAILS))
3713 fprintf (dump_file, " (overlaps_a = ");
3714 dump_conflict_function (dump_file, *overlaps_a);
3715 fprintf (dump_file, ")\n (overlaps_b = ");
3716 dump_conflict_function (dump_file, *overlaps_b);
3717 fprintf (dump_file, "))\n");
3721 /* Returns true when analyze_subscript_affine_affine can be used for
3722 determining the dependence relation between chrec_a and chrec_b,
3723 that contain symbols. This function modifies chrec_a and chrec_b
3724 such that the analysis result is the same, and such that they don't
3725 contain symbols, and then can safely be passed to the analyzer.
3727 Example: The analysis of the following tuples of evolutions produce
3728 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3729 vs. {0, +, 1}_1
3731 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3732 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3735 static bool
3736 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3738 tree diff, type, left_a, left_b, right_b;
3740 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3741 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3742 /* FIXME: For the moment not handled. Might be refined later. */
3743 return false;
3745 type = chrec_type (*chrec_a);
3746 left_a = CHREC_LEFT (*chrec_a);
3747 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3748 diff = chrec_fold_minus (type, left_a, left_b);
3750 if (!evolution_function_is_constant_p (diff))
3751 return false;
3753 if (dump_file && (dump_flags & TDF_DETAILS))
3754 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3756 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3757 diff, CHREC_RIGHT (*chrec_a));
3758 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3759 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3760 build_int_cst (type, 0),
3761 right_b);
3762 return true;
3765 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3766 *OVERLAPS_B are initialized to the functions that describe the
3767 relation between the elements accessed twice by CHREC_A and
3768 CHREC_B. For k >= 0, the following property is verified:
3770 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3772 static void
3773 analyze_siv_subscript (tree chrec_a,
3774 tree chrec_b,
3775 conflict_function **overlaps_a,
3776 conflict_function **overlaps_b,
3777 tree *last_conflicts,
3778 int loop_nest_num)
3780 dependence_stats.num_siv++;
3782 if (dump_file && (dump_flags & TDF_DETAILS))
3783 fprintf (dump_file, "(analyze_siv_subscript \n");
3785 if (evolution_function_is_constant_p (chrec_a)
3786 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3787 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3788 overlaps_a, overlaps_b, last_conflicts);
3790 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3791 && evolution_function_is_constant_p (chrec_b))
3792 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3793 overlaps_b, overlaps_a, last_conflicts);
3795 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3796 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3798 if (!chrec_contains_symbols (chrec_a)
3799 && !chrec_contains_symbols (chrec_b))
3801 analyze_subscript_affine_affine (chrec_a, chrec_b,
3802 overlaps_a, overlaps_b,
3803 last_conflicts);
3805 if (CF_NOT_KNOWN_P (*overlaps_a)
3806 || CF_NOT_KNOWN_P (*overlaps_b))
3807 dependence_stats.num_siv_unimplemented++;
3808 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3809 || CF_NO_DEPENDENCE_P (*overlaps_b))
3810 dependence_stats.num_siv_independent++;
3811 else
3812 dependence_stats.num_siv_dependent++;
3814 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3815 &chrec_b))
3817 analyze_subscript_affine_affine (chrec_a, chrec_b,
3818 overlaps_a, overlaps_b,
3819 last_conflicts);
3821 if (CF_NOT_KNOWN_P (*overlaps_a)
3822 || CF_NOT_KNOWN_P (*overlaps_b))
3823 dependence_stats.num_siv_unimplemented++;
3824 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3825 || CF_NO_DEPENDENCE_P (*overlaps_b))
3826 dependence_stats.num_siv_independent++;
3827 else
3828 dependence_stats.num_siv_dependent++;
3830 else
3831 goto siv_subscript_dontknow;
3834 else
3836 siv_subscript_dontknow:;
3837 if (dump_file && (dump_flags & TDF_DETAILS))
3838 fprintf (dump_file, " siv test failed: unimplemented");
3839 *overlaps_a = conflict_fn_not_known ();
3840 *overlaps_b = conflict_fn_not_known ();
3841 *last_conflicts = chrec_dont_know;
3842 dependence_stats.num_siv_unimplemented++;
3845 if (dump_file && (dump_flags & TDF_DETAILS))
3846 fprintf (dump_file, ")\n");
3849 /* Returns false if we can prove that the greatest common divisor of the steps
3850 of CHREC does not divide CST, false otherwise. */
3852 static bool
3853 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3855 HOST_WIDE_INT cd = 0, val;
3856 tree step;
3858 if (!tree_fits_shwi_p (cst))
3859 return true;
3860 val = tree_to_shwi (cst);
3862 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3864 step = CHREC_RIGHT (chrec);
3865 if (!tree_fits_shwi_p (step))
3866 return true;
3867 cd = gcd (cd, tree_to_shwi (step));
3868 chrec = CHREC_LEFT (chrec);
3871 return val % cd == 0;
3874 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3875 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3876 functions that describe the relation between the elements accessed
3877 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3878 is verified:
3880 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3882 static void
3883 analyze_miv_subscript (tree chrec_a,
3884 tree chrec_b,
3885 conflict_function **overlaps_a,
3886 conflict_function **overlaps_b,
3887 tree *last_conflicts,
3888 struct loop *loop_nest)
3890 tree type, difference;
3892 dependence_stats.num_miv++;
3893 if (dump_file && (dump_flags & TDF_DETAILS))
3894 fprintf (dump_file, "(analyze_miv_subscript \n");
3896 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3897 chrec_a = chrec_convert (type, chrec_a, NULL);
3898 chrec_b = chrec_convert (type, chrec_b, NULL);
3899 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3901 if (eq_evolutions_p (chrec_a, chrec_b))
3903 /* Access functions are the same: all the elements are accessed
3904 in the same order. */
3905 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3906 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3907 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3908 dependence_stats.num_miv_dependent++;
3911 else if (evolution_function_is_constant_p (difference)
3912 /* For the moment, the following is verified:
3913 evolution_function_is_affine_multivariate_p (chrec_a,
3914 loop_nest->num) */
3915 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3917 /* testsuite/.../ssa-chrec-33.c
3918 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3920 The difference is 1, and all the evolution steps are multiples
3921 of 2, consequently there are no overlapping elements. */
3922 *overlaps_a = conflict_fn_no_dependence ();
3923 *overlaps_b = conflict_fn_no_dependence ();
3924 *last_conflicts = integer_zero_node;
3925 dependence_stats.num_miv_independent++;
3928 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3929 && !chrec_contains_symbols (chrec_a)
3930 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3931 && !chrec_contains_symbols (chrec_b))
3933 /* testsuite/.../ssa-chrec-35.c
3934 {0, +, 1}_2 vs. {0, +, 1}_3
3935 the overlapping elements are respectively located at iterations:
3936 {0, +, 1}_x and {0, +, 1}_x,
3937 in other words, we have the equality:
3938 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3940 Other examples:
3941 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3942 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3944 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3945 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3947 analyze_subscript_affine_affine (chrec_a, chrec_b,
3948 overlaps_a, overlaps_b, last_conflicts);
3950 if (CF_NOT_KNOWN_P (*overlaps_a)
3951 || CF_NOT_KNOWN_P (*overlaps_b))
3952 dependence_stats.num_miv_unimplemented++;
3953 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3954 || CF_NO_DEPENDENCE_P (*overlaps_b))
3955 dependence_stats.num_miv_independent++;
3956 else
3957 dependence_stats.num_miv_dependent++;
3960 else
3962 /* When the analysis is too difficult, answer "don't know". */
3963 if (dump_file && (dump_flags & TDF_DETAILS))
3964 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3966 *overlaps_a = conflict_fn_not_known ();
3967 *overlaps_b = conflict_fn_not_known ();
3968 *last_conflicts = chrec_dont_know;
3969 dependence_stats.num_miv_unimplemented++;
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3973 fprintf (dump_file, ")\n");
3976 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3977 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3978 OVERLAP_ITERATIONS_B are initialized with two functions that
3979 describe the iterations that contain conflicting elements.
3981 Remark: For an integer k >= 0, the following equality is true:
3983 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3986 static void
3987 analyze_overlapping_iterations (tree chrec_a,
3988 tree chrec_b,
3989 conflict_function **overlap_iterations_a,
3990 conflict_function **overlap_iterations_b,
3991 tree *last_conflicts, struct loop *loop_nest)
3993 unsigned int lnn = loop_nest->num;
3995 dependence_stats.num_subscript_tests++;
3997 if (dump_file && (dump_flags & TDF_DETAILS))
3999 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4000 fprintf (dump_file, " (chrec_a = ");
4001 print_generic_expr (dump_file, chrec_a);
4002 fprintf (dump_file, ")\n (chrec_b = ");
4003 print_generic_expr (dump_file, chrec_b);
4004 fprintf (dump_file, ")\n");
4007 if (chrec_a == NULL_TREE
4008 || chrec_b == NULL_TREE
4009 || chrec_contains_undetermined (chrec_a)
4010 || chrec_contains_undetermined (chrec_b))
4012 dependence_stats.num_subscript_undetermined++;
4014 *overlap_iterations_a = conflict_fn_not_known ();
4015 *overlap_iterations_b = conflict_fn_not_known ();
4018 /* If they are the same chrec, and are affine, they overlap
4019 on every iteration. */
4020 else if (eq_evolutions_p (chrec_a, chrec_b)
4021 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4022 || operand_equal_p (chrec_a, chrec_b, 0)))
4024 dependence_stats.num_same_subscript_function++;
4025 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4026 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4027 *last_conflicts = chrec_dont_know;
4030 /* If they aren't the same, and aren't affine, we can't do anything
4031 yet. */
4032 else if ((chrec_contains_symbols (chrec_a)
4033 || chrec_contains_symbols (chrec_b))
4034 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4035 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4037 dependence_stats.num_subscript_undetermined++;
4038 *overlap_iterations_a = conflict_fn_not_known ();
4039 *overlap_iterations_b = conflict_fn_not_known ();
4042 else if (ziv_subscript_p (chrec_a, chrec_b))
4043 analyze_ziv_subscript (chrec_a, chrec_b,
4044 overlap_iterations_a, overlap_iterations_b,
4045 last_conflicts);
4047 else if (siv_subscript_p (chrec_a, chrec_b))
4048 analyze_siv_subscript (chrec_a, chrec_b,
4049 overlap_iterations_a, overlap_iterations_b,
4050 last_conflicts, lnn);
4052 else
4053 analyze_miv_subscript (chrec_a, chrec_b,
4054 overlap_iterations_a, overlap_iterations_b,
4055 last_conflicts, loop_nest);
4057 if (dump_file && (dump_flags & TDF_DETAILS))
4059 fprintf (dump_file, " (overlap_iterations_a = ");
4060 dump_conflict_function (dump_file, *overlap_iterations_a);
4061 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4062 dump_conflict_function (dump_file, *overlap_iterations_b);
4063 fprintf (dump_file, "))\n");
4067 /* Helper function for uniquely inserting distance vectors. */
4069 static void
4070 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4072 unsigned i;
4073 lambda_vector v;
4075 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4076 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4077 return;
4079 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4082 /* Helper function for uniquely inserting direction vectors. */
4084 static void
4085 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4087 unsigned i;
4088 lambda_vector v;
4090 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4091 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4092 return;
4094 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4097 /* Add a distance of 1 on all the loops outer than INDEX. If we
4098 haven't yet determined a distance for this outer loop, push a new
4099 distance vector composed of the previous distance, and a distance
4100 of 1 for this outer loop. Example:
4102 | loop_1
4103 | loop_2
4104 | A[10]
4105 | endloop_2
4106 | endloop_1
4108 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4109 save (0, 1), then we have to save (1, 0). */
4111 static void
4112 add_outer_distances (struct data_dependence_relation *ddr,
4113 lambda_vector dist_v, int index)
4115 /* For each outer loop where init_v is not set, the accesses are
4116 in dependence of distance 1 in the loop. */
4117 while (--index >= 0)
4119 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4120 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4121 save_v[index] = 1;
4122 save_dist_v (ddr, save_v);
4126 /* Return false when fail to represent the data dependence as a
4127 distance vector. A_INDEX is the index of the first reference
4128 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4129 second reference. INIT_B is set to true when a component has been
4130 added to the distance vector DIST_V. INDEX_CARRY is then set to
4131 the index in DIST_V that carries the dependence. */
4133 static bool
4134 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4135 unsigned int a_index, unsigned int b_index,
4136 lambda_vector dist_v, bool *init_b,
4137 int *index_carry)
4139 unsigned i;
4140 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4142 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4144 tree access_fn_a, access_fn_b;
4145 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4147 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4149 non_affine_dependence_relation (ddr);
4150 return false;
4153 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4154 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4156 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4157 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4159 HOST_WIDE_INT dist;
4160 int index;
4161 int var_a = CHREC_VARIABLE (access_fn_a);
4162 int var_b = CHREC_VARIABLE (access_fn_b);
4164 if (var_a != var_b
4165 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4167 non_affine_dependence_relation (ddr);
4168 return false;
4171 dist = int_cst_value (SUB_DISTANCE (subscript));
4172 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4173 *index_carry = MIN (index, *index_carry);
4175 /* This is the subscript coupling test. If we have already
4176 recorded a distance for this loop (a distance coming from
4177 another subscript), it should be the same. For example,
4178 in the following code, there is no dependence:
4180 | loop i = 0, N, 1
4181 | T[i+1][i] = ...
4182 | ... = T[i][i]
4183 | endloop
4185 if (init_v[index] != 0 && dist_v[index] != dist)
4187 finalize_ddr_dependent (ddr, chrec_known);
4188 return false;
4191 dist_v[index] = dist;
4192 init_v[index] = 1;
4193 *init_b = true;
4195 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4197 /* This can be for example an affine vs. constant dependence
4198 (T[i] vs. T[3]) that is not an affine dependence and is
4199 not representable as a distance vector. */
4200 non_affine_dependence_relation (ddr);
4201 return false;
4205 return true;
4208 /* Return true when the DDR contains only constant access functions. */
4210 static bool
4211 constant_access_functions (const struct data_dependence_relation *ddr)
4213 unsigned i;
4214 subscript *sub;
4216 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4217 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4218 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4219 return false;
4221 return true;
4224 /* Helper function for the case where DDR_A and DDR_B are the same
4225 multivariate access function with a constant step. For an example
4226 see pr34635-1.c. */
4228 static void
4229 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4231 int x_1, x_2;
4232 tree c_1 = CHREC_LEFT (c_2);
4233 tree c_0 = CHREC_LEFT (c_1);
4234 lambda_vector dist_v;
4235 HOST_WIDE_INT v1, v2, cd;
4237 /* Polynomials with more than 2 variables are not handled yet. When
4238 the evolution steps are parameters, it is not possible to
4239 represent the dependence using classical distance vectors. */
4240 if (TREE_CODE (c_0) != INTEGER_CST
4241 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4242 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4244 DDR_AFFINE_P (ddr) = false;
4245 return;
4248 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4249 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4251 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4252 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4253 v1 = int_cst_value (CHREC_RIGHT (c_1));
4254 v2 = int_cst_value (CHREC_RIGHT (c_2));
4255 cd = gcd (v1, v2);
4256 v1 /= cd;
4257 v2 /= cd;
4259 if (v2 < 0)
4261 v2 = -v2;
4262 v1 = -v1;
4265 dist_v[x_1] = v2;
4266 dist_v[x_2] = -v1;
4267 save_dist_v (ddr, dist_v);
4269 add_outer_distances (ddr, dist_v, x_1);
4272 /* Helper function for the case where DDR_A and DDR_B are the same
4273 access functions. */
4275 static void
4276 add_other_self_distances (struct data_dependence_relation *ddr)
4278 lambda_vector dist_v;
4279 unsigned i;
4280 int index_carry = DDR_NB_LOOPS (ddr);
4281 subscript *sub;
4283 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4285 tree access_fun = SUB_ACCESS_FN (sub, 0);
4287 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4289 if (!evolution_function_is_univariate_p (access_fun))
4291 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4293 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4294 return;
4297 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4299 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4300 add_multivariate_self_dist (ddr, access_fun);
4301 else
4302 /* The evolution step is not constant: it varies in
4303 the outer loop, so this cannot be represented by a
4304 distance vector. For example in pr34635.c the
4305 evolution is {0, +, {0, +, 4}_1}_2. */
4306 DDR_AFFINE_P (ddr) = false;
4308 return;
4311 index_carry = MIN (index_carry,
4312 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4313 DDR_LOOP_NEST (ddr)));
4317 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4318 add_outer_distances (ddr, dist_v, index_carry);
4321 static void
4322 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4324 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4326 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4327 save_dist_v (ddr, dist_v);
4330 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4331 is the case for example when access functions are the same and
4332 equal to a constant, as in:
4334 | loop_1
4335 | A[3] = ...
4336 | ... = A[3]
4337 | endloop_1
4339 in which case the distance vectors are (0) and (1). */
4341 static void
4342 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4344 unsigned i, j;
4346 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4348 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4349 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4350 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4352 for (j = 0; j < ca->n; j++)
4353 if (affine_function_zero_p (ca->fns[j]))
4355 insert_innermost_unit_dist_vector (ddr);
4356 return;
4359 for (j = 0; j < cb->n; j++)
4360 if (affine_function_zero_p (cb->fns[j]))
4362 insert_innermost_unit_dist_vector (ddr);
4363 return;
4368 /* Return true when the DDR contains two data references that have the
4369 same access functions. */
4371 static inline bool
4372 same_access_functions (const struct data_dependence_relation *ddr)
4374 unsigned i;
4375 subscript *sub;
4377 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4378 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4379 SUB_ACCESS_FN (sub, 1)))
4380 return false;
4382 return true;
4385 /* Compute the classic per loop distance vector. DDR is the data
4386 dependence relation to build a vector from. Return false when fail
4387 to represent the data dependence as a distance vector. */
4389 static bool
4390 build_classic_dist_vector (struct data_dependence_relation *ddr,
4391 struct loop *loop_nest)
4393 bool init_b = false;
4394 int index_carry = DDR_NB_LOOPS (ddr);
4395 lambda_vector dist_v;
4397 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4398 return false;
4400 if (same_access_functions (ddr))
4402 /* Save the 0 vector. */
4403 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4404 save_dist_v (ddr, dist_v);
4406 if (constant_access_functions (ddr))
4407 add_distance_for_zero_overlaps (ddr);
4409 if (DDR_NB_LOOPS (ddr) > 1)
4410 add_other_self_distances (ddr);
4412 return true;
4415 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4416 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4417 return false;
4419 /* Save the distance vector if we initialized one. */
4420 if (init_b)
4422 /* Verify a basic constraint: classic distance vectors should
4423 always be lexicographically positive.
4425 Data references are collected in the order of execution of
4426 the program, thus for the following loop
4428 | for (i = 1; i < 100; i++)
4429 | for (j = 1; j < 100; j++)
4431 | t = T[j+1][i-1]; // A
4432 | T[j][i] = t + 2; // B
4435 references are collected following the direction of the wind:
4436 A then B. The data dependence tests are performed also
4437 following this order, such that we're looking at the distance
4438 separating the elements accessed by A from the elements later
4439 accessed by B. But in this example, the distance returned by
4440 test_dep (A, B) is lexicographically negative (-1, 1), that
4441 means that the access A occurs later than B with respect to
4442 the outer loop, ie. we're actually looking upwind. In this
4443 case we solve test_dep (B, A) looking downwind to the
4444 lexicographically positive solution, that returns the
4445 distance vector (1, -1). */
4446 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4448 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4449 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4450 return false;
4451 compute_subscript_distance (ddr);
4452 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4453 &index_carry))
4454 return false;
4455 save_dist_v (ddr, save_v);
4456 DDR_REVERSED_P (ddr) = true;
4458 /* In this case there is a dependence forward for all the
4459 outer loops:
4461 | for (k = 1; k < 100; k++)
4462 | for (i = 1; i < 100; i++)
4463 | for (j = 1; j < 100; j++)
4465 | t = T[j+1][i-1]; // A
4466 | T[j][i] = t + 2; // B
4469 the vectors are:
4470 (0, 1, -1)
4471 (1, 1, -1)
4472 (1, -1, 1)
4474 if (DDR_NB_LOOPS (ddr) > 1)
4476 add_outer_distances (ddr, save_v, index_carry);
4477 add_outer_distances (ddr, dist_v, index_carry);
4480 else
4482 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4483 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4485 if (DDR_NB_LOOPS (ddr) > 1)
4487 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4489 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4490 return false;
4491 compute_subscript_distance (ddr);
4492 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4493 &index_carry))
4494 return false;
4496 save_dist_v (ddr, save_v);
4497 add_outer_distances (ddr, dist_v, index_carry);
4498 add_outer_distances (ddr, opposite_v, index_carry);
4500 else
4501 save_dist_v (ddr, save_v);
4504 else
4506 /* There is a distance of 1 on all the outer loops: Example:
4507 there is a dependence of distance 1 on loop_1 for the array A.
4509 | loop_1
4510 | A[5] = ...
4511 | endloop
4513 add_outer_distances (ddr, dist_v,
4514 lambda_vector_first_nz (dist_v,
4515 DDR_NB_LOOPS (ddr), 0));
4518 if (dump_file && (dump_flags & TDF_DETAILS))
4520 unsigned i;
4522 fprintf (dump_file, "(build_classic_dist_vector\n");
4523 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4525 fprintf (dump_file, " dist_vector = (");
4526 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4527 DDR_NB_LOOPS (ddr));
4528 fprintf (dump_file, " )\n");
4530 fprintf (dump_file, ")\n");
4533 return true;
4536 /* Return the direction for a given distance.
4537 FIXME: Computing dir this way is suboptimal, since dir can catch
4538 cases that dist is unable to represent. */
4540 static inline enum data_dependence_direction
4541 dir_from_dist (int dist)
4543 if (dist > 0)
4544 return dir_positive;
4545 else if (dist < 0)
4546 return dir_negative;
4547 else
4548 return dir_equal;
4551 /* Compute the classic per loop direction vector. DDR is the data
4552 dependence relation to build a vector from. */
4554 static void
4555 build_classic_dir_vector (struct data_dependence_relation *ddr)
4557 unsigned i, j;
4558 lambda_vector dist_v;
4560 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4562 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4564 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4565 dir_v[j] = dir_from_dist (dist_v[j]);
4567 save_dir_v (ddr, dir_v);
4571 /* Helper function. Returns true when there is a dependence between the
4572 data references. A_INDEX is the index of the first reference (0 for
4573 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4575 static bool
4576 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4577 unsigned int a_index, unsigned int b_index,
4578 struct loop *loop_nest)
4580 unsigned int i;
4581 tree last_conflicts;
4582 struct subscript *subscript;
4583 tree res = NULL_TREE;
4585 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4587 conflict_function *overlaps_a, *overlaps_b;
4589 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4590 SUB_ACCESS_FN (subscript, b_index),
4591 &overlaps_a, &overlaps_b,
4592 &last_conflicts, loop_nest);
4594 if (SUB_CONFLICTS_IN_A (subscript))
4595 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4596 if (SUB_CONFLICTS_IN_B (subscript))
4597 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4599 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4600 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4601 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4603 /* If there is any undetermined conflict function we have to
4604 give a conservative answer in case we cannot prove that
4605 no dependence exists when analyzing another subscript. */
4606 if (CF_NOT_KNOWN_P (overlaps_a)
4607 || CF_NOT_KNOWN_P (overlaps_b))
4609 res = chrec_dont_know;
4610 continue;
4613 /* When there is a subscript with no dependence we can stop. */
4614 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4615 || CF_NO_DEPENDENCE_P (overlaps_b))
4617 res = chrec_known;
4618 break;
4622 if (res == NULL_TREE)
4623 return true;
4625 if (res == chrec_known)
4626 dependence_stats.num_dependence_independent++;
4627 else
4628 dependence_stats.num_dependence_undetermined++;
4629 finalize_ddr_dependent (ddr, res);
4630 return false;
4633 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4635 static void
4636 subscript_dependence_tester (struct data_dependence_relation *ddr,
4637 struct loop *loop_nest)
4639 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4640 dependence_stats.num_dependence_dependent++;
4642 compute_subscript_distance (ddr);
4643 if (build_classic_dist_vector (ddr, loop_nest))
4644 build_classic_dir_vector (ddr);
4647 /* Returns true when all the access functions of A are affine or
4648 constant with respect to LOOP_NEST. */
4650 static bool
4651 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4652 const struct loop *loop_nest)
4654 unsigned int i;
4655 vec<tree> fns = DR_ACCESS_FNS (a);
4656 tree t;
4658 FOR_EACH_VEC_ELT (fns, i, t)
4659 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4660 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4661 return false;
4663 return true;
4666 /* This computes the affine dependence relation between A and B with
4667 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4668 independence between two accesses, while CHREC_DONT_KNOW is used
4669 for representing the unknown relation.
4671 Note that it is possible to stop the computation of the dependence
4672 relation the first time we detect a CHREC_KNOWN element for a given
4673 subscript. */
4675 void
4676 compute_affine_dependence (struct data_dependence_relation *ddr,
4677 struct loop *loop_nest)
4679 struct data_reference *dra = DDR_A (ddr);
4680 struct data_reference *drb = DDR_B (ddr);
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4684 fprintf (dump_file, "(compute_affine_dependence\n");
4685 fprintf (dump_file, " stmt_a: ");
4686 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4687 fprintf (dump_file, " stmt_b: ");
4688 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4691 /* Analyze only when the dependence relation is not yet known. */
4692 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4694 dependence_stats.num_dependence_tests++;
4696 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4697 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4698 subscript_dependence_tester (ddr, loop_nest);
4700 /* As a last case, if the dependence cannot be determined, or if
4701 the dependence is considered too difficult to determine, answer
4702 "don't know". */
4703 else
4705 dependence_stats.num_dependence_undetermined++;
4707 if (dump_file && (dump_flags & TDF_DETAILS))
4709 fprintf (dump_file, "Data ref a:\n");
4710 dump_data_reference (dump_file, dra);
4711 fprintf (dump_file, "Data ref b:\n");
4712 dump_data_reference (dump_file, drb);
4713 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4715 finalize_ddr_dependent (ddr, chrec_dont_know);
4719 if (dump_file && (dump_flags & TDF_DETAILS))
4721 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4722 fprintf (dump_file, ") -> no dependence\n");
4723 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4724 fprintf (dump_file, ") -> dependence analysis failed\n");
4725 else
4726 fprintf (dump_file, ")\n");
4730 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4731 the data references in DATAREFS, in the LOOP_NEST. When
4732 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4733 relations. Return true when successful, i.e. data references number
4734 is small enough to be handled. */
4736 bool
4737 compute_all_dependences (vec<data_reference_p> datarefs,
4738 vec<ddr_p> *dependence_relations,
4739 vec<loop_p> loop_nest,
4740 bool compute_self_and_rr)
4742 struct data_dependence_relation *ddr;
4743 struct data_reference *a, *b;
4744 unsigned int i, j;
4746 if ((int) datarefs.length ()
4747 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4749 struct data_dependence_relation *ddr;
4751 /* Insert a single relation into dependence_relations:
4752 chrec_dont_know. */
4753 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4754 dependence_relations->safe_push (ddr);
4755 return false;
4758 FOR_EACH_VEC_ELT (datarefs, i, a)
4759 for (j = i + 1; datarefs.iterate (j, &b); j++)
4760 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4762 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4763 dependence_relations->safe_push (ddr);
4764 if (loop_nest.exists ())
4765 compute_affine_dependence (ddr, loop_nest[0]);
4768 if (compute_self_and_rr)
4769 FOR_EACH_VEC_ELT (datarefs, i, a)
4771 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4772 dependence_relations->safe_push (ddr);
4773 if (loop_nest.exists ())
4774 compute_affine_dependence (ddr, loop_nest[0]);
4777 return true;
4780 /* Describes a location of a memory reference. */
4782 struct data_ref_loc
4784 /* The memory reference. */
4785 tree ref;
4787 /* True if the memory reference is read. */
4788 bool is_read;
4790 /* True if the data reference is conditional within the containing
4791 statement, i.e. if it might not occur even when the statement
4792 is executed and runs to completion. */
4793 bool is_conditional_in_stmt;
4797 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4798 true if STMT clobbers memory, false otherwise. */
4800 static bool
4801 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4803 bool clobbers_memory = false;
4804 data_ref_loc ref;
4805 tree op0, op1;
4806 enum gimple_code stmt_code = gimple_code (stmt);
4808 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4809 As we cannot model data-references to not spelled out
4810 accesses give up if they may occur. */
4811 if (stmt_code == GIMPLE_CALL
4812 && !(gimple_call_flags (stmt) & ECF_CONST))
4814 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4815 if (gimple_call_internal_p (stmt))
4816 switch (gimple_call_internal_fn (stmt))
4818 case IFN_GOMP_SIMD_LANE:
4820 struct loop *loop = gimple_bb (stmt)->loop_father;
4821 tree uid = gimple_call_arg (stmt, 0);
4822 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4823 if (loop == NULL
4824 || loop->simduid != SSA_NAME_VAR (uid))
4825 clobbers_memory = true;
4826 break;
4828 case IFN_MASK_LOAD:
4829 case IFN_MASK_STORE:
4830 break;
4831 default:
4832 clobbers_memory = true;
4833 break;
4835 else
4836 clobbers_memory = true;
4838 else if (stmt_code == GIMPLE_ASM
4839 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4840 || gimple_vuse (stmt)))
4841 clobbers_memory = true;
4843 if (!gimple_vuse (stmt))
4844 return clobbers_memory;
4846 if (stmt_code == GIMPLE_ASSIGN)
4848 tree base;
4849 op0 = gimple_assign_lhs (stmt);
4850 op1 = gimple_assign_rhs1 (stmt);
4852 if (DECL_P (op1)
4853 || (REFERENCE_CLASS_P (op1)
4854 && (base = get_base_address (op1))
4855 && TREE_CODE (base) != SSA_NAME
4856 && !is_gimple_min_invariant (base)))
4858 ref.ref = op1;
4859 ref.is_read = true;
4860 ref.is_conditional_in_stmt = false;
4861 references->safe_push (ref);
4864 else if (stmt_code == GIMPLE_CALL)
4866 unsigned i, n;
4867 tree ptr, type;
4868 unsigned int align;
4870 ref.is_read = false;
4871 if (gimple_call_internal_p (stmt))
4872 switch (gimple_call_internal_fn (stmt))
4874 case IFN_MASK_LOAD:
4875 if (gimple_call_lhs (stmt) == NULL_TREE)
4876 break;
4877 ref.is_read = true;
4878 /* FALLTHRU */
4879 case IFN_MASK_STORE:
4880 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4881 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4882 if (ref.is_read)
4883 type = TREE_TYPE (gimple_call_lhs (stmt));
4884 else
4885 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4886 if (TYPE_ALIGN (type) != align)
4887 type = build_aligned_type (type, align);
4888 ref.is_conditional_in_stmt = true;
4889 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4890 ptr);
4891 references->safe_push (ref);
4892 return false;
4893 default:
4894 break;
4897 op0 = gimple_call_lhs (stmt);
4898 n = gimple_call_num_args (stmt);
4899 for (i = 0; i < n; i++)
4901 op1 = gimple_call_arg (stmt, i);
4903 if (DECL_P (op1)
4904 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4906 ref.ref = op1;
4907 ref.is_read = true;
4908 ref.is_conditional_in_stmt = false;
4909 references->safe_push (ref);
4913 else
4914 return clobbers_memory;
4916 if (op0
4917 && (DECL_P (op0)
4918 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4920 ref.ref = op0;
4921 ref.is_read = false;
4922 ref.is_conditional_in_stmt = false;
4923 references->safe_push (ref);
4925 return clobbers_memory;
4929 /* Returns true if the loop-nest has any data reference. */
4931 bool
4932 loop_nest_has_data_refs (loop_p loop)
4934 basic_block *bbs = get_loop_body (loop);
4935 auto_vec<data_ref_loc, 3> references;
4937 for (unsigned i = 0; i < loop->num_nodes; i++)
4939 basic_block bb = bbs[i];
4940 gimple_stmt_iterator bsi;
4942 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4944 gimple *stmt = gsi_stmt (bsi);
4945 get_references_in_stmt (stmt, &references);
4946 if (references.length ())
4948 free (bbs);
4949 return true;
4953 free (bbs);
4954 return false;
4957 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4958 reference, returns false, otherwise returns true. NEST is the outermost
4959 loop of the loop nest in which the references should be analyzed. */
4961 bool
4962 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4963 vec<data_reference_p> *datarefs)
4965 unsigned i;
4966 auto_vec<data_ref_loc, 2> references;
4967 data_ref_loc *ref;
4968 bool ret = true;
4969 data_reference_p dr;
4971 if (get_references_in_stmt (stmt, &references))
4972 return false;
4974 FOR_EACH_VEC_ELT (references, i, ref)
4976 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4977 loop_containing_stmt (stmt), ref->ref,
4978 stmt, ref->is_read, ref->is_conditional_in_stmt);
4979 gcc_assert (dr != NULL);
4980 datarefs->safe_push (dr);
4983 return ret;
4986 /* Stores the data references in STMT to DATAREFS. If there is an
4987 unanalyzable reference, returns false, otherwise returns true.
4988 NEST is the outermost loop of the loop nest in which the references
4989 should be instantiated, LOOP is the loop in which the references
4990 should be analyzed. */
4992 bool
4993 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
4994 vec<data_reference_p> *datarefs)
4996 unsigned i;
4997 auto_vec<data_ref_loc, 2> references;
4998 data_ref_loc *ref;
4999 bool ret = true;
5000 data_reference_p dr;
5002 if (get_references_in_stmt (stmt, &references))
5003 return false;
5005 FOR_EACH_VEC_ELT (references, i, ref)
5007 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5008 ref->is_conditional_in_stmt);
5009 gcc_assert (dr != NULL);
5010 datarefs->safe_push (dr);
5013 return ret;
5016 /* Search the data references in LOOP, and record the information into
5017 DATAREFS. Returns chrec_dont_know when failing to analyze a
5018 difficult case, returns NULL_TREE otherwise. */
5020 tree
5021 find_data_references_in_bb (struct loop *loop, basic_block bb,
5022 vec<data_reference_p> *datarefs)
5024 gimple_stmt_iterator bsi;
5026 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5028 gimple *stmt = gsi_stmt (bsi);
5030 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5032 struct data_reference *res;
5033 res = XCNEW (struct data_reference);
5034 datarefs->safe_push (res);
5036 return chrec_dont_know;
5040 return NULL_TREE;
5043 /* Search the data references in LOOP, and record the information into
5044 DATAREFS. Returns chrec_dont_know when failing to analyze a
5045 difficult case, returns NULL_TREE otherwise.
5047 TODO: This function should be made smarter so that it can handle address
5048 arithmetic as if they were array accesses, etc. */
5050 tree
5051 find_data_references_in_loop (struct loop *loop,
5052 vec<data_reference_p> *datarefs)
5054 basic_block bb, *bbs;
5055 unsigned int i;
5057 bbs = get_loop_body_in_dom_order (loop);
5059 for (i = 0; i < loop->num_nodes; i++)
5061 bb = bbs[i];
5063 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5065 free (bbs);
5066 return chrec_dont_know;
5069 free (bbs);
5071 return NULL_TREE;
5074 /* Return the alignment in bytes that DRB is guaranteed to have at all
5075 times. */
5077 unsigned int
5078 dr_alignment (innermost_loop_behavior *drb)
5080 /* Get the alignment of BASE_ADDRESS + INIT. */
5081 unsigned int alignment = drb->base_alignment;
5082 unsigned int misalignment = (drb->base_misalignment
5083 + TREE_INT_CST_LOW (drb->init));
5084 if (misalignment != 0)
5085 alignment = MIN (alignment, misalignment & -misalignment);
5087 /* Cap it to the alignment of OFFSET. */
5088 if (!integer_zerop (drb->offset))
5089 alignment = MIN (alignment, drb->offset_alignment);
5091 /* Cap it to the alignment of STEP. */
5092 if (!integer_zerop (drb->step))
5093 alignment = MIN (alignment, drb->step_alignment);
5095 return alignment;
5098 /* Recursive helper function. */
5100 static bool
5101 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5103 /* Inner loops of the nest should not contain siblings. Example:
5104 when there are two consecutive loops,
5106 | loop_0
5107 | loop_1
5108 | A[{0, +, 1}_1]
5109 | endloop_1
5110 | loop_2
5111 | A[{0, +, 1}_2]
5112 | endloop_2
5113 | endloop_0
5115 the dependence relation cannot be captured by the distance
5116 abstraction. */
5117 if (loop->next)
5118 return false;
5120 loop_nest->safe_push (loop);
5121 if (loop->inner)
5122 return find_loop_nest_1 (loop->inner, loop_nest);
5123 return true;
5126 /* Return false when the LOOP is not well nested. Otherwise return
5127 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5128 contain the loops from the outermost to the innermost, as they will
5129 appear in the classic distance vector. */
5131 bool
5132 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5134 loop_nest->safe_push (loop);
5135 if (loop->inner)
5136 return find_loop_nest_1 (loop->inner, loop_nest);
5137 return true;
5140 /* Returns true when the data dependences have been computed, false otherwise.
5141 Given a loop nest LOOP, the following vectors are returned:
5142 DATAREFS is initialized to all the array elements contained in this loop,
5143 DEPENDENCE_RELATIONS contains the relations between the data references.
5144 Compute read-read and self relations if
5145 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5147 bool
5148 compute_data_dependences_for_loop (struct loop *loop,
5149 bool compute_self_and_read_read_dependences,
5150 vec<loop_p> *loop_nest,
5151 vec<data_reference_p> *datarefs,
5152 vec<ddr_p> *dependence_relations)
5154 bool res = true;
5156 memset (&dependence_stats, 0, sizeof (dependence_stats));
5158 /* If the loop nest is not well formed, or one of the data references
5159 is not computable, give up without spending time to compute other
5160 dependences. */
5161 if (!loop
5162 || !find_loop_nest (loop, loop_nest)
5163 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5164 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5165 compute_self_and_read_read_dependences))
5166 res = false;
5168 if (dump_file && (dump_flags & TDF_STATS))
5170 fprintf (dump_file, "Dependence tester statistics:\n");
5172 fprintf (dump_file, "Number of dependence tests: %d\n",
5173 dependence_stats.num_dependence_tests);
5174 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5175 dependence_stats.num_dependence_dependent);
5176 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5177 dependence_stats.num_dependence_independent);
5178 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5179 dependence_stats.num_dependence_undetermined);
5181 fprintf (dump_file, "Number of subscript tests: %d\n",
5182 dependence_stats.num_subscript_tests);
5183 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5184 dependence_stats.num_subscript_undetermined);
5185 fprintf (dump_file, "Number of same subscript function: %d\n",
5186 dependence_stats.num_same_subscript_function);
5188 fprintf (dump_file, "Number of ziv tests: %d\n",
5189 dependence_stats.num_ziv);
5190 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5191 dependence_stats.num_ziv_dependent);
5192 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5193 dependence_stats.num_ziv_independent);
5194 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5195 dependence_stats.num_ziv_unimplemented);
5197 fprintf (dump_file, "Number of siv tests: %d\n",
5198 dependence_stats.num_siv);
5199 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5200 dependence_stats.num_siv_dependent);
5201 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5202 dependence_stats.num_siv_independent);
5203 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5204 dependence_stats.num_siv_unimplemented);
5206 fprintf (dump_file, "Number of miv tests: %d\n",
5207 dependence_stats.num_miv);
5208 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5209 dependence_stats.num_miv_dependent);
5210 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5211 dependence_stats.num_miv_independent);
5212 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5213 dependence_stats.num_miv_unimplemented);
5216 return res;
5219 /* Free the memory used by a data dependence relation DDR. */
5221 void
5222 free_dependence_relation (struct data_dependence_relation *ddr)
5224 if (ddr == NULL)
5225 return;
5227 if (DDR_SUBSCRIPTS (ddr).exists ())
5228 free_subscripts (DDR_SUBSCRIPTS (ddr));
5229 DDR_DIST_VECTS (ddr).release ();
5230 DDR_DIR_VECTS (ddr).release ();
5232 free (ddr);
5235 /* Free the memory used by the data dependence relations from
5236 DEPENDENCE_RELATIONS. */
5238 void
5239 free_dependence_relations (vec<ddr_p> dependence_relations)
5241 unsigned int i;
5242 struct data_dependence_relation *ddr;
5244 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5245 if (ddr)
5246 free_dependence_relation (ddr);
5248 dependence_relations.release ();
5251 /* Free the memory used by the data references from DATAREFS. */
5253 void
5254 free_data_refs (vec<data_reference_p> datarefs)
5256 unsigned int i;
5257 struct data_reference *dr;
5259 FOR_EACH_VEC_ELT (datarefs, i, dr)
5260 free_data_ref (dr);
5261 datarefs.release ();