poly_int: expand_vector_ubsan_overflow
[official-gcc.git] / gcc / tree-data-ref.c
blob954f26206178894479e79721094a1e0fb6aa7cfd
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 /* Returns true iff A divides B. */
141 static inline bool
142 int_divides_p (int a, int b)
144 return ((b % a) == 0);
147 /* Return true if reference REF contains a union access. */
149 static bool
150 ref_contains_union_access_p (tree ref)
152 while (handled_component_p (ref))
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
159 return false;
164 /* Dump into FILE all the data references from DATAREFS. */
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 unsigned int i;
170 struct data_reference *dr;
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
176 /* Unified dump into FILE all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
181 dump_data_references (stderr, ref);
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
194 /* Dump into STDERR all the data references from DATAREFS. */
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
199 dump_data_references (stderr, datarefs);
202 /* Print to STDERR the data_reference DR. */
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
207 dump_data_reference (stderr, dr);
210 /* Dump function for a DATA_REFERENCE structure. */
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
216 unsigned int i;
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 fprintf (outf, "#)\n");
235 /* Unified dump function for a DATA_REFERENCE structure. */
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
240 dump_data_reference (stderr, &ref);
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
253 /* Dumps the affine function described by FN to the file OUTF. */
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
258 unsigned i;
259 tree coef;
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
270 /* Dumps the conflict function CF to the file OUTF. */
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
275 unsigned i;
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
283 for (i = 0; i < cf->n; i++)
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
294 /* Dump function for a SUBSCRIPT structure. */
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
326 /* Print the classic direction vector DIRV to OUTF. */
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
333 int eq;
335 for (eq = 0; eq < length; eq++)
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
340 switch (dir)
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
368 fprintf (outf, "\n");
371 /* Print a vector of direction vectors. */
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
377 unsigned j;
378 lambda_vector v;
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 unsigned j;
403 lambda_vector v;
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
415 struct data_reference *dra, *drb;
417 fprintf (outf, "(Data Dep: \n");
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 if (ddr)
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
434 fprintf (outf, " (don't know)\n)\n");
435 return;
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 unsigned int i;
449 struct loop *loopi;
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
597 *var = NULL_TREE;
598 *off = NULL_TREE;
600 switch (code)
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
627 case ADDR_EXPR:
629 tree base, poffset;
630 poly_int64 pbitsize, pbitpos, pbytepos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
639 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbytepos);
644 if (poffset)
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
655 var0 = fold_convert (type, base);
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
674 *var = var0;
675 *off = off0;
676 return true;
679 case SSA_NAME:
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
696 CASE_CONVERT:
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
712 return false;
715 default:
716 return false;
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
726 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
727 enum tree_code code;
729 *var = exp;
730 *off = ssize_int (0);
731 STRIP_NOPS (exp);
733 if (tree_is_chrec (exp)
734 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
735 return;
737 otype = TREE_TYPE (exp);
738 code = TREE_CODE (exp);
739 extract_ops_from_tree (exp, &code, &op0, &op1);
740 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
742 *var = fold_convert (type, e);
743 *off = o;
747 /* Returns the address ADDR of an object in a canonical shape (without nop
748 casts, and with type of pointer to the object). */
750 static tree
751 canonicalize_base_object_address (tree addr)
753 tree orig = addr;
755 STRIP_NOPS (addr);
757 /* The base address may be obtained by casting from integer, in that case
758 keep the cast. */
759 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
760 return orig;
762 if (TREE_CODE (addr) != ADDR_EXPR)
763 return addr;
765 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
768 /* Analyze the behavior of memory reference REF. There are two modes:
770 - BB analysis. In this case we simply split the address into base,
771 init and offset components, without reference to any containing loop.
772 The resulting base and offset are general expressions and they can
773 vary arbitrarily from one iteration of the containing loop to the next.
774 The step is always zero.
776 - loop analysis. In this case we analyze the reference both wrt LOOP
777 and on the basis that the reference occurs (is "used") in LOOP;
778 see the comment above analyze_scalar_evolution_in_loop for more
779 information about this distinction. The base, init, offset and
780 step fields are all invariant in LOOP.
782 Perform BB analysis if LOOP is null, or if LOOP is the function's
783 dummy outermost loop. In other cases perform loop analysis.
785 Return true if the analysis succeeded and store the results in DRB if so.
786 BB analysis can only fail for bitfield or reversed-storage accesses. */
788 bool
789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
790 struct loop *loop)
792 poly_int64 pbitsize, pbitpos;
793 tree base, poffset;
794 machine_mode pmode;
795 int punsignedp, preversep, pvolatilep;
796 affine_iv base_iv, offset_iv;
797 tree init, dinit, step;
798 bool in_loop = (loop && loop->num);
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "analyze_innermost: ");
803 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
804 &punsignedp, &preversep, &pvolatilep);
805 gcc_assert (base != NULL_TREE);
807 poly_int64 pbytepos;
808 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
810 if (dump_file && (dump_flags & TDF_DETAILS))
811 fprintf (dump_file, "failed: bit offset alignment.\n");
812 return false;
815 if (preversep)
817 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "failed: reverse storage order.\n");
819 return false;
822 /* Calculate the alignment and misalignment for the inner reference. */
823 unsigned int HOST_WIDE_INT bit_base_misalignment;
824 unsigned int bit_base_alignment;
825 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
827 /* There are no bitfield references remaining in BASE, so the values
828 we got back must be whole bytes. */
829 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
830 && bit_base_misalignment % BITS_PER_UNIT == 0);
831 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
832 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
834 if (TREE_CODE (base) == MEM_REF)
836 if (!integer_zerop (TREE_OPERAND (base, 1)))
838 /* Subtract MOFF from the base and add it to POFFSET instead.
839 Adjust the misalignment to reflect the amount we subtracted. */
840 poly_offset_int moff = mem_ref_offset (base);
841 base_misalignment -= moff.force_shwi ();
842 tree mofft = wide_int_to_tree (sizetype, moff);
843 if (!poffset)
844 poffset = mofft;
845 else
846 poffset = size_binop (PLUS_EXPR, poffset, mofft);
848 base = TREE_OPERAND (base, 0);
850 else
851 base = build_fold_addr_expr (base);
853 if (in_loop)
855 if (!simple_iv (loop, loop, base, &base_iv, true))
857 if (dump_file && (dump_flags & TDF_DETAILS))
858 fprintf (dump_file, "failed: evolution of base is not affine.\n");
859 return false;
862 else
864 base_iv.base = base;
865 base_iv.step = ssize_int (0);
866 base_iv.no_overflow = true;
869 if (!poffset)
871 offset_iv.base = ssize_int (0);
872 offset_iv.step = ssize_int (0);
874 else
876 if (!in_loop)
878 offset_iv.base = poffset;
879 offset_iv.step = ssize_int (0);
881 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
885 return false;
889 init = ssize_int (pbytepos);
891 /* Subtract any constant component from the base and add it to INIT instead.
892 Adjust the misalignment to reflect the amount we subtracted. */
893 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
894 init = size_binop (PLUS_EXPR, init, dinit);
895 base_misalignment -= TREE_INT_CST_LOW (dinit);
897 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
898 init = size_binop (PLUS_EXPR, init, dinit);
900 step = size_binop (PLUS_EXPR,
901 fold_convert (ssizetype, base_iv.step),
902 fold_convert (ssizetype, offset_iv.step));
904 base = canonicalize_base_object_address (base_iv.base);
906 /* See if get_pointer_alignment can guarantee a higher alignment than
907 the one we calculated above. */
908 unsigned int HOST_WIDE_INT alt_misalignment;
909 unsigned int alt_alignment;
910 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
912 /* As above, these values must be whole bytes. */
913 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
914 && alt_misalignment % BITS_PER_UNIT == 0);
915 alt_alignment /= BITS_PER_UNIT;
916 alt_misalignment /= BITS_PER_UNIT;
918 if (base_alignment < alt_alignment)
920 base_alignment = alt_alignment;
921 base_misalignment = alt_misalignment;
924 drb->base_address = base;
925 drb->offset = fold_convert (ssizetype, offset_iv.base);
926 drb->init = init;
927 drb->step = step;
928 if (known_misalignment (base_misalignment, base_alignment,
929 &drb->base_misalignment))
930 drb->base_alignment = base_alignment;
931 else
933 drb->base_alignment = known_alignment (base_misalignment);
934 drb->base_misalignment = 0;
936 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
937 drb->step_alignment = highest_pow2_factor (step);
939 if (dump_file && (dump_flags & TDF_DETAILS))
940 fprintf (dump_file, "success.\n");
942 return true;
945 /* Return true if OP is a valid component reference for a DR access
946 function. This accepts a subset of what handled_component_p accepts. */
948 static bool
949 access_fn_component_p (tree op)
951 switch (TREE_CODE (op))
953 case REALPART_EXPR:
954 case IMAGPART_EXPR:
955 case ARRAY_REF:
956 return true;
958 case COMPONENT_REF:
959 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
961 default:
962 return false;
966 /* Determines the base object and the list of indices of memory reference
967 DR, analyzed in LOOP and instantiated before NEST. */
969 static void
970 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
972 vec<tree> access_fns = vNULL;
973 tree ref, op;
974 tree base, off, access_fn;
976 /* If analyzing a basic-block there are no indices to analyze
977 and thus no access functions. */
978 if (!nest)
980 DR_BASE_OBJECT (dr) = DR_REF (dr);
981 DR_ACCESS_FNS (dr).create (0);
982 return;
985 ref = DR_REF (dr);
987 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
988 into a two element array with a constant index. The base is
989 then just the immediate underlying object. */
990 if (TREE_CODE (ref) == REALPART_EXPR)
992 ref = TREE_OPERAND (ref, 0);
993 access_fns.safe_push (integer_zero_node);
995 else if (TREE_CODE (ref) == IMAGPART_EXPR)
997 ref = TREE_OPERAND (ref, 0);
998 access_fns.safe_push (integer_one_node);
1001 /* Analyze access functions of dimensions we know to be independent.
1002 The list of component references handled here should be kept in
1003 sync with access_fn_component_p. */
1004 while (handled_component_p (ref))
1006 if (TREE_CODE (ref) == ARRAY_REF)
1008 op = TREE_OPERAND (ref, 1);
1009 access_fn = analyze_scalar_evolution (loop, op);
1010 access_fn = instantiate_scev (nest, loop, access_fn);
1011 access_fns.safe_push (access_fn);
1013 else if (TREE_CODE (ref) == COMPONENT_REF
1014 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1016 /* For COMPONENT_REFs of records (but not unions!) use the
1017 FIELD_DECL offset as constant access function so we can
1018 disambiguate a[i].f1 and a[i].f2. */
1019 tree off = component_ref_field_offset (ref);
1020 off = size_binop (PLUS_EXPR,
1021 size_binop (MULT_EXPR,
1022 fold_convert (bitsizetype, off),
1023 bitsize_int (BITS_PER_UNIT)),
1024 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1025 access_fns.safe_push (off);
1027 else
1028 /* If we have an unhandled component we could not translate
1029 to an access function stop analyzing. We have determined
1030 our base object in this case. */
1031 break;
1033 ref = TREE_OPERAND (ref, 0);
1036 /* If the address operand of a MEM_REF base has an evolution in the
1037 analyzed nest, add it as an additional independent access-function. */
1038 if (TREE_CODE (ref) == MEM_REF)
1040 op = TREE_OPERAND (ref, 0);
1041 access_fn = analyze_scalar_evolution (loop, op);
1042 access_fn = instantiate_scev (nest, loop, access_fn);
1043 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1045 tree orig_type;
1046 tree memoff = TREE_OPERAND (ref, 1);
1047 base = initial_condition (access_fn);
1048 orig_type = TREE_TYPE (base);
1049 STRIP_USELESS_TYPE_CONVERSION (base);
1050 split_constant_offset (base, &base, &off);
1051 STRIP_USELESS_TYPE_CONVERSION (base);
1052 /* Fold the MEM_REF offset into the evolutions initial
1053 value to make more bases comparable. */
1054 if (!integer_zerop (memoff))
1056 off = size_binop (PLUS_EXPR, off,
1057 fold_convert (ssizetype, memoff));
1058 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1060 /* Adjust the offset so it is a multiple of the access type
1061 size and thus we separate bases that can possibly be used
1062 to produce partial overlaps (which the access_fn machinery
1063 cannot handle). */
1064 wide_int rem;
1065 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1066 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1067 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1068 rem = wi::mod_trunc
1069 (wi::to_wide (off),
1070 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1071 SIGNED);
1072 else
1073 /* If we can't compute the remainder simply force the initial
1074 condition to zero. */
1075 rem = wi::to_wide (off);
1076 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1077 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1078 /* And finally replace the initial condition. */
1079 access_fn = chrec_replace_initial_condition
1080 (access_fn, fold_convert (orig_type, off));
1081 /* ??? This is still not a suitable base object for
1082 dr_may_alias_p - the base object needs to be an
1083 access that covers the object as whole. With
1084 an evolution in the pointer this cannot be
1085 guaranteed.
1086 As a band-aid, mark the access so we can special-case
1087 it in dr_may_alias_p. */
1088 tree old = ref;
1089 ref = fold_build2_loc (EXPR_LOCATION (ref),
1090 MEM_REF, TREE_TYPE (ref),
1091 base, memoff);
1092 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1093 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1094 DR_UNCONSTRAINED_BASE (dr) = true;
1095 access_fns.safe_push (access_fn);
1098 else if (DECL_P (ref))
1100 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1101 ref = build2 (MEM_REF, TREE_TYPE (ref),
1102 build_fold_addr_expr (ref),
1103 build_int_cst (reference_alias_ptr_type (ref), 0));
1106 DR_BASE_OBJECT (dr) = ref;
1107 DR_ACCESS_FNS (dr) = access_fns;
1110 /* Extracts the alias analysis information from the memory reference DR. */
1112 static void
1113 dr_analyze_alias (struct data_reference *dr)
1115 tree ref = DR_REF (dr);
1116 tree base = get_base_address (ref), addr;
1118 if (INDIRECT_REF_P (base)
1119 || TREE_CODE (base) == MEM_REF)
1121 addr = TREE_OPERAND (base, 0);
1122 if (TREE_CODE (addr) == SSA_NAME)
1123 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1127 /* Frees data reference DR. */
1129 void
1130 free_data_ref (data_reference_p dr)
1132 DR_ACCESS_FNS (dr).release ();
1133 free (dr);
1136 /* Analyze memory reference MEMREF, which is accessed in STMT.
1137 The reference is a read if IS_READ is true, otherwise it is a write.
1138 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1139 within STMT, i.e. that it might not occur even if STMT is executed
1140 and runs to completion.
1142 Return the data_reference description of MEMREF. NEST is the outermost
1143 loop in which the reference should be instantiated, LOOP is the loop
1144 in which the data reference should be analyzed. */
1146 struct data_reference *
1147 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1148 bool is_read, bool is_conditional_in_stmt)
1150 struct data_reference *dr;
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, "Creating dr for ");
1155 print_generic_expr (dump_file, memref, TDF_SLIM);
1156 fprintf (dump_file, "\n");
1159 dr = XCNEW (struct data_reference);
1160 DR_STMT (dr) = stmt;
1161 DR_REF (dr) = memref;
1162 DR_IS_READ (dr) = is_read;
1163 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1165 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1166 nest != NULL ? loop : NULL);
1167 dr_analyze_indices (dr, nest, loop);
1168 dr_analyze_alias (dr);
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1172 unsigned i;
1173 fprintf (dump_file, "\tbase_address: ");
1174 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1175 fprintf (dump_file, "\n\toffset from base address: ");
1176 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1177 fprintf (dump_file, "\n\tconstant offset from base address: ");
1178 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1179 fprintf (dump_file, "\n\tstep: ");
1180 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1181 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1182 fprintf (dump_file, "\n\tbase misalignment: %d",
1183 DR_BASE_MISALIGNMENT (dr));
1184 fprintf (dump_file, "\n\toffset alignment: %d",
1185 DR_OFFSET_ALIGNMENT (dr));
1186 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1187 fprintf (dump_file, "\n\tbase_object: ");
1188 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1189 fprintf (dump_file, "\n");
1190 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1192 fprintf (dump_file, "\tAccess function %d: ", i);
1193 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1197 return dr;
1200 /* A helper function computes order between two tree epxressions T1 and T2.
1201 This is used in comparator functions sorting objects based on the order
1202 of tree expressions. The function returns -1, 0, or 1. */
1205 data_ref_compare_tree (tree t1, tree t2)
1207 int i, cmp;
1208 enum tree_code code;
1209 char tclass;
1211 if (t1 == t2)
1212 return 0;
1213 if (t1 == NULL)
1214 return -1;
1215 if (t2 == NULL)
1216 return 1;
1218 STRIP_USELESS_TYPE_CONVERSION (t1);
1219 STRIP_USELESS_TYPE_CONVERSION (t2);
1220 if (t1 == t2)
1221 return 0;
1223 if (TREE_CODE (t1) != TREE_CODE (t2)
1224 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1225 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1227 code = TREE_CODE (t1);
1228 switch (code)
1230 case INTEGER_CST:
1231 return tree_int_cst_compare (t1, t2);
1233 case STRING_CST:
1234 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1235 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1236 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1237 TREE_STRING_LENGTH (t1));
1239 case SSA_NAME:
1240 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1241 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1242 break;
1244 default:
1245 if (POLY_INT_CST_P (t1))
1246 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1247 wi::to_poly_widest (t2));
1249 tclass = TREE_CODE_CLASS (code);
1251 /* For decls, compare their UIDs. */
1252 if (tclass == tcc_declaration)
1254 if (DECL_UID (t1) != DECL_UID (t2))
1255 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1256 break;
1258 /* For expressions, compare their operands recursively. */
1259 else if (IS_EXPR_CODE_CLASS (tclass))
1261 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1263 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1264 TREE_OPERAND (t2, i));
1265 if (cmp != 0)
1266 return cmp;
1269 else
1270 gcc_unreachable ();
1273 return 0;
1276 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1277 check. */
1279 bool
1280 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1282 if (dump_enabled_p ())
1284 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1285 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1286 dump_printf (MSG_NOTE, " and ");
1287 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1288 dump_printf (MSG_NOTE, "\n");
1291 if (!speed_p)
1293 if (dump_enabled_p ())
1294 dump_printf (MSG_MISSED_OPTIMIZATION,
1295 "runtime alias check not supported when optimizing "
1296 "for size.\n");
1297 return false;
1300 /* FORNOW: We don't support versioning with outer-loop in either
1301 vectorization or loop distribution. */
1302 if (loop != NULL && loop->inner != NULL)
1304 if (dump_enabled_p ())
1305 dump_printf (MSG_MISSED_OPTIMIZATION,
1306 "runtime alias check not supported for outer loop.\n");
1307 return false;
1310 /* FORNOW: We don't support creating runtime alias tests for non-constant
1311 step. */
1312 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1313 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1315 if (dump_enabled_p ())
1316 dump_printf (MSG_MISSED_OPTIMIZATION,
1317 "runtime alias check not supported for non-constant "
1318 "step\n");
1319 return false;
1322 return true;
1325 /* Operator == between two dr_with_seg_len objects.
1327 This equality operator is used to make sure two data refs
1328 are the same one so that we will consider to combine the
1329 aliasing checks of those two pairs of data dependent data
1330 refs. */
1332 static bool
1333 operator == (const dr_with_seg_len& d1,
1334 const dr_with_seg_len& d2)
1336 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1337 DR_BASE_ADDRESS (d2.dr), 0)
1338 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1339 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1340 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1343 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1344 so that we can combine aliasing checks in one scan. */
1346 static int
1347 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1349 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1350 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1351 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1352 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1354 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1355 if a and c have the same basic address snd step, and b and d have the same
1356 address and step. Therefore, if any a&c or b&d don't have the same address
1357 and step, we don't care the order of those two pairs after sorting. */
1358 int comp_res;
1360 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1361 DR_BASE_ADDRESS (b1.dr))) != 0)
1362 return comp_res;
1363 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1364 DR_BASE_ADDRESS (b2.dr))) != 0)
1365 return comp_res;
1366 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1367 DR_STEP (b1.dr))) != 0)
1368 return comp_res;
1369 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1370 DR_STEP (b2.dr))) != 0)
1371 return comp_res;
1372 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1373 DR_OFFSET (b1.dr))) != 0)
1374 return comp_res;
1375 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1376 DR_INIT (b1.dr))) != 0)
1377 return comp_res;
1378 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1379 DR_OFFSET (b2.dr))) != 0)
1380 return comp_res;
1381 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1382 DR_INIT (b2.dr))) != 0)
1383 return comp_res;
1385 return 0;
1388 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1389 FACTOR is number of iterations that each data reference is accessed.
1391 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1392 we create an expression:
1394 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1395 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1397 for aliasing checks. However, in some cases we can decrease the number
1398 of checks by combining two checks into one. For example, suppose we have
1399 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1400 condition is satisfied:
1402 load_ptr_0 < load_ptr_1 &&
1403 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1405 (this condition means, in each iteration of vectorized loop, the accessed
1406 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1407 load_ptr_1.)
1409 we then can use only the following expression to finish the alising checks
1410 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1412 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1413 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1415 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1416 basic address. */
1418 void
1419 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1420 poly_uint64 factor)
1422 /* Sort the collected data ref pairs so that we can scan them once to
1423 combine all possible aliasing checks. */
1424 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1426 /* Scan the sorted dr pairs and check if we can combine alias checks
1427 of two neighboring dr pairs. */
1428 for (size_t i = 1; i < alias_pairs->length (); ++i)
1430 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1431 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1432 *dr_b1 = &(*alias_pairs)[i-1].second,
1433 *dr_a2 = &(*alias_pairs)[i].first,
1434 *dr_b2 = &(*alias_pairs)[i].second;
1436 /* Remove duplicate data ref pairs. */
1437 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1439 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE, "found equal ranges ");
1442 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1443 dump_printf (MSG_NOTE, ", ");
1444 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1445 dump_printf (MSG_NOTE, " and ");
1446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1447 dump_printf (MSG_NOTE, ", ");
1448 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1449 dump_printf (MSG_NOTE, "\n");
1451 alias_pairs->ordered_remove (i--);
1452 continue;
1455 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1457 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1458 and DR_A1 and DR_A2 are two consecutive memrefs. */
1459 if (*dr_a1 == *dr_a2)
1461 std::swap (dr_a1, dr_b1);
1462 std::swap (dr_a2, dr_b2);
1465 poly_int64 init_a1, init_a2;
1466 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1467 DR_BASE_ADDRESS (dr_a2->dr), 0)
1468 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1469 DR_OFFSET (dr_a2->dr), 0)
1470 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1471 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1472 continue;
1474 /* Don't combine if we can't tell which one comes first. */
1475 if (!ordered_p (init_a1, init_a2))
1476 continue;
1478 /* Make sure dr_a1 starts left of dr_a2. */
1479 if (maybe_gt (init_a1, init_a2))
1481 std::swap (*dr_a1, *dr_a2);
1482 std::swap (init_a1, init_a2);
1485 /* Only merge const step data references. */
1486 poly_int64 step_a1, step_a2;
1487 if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1)
1488 || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2))
1489 continue;
1491 bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0);
1493 /* DR_A1 and DR_A2 must go in the same direction. */
1494 if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0)))
1495 continue;
1497 poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0;
1498 bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len,
1499 &seg_len_a1);
1500 bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len,
1501 &seg_len_a2);
1503 /* We need to compute merged segment length at compilation time for
1504 dr_a1 and dr_a2, which is impossible if either one has non-const
1505 segment length. */
1506 if ((!const_seg_len_a1 || !const_seg_len_a2)
1507 && maybe_ne (step_a1, step_a2))
1508 continue;
1510 bool do_remove = false;
1511 poly_uint64 diff = init_a2 - init_a1;
1512 poly_uint64 min_seg_len_b;
1513 tree new_seg_len;
1515 if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b))
1517 tree step_b = DR_STEP (dr_b1->dr);
1518 if (!tree_fits_shwi_p (step_b))
1519 continue;
1520 min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b));
1523 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1525 Case A:
1526 check if the following condition is satisfied:
1528 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1530 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1531 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1532 have to make a best estimation. We can get the minimum value
1533 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1534 then either of the following two conditions can guarantee the
1535 one above:
1537 1: DIFF <= MIN_SEG_LEN_B
1538 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1539 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1540 to take care of wrapping behavior in it.
1542 Case B:
1543 If the left segment does not extend beyond the start of the
1544 right segment the new segment length is that of the right
1545 plus the segment distance. The condition is like:
1547 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1549 Note 1: Case A.2 and B combined together effectively merges every
1550 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1552 Note 2: Above description is based on positive DR_STEP, we need to
1553 take care of negative DR_STEP for wrapping behavior. See PR80815
1554 for more information. */
1555 if (neg_step)
1557 /* Adjust diff according to access size of both references. */
1558 diff += tree_to_poly_uint64
1559 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr))));
1560 diff -= tree_to_poly_uint64
1561 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr))));
1562 /* Case A.1. */
1563 if (known_le (diff, min_seg_len_b)
1564 /* Case A.2 and B combined. */
1565 || const_seg_len_a2)
1567 if (const_seg_len_a1 || const_seg_len_a2)
1568 new_seg_len
1569 = build_int_cstu (sizetype,
1570 lower_bound (seg_len_a1 - diff,
1571 seg_len_a2));
1572 else
1573 new_seg_len
1574 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1575 build_int_cstu (sizetype, diff));
1577 dr_a2->seg_len = new_seg_len;
1578 do_remove = true;
1581 else
1583 /* Case A.1. */
1584 if (known_le (diff, min_seg_len_b)
1585 /* Case A.2 and B combined. */
1586 || const_seg_len_a1)
1588 if (const_seg_len_a1 && const_seg_len_a2)
1589 new_seg_len
1590 = build_int_cstu (sizetype,
1591 upper_bound (seg_len_a2 + diff,
1592 seg_len_a1));
1593 else
1594 new_seg_len
1595 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1596 build_int_cstu (sizetype, diff));
1598 dr_a1->seg_len = new_seg_len;
1599 do_remove = true;
1603 if (do_remove)
1605 if (dump_enabled_p ())
1607 dump_printf (MSG_NOTE, "merging ranges for ");
1608 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1609 dump_printf (MSG_NOTE, ", ");
1610 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1611 dump_printf (MSG_NOTE, " and ");
1612 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1613 dump_printf (MSG_NOTE, ", ");
1614 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1615 dump_printf (MSG_NOTE, "\n");
1617 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1618 i--;
1624 /* Given LOOP's two data references and segment lengths described by DR_A
1625 and DR_B, create expression checking if the two addresses ranges intersect
1626 with each other based on index of the two addresses. This can only be
1627 done if DR_A and DR_B referring to the same (array) object and the index
1628 is the only difference. For example:
1630 DR_A DR_B
1631 data-ref arr[i] arr[j]
1632 base_object arr arr
1633 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1635 The addresses and their index are like:
1637 |<- ADDR_A ->| |<- ADDR_B ->|
1638 ------------------------------------------------------->
1639 | | | | | | | | | |
1640 ------------------------------------------------------->
1641 i_0 ... i_0+4 j_0 ... j_0+4
1643 We can create expression based on index rather than address:
1645 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1647 Note evolution step of index needs to be considered in comparison. */
1649 static bool
1650 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1651 const dr_with_seg_len& dr_a,
1652 const dr_with_seg_len& dr_b)
1654 if (integer_zerop (DR_STEP (dr_a.dr))
1655 || integer_zerop (DR_STEP (dr_b.dr))
1656 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1657 return false;
1659 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1660 return false;
1662 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1663 return false;
1665 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1666 return false;
1668 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1669 return false;
1671 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1673 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1674 unsigned HOST_WIDE_INT abs_step
1675 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1677 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1678 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1679 /* Infer the number of iterations with which the memory segment is accessed
1680 by DR. In other words, alias is checked if memory segment accessed by
1681 DR_A in some iterations intersect with memory segment accessed by DR_B
1682 in the same amount iterations.
1683 Note segnment length is a linear function of number of iterations with
1684 DR_STEP as the coefficient. */
1685 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1686 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1688 unsigned int i;
1689 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1691 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1692 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1693 /* Two indices must be the same if they are not scev, or not scev wrto
1694 current loop being vecorized. */
1695 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1696 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1697 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1698 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1700 if (operand_equal_p (access1, access2, 0))
1701 continue;
1703 return false;
1705 /* The two indices must have the same step. */
1706 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1707 return false;
1709 tree idx_step = CHREC_RIGHT (access1);
1710 /* Index must have const step, otherwise DR_STEP won't be constant. */
1711 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1712 /* Index must evaluate in the same direction as DR. */
1713 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1715 tree min1 = CHREC_LEFT (access1);
1716 tree min2 = CHREC_LEFT (access2);
1717 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1718 return false;
1720 /* Ideally, alias can be checked against loop's control IV, but we
1721 need to prove linear mapping between control IV and reference
1722 index. Although that should be true, we check against (array)
1723 index of data reference. Like segment length, index length is
1724 linear function of the number of iterations with index_step as
1725 the coefficient, i.e, niter_len * idx_step. */
1726 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1727 build_int_cst (TREE_TYPE (min1),
1728 niter_len1));
1729 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1730 build_int_cst (TREE_TYPE (min2),
1731 niter_len2));
1732 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1733 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1734 /* Adjust ranges for negative step. */
1735 if (neg_step)
1737 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1738 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1739 CHREC_LEFT (access1), idx_step);
1740 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1741 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1742 CHREC_LEFT (access2), idx_step);
1744 tree part_cond_expr
1745 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1746 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1747 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1748 if (*cond_expr)
1749 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1750 *cond_expr, part_cond_expr);
1751 else
1752 *cond_expr = part_cond_expr;
1754 return true;
1757 /* Given two data references and segment lengths described by DR_A and DR_B,
1758 create expression checking if the two addresses ranges intersect with
1759 each other:
1761 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1762 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1764 static void
1765 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1766 const dr_with_seg_len& dr_a,
1767 const dr_with_seg_len& dr_b)
1769 *cond_expr = NULL_TREE;
1770 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1771 return;
1773 tree segment_length_a = dr_a.seg_len;
1774 tree segment_length_b = dr_b.seg_len;
1775 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1776 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1777 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1779 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1780 offset_a, DR_INIT (dr_a.dr));
1781 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1782 offset_b, DR_INIT (dr_b.dr));
1783 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1784 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1786 tree seg_a_min = addr_base_a;
1787 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1788 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1789 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1790 [a, a+12) */
1791 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1793 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1794 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1795 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1798 tree seg_b_min = addr_base_b;
1799 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1800 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1802 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1803 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1804 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1806 *cond_expr
1807 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1808 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1809 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1812 /* Create a conditional expression that represents the run-time checks for
1813 overlapping of address ranges represented by a list of data references
1814 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1815 COND_EXPR is the conditional expression to be used in the if statement
1816 that controls which version of the loop gets executed at runtime. */
1818 void
1819 create_runtime_alias_checks (struct loop *loop,
1820 vec<dr_with_seg_len_pair_t> *alias_pairs,
1821 tree * cond_expr)
1823 tree part_cond_expr;
1825 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1827 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1828 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1830 if (dump_enabled_p ())
1832 dump_printf (MSG_NOTE, "create runtime check for data references ");
1833 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1834 dump_printf (MSG_NOTE, " and ");
1835 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1836 dump_printf (MSG_NOTE, "\n");
1839 /* Create condition expression for each pair data references. */
1840 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1841 if (*cond_expr)
1842 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1843 *cond_expr, part_cond_expr);
1844 else
1845 *cond_expr = part_cond_expr;
1849 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1850 expressions. */
1851 static bool
1852 dr_equal_offsets_p1 (tree offset1, tree offset2)
1854 bool res;
1856 STRIP_NOPS (offset1);
1857 STRIP_NOPS (offset2);
1859 if (offset1 == offset2)
1860 return true;
1862 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1863 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1864 return false;
1866 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1867 TREE_OPERAND (offset2, 0));
1869 if (!res || !BINARY_CLASS_P (offset1))
1870 return res;
1872 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1873 TREE_OPERAND (offset2, 1));
1875 return res;
1878 /* Check if DRA and DRB have equal offsets. */
1879 bool
1880 dr_equal_offsets_p (struct data_reference *dra,
1881 struct data_reference *drb)
1883 tree offset1, offset2;
1885 offset1 = DR_OFFSET (dra);
1886 offset2 = DR_OFFSET (drb);
1888 return dr_equal_offsets_p1 (offset1, offset2);
1891 /* Returns true if FNA == FNB. */
1893 static bool
1894 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1896 unsigned i, n = fna.length ();
1898 if (n != fnb.length ())
1899 return false;
1901 for (i = 0; i < n; i++)
1902 if (!operand_equal_p (fna[i], fnb[i], 0))
1903 return false;
1905 return true;
1908 /* If all the functions in CF are the same, returns one of them,
1909 otherwise returns NULL. */
1911 static affine_fn
1912 common_affine_function (conflict_function *cf)
1914 unsigned i;
1915 affine_fn comm;
1917 if (!CF_NONTRIVIAL_P (cf))
1918 return affine_fn ();
1920 comm = cf->fns[0];
1922 for (i = 1; i < cf->n; i++)
1923 if (!affine_function_equal_p (comm, cf->fns[i]))
1924 return affine_fn ();
1926 return comm;
1929 /* Returns the base of the affine function FN. */
1931 static tree
1932 affine_function_base (affine_fn fn)
1934 return fn[0];
1937 /* Returns true if FN is a constant. */
1939 static bool
1940 affine_function_constant_p (affine_fn fn)
1942 unsigned i;
1943 tree coef;
1945 for (i = 1; fn.iterate (i, &coef); i++)
1946 if (!integer_zerop (coef))
1947 return false;
1949 return true;
1952 /* Returns true if FN is the zero constant function. */
1954 static bool
1955 affine_function_zero_p (affine_fn fn)
1957 return (integer_zerop (affine_function_base (fn))
1958 && affine_function_constant_p (fn));
1961 /* Returns a signed integer type with the largest precision from TA
1962 and TB. */
1964 static tree
1965 signed_type_for_types (tree ta, tree tb)
1967 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1968 return signed_type_for (ta);
1969 else
1970 return signed_type_for (tb);
1973 /* Applies operation OP on affine functions FNA and FNB, and returns the
1974 result. */
1976 static affine_fn
1977 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1979 unsigned i, n, m;
1980 affine_fn ret;
1981 tree coef;
1983 if (fnb.length () > fna.length ())
1985 n = fna.length ();
1986 m = fnb.length ();
1988 else
1990 n = fnb.length ();
1991 m = fna.length ();
1994 ret.create (m);
1995 for (i = 0; i < n; i++)
1997 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1998 TREE_TYPE (fnb[i]));
1999 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2002 for (; fna.iterate (i, &coef); i++)
2003 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2004 coef, integer_zero_node));
2005 for (; fnb.iterate (i, &coef); i++)
2006 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2007 integer_zero_node, coef));
2009 return ret;
2012 /* Returns the sum of affine functions FNA and FNB. */
2014 static affine_fn
2015 affine_fn_plus (affine_fn fna, affine_fn fnb)
2017 return affine_fn_op (PLUS_EXPR, fna, fnb);
2020 /* Returns the difference of affine functions FNA and FNB. */
2022 static affine_fn
2023 affine_fn_minus (affine_fn fna, affine_fn fnb)
2025 return affine_fn_op (MINUS_EXPR, fna, fnb);
2028 /* Frees affine function FN. */
2030 static void
2031 affine_fn_free (affine_fn fn)
2033 fn.release ();
2036 /* Determine for each subscript in the data dependence relation DDR
2037 the distance. */
2039 static void
2040 compute_subscript_distance (struct data_dependence_relation *ddr)
2042 conflict_function *cf_a, *cf_b;
2043 affine_fn fn_a, fn_b, diff;
2045 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2047 unsigned int i;
2049 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2051 struct subscript *subscript;
2053 subscript = DDR_SUBSCRIPT (ddr, i);
2054 cf_a = SUB_CONFLICTS_IN_A (subscript);
2055 cf_b = SUB_CONFLICTS_IN_B (subscript);
2057 fn_a = common_affine_function (cf_a);
2058 fn_b = common_affine_function (cf_b);
2059 if (!fn_a.exists () || !fn_b.exists ())
2061 SUB_DISTANCE (subscript) = chrec_dont_know;
2062 return;
2064 diff = affine_fn_minus (fn_a, fn_b);
2066 if (affine_function_constant_p (diff))
2067 SUB_DISTANCE (subscript) = affine_function_base (diff);
2068 else
2069 SUB_DISTANCE (subscript) = chrec_dont_know;
2071 affine_fn_free (diff);
2076 /* Returns the conflict function for "unknown". */
2078 static conflict_function *
2079 conflict_fn_not_known (void)
2081 conflict_function *fn = XCNEW (conflict_function);
2082 fn->n = NOT_KNOWN;
2084 return fn;
2087 /* Returns the conflict function for "independent". */
2089 static conflict_function *
2090 conflict_fn_no_dependence (void)
2092 conflict_function *fn = XCNEW (conflict_function);
2093 fn->n = NO_DEPENDENCE;
2095 return fn;
2098 /* Returns true if the address of OBJ is invariant in LOOP. */
2100 static bool
2101 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2103 while (handled_component_p (obj))
2105 if (TREE_CODE (obj) == ARRAY_REF)
2107 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2108 need to check the stride and the lower bound of the reference. */
2109 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2110 loop->num)
2111 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2112 loop->num))
2113 return false;
2115 else if (TREE_CODE (obj) == COMPONENT_REF)
2117 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2118 loop->num))
2119 return false;
2121 obj = TREE_OPERAND (obj, 0);
2124 if (!INDIRECT_REF_P (obj)
2125 && TREE_CODE (obj) != MEM_REF)
2126 return true;
2128 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2129 loop->num);
2132 /* Returns false if we can prove that data references A and B do not alias,
2133 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2134 considered. */
2136 bool
2137 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2138 bool loop_nest)
2140 tree addr_a = DR_BASE_OBJECT (a);
2141 tree addr_b = DR_BASE_OBJECT (b);
2143 /* If we are not processing a loop nest but scalar code we
2144 do not need to care about possible cross-iteration dependences
2145 and thus can process the full original reference. Do so,
2146 similar to how loop invariant motion applies extra offset-based
2147 disambiguation. */
2148 if (!loop_nest)
2150 aff_tree off1, off2;
2151 poly_widest_int size1, size2;
2152 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2153 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2154 aff_combination_scale (&off1, -1);
2155 aff_combination_add (&off2, &off1);
2156 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2157 return false;
2160 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2161 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2162 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2163 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2164 return false;
2166 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2167 do not know the size of the base-object. So we cannot do any
2168 offset/overlap based analysis but have to rely on points-to
2169 information only. */
2170 if (TREE_CODE (addr_a) == MEM_REF
2171 && (DR_UNCONSTRAINED_BASE (a)
2172 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2174 /* For true dependences we can apply TBAA. */
2175 if (flag_strict_aliasing
2176 && DR_IS_WRITE (a) && DR_IS_READ (b)
2177 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2178 get_alias_set (DR_REF (b))))
2179 return false;
2180 if (TREE_CODE (addr_b) == MEM_REF)
2181 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2182 TREE_OPERAND (addr_b, 0));
2183 else
2184 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2185 build_fold_addr_expr (addr_b));
2187 else if (TREE_CODE (addr_b) == MEM_REF
2188 && (DR_UNCONSTRAINED_BASE (b)
2189 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2191 /* For true dependences we can apply TBAA. */
2192 if (flag_strict_aliasing
2193 && DR_IS_WRITE (a) && DR_IS_READ (b)
2194 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2195 get_alias_set (DR_REF (b))))
2196 return false;
2197 if (TREE_CODE (addr_a) == MEM_REF)
2198 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2199 TREE_OPERAND (addr_b, 0));
2200 else
2201 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2202 TREE_OPERAND (addr_b, 0));
2205 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2206 that is being subsetted in the loop nest. */
2207 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2208 return refs_output_dependent_p (addr_a, addr_b);
2209 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2210 return refs_anti_dependent_p (addr_a, addr_b);
2211 return refs_may_alias_p (addr_a, addr_b);
2214 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2215 if it is meaningful to compare their associated access functions
2216 when checking for dependencies. */
2218 static bool
2219 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2221 /* Allow pairs of component refs from the following sets:
2223 { REALPART_EXPR, IMAGPART_EXPR }
2224 { COMPONENT_REF }
2225 { ARRAY_REF }. */
2226 tree_code code_a = TREE_CODE (ref_a);
2227 tree_code code_b = TREE_CODE (ref_b);
2228 if (code_a == IMAGPART_EXPR)
2229 code_a = REALPART_EXPR;
2230 if (code_b == IMAGPART_EXPR)
2231 code_b = REALPART_EXPR;
2232 if (code_a != code_b)
2233 return false;
2235 if (TREE_CODE (ref_a) == COMPONENT_REF)
2236 /* ??? We cannot simply use the type of operand #0 of the refs here as
2237 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2238 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2239 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2240 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2242 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2243 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2246 /* Initialize a data dependence relation between data accesses A and
2247 B. NB_LOOPS is the number of loops surrounding the references: the
2248 size of the classic distance/direction vectors. */
2250 struct data_dependence_relation *
2251 initialize_data_dependence_relation (struct data_reference *a,
2252 struct data_reference *b,
2253 vec<loop_p> loop_nest)
2255 struct data_dependence_relation *res;
2256 unsigned int i;
2258 res = XCNEW (struct data_dependence_relation);
2259 DDR_A (res) = a;
2260 DDR_B (res) = b;
2261 DDR_LOOP_NEST (res).create (0);
2262 DDR_SUBSCRIPTS (res).create (0);
2263 DDR_DIR_VECTS (res).create (0);
2264 DDR_DIST_VECTS (res).create (0);
2266 if (a == NULL || b == NULL)
2268 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2269 return res;
2272 /* If the data references do not alias, then they are independent. */
2273 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2275 DDR_ARE_DEPENDENT (res) = chrec_known;
2276 return res;
2279 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2280 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2281 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2283 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2284 return res;
2287 /* For unconstrained bases, the root (highest-indexed) subscript
2288 describes a variation in the base of the original DR_REF rather
2289 than a component access. We have no type that accurately describes
2290 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2291 applying this subscript) so limit the search to the last real
2292 component access.
2294 E.g. for:
2296 void
2297 f (int a[][8], int b[][8])
2299 for (int i = 0; i < 8; ++i)
2300 a[i * 2][0] = b[i][0];
2303 the a and b accesses have a single ARRAY_REF component reference [0]
2304 but have two subscripts. */
2305 if (DR_UNCONSTRAINED_BASE (a))
2306 num_dimensions_a -= 1;
2307 if (DR_UNCONSTRAINED_BASE (b))
2308 num_dimensions_b -= 1;
2310 /* These structures describe sequences of component references in
2311 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2312 specific access function. */
2313 struct {
2314 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2315 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2316 indices. In C notation, these are the indices of the rightmost
2317 component references; e.g. for a sequence .b.c.d, the start
2318 index is for .d. */
2319 unsigned int start_a;
2320 unsigned int start_b;
2322 /* The sequence contains LENGTH consecutive access functions from
2323 each DR. */
2324 unsigned int length;
2326 /* The enclosing objects for the A and B sequences respectively,
2327 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2328 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2329 tree object_a;
2330 tree object_b;
2331 } full_seq = {}, struct_seq = {};
2333 /* Before each iteration of the loop:
2335 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2336 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2337 unsigned int index_a = 0;
2338 unsigned int index_b = 0;
2339 tree ref_a = DR_REF (a);
2340 tree ref_b = DR_REF (b);
2342 /* Now walk the component references from the final DR_REFs back up to
2343 the enclosing base objects. Each component reference corresponds
2344 to one access function in the DR, with access function 0 being for
2345 the final DR_REF and the highest-indexed access function being the
2346 one that is applied to the base of the DR.
2348 Look for a sequence of component references whose access functions
2349 are comparable (see access_fn_components_comparable_p). If more
2350 than one such sequence exists, pick the one nearest the base
2351 (which is the leftmost sequence in C notation). Store this sequence
2352 in FULL_SEQ.
2354 For example, if we have:
2356 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2358 A: a[0][i].s.c.d
2359 B: __real b[0][i].s.e[i].f
2361 (where d is the same type as the real component of f) then the access
2362 functions would be:
2364 0 1 2 3
2365 A: .d .c .s [i]
2367 0 1 2 3 4 5
2368 B: __real .f [i] .e .s [i]
2370 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2371 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2372 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2373 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2374 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2375 index foo[10] arrays, so is again comparable. The sequence is
2376 therefore:
2378 A: [1, 3] (i.e. [i].s.c)
2379 B: [3, 5] (i.e. [i].s.e)
2381 Also look for sequences of component references whose access
2382 functions are comparable and whose enclosing objects have the same
2383 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2384 example, STRUCT_SEQ would be:
2386 A: [1, 2] (i.e. s.c)
2387 B: [3, 4] (i.e. s.e) */
2388 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2390 /* REF_A and REF_B must be one of the component access types
2391 allowed by dr_analyze_indices. */
2392 gcc_checking_assert (access_fn_component_p (ref_a));
2393 gcc_checking_assert (access_fn_component_p (ref_b));
2395 /* Get the immediately-enclosing objects for REF_A and REF_B,
2396 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2397 and DR_ACCESS_FN (B, INDEX_B). */
2398 tree object_a = TREE_OPERAND (ref_a, 0);
2399 tree object_b = TREE_OPERAND (ref_b, 0);
2401 tree type_a = TREE_TYPE (object_a);
2402 tree type_b = TREE_TYPE (object_b);
2403 if (access_fn_components_comparable_p (ref_a, ref_b))
2405 /* This pair of component accesses is comparable for dependence
2406 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2407 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2408 if (full_seq.start_a + full_seq.length != index_a
2409 || full_seq.start_b + full_seq.length != index_b)
2411 /* The accesses don't extend the current sequence,
2412 so start a new one here. */
2413 full_seq.start_a = index_a;
2414 full_seq.start_b = index_b;
2415 full_seq.length = 0;
2418 /* Add this pair of references to the sequence. */
2419 full_seq.length += 1;
2420 full_seq.object_a = object_a;
2421 full_seq.object_b = object_b;
2423 /* If the enclosing objects are structures (and thus have the
2424 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2425 if (TREE_CODE (type_a) == RECORD_TYPE)
2426 struct_seq = full_seq;
2428 /* Move to the next containing reference for both A and B. */
2429 ref_a = object_a;
2430 ref_b = object_b;
2431 index_a += 1;
2432 index_b += 1;
2433 continue;
2436 /* Try to approach equal type sizes. */
2437 if (!COMPLETE_TYPE_P (type_a)
2438 || !COMPLETE_TYPE_P (type_b)
2439 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2440 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2441 break;
2443 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2444 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2445 if (size_a <= size_b)
2447 index_a += 1;
2448 ref_a = object_a;
2450 if (size_b <= size_a)
2452 index_b += 1;
2453 ref_b = object_b;
2457 /* See whether FULL_SEQ ends at the base and whether the two bases
2458 are equal. We do not care about TBAA or alignment info so we can
2459 use OEP_ADDRESS_OF to avoid false negatives. */
2460 tree base_a = DR_BASE_OBJECT (a);
2461 tree base_b = DR_BASE_OBJECT (b);
2462 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2463 && full_seq.start_b + full_seq.length == num_dimensions_b
2464 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2465 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2466 && types_compatible_p (TREE_TYPE (base_a),
2467 TREE_TYPE (base_b))
2468 && (!loop_nest.exists ()
2469 || (object_address_invariant_in_loop_p
2470 (loop_nest[0], base_a))));
2472 /* If the bases are the same, we can include the base variation too.
2473 E.g. the b accesses in:
2475 for (int i = 0; i < n; ++i)
2476 b[i + 4][0] = b[i][0];
2478 have a definite dependence distance of 4, while for:
2480 for (int i = 0; i < n; ++i)
2481 a[i + 4][0] = b[i][0];
2483 the dependence distance depends on the gap between a and b.
2485 If the bases are different then we can only rely on the sequence
2486 rooted at a structure access, since arrays are allowed to overlap
2487 arbitrarily and change shape arbitrarily. E.g. we treat this as
2488 valid code:
2490 int a[256];
2492 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2494 where two lvalues with the same int[4][3] type overlap, and where
2495 both lvalues are distinct from the object's declared type. */
2496 if (same_base_p)
2498 if (DR_UNCONSTRAINED_BASE (a))
2499 full_seq.length += 1;
2501 else
2502 full_seq = struct_seq;
2504 /* Punt if we didn't find a suitable sequence. */
2505 if (full_seq.length == 0)
2507 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2508 return res;
2511 if (!same_base_p)
2513 /* Partial overlap is possible for different bases when strict aliasing
2514 is not in effect. It's also possible if either base involves a union
2515 access; e.g. for:
2517 struct s1 { int a[2]; };
2518 struct s2 { struct s1 b; int c; };
2519 struct s3 { int d; struct s1 e; };
2520 union u { struct s2 f; struct s3 g; } *p, *q;
2522 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2523 "p->g.e" (base "p->g") and might partially overlap the s1 at
2524 "q->g.e" (base "q->g"). */
2525 if (!flag_strict_aliasing
2526 || ref_contains_union_access_p (full_seq.object_a)
2527 || ref_contains_union_access_p (full_seq.object_b))
2529 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2530 return res;
2533 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2534 if (!loop_nest.exists ()
2535 || (object_address_invariant_in_loop_p (loop_nest[0],
2536 full_seq.object_a)
2537 && object_address_invariant_in_loop_p (loop_nest[0],
2538 full_seq.object_b)))
2540 DDR_OBJECT_A (res) = full_seq.object_a;
2541 DDR_OBJECT_B (res) = full_seq.object_b;
2545 DDR_AFFINE_P (res) = true;
2546 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2547 DDR_SUBSCRIPTS (res).create (full_seq.length);
2548 DDR_LOOP_NEST (res) = loop_nest;
2549 DDR_INNER_LOOP (res) = 0;
2550 DDR_SELF_REFERENCE (res) = false;
2552 for (i = 0; i < full_seq.length; ++i)
2554 struct subscript *subscript;
2556 subscript = XNEW (struct subscript);
2557 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2558 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2559 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2560 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2561 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2562 SUB_DISTANCE (subscript) = chrec_dont_know;
2563 DDR_SUBSCRIPTS (res).safe_push (subscript);
2566 return res;
2569 /* Frees memory used by the conflict function F. */
2571 static void
2572 free_conflict_function (conflict_function *f)
2574 unsigned i;
2576 if (CF_NONTRIVIAL_P (f))
2578 for (i = 0; i < f->n; i++)
2579 affine_fn_free (f->fns[i]);
2581 free (f);
2584 /* Frees memory used by SUBSCRIPTS. */
2586 static void
2587 free_subscripts (vec<subscript_p> subscripts)
2589 unsigned i;
2590 subscript_p s;
2592 FOR_EACH_VEC_ELT (subscripts, i, s)
2594 free_conflict_function (s->conflicting_iterations_in_a);
2595 free_conflict_function (s->conflicting_iterations_in_b);
2596 free (s);
2598 subscripts.release ();
2601 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2602 description. */
2604 static inline void
2605 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2606 tree chrec)
2608 DDR_ARE_DEPENDENT (ddr) = chrec;
2609 free_subscripts (DDR_SUBSCRIPTS (ddr));
2610 DDR_SUBSCRIPTS (ddr).create (0);
2613 /* The dependence relation DDR cannot be represented by a distance
2614 vector. */
2616 static inline void
2617 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2619 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2622 DDR_AFFINE_P (ddr) = false;
2627 /* This section contains the classic Banerjee tests. */
2629 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2630 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2632 static inline bool
2633 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2635 return (evolution_function_is_constant_p (chrec_a)
2636 && evolution_function_is_constant_p (chrec_b));
2639 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2640 variable, i.e., if the SIV (Single Index Variable) test is true. */
2642 static bool
2643 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2645 if ((evolution_function_is_constant_p (chrec_a)
2646 && evolution_function_is_univariate_p (chrec_b))
2647 || (evolution_function_is_constant_p (chrec_b)
2648 && evolution_function_is_univariate_p (chrec_a)))
2649 return true;
2651 if (evolution_function_is_univariate_p (chrec_a)
2652 && evolution_function_is_univariate_p (chrec_b))
2654 switch (TREE_CODE (chrec_a))
2656 case POLYNOMIAL_CHREC:
2657 switch (TREE_CODE (chrec_b))
2659 case POLYNOMIAL_CHREC:
2660 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2661 return false;
2662 /* FALLTHRU */
2664 default:
2665 return true;
2668 default:
2669 return true;
2673 return false;
2676 /* Creates a conflict function with N dimensions. The affine functions
2677 in each dimension follow. */
2679 static conflict_function *
2680 conflict_fn (unsigned n, ...)
2682 unsigned i;
2683 conflict_function *ret = XCNEW (conflict_function);
2684 va_list ap;
2686 gcc_assert (n > 0 && n <= MAX_DIM);
2687 va_start (ap, n);
2689 ret->n = n;
2690 for (i = 0; i < n; i++)
2691 ret->fns[i] = va_arg (ap, affine_fn);
2692 va_end (ap);
2694 return ret;
2697 /* Returns constant affine function with value CST. */
2699 static affine_fn
2700 affine_fn_cst (tree cst)
2702 affine_fn fn;
2703 fn.create (1);
2704 fn.quick_push (cst);
2705 return fn;
2708 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2710 static affine_fn
2711 affine_fn_univar (tree cst, unsigned dim, tree coef)
2713 affine_fn fn;
2714 fn.create (dim + 1);
2715 unsigned i;
2717 gcc_assert (dim > 0);
2718 fn.quick_push (cst);
2719 for (i = 1; i < dim; i++)
2720 fn.quick_push (integer_zero_node);
2721 fn.quick_push (coef);
2722 return fn;
2725 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2726 *OVERLAPS_B are initialized to the functions that describe the
2727 relation between the elements accessed twice by CHREC_A and
2728 CHREC_B. For k >= 0, the following property is verified:
2730 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2732 static void
2733 analyze_ziv_subscript (tree chrec_a,
2734 tree chrec_b,
2735 conflict_function **overlaps_a,
2736 conflict_function **overlaps_b,
2737 tree *last_conflicts)
2739 tree type, difference;
2740 dependence_stats.num_ziv++;
2742 if (dump_file && (dump_flags & TDF_DETAILS))
2743 fprintf (dump_file, "(analyze_ziv_subscript \n");
2745 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2746 chrec_a = chrec_convert (type, chrec_a, NULL);
2747 chrec_b = chrec_convert (type, chrec_b, NULL);
2748 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2750 switch (TREE_CODE (difference))
2752 case INTEGER_CST:
2753 if (integer_zerop (difference))
2755 /* The difference is equal to zero: the accessed index
2756 overlaps for each iteration in the loop. */
2757 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2758 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2759 *last_conflicts = chrec_dont_know;
2760 dependence_stats.num_ziv_dependent++;
2762 else
2764 /* The accesses do not overlap. */
2765 *overlaps_a = conflict_fn_no_dependence ();
2766 *overlaps_b = conflict_fn_no_dependence ();
2767 *last_conflicts = integer_zero_node;
2768 dependence_stats.num_ziv_independent++;
2770 break;
2772 default:
2773 /* We're not sure whether the indexes overlap. For the moment,
2774 conservatively answer "don't know". */
2775 if (dump_file && (dump_flags & TDF_DETAILS))
2776 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2778 *overlaps_a = conflict_fn_not_known ();
2779 *overlaps_b = conflict_fn_not_known ();
2780 *last_conflicts = chrec_dont_know;
2781 dependence_stats.num_ziv_unimplemented++;
2782 break;
2785 if (dump_file && (dump_flags & TDF_DETAILS))
2786 fprintf (dump_file, ")\n");
2789 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2790 and only if it fits to the int type. If this is not the case, or the
2791 bound on the number of iterations of LOOP could not be derived, returns
2792 chrec_dont_know. */
2794 static tree
2795 max_stmt_executions_tree (struct loop *loop)
2797 widest_int nit;
2799 if (!max_stmt_executions (loop, &nit))
2800 return chrec_dont_know;
2802 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2803 return chrec_dont_know;
2805 return wide_int_to_tree (unsigned_type_node, nit);
2808 /* Determine whether the CHREC is always positive/negative. If the expression
2809 cannot be statically analyzed, return false, otherwise set the answer into
2810 VALUE. */
2812 static bool
2813 chrec_is_positive (tree chrec, bool *value)
2815 bool value0, value1, value2;
2816 tree end_value, nb_iter;
2818 switch (TREE_CODE (chrec))
2820 case POLYNOMIAL_CHREC:
2821 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2822 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2823 return false;
2825 /* FIXME -- overflows. */
2826 if (value0 == value1)
2828 *value = value0;
2829 return true;
2832 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2833 and the proof consists in showing that the sign never
2834 changes during the execution of the loop, from 0 to
2835 loop->nb_iterations. */
2836 if (!evolution_function_is_affine_p (chrec))
2837 return false;
2839 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2840 if (chrec_contains_undetermined (nb_iter))
2841 return false;
2843 #if 0
2844 /* TODO -- If the test is after the exit, we may decrease the number of
2845 iterations by one. */
2846 if (after_exit)
2847 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2848 #endif
2850 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2852 if (!chrec_is_positive (end_value, &value2))
2853 return false;
2855 *value = value0;
2856 return value0 == value1;
2858 case INTEGER_CST:
2859 switch (tree_int_cst_sgn (chrec))
2861 case -1:
2862 *value = false;
2863 break;
2864 case 1:
2865 *value = true;
2866 break;
2867 default:
2868 return false;
2870 return true;
2872 default:
2873 return false;
2878 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2879 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2880 *OVERLAPS_B are initialized to the functions that describe the
2881 relation between the elements accessed twice by CHREC_A and
2882 CHREC_B. For k >= 0, the following property is verified:
2884 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2886 static void
2887 analyze_siv_subscript_cst_affine (tree chrec_a,
2888 tree chrec_b,
2889 conflict_function **overlaps_a,
2890 conflict_function **overlaps_b,
2891 tree *last_conflicts)
2893 bool value0, value1, value2;
2894 tree type, difference, tmp;
2896 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2897 chrec_a = chrec_convert (type, chrec_a, NULL);
2898 chrec_b = chrec_convert (type, chrec_b, NULL);
2899 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2901 /* Special case overlap in the first iteration. */
2902 if (integer_zerop (difference))
2904 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2905 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2906 *last_conflicts = integer_one_node;
2907 return;
2910 if (!chrec_is_positive (initial_condition (difference), &value0))
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2915 dependence_stats.num_siv_unimplemented++;
2916 *overlaps_a = conflict_fn_not_known ();
2917 *overlaps_b = conflict_fn_not_known ();
2918 *last_conflicts = chrec_dont_know;
2919 return;
2921 else
2923 if (value0 == false)
2925 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2927 if (dump_file && (dump_flags & TDF_DETAILS))
2928 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2930 *overlaps_a = conflict_fn_not_known ();
2931 *overlaps_b = conflict_fn_not_known ();
2932 *last_conflicts = chrec_dont_know;
2933 dependence_stats.num_siv_unimplemented++;
2934 return;
2936 else
2938 if (value1 == true)
2940 /* Example:
2941 chrec_a = 12
2942 chrec_b = {10, +, 1}
2945 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2947 HOST_WIDE_INT numiter;
2948 struct loop *loop = get_chrec_loop (chrec_b);
2950 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2951 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2952 fold_build1 (ABS_EXPR, type, difference),
2953 CHREC_RIGHT (chrec_b));
2954 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2955 *last_conflicts = integer_one_node;
2958 /* Perform weak-zero siv test to see if overlap is
2959 outside the loop bounds. */
2960 numiter = max_stmt_executions_int (loop);
2962 if (numiter >= 0
2963 && compare_tree_int (tmp, numiter) > 0)
2965 free_conflict_function (*overlaps_a);
2966 free_conflict_function (*overlaps_b);
2967 *overlaps_a = conflict_fn_no_dependence ();
2968 *overlaps_b = conflict_fn_no_dependence ();
2969 *last_conflicts = integer_zero_node;
2970 dependence_stats.num_siv_independent++;
2971 return;
2973 dependence_stats.num_siv_dependent++;
2974 return;
2977 /* When the step does not divide the difference, there are
2978 no overlaps. */
2979 else
2981 *overlaps_a = conflict_fn_no_dependence ();
2982 *overlaps_b = conflict_fn_no_dependence ();
2983 *last_conflicts = integer_zero_node;
2984 dependence_stats.num_siv_independent++;
2985 return;
2989 else
2991 /* Example:
2992 chrec_a = 12
2993 chrec_b = {10, +, -1}
2995 In this case, chrec_a will not overlap with chrec_b. */
2996 *overlaps_a = conflict_fn_no_dependence ();
2997 *overlaps_b = conflict_fn_no_dependence ();
2998 *last_conflicts = integer_zero_node;
2999 dependence_stats.num_siv_independent++;
3000 return;
3004 else
3006 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3008 if (dump_file && (dump_flags & TDF_DETAILS))
3009 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3011 *overlaps_a = conflict_fn_not_known ();
3012 *overlaps_b = conflict_fn_not_known ();
3013 *last_conflicts = chrec_dont_know;
3014 dependence_stats.num_siv_unimplemented++;
3015 return;
3017 else
3019 if (value2 == false)
3021 /* Example:
3022 chrec_a = 3
3023 chrec_b = {10, +, -1}
3025 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3027 HOST_WIDE_INT numiter;
3028 struct loop *loop = get_chrec_loop (chrec_b);
3030 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3031 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3032 CHREC_RIGHT (chrec_b));
3033 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3034 *last_conflicts = integer_one_node;
3036 /* Perform weak-zero siv test to see if overlap is
3037 outside the loop bounds. */
3038 numiter = max_stmt_executions_int (loop);
3040 if (numiter >= 0
3041 && compare_tree_int (tmp, numiter) > 0)
3043 free_conflict_function (*overlaps_a);
3044 free_conflict_function (*overlaps_b);
3045 *overlaps_a = conflict_fn_no_dependence ();
3046 *overlaps_b = conflict_fn_no_dependence ();
3047 *last_conflicts = integer_zero_node;
3048 dependence_stats.num_siv_independent++;
3049 return;
3051 dependence_stats.num_siv_dependent++;
3052 return;
3055 /* When the step does not divide the difference, there
3056 are no overlaps. */
3057 else
3059 *overlaps_a = conflict_fn_no_dependence ();
3060 *overlaps_b = conflict_fn_no_dependence ();
3061 *last_conflicts = integer_zero_node;
3062 dependence_stats.num_siv_independent++;
3063 return;
3066 else
3068 /* Example:
3069 chrec_a = 3
3070 chrec_b = {4, +, 1}
3072 In this case, chrec_a will not overlap with chrec_b. */
3073 *overlaps_a = conflict_fn_no_dependence ();
3074 *overlaps_b = conflict_fn_no_dependence ();
3075 *last_conflicts = integer_zero_node;
3076 dependence_stats.num_siv_independent++;
3077 return;
3084 /* Helper recursive function for initializing the matrix A. Returns
3085 the initial value of CHREC. */
3087 static tree
3088 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3090 gcc_assert (chrec);
3092 switch (TREE_CODE (chrec))
3094 case POLYNOMIAL_CHREC:
3095 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3096 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3098 case PLUS_EXPR:
3099 case MULT_EXPR:
3100 case MINUS_EXPR:
3102 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3103 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3105 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3108 CASE_CONVERT:
3110 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3111 return chrec_convert (chrec_type (chrec), op, NULL);
3114 case BIT_NOT_EXPR:
3116 /* Handle ~X as -1 - X. */
3117 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3118 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3119 build_int_cst (TREE_TYPE (chrec), -1), op);
3122 case INTEGER_CST:
3123 return chrec;
3125 default:
3126 gcc_unreachable ();
3127 return NULL_TREE;
3131 #define FLOOR_DIV(x,y) ((x) / (y))
3133 /* Solves the special case of the Diophantine equation:
3134 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3136 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3137 number of iterations that loops X and Y run. The overlaps will be
3138 constructed as evolutions in dimension DIM. */
3140 static void
3141 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3142 HOST_WIDE_INT step_a,
3143 HOST_WIDE_INT step_b,
3144 affine_fn *overlaps_a,
3145 affine_fn *overlaps_b,
3146 tree *last_conflicts, int dim)
3148 if (((step_a > 0 && step_b > 0)
3149 || (step_a < 0 && step_b < 0)))
3151 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3152 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3154 gcd_steps_a_b = gcd (step_a, step_b);
3155 step_overlaps_a = step_b / gcd_steps_a_b;
3156 step_overlaps_b = step_a / gcd_steps_a_b;
3158 if (niter > 0)
3160 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3161 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3162 last_conflict = tau2;
3163 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3165 else
3166 *last_conflicts = chrec_dont_know;
3168 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3169 build_int_cst (NULL_TREE,
3170 step_overlaps_a));
3171 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3172 build_int_cst (NULL_TREE,
3173 step_overlaps_b));
3176 else
3178 *overlaps_a = affine_fn_cst (integer_zero_node);
3179 *overlaps_b = affine_fn_cst (integer_zero_node);
3180 *last_conflicts = integer_zero_node;
3184 /* Solves the special case of a Diophantine equation where CHREC_A is
3185 an affine bivariate function, and CHREC_B is an affine univariate
3186 function. For example,
3188 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3190 has the following overlapping functions:
3192 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3193 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3194 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3196 FORNOW: This is a specialized implementation for a case occurring in
3197 a common benchmark. Implement the general algorithm. */
3199 static void
3200 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3201 conflict_function **overlaps_a,
3202 conflict_function **overlaps_b,
3203 tree *last_conflicts)
3205 bool xz_p, yz_p, xyz_p;
3206 HOST_WIDE_INT step_x, step_y, step_z;
3207 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3208 affine_fn overlaps_a_xz, overlaps_b_xz;
3209 affine_fn overlaps_a_yz, overlaps_b_yz;
3210 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3211 affine_fn ova1, ova2, ovb;
3212 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3214 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3215 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3216 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3218 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3219 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3220 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3222 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3224 if (dump_file && (dump_flags & TDF_DETAILS))
3225 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3227 *overlaps_a = conflict_fn_not_known ();
3228 *overlaps_b = conflict_fn_not_known ();
3229 *last_conflicts = chrec_dont_know;
3230 return;
3233 niter = MIN (niter_x, niter_z);
3234 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3235 &overlaps_a_xz,
3236 &overlaps_b_xz,
3237 &last_conflicts_xz, 1);
3238 niter = MIN (niter_y, niter_z);
3239 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3240 &overlaps_a_yz,
3241 &overlaps_b_yz,
3242 &last_conflicts_yz, 2);
3243 niter = MIN (niter_x, niter_z);
3244 niter = MIN (niter_y, niter);
3245 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3246 &overlaps_a_xyz,
3247 &overlaps_b_xyz,
3248 &last_conflicts_xyz, 3);
3250 xz_p = !integer_zerop (last_conflicts_xz);
3251 yz_p = !integer_zerop (last_conflicts_yz);
3252 xyz_p = !integer_zerop (last_conflicts_xyz);
3254 if (xz_p || yz_p || xyz_p)
3256 ova1 = affine_fn_cst (integer_zero_node);
3257 ova2 = affine_fn_cst (integer_zero_node);
3258 ovb = affine_fn_cst (integer_zero_node);
3259 if (xz_p)
3261 affine_fn t0 = ova1;
3262 affine_fn t2 = ovb;
3264 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3265 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3266 affine_fn_free (t0);
3267 affine_fn_free (t2);
3268 *last_conflicts = last_conflicts_xz;
3270 if (yz_p)
3272 affine_fn t0 = ova2;
3273 affine_fn t2 = ovb;
3275 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3276 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3277 affine_fn_free (t0);
3278 affine_fn_free (t2);
3279 *last_conflicts = last_conflicts_yz;
3281 if (xyz_p)
3283 affine_fn t0 = ova1;
3284 affine_fn t2 = ova2;
3285 affine_fn t4 = ovb;
3287 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3288 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3289 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3290 affine_fn_free (t0);
3291 affine_fn_free (t2);
3292 affine_fn_free (t4);
3293 *last_conflicts = last_conflicts_xyz;
3295 *overlaps_a = conflict_fn (2, ova1, ova2);
3296 *overlaps_b = conflict_fn (1, ovb);
3298 else
3300 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3301 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3302 *last_conflicts = integer_zero_node;
3305 affine_fn_free (overlaps_a_xz);
3306 affine_fn_free (overlaps_b_xz);
3307 affine_fn_free (overlaps_a_yz);
3308 affine_fn_free (overlaps_b_yz);
3309 affine_fn_free (overlaps_a_xyz);
3310 affine_fn_free (overlaps_b_xyz);
3313 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3315 static void
3316 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3317 int size)
3319 memcpy (vec2, vec1, size * sizeof (*vec1));
3322 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3324 static void
3325 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3326 int m, int n)
3328 int i;
3330 for (i = 0; i < m; i++)
3331 lambda_vector_copy (mat1[i], mat2[i], n);
3334 /* Store the N x N identity matrix in MAT. */
3336 static void
3337 lambda_matrix_id (lambda_matrix mat, int size)
3339 int i, j;
3341 for (i = 0; i < size; i++)
3342 for (j = 0; j < size; j++)
3343 mat[i][j] = (i == j) ? 1 : 0;
3346 /* Return the first nonzero element of vector VEC1 between START and N.
3347 We must have START <= N. Returns N if VEC1 is the zero vector. */
3349 static int
3350 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3352 int j = start;
3353 while (j < n && vec1[j] == 0)
3354 j++;
3355 return j;
3358 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3359 R2 = R2 + CONST1 * R1. */
3361 static void
3362 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3364 int i;
3366 if (const1 == 0)
3367 return;
3369 for (i = 0; i < n; i++)
3370 mat[r2][i] += const1 * mat[r1][i];
3373 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3374 and store the result in VEC2. */
3376 static void
3377 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3378 int size, int const1)
3380 int i;
3382 if (const1 == 0)
3383 lambda_vector_clear (vec2, size);
3384 else
3385 for (i = 0; i < size; i++)
3386 vec2[i] = const1 * vec1[i];
3389 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3391 static void
3392 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3393 int size)
3395 lambda_vector_mult_const (vec1, vec2, size, -1);
3398 /* Negate row R1 of matrix MAT which has N columns. */
3400 static void
3401 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3403 lambda_vector_negate (mat[r1], mat[r1], n);
3406 /* Return true if two vectors are equal. */
3408 static bool
3409 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3411 int i;
3412 for (i = 0; i < size; i++)
3413 if (vec1[i] != vec2[i])
3414 return false;
3415 return true;
3418 /* Given an M x N integer matrix A, this function determines an M x
3419 M unimodular matrix U, and an M x N echelon matrix S such that
3420 "U.A = S". This decomposition is also known as "right Hermite".
3422 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3423 Restructuring Compilers" Utpal Banerjee. */
3425 static void
3426 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3427 lambda_matrix S, lambda_matrix U)
3429 int i, j, i0 = 0;
3431 lambda_matrix_copy (A, S, m, n);
3432 lambda_matrix_id (U, m);
3434 for (j = 0; j < n; j++)
3436 if (lambda_vector_first_nz (S[j], m, i0) < m)
3438 ++i0;
3439 for (i = m - 1; i >= i0; i--)
3441 while (S[i][j] != 0)
3443 int sigma, factor, a, b;
3445 a = S[i-1][j];
3446 b = S[i][j];
3447 sigma = (a * b < 0) ? -1: 1;
3448 a = abs (a);
3449 b = abs (b);
3450 factor = sigma * (a / b);
3452 lambda_matrix_row_add (S, n, i, i-1, -factor);
3453 std::swap (S[i], S[i-1]);
3455 lambda_matrix_row_add (U, m, i, i-1, -factor);
3456 std::swap (U[i], U[i-1]);
3463 /* Determines the overlapping elements due to accesses CHREC_A and
3464 CHREC_B, that are affine functions. This function cannot handle
3465 symbolic evolution functions, ie. when initial conditions are
3466 parameters, because it uses lambda matrices of integers. */
3468 static void
3469 analyze_subscript_affine_affine (tree chrec_a,
3470 tree chrec_b,
3471 conflict_function **overlaps_a,
3472 conflict_function **overlaps_b,
3473 tree *last_conflicts)
3475 unsigned nb_vars_a, nb_vars_b, dim;
3476 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3477 lambda_matrix A, U, S;
3478 struct obstack scratch_obstack;
3480 if (eq_evolutions_p (chrec_a, chrec_b))
3482 /* The accessed index overlaps for each iteration in the
3483 loop. */
3484 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3485 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3486 *last_conflicts = chrec_dont_know;
3487 return;
3489 if (dump_file && (dump_flags & TDF_DETAILS))
3490 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3492 /* For determining the initial intersection, we have to solve a
3493 Diophantine equation. This is the most time consuming part.
3495 For answering to the question: "Is there a dependence?" we have
3496 to prove that there exists a solution to the Diophantine
3497 equation, and that the solution is in the iteration domain,
3498 i.e. the solution is positive or zero, and that the solution
3499 happens before the upper bound loop.nb_iterations. Otherwise
3500 there is no dependence. This function outputs a description of
3501 the iterations that hold the intersections. */
3503 nb_vars_a = nb_vars_in_chrec (chrec_a);
3504 nb_vars_b = nb_vars_in_chrec (chrec_b);
3506 gcc_obstack_init (&scratch_obstack);
3508 dim = nb_vars_a + nb_vars_b;
3509 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3510 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3511 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3513 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3514 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3515 gamma = init_b - init_a;
3517 /* Don't do all the hard work of solving the Diophantine equation
3518 when we already know the solution: for example,
3519 | {3, +, 1}_1
3520 | {3, +, 4}_2
3521 | gamma = 3 - 3 = 0.
3522 Then the first overlap occurs during the first iterations:
3523 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3525 if (gamma == 0)
3527 if (nb_vars_a == 1 && nb_vars_b == 1)
3529 HOST_WIDE_INT step_a, step_b;
3530 HOST_WIDE_INT niter, niter_a, niter_b;
3531 affine_fn ova, ovb;
3533 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3534 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3535 niter = MIN (niter_a, niter_b);
3536 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3537 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3539 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3540 &ova, &ovb,
3541 last_conflicts, 1);
3542 *overlaps_a = conflict_fn (1, ova);
3543 *overlaps_b = conflict_fn (1, ovb);
3546 else if (nb_vars_a == 2 && nb_vars_b == 1)
3547 compute_overlap_steps_for_affine_1_2
3548 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3550 else if (nb_vars_a == 1 && nb_vars_b == 2)
3551 compute_overlap_steps_for_affine_1_2
3552 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3554 else
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3557 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3558 *overlaps_a = conflict_fn_not_known ();
3559 *overlaps_b = conflict_fn_not_known ();
3560 *last_conflicts = chrec_dont_know;
3562 goto end_analyze_subs_aa;
3565 /* U.A = S */
3566 lambda_matrix_right_hermite (A, dim, 1, S, U);
3568 if (S[0][0] < 0)
3570 S[0][0] *= -1;
3571 lambda_matrix_row_negate (U, dim, 0);
3573 gcd_alpha_beta = S[0][0];
3575 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3576 but that is a quite strange case. Instead of ICEing, answer
3577 don't know. */
3578 if (gcd_alpha_beta == 0)
3580 *overlaps_a = conflict_fn_not_known ();
3581 *overlaps_b = conflict_fn_not_known ();
3582 *last_conflicts = chrec_dont_know;
3583 goto end_analyze_subs_aa;
3586 /* The classic "gcd-test". */
3587 if (!int_divides_p (gcd_alpha_beta, gamma))
3589 /* The "gcd-test" has determined that there is no integer
3590 solution, i.e. there is no dependence. */
3591 *overlaps_a = conflict_fn_no_dependence ();
3592 *overlaps_b = conflict_fn_no_dependence ();
3593 *last_conflicts = integer_zero_node;
3596 /* Both access functions are univariate. This includes SIV and MIV cases. */
3597 else if (nb_vars_a == 1 && nb_vars_b == 1)
3599 /* Both functions should have the same evolution sign. */
3600 if (((A[0][0] > 0 && -A[1][0] > 0)
3601 || (A[0][0] < 0 && -A[1][0] < 0)))
3603 /* The solutions are given by:
3605 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3606 | [u21 u22] [y0]
3608 For a given integer t. Using the following variables,
3610 | i0 = u11 * gamma / gcd_alpha_beta
3611 | j0 = u12 * gamma / gcd_alpha_beta
3612 | i1 = u21
3613 | j1 = u22
3615 the solutions are:
3617 | x0 = i0 + i1 * t,
3618 | y0 = j0 + j1 * t. */
3619 HOST_WIDE_INT i0, j0, i1, j1;
3621 i0 = U[0][0] * gamma / gcd_alpha_beta;
3622 j0 = U[0][1] * gamma / gcd_alpha_beta;
3623 i1 = U[1][0];
3624 j1 = U[1][1];
3626 if ((i1 == 0 && i0 < 0)
3627 || (j1 == 0 && j0 < 0))
3629 /* There is no solution.
3630 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3631 falls in here, but for the moment we don't look at the
3632 upper bound of the iteration domain. */
3633 *overlaps_a = conflict_fn_no_dependence ();
3634 *overlaps_b = conflict_fn_no_dependence ();
3635 *last_conflicts = integer_zero_node;
3636 goto end_analyze_subs_aa;
3639 if (i1 > 0 && j1 > 0)
3641 HOST_WIDE_INT niter_a
3642 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3643 HOST_WIDE_INT niter_b
3644 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3645 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3647 /* (X0, Y0) is a solution of the Diophantine equation:
3648 "chrec_a (X0) = chrec_b (Y0)". */
3649 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3650 CEIL (-j0, j1));
3651 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3652 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3654 /* (X1, Y1) is the smallest positive solution of the eq
3655 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3656 first conflict occurs. */
3657 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3658 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3659 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3661 if (niter > 0)
3663 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3664 FLOOR_DIV (niter_b - j0, j1));
3665 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3667 /* If the overlap occurs outside of the bounds of the
3668 loop, there is no dependence. */
3669 if (x1 >= niter_a || y1 >= niter_b)
3671 *overlaps_a = conflict_fn_no_dependence ();
3672 *overlaps_b = conflict_fn_no_dependence ();
3673 *last_conflicts = integer_zero_node;
3674 goto end_analyze_subs_aa;
3676 else
3677 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3679 else
3680 *last_conflicts = chrec_dont_know;
3682 *overlaps_a
3683 = conflict_fn (1,
3684 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3686 build_int_cst (NULL_TREE, i1)));
3687 *overlaps_b
3688 = conflict_fn (1,
3689 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3691 build_int_cst (NULL_TREE, j1)));
3693 else
3695 /* FIXME: For the moment, the upper bound of the
3696 iteration domain for i and j is not checked. */
3697 if (dump_file && (dump_flags & TDF_DETAILS))
3698 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3699 *overlaps_a = conflict_fn_not_known ();
3700 *overlaps_b = conflict_fn_not_known ();
3701 *last_conflicts = chrec_dont_know;
3704 else
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3707 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3708 *overlaps_a = conflict_fn_not_known ();
3709 *overlaps_b = conflict_fn_not_known ();
3710 *last_conflicts = chrec_dont_know;
3713 else
3715 if (dump_file && (dump_flags & TDF_DETAILS))
3716 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3717 *overlaps_a = conflict_fn_not_known ();
3718 *overlaps_b = conflict_fn_not_known ();
3719 *last_conflicts = chrec_dont_know;
3722 end_analyze_subs_aa:
3723 obstack_free (&scratch_obstack, NULL);
3724 if (dump_file && (dump_flags & TDF_DETAILS))
3726 fprintf (dump_file, " (overlaps_a = ");
3727 dump_conflict_function (dump_file, *overlaps_a);
3728 fprintf (dump_file, ")\n (overlaps_b = ");
3729 dump_conflict_function (dump_file, *overlaps_b);
3730 fprintf (dump_file, "))\n");
3734 /* Returns true when analyze_subscript_affine_affine can be used for
3735 determining the dependence relation between chrec_a and chrec_b,
3736 that contain symbols. This function modifies chrec_a and chrec_b
3737 such that the analysis result is the same, and such that they don't
3738 contain symbols, and then can safely be passed to the analyzer.
3740 Example: The analysis of the following tuples of evolutions produce
3741 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3742 vs. {0, +, 1}_1
3744 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3745 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3748 static bool
3749 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3751 tree diff, type, left_a, left_b, right_b;
3753 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3754 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3755 /* FIXME: For the moment not handled. Might be refined later. */
3756 return false;
3758 type = chrec_type (*chrec_a);
3759 left_a = CHREC_LEFT (*chrec_a);
3760 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3761 diff = chrec_fold_minus (type, left_a, left_b);
3763 if (!evolution_function_is_constant_p (diff))
3764 return false;
3766 if (dump_file && (dump_flags & TDF_DETAILS))
3767 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3769 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3770 diff, CHREC_RIGHT (*chrec_a));
3771 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3772 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3773 build_int_cst (type, 0),
3774 right_b);
3775 return true;
3778 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3779 *OVERLAPS_B are initialized to the functions that describe the
3780 relation between the elements accessed twice by CHREC_A and
3781 CHREC_B. For k >= 0, the following property is verified:
3783 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3785 static void
3786 analyze_siv_subscript (tree chrec_a,
3787 tree chrec_b,
3788 conflict_function **overlaps_a,
3789 conflict_function **overlaps_b,
3790 tree *last_conflicts,
3791 int loop_nest_num)
3793 dependence_stats.num_siv++;
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3796 fprintf (dump_file, "(analyze_siv_subscript \n");
3798 if (evolution_function_is_constant_p (chrec_a)
3799 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3800 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3801 overlaps_a, overlaps_b, last_conflicts);
3803 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3804 && evolution_function_is_constant_p (chrec_b))
3805 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3806 overlaps_b, overlaps_a, last_conflicts);
3808 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3809 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3811 if (!chrec_contains_symbols (chrec_a)
3812 && !chrec_contains_symbols (chrec_b))
3814 analyze_subscript_affine_affine (chrec_a, chrec_b,
3815 overlaps_a, overlaps_b,
3816 last_conflicts);
3818 if (CF_NOT_KNOWN_P (*overlaps_a)
3819 || CF_NOT_KNOWN_P (*overlaps_b))
3820 dependence_stats.num_siv_unimplemented++;
3821 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3822 || CF_NO_DEPENDENCE_P (*overlaps_b))
3823 dependence_stats.num_siv_independent++;
3824 else
3825 dependence_stats.num_siv_dependent++;
3827 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3828 &chrec_b))
3830 analyze_subscript_affine_affine (chrec_a, chrec_b,
3831 overlaps_a, overlaps_b,
3832 last_conflicts);
3834 if (CF_NOT_KNOWN_P (*overlaps_a)
3835 || CF_NOT_KNOWN_P (*overlaps_b))
3836 dependence_stats.num_siv_unimplemented++;
3837 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3838 || CF_NO_DEPENDENCE_P (*overlaps_b))
3839 dependence_stats.num_siv_independent++;
3840 else
3841 dependence_stats.num_siv_dependent++;
3843 else
3844 goto siv_subscript_dontknow;
3847 else
3849 siv_subscript_dontknow:;
3850 if (dump_file && (dump_flags & TDF_DETAILS))
3851 fprintf (dump_file, " siv test failed: unimplemented");
3852 *overlaps_a = conflict_fn_not_known ();
3853 *overlaps_b = conflict_fn_not_known ();
3854 *last_conflicts = chrec_dont_know;
3855 dependence_stats.num_siv_unimplemented++;
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3859 fprintf (dump_file, ")\n");
3862 /* Returns false if we can prove that the greatest common divisor of the steps
3863 of CHREC does not divide CST, false otherwise. */
3865 static bool
3866 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3868 HOST_WIDE_INT cd = 0, val;
3869 tree step;
3871 if (!tree_fits_shwi_p (cst))
3872 return true;
3873 val = tree_to_shwi (cst);
3875 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3877 step = CHREC_RIGHT (chrec);
3878 if (!tree_fits_shwi_p (step))
3879 return true;
3880 cd = gcd (cd, tree_to_shwi (step));
3881 chrec = CHREC_LEFT (chrec);
3884 return val % cd == 0;
3887 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3888 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3889 functions that describe the relation between the elements accessed
3890 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3891 is verified:
3893 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3895 static void
3896 analyze_miv_subscript (tree chrec_a,
3897 tree chrec_b,
3898 conflict_function **overlaps_a,
3899 conflict_function **overlaps_b,
3900 tree *last_conflicts,
3901 struct loop *loop_nest)
3903 tree type, difference;
3905 dependence_stats.num_miv++;
3906 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, "(analyze_miv_subscript \n");
3909 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3910 chrec_a = chrec_convert (type, chrec_a, NULL);
3911 chrec_b = chrec_convert (type, chrec_b, NULL);
3912 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3914 if (eq_evolutions_p (chrec_a, chrec_b))
3916 /* Access functions are the same: all the elements are accessed
3917 in the same order. */
3918 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3919 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3920 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3921 dependence_stats.num_miv_dependent++;
3924 else if (evolution_function_is_constant_p (difference)
3925 /* For the moment, the following is verified:
3926 evolution_function_is_affine_multivariate_p (chrec_a,
3927 loop_nest->num) */
3928 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3930 /* testsuite/.../ssa-chrec-33.c
3931 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3933 The difference is 1, and all the evolution steps are multiples
3934 of 2, consequently there are no overlapping elements. */
3935 *overlaps_a = conflict_fn_no_dependence ();
3936 *overlaps_b = conflict_fn_no_dependence ();
3937 *last_conflicts = integer_zero_node;
3938 dependence_stats.num_miv_independent++;
3941 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3942 && !chrec_contains_symbols (chrec_a)
3943 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3944 && !chrec_contains_symbols (chrec_b))
3946 /* testsuite/.../ssa-chrec-35.c
3947 {0, +, 1}_2 vs. {0, +, 1}_3
3948 the overlapping elements are respectively located at iterations:
3949 {0, +, 1}_x and {0, +, 1}_x,
3950 in other words, we have the equality:
3951 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3953 Other examples:
3954 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3955 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3957 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3958 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3960 analyze_subscript_affine_affine (chrec_a, chrec_b,
3961 overlaps_a, overlaps_b, last_conflicts);
3963 if (CF_NOT_KNOWN_P (*overlaps_a)
3964 || CF_NOT_KNOWN_P (*overlaps_b))
3965 dependence_stats.num_miv_unimplemented++;
3966 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3967 || CF_NO_DEPENDENCE_P (*overlaps_b))
3968 dependence_stats.num_miv_independent++;
3969 else
3970 dependence_stats.num_miv_dependent++;
3973 else
3975 /* When the analysis is too difficult, answer "don't know". */
3976 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3979 *overlaps_a = conflict_fn_not_known ();
3980 *overlaps_b = conflict_fn_not_known ();
3981 *last_conflicts = chrec_dont_know;
3982 dependence_stats.num_miv_unimplemented++;
3985 if (dump_file && (dump_flags & TDF_DETAILS))
3986 fprintf (dump_file, ")\n");
3989 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3990 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3991 OVERLAP_ITERATIONS_B are initialized with two functions that
3992 describe the iterations that contain conflicting elements.
3994 Remark: For an integer k >= 0, the following equality is true:
3996 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3999 static void
4000 analyze_overlapping_iterations (tree chrec_a,
4001 tree chrec_b,
4002 conflict_function **overlap_iterations_a,
4003 conflict_function **overlap_iterations_b,
4004 tree *last_conflicts, struct loop *loop_nest)
4006 unsigned int lnn = loop_nest->num;
4008 dependence_stats.num_subscript_tests++;
4010 if (dump_file && (dump_flags & TDF_DETAILS))
4012 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4013 fprintf (dump_file, " (chrec_a = ");
4014 print_generic_expr (dump_file, chrec_a);
4015 fprintf (dump_file, ")\n (chrec_b = ");
4016 print_generic_expr (dump_file, chrec_b);
4017 fprintf (dump_file, ")\n");
4020 if (chrec_a == NULL_TREE
4021 || chrec_b == NULL_TREE
4022 || chrec_contains_undetermined (chrec_a)
4023 || chrec_contains_undetermined (chrec_b))
4025 dependence_stats.num_subscript_undetermined++;
4027 *overlap_iterations_a = conflict_fn_not_known ();
4028 *overlap_iterations_b = conflict_fn_not_known ();
4031 /* If they are the same chrec, and are affine, they overlap
4032 on every iteration. */
4033 else if (eq_evolutions_p (chrec_a, chrec_b)
4034 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4035 || operand_equal_p (chrec_a, chrec_b, 0)))
4037 dependence_stats.num_same_subscript_function++;
4038 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4039 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4040 *last_conflicts = chrec_dont_know;
4043 /* If they aren't the same, and aren't affine, we can't do anything
4044 yet. */
4045 else if ((chrec_contains_symbols (chrec_a)
4046 || chrec_contains_symbols (chrec_b))
4047 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4048 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4050 dependence_stats.num_subscript_undetermined++;
4051 *overlap_iterations_a = conflict_fn_not_known ();
4052 *overlap_iterations_b = conflict_fn_not_known ();
4055 else if (ziv_subscript_p (chrec_a, chrec_b))
4056 analyze_ziv_subscript (chrec_a, chrec_b,
4057 overlap_iterations_a, overlap_iterations_b,
4058 last_conflicts);
4060 else if (siv_subscript_p (chrec_a, chrec_b))
4061 analyze_siv_subscript (chrec_a, chrec_b,
4062 overlap_iterations_a, overlap_iterations_b,
4063 last_conflicts, lnn);
4065 else
4066 analyze_miv_subscript (chrec_a, chrec_b,
4067 overlap_iterations_a, overlap_iterations_b,
4068 last_conflicts, loop_nest);
4070 if (dump_file && (dump_flags & TDF_DETAILS))
4072 fprintf (dump_file, " (overlap_iterations_a = ");
4073 dump_conflict_function (dump_file, *overlap_iterations_a);
4074 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4075 dump_conflict_function (dump_file, *overlap_iterations_b);
4076 fprintf (dump_file, "))\n");
4080 /* Helper function for uniquely inserting distance vectors. */
4082 static void
4083 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4085 unsigned i;
4086 lambda_vector v;
4088 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4089 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4090 return;
4092 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4095 /* Helper function for uniquely inserting direction vectors. */
4097 static void
4098 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4100 unsigned i;
4101 lambda_vector v;
4103 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4104 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4105 return;
4107 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4110 /* Add a distance of 1 on all the loops outer than INDEX. If we
4111 haven't yet determined a distance for this outer loop, push a new
4112 distance vector composed of the previous distance, and a distance
4113 of 1 for this outer loop. Example:
4115 | loop_1
4116 | loop_2
4117 | A[10]
4118 | endloop_2
4119 | endloop_1
4121 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4122 save (0, 1), then we have to save (1, 0). */
4124 static void
4125 add_outer_distances (struct data_dependence_relation *ddr,
4126 lambda_vector dist_v, int index)
4128 /* For each outer loop where init_v is not set, the accesses are
4129 in dependence of distance 1 in the loop. */
4130 while (--index >= 0)
4132 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4133 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4134 save_v[index] = 1;
4135 save_dist_v (ddr, save_v);
4139 /* Return false when fail to represent the data dependence as a
4140 distance vector. A_INDEX is the index of the first reference
4141 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4142 second reference. INIT_B is set to true when a component has been
4143 added to the distance vector DIST_V. INDEX_CARRY is then set to
4144 the index in DIST_V that carries the dependence. */
4146 static bool
4147 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4148 unsigned int a_index, unsigned int b_index,
4149 lambda_vector dist_v, bool *init_b,
4150 int *index_carry)
4152 unsigned i;
4153 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4155 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4157 tree access_fn_a, access_fn_b;
4158 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4160 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4162 non_affine_dependence_relation (ddr);
4163 return false;
4166 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4167 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4169 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4170 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4172 HOST_WIDE_INT dist;
4173 int index;
4174 int var_a = CHREC_VARIABLE (access_fn_a);
4175 int var_b = CHREC_VARIABLE (access_fn_b);
4177 if (var_a != var_b
4178 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4180 non_affine_dependence_relation (ddr);
4181 return false;
4184 dist = int_cst_value (SUB_DISTANCE (subscript));
4185 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4186 *index_carry = MIN (index, *index_carry);
4188 /* This is the subscript coupling test. If we have already
4189 recorded a distance for this loop (a distance coming from
4190 another subscript), it should be the same. For example,
4191 in the following code, there is no dependence:
4193 | loop i = 0, N, 1
4194 | T[i+1][i] = ...
4195 | ... = T[i][i]
4196 | endloop
4198 if (init_v[index] != 0 && dist_v[index] != dist)
4200 finalize_ddr_dependent (ddr, chrec_known);
4201 return false;
4204 dist_v[index] = dist;
4205 init_v[index] = 1;
4206 *init_b = true;
4208 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4210 /* This can be for example an affine vs. constant dependence
4211 (T[i] vs. T[3]) that is not an affine dependence and is
4212 not representable as a distance vector. */
4213 non_affine_dependence_relation (ddr);
4214 return false;
4218 return true;
4221 /* Return true when the DDR contains only constant access functions. */
4223 static bool
4224 constant_access_functions (const struct data_dependence_relation *ddr)
4226 unsigned i;
4227 subscript *sub;
4229 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4230 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4231 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4232 return false;
4234 return true;
4237 /* Helper function for the case where DDR_A and DDR_B are the same
4238 multivariate access function with a constant step. For an example
4239 see pr34635-1.c. */
4241 static void
4242 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4244 int x_1, x_2;
4245 tree c_1 = CHREC_LEFT (c_2);
4246 tree c_0 = CHREC_LEFT (c_1);
4247 lambda_vector dist_v;
4248 HOST_WIDE_INT v1, v2, cd;
4250 /* Polynomials with more than 2 variables are not handled yet. When
4251 the evolution steps are parameters, it is not possible to
4252 represent the dependence using classical distance vectors. */
4253 if (TREE_CODE (c_0) != INTEGER_CST
4254 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4255 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4257 DDR_AFFINE_P (ddr) = false;
4258 return;
4261 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4262 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4264 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4265 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4266 v1 = int_cst_value (CHREC_RIGHT (c_1));
4267 v2 = int_cst_value (CHREC_RIGHT (c_2));
4268 cd = gcd (v1, v2);
4269 v1 /= cd;
4270 v2 /= cd;
4272 if (v2 < 0)
4274 v2 = -v2;
4275 v1 = -v1;
4278 dist_v[x_1] = v2;
4279 dist_v[x_2] = -v1;
4280 save_dist_v (ddr, dist_v);
4282 add_outer_distances (ddr, dist_v, x_1);
4285 /* Helper function for the case where DDR_A and DDR_B are the same
4286 access functions. */
4288 static void
4289 add_other_self_distances (struct data_dependence_relation *ddr)
4291 lambda_vector dist_v;
4292 unsigned i;
4293 int index_carry = DDR_NB_LOOPS (ddr);
4294 subscript *sub;
4296 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4298 tree access_fun = SUB_ACCESS_FN (sub, 0);
4300 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4302 if (!evolution_function_is_univariate_p (access_fun))
4304 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4306 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4307 return;
4310 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4312 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4313 add_multivariate_self_dist (ddr, access_fun);
4314 else
4315 /* The evolution step is not constant: it varies in
4316 the outer loop, so this cannot be represented by a
4317 distance vector. For example in pr34635.c the
4318 evolution is {0, +, {0, +, 4}_1}_2. */
4319 DDR_AFFINE_P (ddr) = false;
4321 return;
4324 index_carry = MIN (index_carry,
4325 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4326 DDR_LOOP_NEST (ddr)));
4330 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4331 add_outer_distances (ddr, dist_v, index_carry);
4334 static void
4335 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4337 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4339 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4340 save_dist_v (ddr, dist_v);
4343 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4344 is the case for example when access functions are the same and
4345 equal to a constant, as in:
4347 | loop_1
4348 | A[3] = ...
4349 | ... = A[3]
4350 | endloop_1
4352 in which case the distance vectors are (0) and (1). */
4354 static void
4355 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4357 unsigned i, j;
4359 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4361 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4362 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4363 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4365 for (j = 0; j < ca->n; j++)
4366 if (affine_function_zero_p (ca->fns[j]))
4368 insert_innermost_unit_dist_vector (ddr);
4369 return;
4372 for (j = 0; j < cb->n; j++)
4373 if (affine_function_zero_p (cb->fns[j]))
4375 insert_innermost_unit_dist_vector (ddr);
4376 return;
4381 /* Return true when the DDR contains two data references that have the
4382 same access functions. */
4384 static inline bool
4385 same_access_functions (const struct data_dependence_relation *ddr)
4387 unsigned i;
4388 subscript *sub;
4390 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4391 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4392 SUB_ACCESS_FN (sub, 1)))
4393 return false;
4395 return true;
4398 /* Compute the classic per loop distance vector. DDR is the data
4399 dependence relation to build a vector from. Return false when fail
4400 to represent the data dependence as a distance vector. */
4402 static bool
4403 build_classic_dist_vector (struct data_dependence_relation *ddr,
4404 struct loop *loop_nest)
4406 bool init_b = false;
4407 int index_carry = DDR_NB_LOOPS (ddr);
4408 lambda_vector dist_v;
4410 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4411 return false;
4413 if (same_access_functions (ddr))
4415 /* Save the 0 vector. */
4416 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4417 save_dist_v (ddr, dist_v);
4419 if (constant_access_functions (ddr))
4420 add_distance_for_zero_overlaps (ddr);
4422 if (DDR_NB_LOOPS (ddr) > 1)
4423 add_other_self_distances (ddr);
4425 return true;
4428 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4429 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4430 return false;
4432 /* Save the distance vector if we initialized one. */
4433 if (init_b)
4435 /* Verify a basic constraint: classic distance vectors should
4436 always be lexicographically positive.
4438 Data references are collected in the order of execution of
4439 the program, thus for the following loop
4441 | for (i = 1; i < 100; i++)
4442 | for (j = 1; j < 100; j++)
4444 | t = T[j+1][i-1]; // A
4445 | T[j][i] = t + 2; // B
4448 references are collected following the direction of the wind:
4449 A then B. The data dependence tests are performed also
4450 following this order, such that we're looking at the distance
4451 separating the elements accessed by A from the elements later
4452 accessed by B. But in this example, the distance returned by
4453 test_dep (A, B) is lexicographically negative (-1, 1), that
4454 means that the access A occurs later than B with respect to
4455 the outer loop, ie. we're actually looking upwind. In this
4456 case we solve test_dep (B, A) looking downwind to the
4457 lexicographically positive solution, that returns the
4458 distance vector (1, -1). */
4459 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4461 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4462 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4463 return false;
4464 compute_subscript_distance (ddr);
4465 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4466 &index_carry))
4467 return false;
4468 save_dist_v (ddr, save_v);
4469 DDR_REVERSED_P (ddr) = true;
4471 /* In this case there is a dependence forward for all the
4472 outer loops:
4474 | for (k = 1; k < 100; k++)
4475 | for (i = 1; i < 100; i++)
4476 | for (j = 1; j < 100; j++)
4478 | t = T[j+1][i-1]; // A
4479 | T[j][i] = t + 2; // B
4482 the vectors are:
4483 (0, 1, -1)
4484 (1, 1, -1)
4485 (1, -1, 1)
4487 if (DDR_NB_LOOPS (ddr) > 1)
4489 add_outer_distances (ddr, save_v, index_carry);
4490 add_outer_distances (ddr, dist_v, index_carry);
4493 else
4495 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4496 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4498 if (DDR_NB_LOOPS (ddr) > 1)
4500 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4502 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4503 return false;
4504 compute_subscript_distance (ddr);
4505 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4506 &index_carry))
4507 return false;
4509 save_dist_v (ddr, save_v);
4510 add_outer_distances (ddr, dist_v, index_carry);
4511 add_outer_distances (ddr, opposite_v, index_carry);
4513 else
4514 save_dist_v (ddr, save_v);
4517 else
4519 /* There is a distance of 1 on all the outer loops: Example:
4520 there is a dependence of distance 1 on loop_1 for the array A.
4522 | loop_1
4523 | A[5] = ...
4524 | endloop
4526 add_outer_distances (ddr, dist_v,
4527 lambda_vector_first_nz (dist_v,
4528 DDR_NB_LOOPS (ddr), 0));
4531 if (dump_file && (dump_flags & TDF_DETAILS))
4533 unsigned i;
4535 fprintf (dump_file, "(build_classic_dist_vector\n");
4536 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4538 fprintf (dump_file, " dist_vector = (");
4539 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4540 DDR_NB_LOOPS (ddr));
4541 fprintf (dump_file, " )\n");
4543 fprintf (dump_file, ")\n");
4546 return true;
4549 /* Return the direction for a given distance.
4550 FIXME: Computing dir this way is suboptimal, since dir can catch
4551 cases that dist is unable to represent. */
4553 static inline enum data_dependence_direction
4554 dir_from_dist (int dist)
4556 if (dist > 0)
4557 return dir_positive;
4558 else if (dist < 0)
4559 return dir_negative;
4560 else
4561 return dir_equal;
4564 /* Compute the classic per loop direction vector. DDR is the data
4565 dependence relation to build a vector from. */
4567 static void
4568 build_classic_dir_vector (struct data_dependence_relation *ddr)
4570 unsigned i, j;
4571 lambda_vector dist_v;
4573 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4575 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4577 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4578 dir_v[j] = dir_from_dist (dist_v[j]);
4580 save_dir_v (ddr, dir_v);
4584 /* Helper function. Returns true when there is a dependence between the
4585 data references. A_INDEX is the index of the first reference (0 for
4586 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4588 static bool
4589 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4590 unsigned int a_index, unsigned int b_index,
4591 struct loop *loop_nest)
4593 unsigned int i;
4594 tree last_conflicts;
4595 struct subscript *subscript;
4596 tree res = NULL_TREE;
4598 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4600 conflict_function *overlaps_a, *overlaps_b;
4602 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4603 SUB_ACCESS_FN (subscript, b_index),
4604 &overlaps_a, &overlaps_b,
4605 &last_conflicts, loop_nest);
4607 if (SUB_CONFLICTS_IN_A (subscript))
4608 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4609 if (SUB_CONFLICTS_IN_B (subscript))
4610 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4612 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4613 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4614 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4616 /* If there is any undetermined conflict function we have to
4617 give a conservative answer in case we cannot prove that
4618 no dependence exists when analyzing another subscript. */
4619 if (CF_NOT_KNOWN_P (overlaps_a)
4620 || CF_NOT_KNOWN_P (overlaps_b))
4622 res = chrec_dont_know;
4623 continue;
4626 /* When there is a subscript with no dependence we can stop. */
4627 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4628 || CF_NO_DEPENDENCE_P (overlaps_b))
4630 res = chrec_known;
4631 break;
4635 if (res == NULL_TREE)
4636 return true;
4638 if (res == chrec_known)
4639 dependence_stats.num_dependence_independent++;
4640 else
4641 dependence_stats.num_dependence_undetermined++;
4642 finalize_ddr_dependent (ddr, res);
4643 return false;
4646 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4648 static void
4649 subscript_dependence_tester (struct data_dependence_relation *ddr,
4650 struct loop *loop_nest)
4652 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4653 dependence_stats.num_dependence_dependent++;
4655 compute_subscript_distance (ddr);
4656 if (build_classic_dist_vector (ddr, loop_nest))
4657 build_classic_dir_vector (ddr);
4660 /* Returns true when all the access functions of A are affine or
4661 constant with respect to LOOP_NEST. */
4663 static bool
4664 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4665 const struct loop *loop_nest)
4667 unsigned int i;
4668 vec<tree> fns = DR_ACCESS_FNS (a);
4669 tree t;
4671 FOR_EACH_VEC_ELT (fns, i, t)
4672 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4673 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4674 return false;
4676 return true;
4679 /* This computes the affine dependence relation between A and B with
4680 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4681 independence between two accesses, while CHREC_DONT_KNOW is used
4682 for representing the unknown relation.
4684 Note that it is possible to stop the computation of the dependence
4685 relation the first time we detect a CHREC_KNOWN element for a given
4686 subscript. */
4688 void
4689 compute_affine_dependence (struct data_dependence_relation *ddr,
4690 struct loop *loop_nest)
4692 struct data_reference *dra = DDR_A (ddr);
4693 struct data_reference *drb = DDR_B (ddr);
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4697 fprintf (dump_file, "(compute_affine_dependence\n");
4698 fprintf (dump_file, " stmt_a: ");
4699 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4700 fprintf (dump_file, " stmt_b: ");
4701 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4704 /* Analyze only when the dependence relation is not yet known. */
4705 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4707 dependence_stats.num_dependence_tests++;
4709 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4710 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4711 subscript_dependence_tester (ddr, loop_nest);
4713 /* As a last case, if the dependence cannot be determined, or if
4714 the dependence is considered too difficult to determine, answer
4715 "don't know". */
4716 else
4718 dependence_stats.num_dependence_undetermined++;
4720 if (dump_file && (dump_flags & TDF_DETAILS))
4722 fprintf (dump_file, "Data ref a:\n");
4723 dump_data_reference (dump_file, dra);
4724 fprintf (dump_file, "Data ref b:\n");
4725 dump_data_reference (dump_file, drb);
4726 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4728 finalize_ddr_dependent (ddr, chrec_dont_know);
4732 if (dump_file && (dump_flags & TDF_DETAILS))
4734 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4735 fprintf (dump_file, ") -> no dependence\n");
4736 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4737 fprintf (dump_file, ") -> dependence analysis failed\n");
4738 else
4739 fprintf (dump_file, ")\n");
4743 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4744 the data references in DATAREFS, in the LOOP_NEST. When
4745 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4746 relations. Return true when successful, i.e. data references number
4747 is small enough to be handled. */
4749 bool
4750 compute_all_dependences (vec<data_reference_p> datarefs,
4751 vec<ddr_p> *dependence_relations,
4752 vec<loop_p> loop_nest,
4753 bool compute_self_and_rr)
4755 struct data_dependence_relation *ddr;
4756 struct data_reference *a, *b;
4757 unsigned int i, j;
4759 if ((int) datarefs.length ()
4760 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4762 struct data_dependence_relation *ddr;
4764 /* Insert a single relation into dependence_relations:
4765 chrec_dont_know. */
4766 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4767 dependence_relations->safe_push (ddr);
4768 return false;
4771 FOR_EACH_VEC_ELT (datarefs, i, a)
4772 for (j = i + 1; datarefs.iterate (j, &b); j++)
4773 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4775 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4776 dependence_relations->safe_push (ddr);
4777 if (loop_nest.exists ())
4778 compute_affine_dependence (ddr, loop_nest[0]);
4781 if (compute_self_and_rr)
4782 FOR_EACH_VEC_ELT (datarefs, i, a)
4784 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4785 dependence_relations->safe_push (ddr);
4786 if (loop_nest.exists ())
4787 compute_affine_dependence (ddr, loop_nest[0]);
4790 return true;
4793 /* Describes a location of a memory reference. */
4795 struct data_ref_loc
4797 /* The memory reference. */
4798 tree ref;
4800 /* True if the memory reference is read. */
4801 bool is_read;
4803 /* True if the data reference is conditional within the containing
4804 statement, i.e. if it might not occur even when the statement
4805 is executed and runs to completion. */
4806 bool is_conditional_in_stmt;
4810 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4811 true if STMT clobbers memory, false otherwise. */
4813 static bool
4814 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4816 bool clobbers_memory = false;
4817 data_ref_loc ref;
4818 tree op0, op1;
4819 enum gimple_code stmt_code = gimple_code (stmt);
4821 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4822 As we cannot model data-references to not spelled out
4823 accesses give up if they may occur. */
4824 if (stmt_code == GIMPLE_CALL
4825 && !(gimple_call_flags (stmt) & ECF_CONST))
4827 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4828 if (gimple_call_internal_p (stmt))
4829 switch (gimple_call_internal_fn (stmt))
4831 case IFN_GOMP_SIMD_LANE:
4833 struct loop *loop = gimple_bb (stmt)->loop_father;
4834 tree uid = gimple_call_arg (stmt, 0);
4835 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4836 if (loop == NULL
4837 || loop->simduid != SSA_NAME_VAR (uid))
4838 clobbers_memory = true;
4839 break;
4841 case IFN_MASK_LOAD:
4842 case IFN_MASK_STORE:
4843 break;
4844 default:
4845 clobbers_memory = true;
4846 break;
4848 else
4849 clobbers_memory = true;
4851 else if (stmt_code == GIMPLE_ASM
4852 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4853 || gimple_vuse (stmt)))
4854 clobbers_memory = true;
4856 if (!gimple_vuse (stmt))
4857 return clobbers_memory;
4859 if (stmt_code == GIMPLE_ASSIGN)
4861 tree base;
4862 op0 = gimple_assign_lhs (stmt);
4863 op1 = gimple_assign_rhs1 (stmt);
4865 if (DECL_P (op1)
4866 || (REFERENCE_CLASS_P (op1)
4867 && (base = get_base_address (op1))
4868 && TREE_CODE (base) != SSA_NAME
4869 && !is_gimple_min_invariant (base)))
4871 ref.ref = op1;
4872 ref.is_read = true;
4873 ref.is_conditional_in_stmt = false;
4874 references->safe_push (ref);
4877 else if (stmt_code == GIMPLE_CALL)
4879 unsigned i, n;
4880 tree ptr, type;
4881 unsigned int align;
4883 ref.is_read = false;
4884 if (gimple_call_internal_p (stmt))
4885 switch (gimple_call_internal_fn (stmt))
4887 case IFN_MASK_LOAD:
4888 if (gimple_call_lhs (stmt) == NULL_TREE)
4889 break;
4890 ref.is_read = true;
4891 /* FALLTHRU */
4892 case IFN_MASK_STORE:
4893 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4894 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4895 if (ref.is_read)
4896 type = TREE_TYPE (gimple_call_lhs (stmt));
4897 else
4898 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4899 if (TYPE_ALIGN (type) != align)
4900 type = build_aligned_type (type, align);
4901 ref.is_conditional_in_stmt = true;
4902 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4903 ptr);
4904 references->safe_push (ref);
4905 return false;
4906 default:
4907 break;
4910 op0 = gimple_call_lhs (stmt);
4911 n = gimple_call_num_args (stmt);
4912 for (i = 0; i < n; i++)
4914 op1 = gimple_call_arg (stmt, i);
4916 if (DECL_P (op1)
4917 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4919 ref.ref = op1;
4920 ref.is_read = true;
4921 ref.is_conditional_in_stmt = false;
4922 references->safe_push (ref);
4926 else
4927 return clobbers_memory;
4929 if (op0
4930 && (DECL_P (op0)
4931 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4933 ref.ref = op0;
4934 ref.is_read = false;
4935 ref.is_conditional_in_stmt = false;
4936 references->safe_push (ref);
4938 return clobbers_memory;
4942 /* Returns true if the loop-nest has any data reference. */
4944 bool
4945 loop_nest_has_data_refs (loop_p loop)
4947 basic_block *bbs = get_loop_body (loop);
4948 auto_vec<data_ref_loc, 3> references;
4950 for (unsigned i = 0; i < loop->num_nodes; i++)
4952 basic_block bb = bbs[i];
4953 gimple_stmt_iterator bsi;
4955 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4957 gimple *stmt = gsi_stmt (bsi);
4958 get_references_in_stmt (stmt, &references);
4959 if (references.length ())
4961 free (bbs);
4962 return true;
4966 free (bbs);
4967 return false;
4970 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4971 reference, returns false, otherwise returns true. NEST is the outermost
4972 loop of the loop nest in which the references should be analyzed. */
4974 bool
4975 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4976 vec<data_reference_p> *datarefs)
4978 unsigned i;
4979 auto_vec<data_ref_loc, 2> references;
4980 data_ref_loc *ref;
4981 bool ret = true;
4982 data_reference_p dr;
4984 if (get_references_in_stmt (stmt, &references))
4985 return false;
4987 FOR_EACH_VEC_ELT (references, i, ref)
4989 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4990 loop_containing_stmt (stmt), ref->ref,
4991 stmt, ref->is_read, ref->is_conditional_in_stmt);
4992 gcc_assert (dr != NULL);
4993 datarefs->safe_push (dr);
4996 return ret;
4999 /* Stores the data references in STMT to DATAREFS. If there is an
5000 unanalyzable reference, returns false, otherwise returns true.
5001 NEST is the outermost loop of the loop nest in which the references
5002 should be instantiated, LOOP is the loop in which the references
5003 should be analyzed. */
5005 bool
5006 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5007 vec<data_reference_p> *datarefs)
5009 unsigned i;
5010 auto_vec<data_ref_loc, 2> references;
5011 data_ref_loc *ref;
5012 bool ret = true;
5013 data_reference_p dr;
5015 if (get_references_in_stmt (stmt, &references))
5016 return false;
5018 FOR_EACH_VEC_ELT (references, i, ref)
5020 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5021 ref->is_conditional_in_stmt);
5022 gcc_assert (dr != NULL);
5023 datarefs->safe_push (dr);
5026 return ret;
5029 /* Search the data references in LOOP, and record the information into
5030 DATAREFS. Returns chrec_dont_know when failing to analyze a
5031 difficult case, returns NULL_TREE otherwise. */
5033 tree
5034 find_data_references_in_bb (struct loop *loop, basic_block bb,
5035 vec<data_reference_p> *datarefs)
5037 gimple_stmt_iterator bsi;
5039 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5041 gimple *stmt = gsi_stmt (bsi);
5043 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5045 struct data_reference *res;
5046 res = XCNEW (struct data_reference);
5047 datarefs->safe_push (res);
5049 return chrec_dont_know;
5053 return NULL_TREE;
5056 /* Search the data references in LOOP, and record the information into
5057 DATAREFS. Returns chrec_dont_know when failing to analyze a
5058 difficult case, returns NULL_TREE otherwise.
5060 TODO: This function should be made smarter so that it can handle address
5061 arithmetic as if they were array accesses, etc. */
5063 tree
5064 find_data_references_in_loop (struct loop *loop,
5065 vec<data_reference_p> *datarefs)
5067 basic_block bb, *bbs;
5068 unsigned int i;
5070 bbs = get_loop_body_in_dom_order (loop);
5072 for (i = 0; i < loop->num_nodes; i++)
5074 bb = bbs[i];
5076 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5078 free (bbs);
5079 return chrec_dont_know;
5082 free (bbs);
5084 return NULL_TREE;
5087 /* Return the alignment in bytes that DRB is guaranteed to have at all
5088 times. */
5090 unsigned int
5091 dr_alignment (innermost_loop_behavior *drb)
5093 /* Get the alignment of BASE_ADDRESS + INIT. */
5094 unsigned int alignment = drb->base_alignment;
5095 unsigned int misalignment = (drb->base_misalignment
5096 + TREE_INT_CST_LOW (drb->init));
5097 if (misalignment != 0)
5098 alignment = MIN (alignment, misalignment & -misalignment);
5100 /* Cap it to the alignment of OFFSET. */
5101 if (!integer_zerop (drb->offset))
5102 alignment = MIN (alignment, drb->offset_alignment);
5104 /* Cap it to the alignment of STEP. */
5105 if (!integer_zerop (drb->step))
5106 alignment = MIN (alignment, drb->step_alignment);
5108 return alignment;
5111 /* Recursive helper function. */
5113 static bool
5114 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5116 /* Inner loops of the nest should not contain siblings. Example:
5117 when there are two consecutive loops,
5119 | loop_0
5120 | loop_1
5121 | A[{0, +, 1}_1]
5122 | endloop_1
5123 | loop_2
5124 | A[{0, +, 1}_2]
5125 | endloop_2
5126 | endloop_0
5128 the dependence relation cannot be captured by the distance
5129 abstraction. */
5130 if (loop->next)
5131 return false;
5133 loop_nest->safe_push (loop);
5134 if (loop->inner)
5135 return find_loop_nest_1 (loop->inner, loop_nest);
5136 return true;
5139 /* Return false when the LOOP is not well nested. Otherwise return
5140 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5141 contain the loops from the outermost to the innermost, as they will
5142 appear in the classic distance vector. */
5144 bool
5145 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5147 loop_nest->safe_push (loop);
5148 if (loop->inner)
5149 return find_loop_nest_1 (loop->inner, loop_nest);
5150 return true;
5153 /* Returns true when the data dependences have been computed, false otherwise.
5154 Given a loop nest LOOP, the following vectors are returned:
5155 DATAREFS is initialized to all the array elements contained in this loop,
5156 DEPENDENCE_RELATIONS contains the relations between the data references.
5157 Compute read-read and self relations if
5158 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5160 bool
5161 compute_data_dependences_for_loop (struct loop *loop,
5162 bool compute_self_and_read_read_dependences,
5163 vec<loop_p> *loop_nest,
5164 vec<data_reference_p> *datarefs,
5165 vec<ddr_p> *dependence_relations)
5167 bool res = true;
5169 memset (&dependence_stats, 0, sizeof (dependence_stats));
5171 /* If the loop nest is not well formed, or one of the data references
5172 is not computable, give up without spending time to compute other
5173 dependences. */
5174 if (!loop
5175 || !find_loop_nest (loop, loop_nest)
5176 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5177 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5178 compute_self_and_read_read_dependences))
5179 res = false;
5181 if (dump_file && (dump_flags & TDF_STATS))
5183 fprintf (dump_file, "Dependence tester statistics:\n");
5185 fprintf (dump_file, "Number of dependence tests: %d\n",
5186 dependence_stats.num_dependence_tests);
5187 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5188 dependence_stats.num_dependence_dependent);
5189 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5190 dependence_stats.num_dependence_independent);
5191 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5192 dependence_stats.num_dependence_undetermined);
5194 fprintf (dump_file, "Number of subscript tests: %d\n",
5195 dependence_stats.num_subscript_tests);
5196 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5197 dependence_stats.num_subscript_undetermined);
5198 fprintf (dump_file, "Number of same subscript function: %d\n",
5199 dependence_stats.num_same_subscript_function);
5201 fprintf (dump_file, "Number of ziv tests: %d\n",
5202 dependence_stats.num_ziv);
5203 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5204 dependence_stats.num_ziv_dependent);
5205 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5206 dependence_stats.num_ziv_independent);
5207 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5208 dependence_stats.num_ziv_unimplemented);
5210 fprintf (dump_file, "Number of siv tests: %d\n",
5211 dependence_stats.num_siv);
5212 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5213 dependence_stats.num_siv_dependent);
5214 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5215 dependence_stats.num_siv_independent);
5216 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5217 dependence_stats.num_siv_unimplemented);
5219 fprintf (dump_file, "Number of miv tests: %d\n",
5220 dependence_stats.num_miv);
5221 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5222 dependence_stats.num_miv_dependent);
5223 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5224 dependence_stats.num_miv_independent);
5225 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5226 dependence_stats.num_miv_unimplemented);
5229 return res;
5232 /* Free the memory used by a data dependence relation DDR. */
5234 void
5235 free_dependence_relation (struct data_dependence_relation *ddr)
5237 if (ddr == NULL)
5238 return;
5240 if (DDR_SUBSCRIPTS (ddr).exists ())
5241 free_subscripts (DDR_SUBSCRIPTS (ddr));
5242 DDR_DIST_VECTS (ddr).release ();
5243 DDR_DIR_VECTS (ddr).release ();
5245 free (ddr);
5248 /* Free the memory used by the data dependence relations from
5249 DEPENDENCE_RELATIONS. */
5251 void
5252 free_dependence_relations (vec<ddr_p> dependence_relations)
5254 unsigned int i;
5255 struct data_dependence_relation *ddr;
5257 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5258 if (ddr)
5259 free_dependence_relation (ddr);
5261 dependence_relations.release ();
5264 /* Free the memory used by the data references from DATAREFS. */
5266 void
5267 free_data_refs (vec<data_reference_p> datarefs)
5269 unsigned int i;
5270 struct data_reference *dr;
5272 FOR_EACH_VEC_ELT (datarefs, i, dr)
5273 free_data_ref (dr);
5274 datarefs.release ();