quad-float128.h (IBM128_TYPE): Explicitly use __ibm128, instead of trying to use...
[official-gcc.git] / gcc / tree-data-ref.c
blob3b15b5dc2c1e6199b11cd59735252284a3c337ff
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
99 static struct datadep_stats
101 int num_dependence_tests;
102 int num_dependence_dependent;
103 int num_dependence_independent;
104 int num_dependence_undetermined;
106 int num_subscript_tests;
107 int num_subscript_undetermined;
108 int num_same_subscript_function;
110 int num_ziv;
111 int num_ziv_independent;
112 int num_ziv_dependent;
113 int num_ziv_unimplemented;
115 int num_siv;
116 int num_siv_independent;
117 int num_siv_dependent;
118 int num_siv_unimplemented;
120 int num_miv;
121 int num_miv_independent;
122 int num_miv_dependent;
123 int num_miv_unimplemented;
124 } dependence_stats;
126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
127 unsigned int, unsigned int,
128 struct loop *);
129 /* Returns true iff A divides B. */
131 static inline bool
132 tree_fold_divides_p (const_tree a, const_tree b)
134 gcc_assert (TREE_CODE (a) == INTEGER_CST);
135 gcc_assert (TREE_CODE (b) == INTEGER_CST);
136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 /* Returns true iff A divides B. */
141 static inline bool
142 int_divides_p (int a, int b)
144 return ((b % a) == 0);
147 /* Return true if reference REF contains a union access. */
149 static bool
150 ref_contains_union_access_p (tree ref)
152 while (handled_component_p (ref))
154 ref = TREE_OPERAND (ref, 0);
155 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
156 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
157 return true;
159 return false;
164 /* Dump into FILE all the data references from DATAREFS. */
166 static void
167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
169 unsigned int i;
170 struct data_reference *dr;
172 FOR_EACH_VEC_ELT (datarefs, i, dr)
173 dump_data_reference (file, dr);
176 /* Unified dump into FILE all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug (vec<data_reference_p> &ref)
181 dump_data_references (stderr, ref);
184 DEBUG_FUNCTION void
185 debug (vec<data_reference_p> *ptr)
187 if (ptr)
188 debug (*ptr);
189 else
190 fprintf (stderr, "<nil>\n");
194 /* Dump into STDERR all the data references from DATAREFS. */
196 DEBUG_FUNCTION void
197 debug_data_references (vec<data_reference_p> datarefs)
199 dump_data_references (stderr, datarefs);
202 /* Print to STDERR the data_reference DR. */
204 DEBUG_FUNCTION void
205 debug_data_reference (struct data_reference *dr)
207 dump_data_reference (stderr, dr);
210 /* Dump function for a DATA_REFERENCE structure. */
212 void
213 dump_data_reference (FILE *outf,
214 struct data_reference *dr)
216 unsigned int i;
218 fprintf (outf, "#(Data Ref: \n");
219 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
220 fprintf (outf, "# stmt: ");
221 print_gimple_stmt (outf, DR_STMT (dr), 0);
222 fprintf (outf, "# ref: ");
223 print_generic_stmt (outf, DR_REF (dr));
224 fprintf (outf, "# base_object: ");
225 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
227 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
229 fprintf (outf, "# Access function %d: ", i);
230 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
232 fprintf (outf, "#)\n");
235 /* Unified dump function for a DATA_REFERENCE structure. */
237 DEBUG_FUNCTION void
238 debug (data_reference &ref)
240 dump_data_reference (stderr, &ref);
243 DEBUG_FUNCTION void
244 debug (data_reference *ptr)
246 if (ptr)
247 debug (*ptr);
248 else
249 fprintf (stderr, "<nil>\n");
253 /* Dumps the affine function described by FN to the file OUTF. */
255 DEBUG_FUNCTION void
256 dump_affine_function (FILE *outf, affine_fn fn)
258 unsigned i;
259 tree coef;
261 print_generic_expr (outf, fn[0], TDF_SLIM);
262 for (i = 1; fn.iterate (i, &coef); i++)
264 fprintf (outf, " + ");
265 print_generic_expr (outf, coef, TDF_SLIM);
266 fprintf (outf, " * x_%u", i);
270 /* Dumps the conflict function CF to the file OUTF. */
272 DEBUG_FUNCTION void
273 dump_conflict_function (FILE *outf, conflict_function *cf)
275 unsigned i;
277 if (cf->n == NO_DEPENDENCE)
278 fprintf (outf, "no dependence");
279 else if (cf->n == NOT_KNOWN)
280 fprintf (outf, "not known");
281 else
283 for (i = 0; i < cf->n; i++)
285 if (i != 0)
286 fprintf (outf, " ");
287 fprintf (outf, "[");
288 dump_affine_function (outf, cf->fns[i]);
289 fprintf (outf, "]");
294 /* Dump function for a SUBSCRIPT structure. */
296 DEBUG_FUNCTION void
297 dump_subscript (FILE *outf, struct subscript *subscript)
299 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
301 fprintf (outf, "\n (subscript \n");
302 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
303 dump_conflict_function (outf, cf);
304 if (CF_NONTRIVIAL_P (cf))
306 tree last_iteration = SUB_LAST_CONFLICT (subscript);
307 fprintf (outf, "\n last_conflict: ");
308 print_generic_expr (outf, last_iteration);
311 cf = SUB_CONFLICTS_IN_B (subscript);
312 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
313 dump_conflict_function (outf, cf);
314 if (CF_NONTRIVIAL_P (cf))
316 tree last_iteration = SUB_LAST_CONFLICT (subscript);
317 fprintf (outf, "\n last_conflict: ");
318 print_generic_expr (outf, last_iteration);
321 fprintf (outf, "\n (Subscript distance: ");
322 print_generic_expr (outf, SUB_DISTANCE (subscript));
323 fprintf (outf, " ))\n");
326 /* Print the classic direction vector DIRV to OUTF. */
328 DEBUG_FUNCTION void
329 print_direction_vector (FILE *outf,
330 lambda_vector dirv,
331 int length)
333 int eq;
335 for (eq = 0; eq < length; eq++)
337 enum data_dependence_direction dir = ((enum data_dependence_direction)
338 dirv[eq]);
340 switch (dir)
342 case dir_positive:
343 fprintf (outf, " +");
344 break;
345 case dir_negative:
346 fprintf (outf, " -");
347 break;
348 case dir_equal:
349 fprintf (outf, " =");
350 break;
351 case dir_positive_or_equal:
352 fprintf (outf, " +=");
353 break;
354 case dir_positive_or_negative:
355 fprintf (outf, " +-");
356 break;
357 case dir_negative_or_equal:
358 fprintf (outf, " -=");
359 break;
360 case dir_star:
361 fprintf (outf, " *");
362 break;
363 default:
364 fprintf (outf, "indep");
365 break;
368 fprintf (outf, "\n");
371 /* Print a vector of direction vectors. */
373 DEBUG_FUNCTION void
374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
375 int length)
377 unsigned j;
378 lambda_vector v;
380 FOR_EACH_VEC_ELT (dir_vects, j, v)
381 print_direction_vector (outf, v, length);
384 /* Print out a vector VEC of length N to OUTFILE. */
386 DEBUG_FUNCTION void
387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389 int i;
391 for (i = 0; i < n; i++)
392 fprintf (outfile, "%3d ", vector[i]);
393 fprintf (outfile, "\n");
396 /* Print a vector of distance vectors. */
398 DEBUG_FUNCTION void
399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
402 unsigned j;
403 lambda_vector v;
405 FOR_EACH_VEC_ELT (dist_vects, j, v)
406 print_lambda_vector (outf, v, length);
409 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 DEBUG_FUNCTION void
412 dump_data_dependence_relation (FILE *outf,
413 struct data_dependence_relation *ddr)
415 struct data_reference *dra, *drb;
417 fprintf (outf, "(Data Dep: \n");
419 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
421 if (ddr)
423 dra = DDR_A (ddr);
424 drb = DDR_B (ddr);
425 if (dra)
426 dump_data_reference (outf, dra);
427 else
428 fprintf (outf, " (nil)\n");
429 if (drb)
430 dump_data_reference (outf, drb);
431 else
432 fprintf (outf, " (nil)\n");
434 fprintf (outf, " (don't know)\n)\n");
435 return;
438 dra = DDR_A (ddr);
439 drb = DDR_B (ddr);
440 dump_data_reference (outf, dra);
441 dump_data_reference (outf, drb);
443 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
444 fprintf (outf, " (no dependence)\n");
446 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
448 unsigned int i;
449 struct loop *loopi;
451 subscript *sub;
452 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
454 fprintf (outf, " access_fn_A: ");
455 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
456 fprintf (outf, " access_fn_B: ");
457 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
458 dump_subscript (outf, sub);
461 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
462 fprintf (outf, " loop nest: (");
463 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
464 fprintf (outf, "%d ", loopi->num);
465 fprintf (outf, ")\n");
467 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 fprintf (outf, " distance_vector: ");
470 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
471 DDR_NB_LOOPS (ddr));
474 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 fprintf (outf, " direction_vector: ");
477 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
478 DDR_NB_LOOPS (ddr));
482 fprintf (outf, ")\n");
485 /* Debug version. */
487 DEBUG_FUNCTION void
488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 dump_data_dependence_relation (stderr, ddr);
493 /* Dump into FILE all the dependence relations from DDRS. */
495 DEBUG_FUNCTION void
496 dump_data_dependence_relations (FILE *file,
497 vec<ddr_p> ddrs)
499 unsigned int i;
500 struct data_dependence_relation *ddr;
502 FOR_EACH_VEC_ELT (ddrs, i, ddr)
503 dump_data_dependence_relation (file, ddr);
506 DEBUG_FUNCTION void
507 debug (vec<ddr_p> &ref)
509 dump_data_dependence_relations (stderr, ref);
512 DEBUG_FUNCTION void
513 debug (vec<ddr_p> *ptr)
515 if (ptr)
516 debug (*ptr);
517 else
518 fprintf (stderr, "<nil>\n");
522 /* Dump to STDERR all the dependence relations from DDRS. */
524 DEBUG_FUNCTION void
525 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 dump_data_dependence_relations (stderr, ddrs);
530 /* Dumps the distance and direction vectors in FILE. DDRS contains
531 the dependence relations, and VECT_SIZE is the size of the
532 dependence vectors, or in other words the number of loops in the
533 considered nest. */
535 DEBUG_FUNCTION void
536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 unsigned int i, j;
539 struct data_dependence_relation *ddr;
540 lambda_vector v;
542 FOR_EACH_VEC_ELT (ddrs, i, ddr)
543 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 fprintf (file, "DISTANCE_V (");
548 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
549 fprintf (file, ")\n");
552 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 fprintf (file, "DIRECTION_V (");
555 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
556 fprintf (file, ")\n");
560 fprintf (file, "\n\n");
563 /* Dumps the data dependence relations DDRS in FILE. */
565 DEBUG_FUNCTION void
566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 unsigned int i;
569 struct data_dependence_relation *ddr;
571 FOR_EACH_VEC_ELT (ddrs, i, ddr)
572 dump_data_dependence_relation (file, ddr);
574 fprintf (file, "\n\n");
577 DEBUG_FUNCTION void
578 debug_ddrs (vec<ddr_p> ddrs)
580 dump_ddrs (stderr, ddrs);
583 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
584 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
585 constant of type ssizetype, and returns true. If we cannot do this
586 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
587 is returned. */
589 static bool
590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
591 tree *var, tree *off)
593 tree var0, var1;
594 tree off0, off1;
595 enum tree_code ocode = code;
597 *var = NULL_TREE;
598 *off = NULL_TREE;
600 switch (code)
602 case INTEGER_CST:
603 *var = build_int_cst (type, 0);
604 *off = fold_convert (ssizetype, op0);
605 return true;
607 case POINTER_PLUS_EXPR:
608 ocode = PLUS_EXPR;
609 /* FALLTHROUGH */
610 case PLUS_EXPR:
611 case MINUS_EXPR:
612 split_constant_offset (op0, &var0, &off0);
613 split_constant_offset (op1, &var1, &off1);
614 *var = fold_build2 (code, type, var0, var1);
615 *off = size_binop (ocode, off0, off1);
616 return true;
618 case MULT_EXPR:
619 if (TREE_CODE (op1) != INTEGER_CST)
620 return false;
622 split_constant_offset (op0, &var0, &off0);
623 *var = fold_build2 (MULT_EXPR, type, var0, op1);
624 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
625 return true;
627 case ADDR_EXPR:
629 tree base, poffset;
630 poly_int64 pbitsize, pbitpos, pbytepos;
631 machine_mode pmode;
632 int punsignedp, preversep, pvolatilep;
634 op0 = TREE_OPERAND (op0, 0);
635 base
636 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
637 &punsignedp, &preversep, &pvolatilep);
639 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
640 return false;
641 base = build_fold_addr_expr (base);
642 off0 = ssize_int (pbytepos);
644 if (poffset)
646 split_constant_offset (poffset, &poffset, &off1);
647 off0 = size_binop (PLUS_EXPR, off0, off1);
648 if (POINTER_TYPE_P (TREE_TYPE (base)))
649 base = fold_build_pointer_plus (base, poffset);
650 else
651 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
652 fold_convert (TREE_TYPE (base), poffset));
655 var0 = fold_convert (type, base);
657 /* If variable length types are involved, punt, otherwise casts
658 might be converted into ARRAY_REFs in gimplify_conversion.
659 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
660 possibly no longer appears in current GIMPLE, might resurface.
661 This perhaps could run
662 if (CONVERT_EXPR_P (var0))
664 gimplify_conversion (&var0);
665 // Attempt to fill in any within var0 found ARRAY_REF's
666 // element size from corresponding op embedded ARRAY_REF,
667 // if unsuccessful, just punt.
668 } */
669 while (POINTER_TYPE_P (type))
670 type = TREE_TYPE (type);
671 if (int_size_in_bytes (type) < 0)
672 return false;
674 *var = var0;
675 *off = off0;
676 return true;
679 case SSA_NAME:
681 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
682 return false;
684 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
685 enum tree_code subcode;
687 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
688 return false;
690 var0 = gimple_assign_rhs1 (def_stmt);
691 subcode = gimple_assign_rhs_code (def_stmt);
692 var1 = gimple_assign_rhs2 (def_stmt);
694 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
696 CASE_CONVERT:
698 /* We must not introduce undefined overflow, and we must not change the value.
699 Hence we're okay if the inner type doesn't overflow to start with
700 (pointer or signed), the outer type also is an integer or pointer
701 and the outer precision is at least as large as the inner. */
702 tree itype = TREE_TYPE (op0);
703 if ((POINTER_TYPE_P (itype)
704 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
705 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
706 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
708 split_constant_offset (op0, &var0, off);
709 *var = fold_convert (type, var0);
710 return true;
712 return false;
715 default:
716 return false;
720 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
721 will be ssizetype. */
723 void
724 split_constant_offset (tree exp, tree *var, tree *off)
726 tree type = TREE_TYPE (exp), op0, op1, e, o;
727 enum tree_code code;
729 *var = exp;
730 *off = ssize_int (0);
732 if (tree_is_chrec (exp)
733 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
734 return;
736 code = TREE_CODE (exp);
737 extract_ops_from_tree (exp, &code, &op0, &op1);
738 if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
740 *var = e;
741 *off = o;
745 /* Returns the address ADDR of an object in a canonical shape (without nop
746 casts, and with type of pointer to the object). */
748 static tree
749 canonicalize_base_object_address (tree addr)
751 tree orig = addr;
753 STRIP_NOPS (addr);
755 /* The base address may be obtained by casting from integer, in that case
756 keep the cast. */
757 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
758 return orig;
760 if (TREE_CODE (addr) != ADDR_EXPR)
761 return addr;
763 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
766 /* Analyze the behavior of memory reference REF. There are two modes:
768 - BB analysis. In this case we simply split the address into base,
769 init and offset components, without reference to any containing loop.
770 The resulting base and offset are general expressions and they can
771 vary arbitrarily from one iteration of the containing loop to the next.
772 The step is always zero.
774 - loop analysis. In this case we analyze the reference both wrt LOOP
775 and on the basis that the reference occurs (is "used") in LOOP;
776 see the comment above analyze_scalar_evolution_in_loop for more
777 information about this distinction. The base, init, offset and
778 step fields are all invariant in LOOP.
780 Perform BB analysis if LOOP is null, or if LOOP is the function's
781 dummy outermost loop. In other cases perform loop analysis.
783 Return true if the analysis succeeded and store the results in DRB if so.
784 BB analysis can only fail for bitfield or reversed-storage accesses. */
786 bool
787 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
788 struct loop *loop)
790 poly_int64 pbitsize, pbitpos;
791 tree base, poffset;
792 machine_mode pmode;
793 int punsignedp, preversep, pvolatilep;
794 affine_iv base_iv, offset_iv;
795 tree init, dinit, step;
796 bool in_loop = (loop && loop->num);
798 if (dump_file && (dump_flags & TDF_DETAILS))
799 fprintf (dump_file, "analyze_innermost: ");
801 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
802 &punsignedp, &preversep, &pvolatilep);
803 gcc_assert (base != NULL_TREE);
805 poly_int64 pbytepos;
806 if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
808 if (dump_file && (dump_flags & TDF_DETAILS))
809 fprintf (dump_file, "failed: bit offset alignment.\n");
810 return false;
813 if (preversep)
815 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "failed: reverse storage order.\n");
817 return false;
820 /* Calculate the alignment and misalignment for the inner reference. */
821 unsigned int HOST_WIDE_INT bit_base_misalignment;
822 unsigned int bit_base_alignment;
823 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
825 /* There are no bitfield references remaining in BASE, so the values
826 we got back must be whole bytes. */
827 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
828 && bit_base_misalignment % BITS_PER_UNIT == 0);
829 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
830 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
832 if (TREE_CODE (base) == MEM_REF)
834 if (!integer_zerop (TREE_OPERAND (base, 1)))
836 /* Subtract MOFF from the base and add it to POFFSET instead.
837 Adjust the misalignment to reflect the amount we subtracted. */
838 poly_offset_int moff = mem_ref_offset (base);
839 base_misalignment -= moff.force_shwi ();
840 tree mofft = wide_int_to_tree (sizetype, moff);
841 if (!poffset)
842 poffset = mofft;
843 else
844 poffset = size_binop (PLUS_EXPR, poffset, mofft);
846 base = TREE_OPERAND (base, 0);
848 else
849 base = build_fold_addr_expr (base);
851 if (in_loop)
853 if (!simple_iv (loop, loop, base, &base_iv, true))
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 fprintf (dump_file, "failed: evolution of base is not affine.\n");
857 return false;
860 else
862 base_iv.base = base;
863 base_iv.step = ssize_int (0);
864 base_iv.no_overflow = true;
867 if (!poffset)
869 offset_iv.base = ssize_int (0);
870 offset_iv.step = ssize_int (0);
872 else
874 if (!in_loop)
876 offset_iv.base = poffset;
877 offset_iv.step = ssize_int (0);
879 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
883 return false;
887 init = ssize_int (pbytepos);
889 /* Subtract any constant component from the base and add it to INIT instead.
890 Adjust the misalignment to reflect the amount we subtracted. */
891 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
892 init = size_binop (PLUS_EXPR, init, dinit);
893 base_misalignment -= TREE_INT_CST_LOW (dinit);
895 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
896 init = size_binop (PLUS_EXPR, init, dinit);
898 step = size_binop (PLUS_EXPR,
899 fold_convert (ssizetype, base_iv.step),
900 fold_convert (ssizetype, offset_iv.step));
902 base = canonicalize_base_object_address (base_iv.base);
904 /* See if get_pointer_alignment can guarantee a higher alignment than
905 the one we calculated above. */
906 unsigned int HOST_WIDE_INT alt_misalignment;
907 unsigned int alt_alignment;
908 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
910 /* As above, these values must be whole bytes. */
911 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
912 && alt_misalignment % BITS_PER_UNIT == 0);
913 alt_alignment /= BITS_PER_UNIT;
914 alt_misalignment /= BITS_PER_UNIT;
916 if (base_alignment < alt_alignment)
918 base_alignment = alt_alignment;
919 base_misalignment = alt_misalignment;
922 drb->base_address = base;
923 drb->offset = fold_convert (ssizetype, offset_iv.base);
924 drb->init = init;
925 drb->step = step;
926 if (known_misalignment (base_misalignment, base_alignment,
927 &drb->base_misalignment))
928 drb->base_alignment = base_alignment;
929 else
931 drb->base_alignment = known_alignment (base_misalignment);
932 drb->base_misalignment = 0;
934 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
935 drb->step_alignment = highest_pow2_factor (step);
937 if (dump_file && (dump_flags & TDF_DETAILS))
938 fprintf (dump_file, "success.\n");
940 return true;
943 /* Return true if OP is a valid component reference for a DR access
944 function. This accepts a subset of what handled_component_p accepts. */
946 static bool
947 access_fn_component_p (tree op)
949 switch (TREE_CODE (op))
951 case REALPART_EXPR:
952 case IMAGPART_EXPR:
953 case ARRAY_REF:
954 return true;
956 case COMPONENT_REF:
957 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
959 default:
960 return false;
964 /* Determines the base object and the list of indices of memory reference
965 DR, analyzed in LOOP and instantiated before NEST. */
967 static void
968 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
970 vec<tree> access_fns = vNULL;
971 tree ref, op;
972 tree base, off, access_fn;
974 /* If analyzing a basic-block there are no indices to analyze
975 and thus no access functions. */
976 if (!nest)
978 DR_BASE_OBJECT (dr) = DR_REF (dr);
979 DR_ACCESS_FNS (dr).create (0);
980 return;
983 ref = DR_REF (dr);
985 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
986 into a two element array with a constant index. The base is
987 then just the immediate underlying object. */
988 if (TREE_CODE (ref) == REALPART_EXPR)
990 ref = TREE_OPERAND (ref, 0);
991 access_fns.safe_push (integer_zero_node);
993 else if (TREE_CODE (ref) == IMAGPART_EXPR)
995 ref = TREE_OPERAND (ref, 0);
996 access_fns.safe_push (integer_one_node);
999 /* Analyze access functions of dimensions we know to be independent.
1000 The list of component references handled here should be kept in
1001 sync with access_fn_component_p. */
1002 while (handled_component_p (ref))
1004 if (TREE_CODE (ref) == ARRAY_REF)
1006 op = TREE_OPERAND (ref, 1);
1007 access_fn = analyze_scalar_evolution (loop, op);
1008 access_fn = instantiate_scev (nest, loop, access_fn);
1009 access_fns.safe_push (access_fn);
1011 else if (TREE_CODE (ref) == COMPONENT_REF
1012 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1014 /* For COMPONENT_REFs of records (but not unions!) use the
1015 FIELD_DECL offset as constant access function so we can
1016 disambiguate a[i].f1 and a[i].f2. */
1017 tree off = component_ref_field_offset (ref);
1018 off = size_binop (PLUS_EXPR,
1019 size_binop (MULT_EXPR,
1020 fold_convert (bitsizetype, off),
1021 bitsize_int (BITS_PER_UNIT)),
1022 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1023 access_fns.safe_push (off);
1025 else
1026 /* If we have an unhandled component we could not translate
1027 to an access function stop analyzing. We have determined
1028 our base object in this case. */
1029 break;
1031 ref = TREE_OPERAND (ref, 0);
1034 /* If the address operand of a MEM_REF base has an evolution in the
1035 analyzed nest, add it as an additional independent access-function. */
1036 if (TREE_CODE (ref) == MEM_REF)
1038 op = TREE_OPERAND (ref, 0);
1039 access_fn = analyze_scalar_evolution (loop, op);
1040 access_fn = instantiate_scev (nest, loop, access_fn);
1041 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1043 tree orig_type;
1044 tree memoff = TREE_OPERAND (ref, 1);
1045 base = initial_condition (access_fn);
1046 orig_type = TREE_TYPE (base);
1047 STRIP_USELESS_TYPE_CONVERSION (base);
1048 split_constant_offset (base, &base, &off);
1049 STRIP_USELESS_TYPE_CONVERSION (base);
1050 /* Fold the MEM_REF offset into the evolutions initial
1051 value to make more bases comparable. */
1052 if (!integer_zerop (memoff))
1054 off = size_binop (PLUS_EXPR, off,
1055 fold_convert (ssizetype, memoff));
1056 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1058 /* Adjust the offset so it is a multiple of the access type
1059 size and thus we separate bases that can possibly be used
1060 to produce partial overlaps (which the access_fn machinery
1061 cannot handle). */
1062 wide_int rem;
1063 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1064 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1065 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1066 rem = wi::mod_trunc
1067 (wi::to_wide (off),
1068 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1069 SIGNED);
1070 else
1071 /* If we can't compute the remainder simply force the initial
1072 condition to zero. */
1073 rem = wi::to_wide (off);
1074 off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1075 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1076 /* And finally replace the initial condition. */
1077 access_fn = chrec_replace_initial_condition
1078 (access_fn, fold_convert (orig_type, off));
1079 /* ??? This is still not a suitable base object for
1080 dr_may_alias_p - the base object needs to be an
1081 access that covers the object as whole. With
1082 an evolution in the pointer this cannot be
1083 guaranteed.
1084 As a band-aid, mark the access so we can special-case
1085 it in dr_may_alias_p. */
1086 tree old = ref;
1087 ref = fold_build2_loc (EXPR_LOCATION (ref),
1088 MEM_REF, TREE_TYPE (ref),
1089 base, memoff);
1090 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1091 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1092 DR_UNCONSTRAINED_BASE (dr) = true;
1093 access_fns.safe_push (access_fn);
1096 else if (DECL_P (ref))
1098 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1099 ref = build2 (MEM_REF, TREE_TYPE (ref),
1100 build_fold_addr_expr (ref),
1101 build_int_cst (reference_alias_ptr_type (ref), 0));
1104 DR_BASE_OBJECT (dr) = ref;
1105 DR_ACCESS_FNS (dr) = access_fns;
1108 /* Extracts the alias analysis information from the memory reference DR. */
1110 static void
1111 dr_analyze_alias (struct data_reference *dr)
1113 tree ref = DR_REF (dr);
1114 tree base = get_base_address (ref), addr;
1116 if (INDIRECT_REF_P (base)
1117 || TREE_CODE (base) == MEM_REF)
1119 addr = TREE_OPERAND (base, 0);
1120 if (TREE_CODE (addr) == SSA_NAME)
1121 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1125 /* Frees data reference DR. */
1127 void
1128 free_data_ref (data_reference_p dr)
1130 DR_ACCESS_FNS (dr).release ();
1131 free (dr);
1134 /* Analyze memory reference MEMREF, which is accessed in STMT.
1135 The reference is a read if IS_READ is true, otherwise it is a write.
1136 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1137 within STMT, i.e. that it might not occur even if STMT is executed
1138 and runs to completion.
1140 Return the data_reference description of MEMREF. NEST is the outermost
1141 loop in which the reference should be instantiated, LOOP is the loop
1142 in which the data reference should be analyzed. */
1144 struct data_reference *
1145 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1146 bool is_read, bool is_conditional_in_stmt)
1148 struct data_reference *dr;
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1152 fprintf (dump_file, "Creating dr for ");
1153 print_generic_expr (dump_file, memref, TDF_SLIM);
1154 fprintf (dump_file, "\n");
1157 dr = XCNEW (struct data_reference);
1158 DR_STMT (dr) = stmt;
1159 DR_REF (dr) = memref;
1160 DR_IS_READ (dr) = is_read;
1161 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1163 dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1164 nest != NULL ? loop : NULL);
1165 dr_analyze_indices (dr, nest, loop);
1166 dr_analyze_alias (dr);
1168 if (dump_file && (dump_flags & TDF_DETAILS))
1170 unsigned i;
1171 fprintf (dump_file, "\tbase_address: ");
1172 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1173 fprintf (dump_file, "\n\toffset from base address: ");
1174 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1175 fprintf (dump_file, "\n\tconstant offset from base address: ");
1176 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1177 fprintf (dump_file, "\n\tstep: ");
1178 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1179 fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1180 fprintf (dump_file, "\n\tbase misalignment: %d",
1181 DR_BASE_MISALIGNMENT (dr));
1182 fprintf (dump_file, "\n\toffset alignment: %d",
1183 DR_OFFSET_ALIGNMENT (dr));
1184 fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1185 fprintf (dump_file, "\n\tbase_object: ");
1186 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1187 fprintf (dump_file, "\n");
1188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1190 fprintf (dump_file, "\tAccess function %d: ", i);
1191 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1195 return dr;
1198 /* A helper function computes order between two tree epxressions T1 and T2.
1199 This is used in comparator functions sorting objects based on the order
1200 of tree expressions. The function returns -1, 0, or 1. */
1203 data_ref_compare_tree (tree t1, tree t2)
1205 int i, cmp;
1206 enum tree_code code;
1207 char tclass;
1209 if (t1 == t2)
1210 return 0;
1211 if (t1 == NULL)
1212 return -1;
1213 if (t2 == NULL)
1214 return 1;
1216 STRIP_USELESS_TYPE_CONVERSION (t1);
1217 STRIP_USELESS_TYPE_CONVERSION (t2);
1218 if (t1 == t2)
1219 return 0;
1221 if (TREE_CODE (t1) != TREE_CODE (t2)
1222 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1223 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1225 code = TREE_CODE (t1);
1226 switch (code)
1228 case INTEGER_CST:
1229 return tree_int_cst_compare (t1, t2);
1231 case STRING_CST:
1232 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1233 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1234 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1235 TREE_STRING_LENGTH (t1));
1237 case SSA_NAME:
1238 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1239 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1240 break;
1242 default:
1243 if (POLY_INT_CST_P (t1))
1244 return compare_sizes_for_sort (wi::to_poly_widest (t1),
1245 wi::to_poly_widest (t2));
1247 tclass = TREE_CODE_CLASS (code);
1249 /* For decls, compare their UIDs. */
1250 if (tclass == tcc_declaration)
1252 if (DECL_UID (t1) != DECL_UID (t2))
1253 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1254 break;
1256 /* For expressions, compare their operands recursively. */
1257 else if (IS_EXPR_CODE_CLASS (tclass))
1259 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1261 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1262 TREE_OPERAND (t2, i));
1263 if (cmp != 0)
1264 return cmp;
1267 else
1268 gcc_unreachable ();
1271 return 0;
1274 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1275 check. */
1277 bool
1278 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1280 if (dump_enabled_p ())
1282 dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1283 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1284 dump_printf (MSG_NOTE, " and ");
1285 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1286 dump_printf (MSG_NOTE, "\n");
1289 if (!speed_p)
1291 if (dump_enabled_p ())
1292 dump_printf (MSG_MISSED_OPTIMIZATION,
1293 "runtime alias check not supported when optimizing "
1294 "for size.\n");
1295 return false;
1298 /* FORNOW: We don't support versioning with outer-loop in either
1299 vectorization or loop distribution. */
1300 if (loop != NULL && loop->inner != NULL)
1302 if (dump_enabled_p ())
1303 dump_printf (MSG_MISSED_OPTIMIZATION,
1304 "runtime alias check not supported for outer loop.\n");
1305 return false;
1308 /* FORNOW: We don't support creating runtime alias tests for non-constant
1309 step. */
1310 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1311 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1313 if (dump_enabled_p ())
1314 dump_printf (MSG_MISSED_OPTIMIZATION,
1315 "runtime alias check not supported for non-constant "
1316 "step\n");
1317 return false;
1320 return true;
1323 /* Operator == between two dr_with_seg_len objects.
1325 This equality operator is used to make sure two data refs
1326 are the same one so that we will consider to combine the
1327 aliasing checks of those two pairs of data dependent data
1328 refs. */
1330 static bool
1331 operator == (const dr_with_seg_len& d1,
1332 const dr_with_seg_len& d2)
1334 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1335 DR_BASE_ADDRESS (d2.dr), 0)
1336 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1337 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1338 && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1341 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1342 so that we can combine aliasing checks in one scan. */
1344 static int
1345 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1347 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1348 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1349 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1350 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1352 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1353 if a and c have the same basic address snd step, and b and d have the same
1354 address and step. Therefore, if any a&c or b&d don't have the same address
1355 and step, we don't care the order of those two pairs after sorting. */
1356 int comp_res;
1358 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1359 DR_BASE_ADDRESS (b1.dr))) != 0)
1360 return comp_res;
1361 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1362 DR_BASE_ADDRESS (b2.dr))) != 0)
1363 return comp_res;
1364 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1365 DR_STEP (b1.dr))) != 0)
1366 return comp_res;
1367 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1368 DR_STEP (b2.dr))) != 0)
1369 return comp_res;
1370 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1371 DR_OFFSET (b1.dr))) != 0)
1372 return comp_res;
1373 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1374 DR_INIT (b1.dr))) != 0)
1375 return comp_res;
1376 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1377 DR_OFFSET (b2.dr))) != 0)
1378 return comp_res;
1379 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1380 DR_INIT (b2.dr))) != 0)
1381 return comp_res;
1383 return 0;
1386 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1387 FACTOR is number of iterations that each data reference is accessed.
1389 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1390 we create an expression:
1392 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1393 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1395 for aliasing checks. However, in some cases we can decrease the number
1396 of checks by combining two checks into one. For example, suppose we have
1397 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1398 condition is satisfied:
1400 load_ptr_0 < load_ptr_1 &&
1401 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1403 (this condition means, in each iteration of vectorized loop, the accessed
1404 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1405 load_ptr_1.)
1407 we then can use only the following expression to finish the alising checks
1408 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1410 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1411 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1413 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1414 basic address. */
1416 void
1417 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1418 poly_uint64 factor)
1420 /* Sort the collected data ref pairs so that we can scan them once to
1421 combine all possible aliasing checks. */
1422 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1424 /* Scan the sorted dr pairs and check if we can combine alias checks
1425 of two neighboring dr pairs. */
1426 for (size_t i = 1; i < alias_pairs->length (); ++i)
1428 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1429 dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1430 *dr_b1 = &(*alias_pairs)[i-1].second,
1431 *dr_a2 = &(*alias_pairs)[i].first,
1432 *dr_b2 = &(*alias_pairs)[i].second;
1434 /* Remove duplicate data ref pairs. */
1435 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1437 if (dump_enabled_p ())
1439 dump_printf (MSG_NOTE, "found equal ranges ");
1440 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1441 dump_printf (MSG_NOTE, ", ");
1442 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1443 dump_printf (MSG_NOTE, " and ");
1444 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1445 dump_printf (MSG_NOTE, ", ");
1446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1447 dump_printf (MSG_NOTE, "\n");
1449 alias_pairs->ordered_remove (i--);
1450 continue;
1453 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1455 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1456 and DR_A1 and DR_A2 are two consecutive memrefs. */
1457 if (*dr_a1 == *dr_a2)
1459 std::swap (dr_a1, dr_b1);
1460 std::swap (dr_a2, dr_b2);
1463 poly_int64 init_a1, init_a2;
1464 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1465 DR_BASE_ADDRESS (dr_a2->dr), 0)
1466 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1467 DR_OFFSET (dr_a2->dr), 0)
1468 || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1469 || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1470 continue;
1472 /* Don't combine if we can't tell which one comes first. */
1473 if (!ordered_p (init_a1, init_a2))
1474 continue;
1476 /* Make sure dr_a1 starts left of dr_a2. */
1477 if (maybe_gt (init_a1, init_a2))
1479 std::swap (*dr_a1, *dr_a2);
1480 std::swap (init_a1, init_a2);
1483 /* Only merge const step data references. */
1484 poly_int64 step_a1, step_a2;
1485 if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1)
1486 || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2))
1487 continue;
1489 bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0);
1491 /* DR_A1 and DR_A2 must go in the same direction. */
1492 if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0)))
1493 continue;
1495 poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0;
1496 bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len,
1497 &seg_len_a1);
1498 bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len,
1499 &seg_len_a2);
1501 /* We need to compute merged segment length at compilation time for
1502 dr_a1 and dr_a2, which is impossible if either one has non-const
1503 segment length. */
1504 if ((!const_seg_len_a1 || !const_seg_len_a2)
1505 && maybe_ne (step_a1, step_a2))
1506 continue;
1508 bool do_remove = false;
1509 poly_uint64 diff = init_a2 - init_a1;
1510 poly_uint64 min_seg_len_b;
1511 tree new_seg_len;
1513 if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b))
1515 tree step_b = DR_STEP (dr_b1->dr);
1516 if (!tree_fits_shwi_p (step_b))
1517 continue;
1518 min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b));
1521 /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1523 Case A:
1524 check if the following condition is satisfied:
1526 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1528 where DIFF = DR_A2_INIT - DR_A1_INIT. However,
1529 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1530 have to make a best estimation. We can get the minimum value
1531 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1532 then either of the following two conditions can guarantee the
1533 one above:
1535 1: DIFF <= MIN_SEG_LEN_B
1536 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1537 Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1538 to take care of wrapping behavior in it.
1540 Case B:
1541 If the left segment does not extend beyond the start of the
1542 right segment the new segment length is that of the right
1543 plus the segment distance. The condition is like:
1545 DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
1547 Note 1: Case A.2 and B combined together effectively merges every
1548 dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1550 Note 2: Above description is based on positive DR_STEP, we need to
1551 take care of negative DR_STEP for wrapping behavior. See PR80815
1552 for more information. */
1553 if (neg_step)
1555 /* Adjust diff according to access size of both references. */
1556 diff += tree_to_poly_uint64
1557 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr))));
1558 diff -= tree_to_poly_uint64
1559 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr))));
1560 /* Case A.1. */
1561 if (known_le (diff, min_seg_len_b)
1562 /* Case A.2 and B combined. */
1563 || const_seg_len_a2)
1565 if (const_seg_len_a1 || const_seg_len_a2)
1566 new_seg_len
1567 = build_int_cstu (sizetype,
1568 lower_bound (seg_len_a1 - diff,
1569 seg_len_a2));
1570 else
1571 new_seg_len
1572 = size_binop (MINUS_EXPR, dr_a2->seg_len,
1573 build_int_cstu (sizetype, diff));
1575 dr_a2->seg_len = new_seg_len;
1576 do_remove = true;
1579 else
1581 /* Case A.1. */
1582 if (known_le (diff, min_seg_len_b)
1583 /* Case A.2 and B combined. */
1584 || const_seg_len_a1)
1586 if (const_seg_len_a1 && const_seg_len_a2)
1587 new_seg_len
1588 = build_int_cstu (sizetype,
1589 upper_bound (seg_len_a2 + diff,
1590 seg_len_a1));
1591 else
1592 new_seg_len
1593 = size_binop (PLUS_EXPR, dr_a2->seg_len,
1594 build_int_cstu (sizetype, diff));
1596 dr_a1->seg_len = new_seg_len;
1597 do_remove = true;
1601 if (do_remove)
1603 if (dump_enabled_p ())
1605 dump_printf (MSG_NOTE, "merging ranges for ");
1606 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1607 dump_printf (MSG_NOTE, ", ");
1608 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1609 dump_printf (MSG_NOTE, " and ");
1610 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1611 dump_printf (MSG_NOTE, ", ");
1612 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1613 dump_printf (MSG_NOTE, "\n");
1615 alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1616 i--;
1622 /* Given LOOP's two data references and segment lengths described by DR_A
1623 and DR_B, create expression checking if the two addresses ranges intersect
1624 with each other based on index of the two addresses. This can only be
1625 done if DR_A and DR_B referring to the same (array) object and the index
1626 is the only difference. For example:
1628 DR_A DR_B
1629 data-ref arr[i] arr[j]
1630 base_object arr arr
1631 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1633 The addresses and their index are like:
1635 |<- ADDR_A ->| |<- ADDR_B ->|
1636 ------------------------------------------------------->
1637 | | | | | | | | | |
1638 ------------------------------------------------------->
1639 i_0 ... i_0+4 j_0 ... j_0+4
1641 We can create expression based on index rather than address:
1643 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1645 Note evolution step of index needs to be considered in comparison. */
1647 static bool
1648 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1649 const dr_with_seg_len& dr_a,
1650 const dr_with_seg_len& dr_b)
1652 if (integer_zerop (DR_STEP (dr_a.dr))
1653 || integer_zerop (DR_STEP (dr_b.dr))
1654 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1655 return false;
1657 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1658 return false;
1660 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1661 return false;
1663 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1664 return false;
1666 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1667 return false;
1669 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1671 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1672 unsigned HOST_WIDE_INT abs_step
1673 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1675 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1676 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1677 /* Infer the number of iterations with which the memory segment is accessed
1678 by DR. In other words, alias is checked if memory segment accessed by
1679 DR_A in some iterations intersect with memory segment accessed by DR_B
1680 in the same amount iterations.
1681 Note segnment length is a linear function of number of iterations with
1682 DR_STEP as the coefficient. */
1683 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1684 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1686 unsigned int i;
1687 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1689 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1690 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1691 /* Two indices must be the same if they are not scev, or not scev wrto
1692 current loop being vecorized. */
1693 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1694 || TREE_CODE (access2) != POLYNOMIAL_CHREC
1695 || CHREC_VARIABLE (access1) != (unsigned)loop->num
1696 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1698 if (operand_equal_p (access1, access2, 0))
1699 continue;
1701 return false;
1703 /* The two indices must have the same step. */
1704 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1705 return false;
1707 tree idx_step = CHREC_RIGHT (access1);
1708 /* Index must have const step, otherwise DR_STEP won't be constant. */
1709 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1710 /* Index must evaluate in the same direction as DR. */
1711 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1713 tree min1 = CHREC_LEFT (access1);
1714 tree min2 = CHREC_LEFT (access2);
1715 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1716 return false;
1718 /* Ideally, alias can be checked against loop's control IV, but we
1719 need to prove linear mapping between control IV and reference
1720 index. Although that should be true, we check against (array)
1721 index of data reference. Like segment length, index length is
1722 linear function of the number of iterations with index_step as
1723 the coefficient, i.e, niter_len * idx_step. */
1724 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1725 build_int_cst (TREE_TYPE (min1),
1726 niter_len1));
1727 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1728 build_int_cst (TREE_TYPE (min2),
1729 niter_len2));
1730 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1731 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1732 /* Adjust ranges for negative step. */
1733 if (neg_step)
1735 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1736 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1737 CHREC_LEFT (access1), idx_step);
1738 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1739 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1740 CHREC_LEFT (access2), idx_step);
1742 tree part_cond_expr
1743 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1744 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1745 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1746 if (*cond_expr)
1747 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1748 *cond_expr, part_cond_expr);
1749 else
1750 *cond_expr = part_cond_expr;
1752 return true;
1755 /* Given two data references and segment lengths described by DR_A and DR_B,
1756 create expression checking if the two addresses ranges intersect with
1757 each other:
1759 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1760 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
1762 static void
1763 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1764 const dr_with_seg_len& dr_a,
1765 const dr_with_seg_len& dr_b)
1767 *cond_expr = NULL_TREE;
1768 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1769 return;
1771 tree segment_length_a = dr_a.seg_len;
1772 tree segment_length_b = dr_b.seg_len;
1773 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1774 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1775 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1777 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1778 offset_a, DR_INIT (dr_a.dr));
1779 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1780 offset_b, DR_INIT (dr_b.dr));
1781 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1782 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1784 tree seg_a_min = addr_base_a;
1785 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1786 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1787 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1788 [a, a+12) */
1789 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1791 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1792 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1793 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1796 tree seg_b_min = addr_base_b;
1797 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1798 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1800 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1801 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1802 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1804 *cond_expr
1805 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1806 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1807 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1810 /* Create a conditional expression that represents the run-time checks for
1811 overlapping of address ranges represented by a list of data references
1812 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
1813 COND_EXPR is the conditional expression to be used in the if statement
1814 that controls which version of the loop gets executed at runtime. */
1816 void
1817 create_runtime_alias_checks (struct loop *loop,
1818 vec<dr_with_seg_len_pair_t> *alias_pairs,
1819 tree * cond_expr)
1821 tree part_cond_expr;
1823 for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1825 const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1826 const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1828 if (dump_enabled_p ())
1830 dump_printf (MSG_NOTE, "create runtime check for data references ");
1831 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1832 dump_printf (MSG_NOTE, " and ");
1833 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1834 dump_printf (MSG_NOTE, "\n");
1837 /* Create condition expression for each pair data references. */
1838 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1839 if (*cond_expr)
1840 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1841 *cond_expr, part_cond_expr);
1842 else
1843 *cond_expr = part_cond_expr;
1847 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1848 expressions. */
1849 static bool
1850 dr_equal_offsets_p1 (tree offset1, tree offset2)
1852 bool res;
1854 STRIP_NOPS (offset1);
1855 STRIP_NOPS (offset2);
1857 if (offset1 == offset2)
1858 return true;
1860 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1861 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1862 return false;
1864 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1865 TREE_OPERAND (offset2, 0));
1867 if (!res || !BINARY_CLASS_P (offset1))
1868 return res;
1870 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1871 TREE_OPERAND (offset2, 1));
1873 return res;
1876 /* Check if DRA and DRB have equal offsets. */
1877 bool
1878 dr_equal_offsets_p (struct data_reference *dra,
1879 struct data_reference *drb)
1881 tree offset1, offset2;
1883 offset1 = DR_OFFSET (dra);
1884 offset2 = DR_OFFSET (drb);
1886 return dr_equal_offsets_p1 (offset1, offset2);
1889 /* Returns true if FNA == FNB. */
1891 static bool
1892 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1894 unsigned i, n = fna.length ();
1896 if (n != fnb.length ())
1897 return false;
1899 for (i = 0; i < n; i++)
1900 if (!operand_equal_p (fna[i], fnb[i], 0))
1901 return false;
1903 return true;
1906 /* If all the functions in CF are the same, returns one of them,
1907 otherwise returns NULL. */
1909 static affine_fn
1910 common_affine_function (conflict_function *cf)
1912 unsigned i;
1913 affine_fn comm;
1915 if (!CF_NONTRIVIAL_P (cf))
1916 return affine_fn ();
1918 comm = cf->fns[0];
1920 for (i = 1; i < cf->n; i++)
1921 if (!affine_function_equal_p (comm, cf->fns[i]))
1922 return affine_fn ();
1924 return comm;
1927 /* Returns the base of the affine function FN. */
1929 static tree
1930 affine_function_base (affine_fn fn)
1932 return fn[0];
1935 /* Returns true if FN is a constant. */
1937 static bool
1938 affine_function_constant_p (affine_fn fn)
1940 unsigned i;
1941 tree coef;
1943 for (i = 1; fn.iterate (i, &coef); i++)
1944 if (!integer_zerop (coef))
1945 return false;
1947 return true;
1950 /* Returns true if FN is the zero constant function. */
1952 static bool
1953 affine_function_zero_p (affine_fn fn)
1955 return (integer_zerop (affine_function_base (fn))
1956 && affine_function_constant_p (fn));
1959 /* Returns a signed integer type with the largest precision from TA
1960 and TB. */
1962 static tree
1963 signed_type_for_types (tree ta, tree tb)
1965 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1966 return signed_type_for (ta);
1967 else
1968 return signed_type_for (tb);
1971 /* Applies operation OP on affine functions FNA and FNB, and returns the
1972 result. */
1974 static affine_fn
1975 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1977 unsigned i, n, m;
1978 affine_fn ret;
1979 tree coef;
1981 if (fnb.length () > fna.length ())
1983 n = fna.length ();
1984 m = fnb.length ();
1986 else
1988 n = fnb.length ();
1989 m = fna.length ();
1992 ret.create (m);
1993 for (i = 0; i < n; i++)
1995 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1996 TREE_TYPE (fnb[i]));
1997 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2000 for (; fna.iterate (i, &coef); i++)
2001 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2002 coef, integer_zero_node));
2003 for (; fnb.iterate (i, &coef); i++)
2004 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2005 integer_zero_node, coef));
2007 return ret;
2010 /* Returns the sum of affine functions FNA and FNB. */
2012 static affine_fn
2013 affine_fn_plus (affine_fn fna, affine_fn fnb)
2015 return affine_fn_op (PLUS_EXPR, fna, fnb);
2018 /* Returns the difference of affine functions FNA and FNB. */
2020 static affine_fn
2021 affine_fn_minus (affine_fn fna, affine_fn fnb)
2023 return affine_fn_op (MINUS_EXPR, fna, fnb);
2026 /* Frees affine function FN. */
2028 static void
2029 affine_fn_free (affine_fn fn)
2031 fn.release ();
2034 /* Determine for each subscript in the data dependence relation DDR
2035 the distance. */
2037 static void
2038 compute_subscript_distance (struct data_dependence_relation *ddr)
2040 conflict_function *cf_a, *cf_b;
2041 affine_fn fn_a, fn_b, diff;
2043 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2045 unsigned int i;
2047 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2049 struct subscript *subscript;
2051 subscript = DDR_SUBSCRIPT (ddr, i);
2052 cf_a = SUB_CONFLICTS_IN_A (subscript);
2053 cf_b = SUB_CONFLICTS_IN_B (subscript);
2055 fn_a = common_affine_function (cf_a);
2056 fn_b = common_affine_function (cf_b);
2057 if (!fn_a.exists () || !fn_b.exists ())
2059 SUB_DISTANCE (subscript) = chrec_dont_know;
2060 return;
2062 diff = affine_fn_minus (fn_a, fn_b);
2064 if (affine_function_constant_p (diff))
2065 SUB_DISTANCE (subscript) = affine_function_base (diff);
2066 else
2067 SUB_DISTANCE (subscript) = chrec_dont_know;
2069 affine_fn_free (diff);
2074 /* Returns the conflict function for "unknown". */
2076 static conflict_function *
2077 conflict_fn_not_known (void)
2079 conflict_function *fn = XCNEW (conflict_function);
2080 fn->n = NOT_KNOWN;
2082 return fn;
2085 /* Returns the conflict function for "independent". */
2087 static conflict_function *
2088 conflict_fn_no_dependence (void)
2090 conflict_function *fn = XCNEW (conflict_function);
2091 fn->n = NO_DEPENDENCE;
2093 return fn;
2096 /* Returns true if the address of OBJ is invariant in LOOP. */
2098 static bool
2099 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2101 while (handled_component_p (obj))
2103 if (TREE_CODE (obj) == ARRAY_REF)
2105 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2106 need to check the stride and the lower bound of the reference. */
2107 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2108 loop->num)
2109 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2110 loop->num))
2111 return false;
2113 else if (TREE_CODE (obj) == COMPONENT_REF)
2115 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2116 loop->num))
2117 return false;
2119 obj = TREE_OPERAND (obj, 0);
2122 if (!INDIRECT_REF_P (obj)
2123 && TREE_CODE (obj) != MEM_REF)
2124 return true;
2126 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2127 loop->num);
2130 /* Returns false if we can prove that data references A and B do not alias,
2131 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2132 considered. */
2134 bool
2135 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2136 bool loop_nest)
2138 tree addr_a = DR_BASE_OBJECT (a);
2139 tree addr_b = DR_BASE_OBJECT (b);
2141 /* If we are not processing a loop nest but scalar code we
2142 do not need to care about possible cross-iteration dependences
2143 and thus can process the full original reference. Do so,
2144 similar to how loop invariant motion applies extra offset-based
2145 disambiguation. */
2146 if (!loop_nest)
2148 aff_tree off1, off2;
2149 poly_widest_int size1, size2;
2150 get_inner_reference_aff (DR_REF (a), &off1, &size1);
2151 get_inner_reference_aff (DR_REF (b), &off2, &size2);
2152 aff_combination_scale (&off1, -1);
2153 aff_combination_add (&off2, &off1);
2154 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2155 return false;
2158 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2159 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2160 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2161 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2162 return false;
2164 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2165 do not know the size of the base-object. So we cannot do any
2166 offset/overlap based analysis but have to rely on points-to
2167 information only. */
2168 if (TREE_CODE (addr_a) == MEM_REF
2169 && (DR_UNCONSTRAINED_BASE (a)
2170 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2172 /* For true dependences we can apply TBAA. */
2173 if (flag_strict_aliasing
2174 && DR_IS_WRITE (a) && DR_IS_READ (b)
2175 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2176 get_alias_set (DR_REF (b))))
2177 return false;
2178 if (TREE_CODE (addr_b) == MEM_REF)
2179 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2180 TREE_OPERAND (addr_b, 0));
2181 else
2182 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2183 build_fold_addr_expr (addr_b));
2185 else if (TREE_CODE (addr_b) == MEM_REF
2186 && (DR_UNCONSTRAINED_BASE (b)
2187 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2189 /* For true dependences we can apply TBAA. */
2190 if (flag_strict_aliasing
2191 && DR_IS_WRITE (a) && DR_IS_READ (b)
2192 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2193 get_alias_set (DR_REF (b))))
2194 return false;
2195 if (TREE_CODE (addr_a) == MEM_REF)
2196 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2197 TREE_OPERAND (addr_b, 0));
2198 else
2199 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2200 TREE_OPERAND (addr_b, 0));
2203 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2204 that is being subsetted in the loop nest. */
2205 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2206 return refs_output_dependent_p (addr_a, addr_b);
2207 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2208 return refs_anti_dependent_p (addr_a, addr_b);
2209 return refs_may_alias_p (addr_a, addr_b);
2212 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
2213 if it is meaningful to compare their associated access functions
2214 when checking for dependencies. */
2216 static bool
2217 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2219 /* Allow pairs of component refs from the following sets:
2221 { REALPART_EXPR, IMAGPART_EXPR }
2222 { COMPONENT_REF }
2223 { ARRAY_REF }. */
2224 tree_code code_a = TREE_CODE (ref_a);
2225 tree_code code_b = TREE_CODE (ref_b);
2226 if (code_a == IMAGPART_EXPR)
2227 code_a = REALPART_EXPR;
2228 if (code_b == IMAGPART_EXPR)
2229 code_b = REALPART_EXPR;
2230 if (code_a != code_b)
2231 return false;
2233 if (TREE_CODE (ref_a) == COMPONENT_REF)
2234 /* ??? We cannot simply use the type of operand #0 of the refs here as
2235 the Fortran compiler smuggles type punning into COMPONENT_REFs.
2236 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
2237 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2238 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2240 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2241 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2244 /* Initialize a data dependence relation between data accesses A and
2245 B. NB_LOOPS is the number of loops surrounding the references: the
2246 size of the classic distance/direction vectors. */
2248 struct data_dependence_relation *
2249 initialize_data_dependence_relation (struct data_reference *a,
2250 struct data_reference *b,
2251 vec<loop_p> loop_nest)
2253 struct data_dependence_relation *res;
2254 unsigned int i;
2256 res = XCNEW (struct data_dependence_relation);
2257 DDR_A (res) = a;
2258 DDR_B (res) = b;
2259 DDR_LOOP_NEST (res).create (0);
2260 DDR_SUBSCRIPTS (res).create (0);
2261 DDR_DIR_VECTS (res).create (0);
2262 DDR_DIST_VECTS (res).create (0);
2264 if (a == NULL || b == NULL)
2266 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2267 return res;
2270 /* If the data references do not alias, then they are independent. */
2271 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2273 DDR_ARE_DEPENDENT (res) = chrec_known;
2274 return res;
2277 unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2278 unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2279 if (num_dimensions_a == 0 || num_dimensions_b == 0)
2281 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2282 return res;
2285 /* For unconstrained bases, the root (highest-indexed) subscript
2286 describes a variation in the base of the original DR_REF rather
2287 than a component access. We have no type that accurately describes
2288 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2289 applying this subscript) so limit the search to the last real
2290 component access.
2292 E.g. for:
2294 void
2295 f (int a[][8], int b[][8])
2297 for (int i = 0; i < 8; ++i)
2298 a[i * 2][0] = b[i][0];
2301 the a and b accesses have a single ARRAY_REF component reference [0]
2302 but have two subscripts. */
2303 if (DR_UNCONSTRAINED_BASE (a))
2304 num_dimensions_a -= 1;
2305 if (DR_UNCONSTRAINED_BASE (b))
2306 num_dimensions_b -= 1;
2308 /* These structures describe sequences of component references in
2309 DR_REF (A) and DR_REF (B). Each component reference is tied to a
2310 specific access function. */
2311 struct {
2312 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2313 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2314 indices. In C notation, these are the indices of the rightmost
2315 component references; e.g. for a sequence .b.c.d, the start
2316 index is for .d. */
2317 unsigned int start_a;
2318 unsigned int start_b;
2320 /* The sequence contains LENGTH consecutive access functions from
2321 each DR. */
2322 unsigned int length;
2324 /* The enclosing objects for the A and B sequences respectively,
2325 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2326 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
2327 tree object_a;
2328 tree object_b;
2329 } full_seq = {}, struct_seq = {};
2331 /* Before each iteration of the loop:
2333 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2334 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
2335 unsigned int index_a = 0;
2336 unsigned int index_b = 0;
2337 tree ref_a = DR_REF (a);
2338 tree ref_b = DR_REF (b);
2340 /* Now walk the component references from the final DR_REFs back up to
2341 the enclosing base objects. Each component reference corresponds
2342 to one access function in the DR, with access function 0 being for
2343 the final DR_REF and the highest-indexed access function being the
2344 one that is applied to the base of the DR.
2346 Look for a sequence of component references whose access functions
2347 are comparable (see access_fn_components_comparable_p). If more
2348 than one such sequence exists, pick the one nearest the base
2349 (which is the leftmost sequence in C notation). Store this sequence
2350 in FULL_SEQ.
2352 For example, if we have:
2354 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2356 A: a[0][i].s.c.d
2357 B: __real b[0][i].s.e[i].f
2359 (where d is the same type as the real component of f) then the access
2360 functions would be:
2362 0 1 2 3
2363 A: .d .c .s [i]
2365 0 1 2 3 4 5
2366 B: __real .f [i] .e .s [i]
2368 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2369 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
2370 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
2371 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2372 so is comparable. The A3/B5 column contains two ARRAY_REFs that
2373 index foo[10] arrays, so is again comparable. The sequence is
2374 therefore:
2376 A: [1, 3] (i.e. [i].s.c)
2377 B: [3, 5] (i.e. [i].s.e)
2379 Also look for sequences of component references whose access
2380 functions are comparable and whose enclosing objects have the same
2381 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
2382 example, STRUCT_SEQ would be:
2384 A: [1, 2] (i.e. s.c)
2385 B: [3, 4] (i.e. s.e) */
2386 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2388 /* REF_A and REF_B must be one of the component access types
2389 allowed by dr_analyze_indices. */
2390 gcc_checking_assert (access_fn_component_p (ref_a));
2391 gcc_checking_assert (access_fn_component_p (ref_b));
2393 /* Get the immediately-enclosing objects for REF_A and REF_B,
2394 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2395 and DR_ACCESS_FN (B, INDEX_B). */
2396 tree object_a = TREE_OPERAND (ref_a, 0);
2397 tree object_b = TREE_OPERAND (ref_b, 0);
2399 tree type_a = TREE_TYPE (object_a);
2400 tree type_b = TREE_TYPE (object_b);
2401 if (access_fn_components_comparable_p (ref_a, ref_b))
2403 /* This pair of component accesses is comparable for dependence
2404 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2405 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
2406 if (full_seq.start_a + full_seq.length != index_a
2407 || full_seq.start_b + full_seq.length != index_b)
2409 /* The accesses don't extend the current sequence,
2410 so start a new one here. */
2411 full_seq.start_a = index_a;
2412 full_seq.start_b = index_b;
2413 full_seq.length = 0;
2416 /* Add this pair of references to the sequence. */
2417 full_seq.length += 1;
2418 full_seq.object_a = object_a;
2419 full_seq.object_b = object_b;
2421 /* If the enclosing objects are structures (and thus have the
2422 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
2423 if (TREE_CODE (type_a) == RECORD_TYPE)
2424 struct_seq = full_seq;
2426 /* Move to the next containing reference for both A and B. */
2427 ref_a = object_a;
2428 ref_b = object_b;
2429 index_a += 1;
2430 index_b += 1;
2431 continue;
2434 /* Try to approach equal type sizes. */
2435 if (!COMPLETE_TYPE_P (type_a)
2436 || !COMPLETE_TYPE_P (type_b)
2437 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2438 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2439 break;
2441 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2442 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2443 if (size_a <= size_b)
2445 index_a += 1;
2446 ref_a = object_a;
2448 if (size_b <= size_a)
2450 index_b += 1;
2451 ref_b = object_b;
2455 /* See whether FULL_SEQ ends at the base and whether the two bases
2456 are equal. We do not care about TBAA or alignment info so we can
2457 use OEP_ADDRESS_OF to avoid false negatives. */
2458 tree base_a = DR_BASE_OBJECT (a);
2459 tree base_b = DR_BASE_OBJECT (b);
2460 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2461 && full_seq.start_b + full_seq.length == num_dimensions_b
2462 && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2463 && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2464 && types_compatible_p (TREE_TYPE (base_a),
2465 TREE_TYPE (base_b))
2466 && (!loop_nest.exists ()
2467 || (object_address_invariant_in_loop_p
2468 (loop_nest[0], base_a))));
2470 /* If the bases are the same, we can include the base variation too.
2471 E.g. the b accesses in:
2473 for (int i = 0; i < n; ++i)
2474 b[i + 4][0] = b[i][0];
2476 have a definite dependence distance of 4, while for:
2478 for (int i = 0; i < n; ++i)
2479 a[i + 4][0] = b[i][0];
2481 the dependence distance depends on the gap between a and b.
2483 If the bases are different then we can only rely on the sequence
2484 rooted at a structure access, since arrays are allowed to overlap
2485 arbitrarily and change shape arbitrarily. E.g. we treat this as
2486 valid code:
2488 int a[256];
2490 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2492 where two lvalues with the same int[4][3] type overlap, and where
2493 both lvalues are distinct from the object's declared type. */
2494 if (same_base_p)
2496 if (DR_UNCONSTRAINED_BASE (a))
2497 full_seq.length += 1;
2499 else
2500 full_seq = struct_seq;
2502 /* Punt if we didn't find a suitable sequence. */
2503 if (full_seq.length == 0)
2505 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2506 return res;
2509 if (!same_base_p)
2511 /* Partial overlap is possible for different bases when strict aliasing
2512 is not in effect. It's also possible if either base involves a union
2513 access; e.g. for:
2515 struct s1 { int a[2]; };
2516 struct s2 { struct s1 b; int c; };
2517 struct s3 { int d; struct s1 e; };
2518 union u { struct s2 f; struct s3 g; } *p, *q;
2520 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2521 "p->g.e" (base "p->g") and might partially overlap the s1 at
2522 "q->g.e" (base "q->g"). */
2523 if (!flag_strict_aliasing
2524 || ref_contains_union_access_p (full_seq.object_a)
2525 || ref_contains_union_access_p (full_seq.object_b))
2527 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2528 return res;
2531 DDR_COULD_BE_INDEPENDENT_P (res) = true;
2532 if (!loop_nest.exists ()
2533 || (object_address_invariant_in_loop_p (loop_nest[0],
2534 full_seq.object_a)
2535 && object_address_invariant_in_loop_p (loop_nest[0],
2536 full_seq.object_b)))
2538 DDR_OBJECT_A (res) = full_seq.object_a;
2539 DDR_OBJECT_B (res) = full_seq.object_b;
2543 DDR_AFFINE_P (res) = true;
2544 DDR_ARE_DEPENDENT (res) = NULL_TREE;
2545 DDR_SUBSCRIPTS (res).create (full_seq.length);
2546 DDR_LOOP_NEST (res) = loop_nest;
2547 DDR_INNER_LOOP (res) = 0;
2548 DDR_SELF_REFERENCE (res) = false;
2550 for (i = 0; i < full_seq.length; ++i)
2552 struct subscript *subscript;
2554 subscript = XNEW (struct subscript);
2555 SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2556 SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2557 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2558 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2559 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2560 SUB_DISTANCE (subscript) = chrec_dont_know;
2561 DDR_SUBSCRIPTS (res).safe_push (subscript);
2564 return res;
2567 /* Frees memory used by the conflict function F. */
2569 static void
2570 free_conflict_function (conflict_function *f)
2572 unsigned i;
2574 if (CF_NONTRIVIAL_P (f))
2576 for (i = 0; i < f->n; i++)
2577 affine_fn_free (f->fns[i]);
2579 free (f);
2582 /* Frees memory used by SUBSCRIPTS. */
2584 static void
2585 free_subscripts (vec<subscript_p> subscripts)
2587 unsigned i;
2588 subscript_p s;
2590 FOR_EACH_VEC_ELT (subscripts, i, s)
2592 free_conflict_function (s->conflicting_iterations_in_a);
2593 free_conflict_function (s->conflicting_iterations_in_b);
2594 free (s);
2596 subscripts.release ();
2599 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2600 description. */
2602 static inline void
2603 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2604 tree chrec)
2606 DDR_ARE_DEPENDENT (ddr) = chrec;
2607 free_subscripts (DDR_SUBSCRIPTS (ddr));
2608 DDR_SUBSCRIPTS (ddr).create (0);
2611 /* The dependence relation DDR cannot be represented by a distance
2612 vector. */
2614 static inline void
2615 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2618 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2620 DDR_AFFINE_P (ddr) = false;
2625 /* This section contains the classic Banerjee tests. */
2627 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2628 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
2630 static inline bool
2631 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2633 return (evolution_function_is_constant_p (chrec_a)
2634 && evolution_function_is_constant_p (chrec_b));
2637 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2638 variable, i.e., if the SIV (Single Index Variable) test is true. */
2640 static bool
2641 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2643 if ((evolution_function_is_constant_p (chrec_a)
2644 && evolution_function_is_univariate_p (chrec_b))
2645 || (evolution_function_is_constant_p (chrec_b)
2646 && evolution_function_is_univariate_p (chrec_a)))
2647 return true;
2649 if (evolution_function_is_univariate_p (chrec_a)
2650 && evolution_function_is_univariate_p (chrec_b))
2652 switch (TREE_CODE (chrec_a))
2654 case POLYNOMIAL_CHREC:
2655 switch (TREE_CODE (chrec_b))
2657 case POLYNOMIAL_CHREC:
2658 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2659 return false;
2660 /* FALLTHRU */
2662 default:
2663 return true;
2666 default:
2667 return true;
2671 return false;
2674 /* Creates a conflict function with N dimensions. The affine functions
2675 in each dimension follow. */
2677 static conflict_function *
2678 conflict_fn (unsigned n, ...)
2680 unsigned i;
2681 conflict_function *ret = XCNEW (conflict_function);
2682 va_list ap;
2684 gcc_assert (n > 0 && n <= MAX_DIM);
2685 va_start (ap, n);
2687 ret->n = n;
2688 for (i = 0; i < n; i++)
2689 ret->fns[i] = va_arg (ap, affine_fn);
2690 va_end (ap);
2692 return ret;
2695 /* Returns constant affine function with value CST. */
2697 static affine_fn
2698 affine_fn_cst (tree cst)
2700 affine_fn fn;
2701 fn.create (1);
2702 fn.quick_push (cst);
2703 return fn;
2706 /* Returns affine function with single variable, CST + COEF * x_DIM. */
2708 static affine_fn
2709 affine_fn_univar (tree cst, unsigned dim, tree coef)
2711 affine_fn fn;
2712 fn.create (dim + 1);
2713 unsigned i;
2715 gcc_assert (dim > 0);
2716 fn.quick_push (cst);
2717 for (i = 1; i < dim; i++)
2718 fn.quick_push (integer_zero_node);
2719 fn.quick_push (coef);
2720 return fn;
2723 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
2724 *OVERLAPS_B are initialized to the functions that describe the
2725 relation between the elements accessed twice by CHREC_A and
2726 CHREC_B. For k >= 0, the following property is verified:
2728 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2730 static void
2731 analyze_ziv_subscript (tree chrec_a,
2732 tree chrec_b,
2733 conflict_function **overlaps_a,
2734 conflict_function **overlaps_b,
2735 tree *last_conflicts)
2737 tree type, difference;
2738 dependence_stats.num_ziv++;
2740 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, "(analyze_ziv_subscript \n");
2743 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2744 chrec_a = chrec_convert (type, chrec_a, NULL);
2745 chrec_b = chrec_convert (type, chrec_b, NULL);
2746 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2748 switch (TREE_CODE (difference))
2750 case INTEGER_CST:
2751 if (integer_zerop (difference))
2753 /* The difference is equal to zero: the accessed index
2754 overlaps for each iteration in the loop. */
2755 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2756 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2757 *last_conflicts = chrec_dont_know;
2758 dependence_stats.num_ziv_dependent++;
2760 else
2762 /* The accesses do not overlap. */
2763 *overlaps_a = conflict_fn_no_dependence ();
2764 *overlaps_b = conflict_fn_no_dependence ();
2765 *last_conflicts = integer_zero_node;
2766 dependence_stats.num_ziv_independent++;
2768 break;
2770 default:
2771 /* We're not sure whether the indexes overlap. For the moment,
2772 conservatively answer "don't know". */
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2774 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2776 *overlaps_a = conflict_fn_not_known ();
2777 *overlaps_b = conflict_fn_not_known ();
2778 *last_conflicts = chrec_dont_know;
2779 dependence_stats.num_ziv_unimplemented++;
2780 break;
2783 if (dump_file && (dump_flags & TDF_DETAILS))
2784 fprintf (dump_file, ")\n");
2787 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2788 and only if it fits to the int type. If this is not the case, or the
2789 bound on the number of iterations of LOOP could not be derived, returns
2790 chrec_dont_know. */
2792 static tree
2793 max_stmt_executions_tree (struct loop *loop)
2795 widest_int nit;
2797 if (!max_stmt_executions (loop, &nit))
2798 return chrec_dont_know;
2800 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2801 return chrec_dont_know;
2803 return wide_int_to_tree (unsigned_type_node, nit);
2806 /* Determine whether the CHREC is always positive/negative. If the expression
2807 cannot be statically analyzed, return false, otherwise set the answer into
2808 VALUE. */
2810 static bool
2811 chrec_is_positive (tree chrec, bool *value)
2813 bool value0, value1, value2;
2814 tree end_value, nb_iter;
2816 switch (TREE_CODE (chrec))
2818 case POLYNOMIAL_CHREC:
2819 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2820 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2821 return false;
2823 /* FIXME -- overflows. */
2824 if (value0 == value1)
2826 *value = value0;
2827 return true;
2830 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2831 and the proof consists in showing that the sign never
2832 changes during the execution of the loop, from 0 to
2833 loop->nb_iterations. */
2834 if (!evolution_function_is_affine_p (chrec))
2835 return false;
2837 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2838 if (chrec_contains_undetermined (nb_iter))
2839 return false;
2841 #if 0
2842 /* TODO -- If the test is after the exit, we may decrease the number of
2843 iterations by one. */
2844 if (after_exit)
2845 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2846 #endif
2848 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2850 if (!chrec_is_positive (end_value, &value2))
2851 return false;
2853 *value = value0;
2854 return value0 == value1;
2856 case INTEGER_CST:
2857 switch (tree_int_cst_sgn (chrec))
2859 case -1:
2860 *value = false;
2861 break;
2862 case 1:
2863 *value = true;
2864 break;
2865 default:
2866 return false;
2868 return true;
2870 default:
2871 return false;
2876 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2877 constant, and CHREC_B is an affine function. *OVERLAPS_A and
2878 *OVERLAPS_B are initialized to the functions that describe the
2879 relation between the elements accessed twice by CHREC_A and
2880 CHREC_B. For k >= 0, the following property is verified:
2882 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2884 static void
2885 analyze_siv_subscript_cst_affine (tree chrec_a,
2886 tree chrec_b,
2887 conflict_function **overlaps_a,
2888 conflict_function **overlaps_b,
2889 tree *last_conflicts)
2891 bool value0, value1, value2;
2892 tree type, difference, tmp;
2894 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2895 chrec_a = chrec_convert (type, chrec_a, NULL);
2896 chrec_b = chrec_convert (type, chrec_b, NULL);
2897 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2899 /* Special case overlap in the first iteration. */
2900 if (integer_zerop (difference))
2902 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2903 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2904 *last_conflicts = integer_one_node;
2905 return;
2908 if (!chrec_is_positive (initial_condition (difference), &value0))
2910 if (dump_file && (dump_flags & TDF_DETAILS))
2911 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2913 dependence_stats.num_siv_unimplemented++;
2914 *overlaps_a = conflict_fn_not_known ();
2915 *overlaps_b = conflict_fn_not_known ();
2916 *last_conflicts = chrec_dont_know;
2917 return;
2919 else
2921 if (value0 == false)
2923 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2925 if (dump_file && (dump_flags & TDF_DETAILS))
2926 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2928 *overlaps_a = conflict_fn_not_known ();
2929 *overlaps_b = conflict_fn_not_known ();
2930 *last_conflicts = chrec_dont_know;
2931 dependence_stats.num_siv_unimplemented++;
2932 return;
2934 else
2936 if (value1 == true)
2938 /* Example:
2939 chrec_a = 12
2940 chrec_b = {10, +, 1}
2943 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2945 HOST_WIDE_INT numiter;
2946 struct loop *loop = get_chrec_loop (chrec_b);
2948 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2949 tmp = fold_build2 (EXACT_DIV_EXPR, type,
2950 fold_build1 (ABS_EXPR, type, difference),
2951 CHREC_RIGHT (chrec_b));
2952 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2953 *last_conflicts = integer_one_node;
2956 /* Perform weak-zero siv test to see if overlap is
2957 outside the loop bounds. */
2958 numiter = max_stmt_executions_int (loop);
2960 if (numiter >= 0
2961 && compare_tree_int (tmp, numiter) > 0)
2963 free_conflict_function (*overlaps_a);
2964 free_conflict_function (*overlaps_b);
2965 *overlaps_a = conflict_fn_no_dependence ();
2966 *overlaps_b = conflict_fn_no_dependence ();
2967 *last_conflicts = integer_zero_node;
2968 dependence_stats.num_siv_independent++;
2969 return;
2971 dependence_stats.num_siv_dependent++;
2972 return;
2975 /* When the step does not divide the difference, there are
2976 no overlaps. */
2977 else
2979 *overlaps_a = conflict_fn_no_dependence ();
2980 *overlaps_b = conflict_fn_no_dependence ();
2981 *last_conflicts = integer_zero_node;
2982 dependence_stats.num_siv_independent++;
2983 return;
2987 else
2989 /* Example:
2990 chrec_a = 12
2991 chrec_b = {10, +, -1}
2993 In this case, chrec_a will not overlap with chrec_b. */
2994 *overlaps_a = conflict_fn_no_dependence ();
2995 *overlaps_b = conflict_fn_no_dependence ();
2996 *last_conflicts = integer_zero_node;
2997 dependence_stats.num_siv_independent++;
2998 return;
3002 else
3004 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3007 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3009 *overlaps_a = conflict_fn_not_known ();
3010 *overlaps_b = conflict_fn_not_known ();
3011 *last_conflicts = chrec_dont_know;
3012 dependence_stats.num_siv_unimplemented++;
3013 return;
3015 else
3017 if (value2 == false)
3019 /* Example:
3020 chrec_a = 3
3021 chrec_b = {10, +, -1}
3023 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3025 HOST_WIDE_INT numiter;
3026 struct loop *loop = get_chrec_loop (chrec_b);
3028 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3029 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3030 CHREC_RIGHT (chrec_b));
3031 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3032 *last_conflicts = integer_one_node;
3034 /* Perform weak-zero siv test to see if overlap is
3035 outside the loop bounds. */
3036 numiter = max_stmt_executions_int (loop);
3038 if (numiter >= 0
3039 && compare_tree_int (tmp, numiter) > 0)
3041 free_conflict_function (*overlaps_a);
3042 free_conflict_function (*overlaps_b);
3043 *overlaps_a = conflict_fn_no_dependence ();
3044 *overlaps_b = conflict_fn_no_dependence ();
3045 *last_conflicts = integer_zero_node;
3046 dependence_stats.num_siv_independent++;
3047 return;
3049 dependence_stats.num_siv_dependent++;
3050 return;
3053 /* When the step does not divide the difference, there
3054 are no overlaps. */
3055 else
3057 *overlaps_a = conflict_fn_no_dependence ();
3058 *overlaps_b = conflict_fn_no_dependence ();
3059 *last_conflicts = integer_zero_node;
3060 dependence_stats.num_siv_independent++;
3061 return;
3064 else
3066 /* Example:
3067 chrec_a = 3
3068 chrec_b = {4, +, 1}
3070 In this case, chrec_a will not overlap with chrec_b. */
3071 *overlaps_a = conflict_fn_no_dependence ();
3072 *overlaps_b = conflict_fn_no_dependence ();
3073 *last_conflicts = integer_zero_node;
3074 dependence_stats.num_siv_independent++;
3075 return;
3082 /* Helper recursive function for initializing the matrix A. Returns
3083 the initial value of CHREC. */
3085 static tree
3086 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3088 gcc_assert (chrec);
3090 switch (TREE_CODE (chrec))
3092 case POLYNOMIAL_CHREC:
3093 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3094 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3096 case PLUS_EXPR:
3097 case MULT_EXPR:
3098 case MINUS_EXPR:
3100 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3101 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3103 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3106 CASE_CONVERT:
3108 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3109 return chrec_convert (chrec_type (chrec), op, NULL);
3112 case BIT_NOT_EXPR:
3114 /* Handle ~X as -1 - X. */
3115 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3116 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3117 build_int_cst (TREE_TYPE (chrec), -1), op);
3120 case INTEGER_CST:
3121 return chrec;
3123 default:
3124 gcc_unreachable ();
3125 return NULL_TREE;
3129 #define FLOOR_DIV(x,y) ((x) / (y))
3131 /* Solves the special case of the Diophantine equation:
3132 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3134 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
3135 number of iterations that loops X and Y run. The overlaps will be
3136 constructed as evolutions in dimension DIM. */
3138 static void
3139 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3140 HOST_WIDE_INT step_a,
3141 HOST_WIDE_INT step_b,
3142 affine_fn *overlaps_a,
3143 affine_fn *overlaps_b,
3144 tree *last_conflicts, int dim)
3146 if (((step_a > 0 && step_b > 0)
3147 || (step_a < 0 && step_b < 0)))
3149 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3150 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3152 gcd_steps_a_b = gcd (step_a, step_b);
3153 step_overlaps_a = step_b / gcd_steps_a_b;
3154 step_overlaps_b = step_a / gcd_steps_a_b;
3156 if (niter > 0)
3158 tau2 = FLOOR_DIV (niter, step_overlaps_a);
3159 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3160 last_conflict = tau2;
3161 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3163 else
3164 *last_conflicts = chrec_dont_know;
3166 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3167 build_int_cst (NULL_TREE,
3168 step_overlaps_a));
3169 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3170 build_int_cst (NULL_TREE,
3171 step_overlaps_b));
3174 else
3176 *overlaps_a = affine_fn_cst (integer_zero_node);
3177 *overlaps_b = affine_fn_cst (integer_zero_node);
3178 *last_conflicts = integer_zero_node;
3182 /* Solves the special case of a Diophantine equation where CHREC_A is
3183 an affine bivariate function, and CHREC_B is an affine univariate
3184 function. For example,
3186 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3188 has the following overlapping functions:
3190 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3191 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3192 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3194 FORNOW: This is a specialized implementation for a case occurring in
3195 a common benchmark. Implement the general algorithm. */
3197 static void
3198 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3199 conflict_function **overlaps_a,
3200 conflict_function **overlaps_b,
3201 tree *last_conflicts)
3203 bool xz_p, yz_p, xyz_p;
3204 HOST_WIDE_INT step_x, step_y, step_z;
3205 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3206 affine_fn overlaps_a_xz, overlaps_b_xz;
3207 affine_fn overlaps_a_yz, overlaps_b_yz;
3208 affine_fn overlaps_a_xyz, overlaps_b_xyz;
3209 affine_fn ova1, ova2, ovb;
3210 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3212 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3213 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3214 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3216 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3217 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3218 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3220 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3222 if (dump_file && (dump_flags & TDF_DETAILS))
3223 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3225 *overlaps_a = conflict_fn_not_known ();
3226 *overlaps_b = conflict_fn_not_known ();
3227 *last_conflicts = chrec_dont_know;
3228 return;
3231 niter = MIN (niter_x, niter_z);
3232 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3233 &overlaps_a_xz,
3234 &overlaps_b_xz,
3235 &last_conflicts_xz, 1);
3236 niter = MIN (niter_y, niter_z);
3237 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3238 &overlaps_a_yz,
3239 &overlaps_b_yz,
3240 &last_conflicts_yz, 2);
3241 niter = MIN (niter_x, niter_z);
3242 niter = MIN (niter_y, niter);
3243 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3244 &overlaps_a_xyz,
3245 &overlaps_b_xyz,
3246 &last_conflicts_xyz, 3);
3248 xz_p = !integer_zerop (last_conflicts_xz);
3249 yz_p = !integer_zerop (last_conflicts_yz);
3250 xyz_p = !integer_zerop (last_conflicts_xyz);
3252 if (xz_p || yz_p || xyz_p)
3254 ova1 = affine_fn_cst (integer_zero_node);
3255 ova2 = affine_fn_cst (integer_zero_node);
3256 ovb = affine_fn_cst (integer_zero_node);
3257 if (xz_p)
3259 affine_fn t0 = ova1;
3260 affine_fn t2 = ovb;
3262 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3263 ovb = affine_fn_plus (ovb, overlaps_b_xz);
3264 affine_fn_free (t0);
3265 affine_fn_free (t2);
3266 *last_conflicts = last_conflicts_xz;
3268 if (yz_p)
3270 affine_fn t0 = ova2;
3271 affine_fn t2 = ovb;
3273 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3274 ovb = affine_fn_plus (ovb, overlaps_b_yz);
3275 affine_fn_free (t0);
3276 affine_fn_free (t2);
3277 *last_conflicts = last_conflicts_yz;
3279 if (xyz_p)
3281 affine_fn t0 = ova1;
3282 affine_fn t2 = ova2;
3283 affine_fn t4 = ovb;
3285 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3286 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3287 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3288 affine_fn_free (t0);
3289 affine_fn_free (t2);
3290 affine_fn_free (t4);
3291 *last_conflicts = last_conflicts_xyz;
3293 *overlaps_a = conflict_fn (2, ova1, ova2);
3294 *overlaps_b = conflict_fn (1, ovb);
3296 else
3298 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3299 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3300 *last_conflicts = integer_zero_node;
3303 affine_fn_free (overlaps_a_xz);
3304 affine_fn_free (overlaps_b_xz);
3305 affine_fn_free (overlaps_a_yz);
3306 affine_fn_free (overlaps_b_yz);
3307 affine_fn_free (overlaps_a_xyz);
3308 affine_fn_free (overlaps_b_xyz);
3311 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
3313 static void
3314 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3315 int size)
3317 memcpy (vec2, vec1, size * sizeof (*vec1));
3320 /* Copy the elements of M x N matrix MAT1 to MAT2. */
3322 static void
3323 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3324 int m, int n)
3326 int i;
3328 for (i = 0; i < m; i++)
3329 lambda_vector_copy (mat1[i], mat2[i], n);
3332 /* Store the N x N identity matrix in MAT. */
3334 static void
3335 lambda_matrix_id (lambda_matrix mat, int size)
3337 int i, j;
3339 for (i = 0; i < size; i++)
3340 for (j = 0; j < size; j++)
3341 mat[i][j] = (i == j) ? 1 : 0;
3344 /* Return the first nonzero element of vector VEC1 between START and N.
3345 We must have START <= N. Returns N if VEC1 is the zero vector. */
3347 static int
3348 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3350 int j = start;
3351 while (j < n && vec1[j] == 0)
3352 j++;
3353 return j;
3356 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3357 R2 = R2 + CONST1 * R1. */
3359 static void
3360 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3362 int i;
3364 if (const1 == 0)
3365 return;
3367 for (i = 0; i < n; i++)
3368 mat[r2][i] += const1 * mat[r1][i];
3371 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3372 and store the result in VEC2. */
3374 static void
3375 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3376 int size, int const1)
3378 int i;
3380 if (const1 == 0)
3381 lambda_vector_clear (vec2, size);
3382 else
3383 for (i = 0; i < size; i++)
3384 vec2[i] = const1 * vec1[i];
3387 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
3389 static void
3390 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3391 int size)
3393 lambda_vector_mult_const (vec1, vec2, size, -1);
3396 /* Negate row R1 of matrix MAT which has N columns. */
3398 static void
3399 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3401 lambda_vector_negate (mat[r1], mat[r1], n);
3404 /* Return true if two vectors are equal. */
3406 static bool
3407 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3409 int i;
3410 for (i = 0; i < size; i++)
3411 if (vec1[i] != vec2[i])
3412 return false;
3413 return true;
3416 /* Given an M x N integer matrix A, this function determines an M x
3417 M unimodular matrix U, and an M x N echelon matrix S such that
3418 "U.A = S". This decomposition is also known as "right Hermite".
3420 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3421 Restructuring Compilers" Utpal Banerjee. */
3423 static void
3424 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3425 lambda_matrix S, lambda_matrix U)
3427 int i, j, i0 = 0;
3429 lambda_matrix_copy (A, S, m, n);
3430 lambda_matrix_id (U, m);
3432 for (j = 0; j < n; j++)
3434 if (lambda_vector_first_nz (S[j], m, i0) < m)
3436 ++i0;
3437 for (i = m - 1; i >= i0; i--)
3439 while (S[i][j] != 0)
3441 int sigma, factor, a, b;
3443 a = S[i-1][j];
3444 b = S[i][j];
3445 sigma = (a * b < 0) ? -1: 1;
3446 a = abs (a);
3447 b = abs (b);
3448 factor = sigma * (a / b);
3450 lambda_matrix_row_add (S, n, i, i-1, -factor);
3451 std::swap (S[i], S[i-1]);
3453 lambda_matrix_row_add (U, m, i, i-1, -factor);
3454 std::swap (U[i], U[i-1]);
3461 /* Determines the overlapping elements due to accesses CHREC_A and
3462 CHREC_B, that are affine functions. This function cannot handle
3463 symbolic evolution functions, ie. when initial conditions are
3464 parameters, because it uses lambda matrices of integers. */
3466 static void
3467 analyze_subscript_affine_affine (tree chrec_a,
3468 tree chrec_b,
3469 conflict_function **overlaps_a,
3470 conflict_function **overlaps_b,
3471 tree *last_conflicts)
3473 unsigned nb_vars_a, nb_vars_b, dim;
3474 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3475 lambda_matrix A, U, S;
3476 struct obstack scratch_obstack;
3478 if (eq_evolutions_p (chrec_a, chrec_b))
3480 /* The accessed index overlaps for each iteration in the
3481 loop. */
3482 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3483 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3484 *last_conflicts = chrec_dont_know;
3485 return;
3487 if (dump_file && (dump_flags & TDF_DETAILS))
3488 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3490 /* For determining the initial intersection, we have to solve a
3491 Diophantine equation. This is the most time consuming part.
3493 For answering to the question: "Is there a dependence?" we have
3494 to prove that there exists a solution to the Diophantine
3495 equation, and that the solution is in the iteration domain,
3496 i.e. the solution is positive or zero, and that the solution
3497 happens before the upper bound loop.nb_iterations. Otherwise
3498 there is no dependence. This function outputs a description of
3499 the iterations that hold the intersections. */
3501 nb_vars_a = nb_vars_in_chrec (chrec_a);
3502 nb_vars_b = nb_vars_in_chrec (chrec_b);
3504 gcc_obstack_init (&scratch_obstack);
3506 dim = nb_vars_a + nb_vars_b;
3507 U = lambda_matrix_new (dim, dim, &scratch_obstack);
3508 A = lambda_matrix_new (dim, 1, &scratch_obstack);
3509 S = lambda_matrix_new (dim, 1, &scratch_obstack);
3511 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3512 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3513 gamma = init_b - init_a;
3515 /* Don't do all the hard work of solving the Diophantine equation
3516 when we already know the solution: for example,
3517 | {3, +, 1}_1
3518 | {3, +, 4}_2
3519 | gamma = 3 - 3 = 0.
3520 Then the first overlap occurs during the first iterations:
3521 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3523 if (gamma == 0)
3525 if (nb_vars_a == 1 && nb_vars_b == 1)
3527 HOST_WIDE_INT step_a, step_b;
3528 HOST_WIDE_INT niter, niter_a, niter_b;
3529 affine_fn ova, ovb;
3531 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3532 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3533 niter = MIN (niter_a, niter_b);
3534 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3535 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3537 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3538 &ova, &ovb,
3539 last_conflicts, 1);
3540 *overlaps_a = conflict_fn (1, ova);
3541 *overlaps_b = conflict_fn (1, ovb);
3544 else if (nb_vars_a == 2 && nb_vars_b == 1)
3545 compute_overlap_steps_for_affine_1_2
3546 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3548 else if (nb_vars_a == 1 && nb_vars_b == 2)
3549 compute_overlap_steps_for_affine_1_2
3550 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3552 else
3554 if (dump_file && (dump_flags & TDF_DETAILS))
3555 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3556 *overlaps_a = conflict_fn_not_known ();
3557 *overlaps_b = conflict_fn_not_known ();
3558 *last_conflicts = chrec_dont_know;
3560 goto end_analyze_subs_aa;
3563 /* U.A = S */
3564 lambda_matrix_right_hermite (A, dim, 1, S, U);
3566 if (S[0][0] < 0)
3568 S[0][0] *= -1;
3569 lambda_matrix_row_negate (U, dim, 0);
3571 gcd_alpha_beta = S[0][0];
3573 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3574 but that is a quite strange case. Instead of ICEing, answer
3575 don't know. */
3576 if (gcd_alpha_beta == 0)
3578 *overlaps_a = conflict_fn_not_known ();
3579 *overlaps_b = conflict_fn_not_known ();
3580 *last_conflicts = chrec_dont_know;
3581 goto end_analyze_subs_aa;
3584 /* The classic "gcd-test". */
3585 if (!int_divides_p (gcd_alpha_beta, gamma))
3587 /* The "gcd-test" has determined that there is no integer
3588 solution, i.e. there is no dependence. */
3589 *overlaps_a = conflict_fn_no_dependence ();
3590 *overlaps_b = conflict_fn_no_dependence ();
3591 *last_conflicts = integer_zero_node;
3594 /* Both access functions are univariate. This includes SIV and MIV cases. */
3595 else if (nb_vars_a == 1 && nb_vars_b == 1)
3597 /* Both functions should have the same evolution sign. */
3598 if (((A[0][0] > 0 && -A[1][0] > 0)
3599 || (A[0][0] < 0 && -A[1][0] < 0)))
3601 /* The solutions are given by:
3603 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
3604 | [u21 u22] [y0]
3606 For a given integer t. Using the following variables,
3608 | i0 = u11 * gamma / gcd_alpha_beta
3609 | j0 = u12 * gamma / gcd_alpha_beta
3610 | i1 = u21
3611 | j1 = u22
3613 the solutions are:
3615 | x0 = i0 + i1 * t,
3616 | y0 = j0 + j1 * t. */
3617 HOST_WIDE_INT i0, j0, i1, j1;
3619 i0 = U[0][0] * gamma / gcd_alpha_beta;
3620 j0 = U[0][1] * gamma / gcd_alpha_beta;
3621 i1 = U[1][0];
3622 j1 = U[1][1];
3624 if ((i1 == 0 && i0 < 0)
3625 || (j1 == 0 && j0 < 0))
3627 /* There is no solution.
3628 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3629 falls in here, but for the moment we don't look at the
3630 upper bound of the iteration domain. */
3631 *overlaps_a = conflict_fn_no_dependence ();
3632 *overlaps_b = conflict_fn_no_dependence ();
3633 *last_conflicts = integer_zero_node;
3634 goto end_analyze_subs_aa;
3637 if (i1 > 0 && j1 > 0)
3639 HOST_WIDE_INT niter_a
3640 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3641 HOST_WIDE_INT niter_b
3642 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3643 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3645 /* (X0, Y0) is a solution of the Diophantine equation:
3646 "chrec_a (X0) = chrec_b (Y0)". */
3647 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3648 CEIL (-j0, j1));
3649 HOST_WIDE_INT x0 = i1 * tau1 + i0;
3650 HOST_WIDE_INT y0 = j1 * tau1 + j0;
3652 /* (X1, Y1) is the smallest positive solution of the eq
3653 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3654 first conflict occurs. */
3655 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3656 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3657 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3659 if (niter > 0)
3661 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3662 FLOOR_DIV (niter_b - j0, j1));
3663 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3665 /* If the overlap occurs outside of the bounds of the
3666 loop, there is no dependence. */
3667 if (x1 >= niter_a || y1 >= niter_b)
3669 *overlaps_a = conflict_fn_no_dependence ();
3670 *overlaps_b = conflict_fn_no_dependence ();
3671 *last_conflicts = integer_zero_node;
3672 goto end_analyze_subs_aa;
3674 else
3675 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3677 else
3678 *last_conflicts = chrec_dont_know;
3680 *overlaps_a
3681 = conflict_fn (1,
3682 affine_fn_univar (build_int_cst (NULL_TREE, x1),
3684 build_int_cst (NULL_TREE, i1)));
3685 *overlaps_b
3686 = conflict_fn (1,
3687 affine_fn_univar (build_int_cst (NULL_TREE, y1),
3689 build_int_cst (NULL_TREE, j1)));
3691 else
3693 /* FIXME: For the moment, the upper bound of the
3694 iteration domain for i and j is not checked. */
3695 if (dump_file && (dump_flags & TDF_DETAILS))
3696 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3697 *overlaps_a = conflict_fn_not_known ();
3698 *overlaps_b = conflict_fn_not_known ();
3699 *last_conflicts = chrec_dont_know;
3702 else
3704 if (dump_file && (dump_flags & TDF_DETAILS))
3705 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3706 *overlaps_a = conflict_fn_not_known ();
3707 *overlaps_b = conflict_fn_not_known ();
3708 *last_conflicts = chrec_dont_know;
3711 else
3713 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3715 *overlaps_a = conflict_fn_not_known ();
3716 *overlaps_b = conflict_fn_not_known ();
3717 *last_conflicts = chrec_dont_know;
3720 end_analyze_subs_aa:
3721 obstack_free (&scratch_obstack, NULL);
3722 if (dump_file && (dump_flags & TDF_DETAILS))
3724 fprintf (dump_file, " (overlaps_a = ");
3725 dump_conflict_function (dump_file, *overlaps_a);
3726 fprintf (dump_file, ")\n (overlaps_b = ");
3727 dump_conflict_function (dump_file, *overlaps_b);
3728 fprintf (dump_file, "))\n");
3732 /* Returns true when analyze_subscript_affine_affine can be used for
3733 determining the dependence relation between chrec_a and chrec_b,
3734 that contain symbols. This function modifies chrec_a and chrec_b
3735 such that the analysis result is the same, and such that they don't
3736 contain symbols, and then can safely be passed to the analyzer.
3738 Example: The analysis of the following tuples of evolutions produce
3739 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3740 vs. {0, +, 1}_1
3742 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3743 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3746 static bool
3747 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3749 tree diff, type, left_a, left_b, right_b;
3751 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3752 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3753 /* FIXME: For the moment not handled. Might be refined later. */
3754 return false;
3756 type = chrec_type (*chrec_a);
3757 left_a = CHREC_LEFT (*chrec_a);
3758 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3759 diff = chrec_fold_minus (type, left_a, left_b);
3761 if (!evolution_function_is_constant_p (diff))
3762 return false;
3764 if (dump_file && (dump_flags & TDF_DETAILS))
3765 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3767 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3768 diff, CHREC_RIGHT (*chrec_a));
3769 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3770 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3771 build_int_cst (type, 0),
3772 right_b);
3773 return true;
3776 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
3777 *OVERLAPS_B are initialized to the functions that describe the
3778 relation between the elements accessed twice by CHREC_A and
3779 CHREC_B. For k >= 0, the following property is verified:
3781 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3783 static void
3784 analyze_siv_subscript (tree chrec_a,
3785 tree chrec_b,
3786 conflict_function **overlaps_a,
3787 conflict_function **overlaps_b,
3788 tree *last_conflicts,
3789 int loop_nest_num)
3791 dependence_stats.num_siv++;
3793 if (dump_file && (dump_flags & TDF_DETAILS))
3794 fprintf (dump_file, "(analyze_siv_subscript \n");
3796 if (evolution_function_is_constant_p (chrec_a)
3797 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3798 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3799 overlaps_a, overlaps_b, last_conflicts);
3801 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3802 && evolution_function_is_constant_p (chrec_b))
3803 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3804 overlaps_b, overlaps_a, last_conflicts);
3806 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3807 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3809 if (!chrec_contains_symbols (chrec_a)
3810 && !chrec_contains_symbols (chrec_b))
3812 analyze_subscript_affine_affine (chrec_a, chrec_b,
3813 overlaps_a, overlaps_b,
3814 last_conflicts);
3816 if (CF_NOT_KNOWN_P (*overlaps_a)
3817 || CF_NOT_KNOWN_P (*overlaps_b))
3818 dependence_stats.num_siv_unimplemented++;
3819 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3820 || CF_NO_DEPENDENCE_P (*overlaps_b))
3821 dependence_stats.num_siv_independent++;
3822 else
3823 dependence_stats.num_siv_dependent++;
3825 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3826 &chrec_b))
3828 analyze_subscript_affine_affine (chrec_a, chrec_b,
3829 overlaps_a, overlaps_b,
3830 last_conflicts);
3832 if (CF_NOT_KNOWN_P (*overlaps_a)
3833 || CF_NOT_KNOWN_P (*overlaps_b))
3834 dependence_stats.num_siv_unimplemented++;
3835 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3836 || CF_NO_DEPENDENCE_P (*overlaps_b))
3837 dependence_stats.num_siv_independent++;
3838 else
3839 dependence_stats.num_siv_dependent++;
3841 else
3842 goto siv_subscript_dontknow;
3845 else
3847 siv_subscript_dontknow:;
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3849 fprintf (dump_file, " siv test failed: unimplemented");
3850 *overlaps_a = conflict_fn_not_known ();
3851 *overlaps_b = conflict_fn_not_known ();
3852 *last_conflicts = chrec_dont_know;
3853 dependence_stats.num_siv_unimplemented++;
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, ")\n");
3860 /* Returns false if we can prove that the greatest common divisor of the steps
3861 of CHREC does not divide CST, false otherwise. */
3863 static bool
3864 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3866 HOST_WIDE_INT cd = 0, val;
3867 tree step;
3869 if (!tree_fits_shwi_p (cst))
3870 return true;
3871 val = tree_to_shwi (cst);
3873 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3875 step = CHREC_RIGHT (chrec);
3876 if (!tree_fits_shwi_p (step))
3877 return true;
3878 cd = gcd (cd, tree_to_shwi (step));
3879 chrec = CHREC_LEFT (chrec);
3882 return val % cd == 0;
3885 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3886 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
3887 functions that describe the relation between the elements accessed
3888 twice by CHREC_A and CHREC_B. For k >= 0, the following property
3889 is verified:
3891 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3893 static void
3894 analyze_miv_subscript (tree chrec_a,
3895 tree chrec_b,
3896 conflict_function **overlaps_a,
3897 conflict_function **overlaps_b,
3898 tree *last_conflicts,
3899 struct loop *loop_nest)
3901 tree type, difference;
3903 dependence_stats.num_miv++;
3904 if (dump_file && (dump_flags & TDF_DETAILS))
3905 fprintf (dump_file, "(analyze_miv_subscript \n");
3907 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3908 chrec_a = chrec_convert (type, chrec_a, NULL);
3909 chrec_b = chrec_convert (type, chrec_b, NULL);
3910 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3912 if (eq_evolutions_p (chrec_a, chrec_b))
3914 /* Access functions are the same: all the elements are accessed
3915 in the same order. */
3916 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3917 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3918 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3919 dependence_stats.num_miv_dependent++;
3922 else if (evolution_function_is_constant_p (difference)
3923 /* For the moment, the following is verified:
3924 evolution_function_is_affine_multivariate_p (chrec_a,
3925 loop_nest->num) */
3926 && !gcd_of_steps_may_divide_p (chrec_a, difference))
3928 /* testsuite/.../ssa-chrec-33.c
3929 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
3931 The difference is 1, and all the evolution steps are multiples
3932 of 2, consequently there are no overlapping elements. */
3933 *overlaps_a = conflict_fn_no_dependence ();
3934 *overlaps_b = conflict_fn_no_dependence ();
3935 *last_conflicts = integer_zero_node;
3936 dependence_stats.num_miv_independent++;
3939 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3940 && !chrec_contains_symbols (chrec_a)
3941 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3942 && !chrec_contains_symbols (chrec_b))
3944 /* testsuite/.../ssa-chrec-35.c
3945 {0, +, 1}_2 vs. {0, +, 1}_3
3946 the overlapping elements are respectively located at iterations:
3947 {0, +, 1}_x and {0, +, 1}_x,
3948 in other words, we have the equality:
3949 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3951 Other examples:
3952 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3953 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3955 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3956 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3958 analyze_subscript_affine_affine (chrec_a, chrec_b,
3959 overlaps_a, overlaps_b, last_conflicts);
3961 if (CF_NOT_KNOWN_P (*overlaps_a)
3962 || CF_NOT_KNOWN_P (*overlaps_b))
3963 dependence_stats.num_miv_unimplemented++;
3964 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3965 || CF_NO_DEPENDENCE_P (*overlaps_b))
3966 dependence_stats.num_miv_independent++;
3967 else
3968 dependence_stats.num_miv_dependent++;
3971 else
3973 /* When the analysis is too difficult, answer "don't know". */
3974 if (dump_file && (dump_flags & TDF_DETAILS))
3975 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3977 *overlaps_a = conflict_fn_not_known ();
3978 *overlaps_b = conflict_fn_not_known ();
3979 *last_conflicts = chrec_dont_know;
3980 dependence_stats.num_miv_unimplemented++;
3983 if (dump_file && (dump_flags & TDF_DETAILS))
3984 fprintf (dump_file, ")\n");
3987 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3988 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3989 OVERLAP_ITERATIONS_B are initialized with two functions that
3990 describe the iterations that contain conflicting elements.
3992 Remark: For an integer k >= 0, the following equality is true:
3994 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3997 static void
3998 analyze_overlapping_iterations (tree chrec_a,
3999 tree chrec_b,
4000 conflict_function **overlap_iterations_a,
4001 conflict_function **overlap_iterations_b,
4002 tree *last_conflicts, struct loop *loop_nest)
4004 unsigned int lnn = loop_nest->num;
4006 dependence_stats.num_subscript_tests++;
4008 if (dump_file && (dump_flags & TDF_DETAILS))
4010 fprintf (dump_file, "(analyze_overlapping_iterations \n");
4011 fprintf (dump_file, " (chrec_a = ");
4012 print_generic_expr (dump_file, chrec_a);
4013 fprintf (dump_file, ")\n (chrec_b = ");
4014 print_generic_expr (dump_file, chrec_b);
4015 fprintf (dump_file, ")\n");
4018 if (chrec_a == NULL_TREE
4019 || chrec_b == NULL_TREE
4020 || chrec_contains_undetermined (chrec_a)
4021 || chrec_contains_undetermined (chrec_b))
4023 dependence_stats.num_subscript_undetermined++;
4025 *overlap_iterations_a = conflict_fn_not_known ();
4026 *overlap_iterations_b = conflict_fn_not_known ();
4029 /* If they are the same chrec, and are affine, they overlap
4030 on every iteration. */
4031 else if (eq_evolutions_p (chrec_a, chrec_b)
4032 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4033 || operand_equal_p (chrec_a, chrec_b, 0)))
4035 dependence_stats.num_same_subscript_function++;
4036 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4037 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4038 *last_conflicts = chrec_dont_know;
4041 /* If they aren't the same, and aren't affine, we can't do anything
4042 yet. */
4043 else if ((chrec_contains_symbols (chrec_a)
4044 || chrec_contains_symbols (chrec_b))
4045 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4046 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4048 dependence_stats.num_subscript_undetermined++;
4049 *overlap_iterations_a = conflict_fn_not_known ();
4050 *overlap_iterations_b = conflict_fn_not_known ();
4053 else if (ziv_subscript_p (chrec_a, chrec_b))
4054 analyze_ziv_subscript (chrec_a, chrec_b,
4055 overlap_iterations_a, overlap_iterations_b,
4056 last_conflicts);
4058 else if (siv_subscript_p (chrec_a, chrec_b))
4059 analyze_siv_subscript (chrec_a, chrec_b,
4060 overlap_iterations_a, overlap_iterations_b,
4061 last_conflicts, lnn);
4063 else
4064 analyze_miv_subscript (chrec_a, chrec_b,
4065 overlap_iterations_a, overlap_iterations_b,
4066 last_conflicts, loop_nest);
4068 if (dump_file && (dump_flags & TDF_DETAILS))
4070 fprintf (dump_file, " (overlap_iterations_a = ");
4071 dump_conflict_function (dump_file, *overlap_iterations_a);
4072 fprintf (dump_file, ")\n (overlap_iterations_b = ");
4073 dump_conflict_function (dump_file, *overlap_iterations_b);
4074 fprintf (dump_file, "))\n");
4078 /* Helper function for uniquely inserting distance vectors. */
4080 static void
4081 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4083 unsigned i;
4084 lambda_vector v;
4086 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4087 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4088 return;
4090 DDR_DIST_VECTS (ddr).safe_push (dist_v);
4093 /* Helper function for uniquely inserting direction vectors. */
4095 static void
4096 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4098 unsigned i;
4099 lambda_vector v;
4101 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4102 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4103 return;
4105 DDR_DIR_VECTS (ddr).safe_push (dir_v);
4108 /* Add a distance of 1 on all the loops outer than INDEX. If we
4109 haven't yet determined a distance for this outer loop, push a new
4110 distance vector composed of the previous distance, and a distance
4111 of 1 for this outer loop. Example:
4113 | loop_1
4114 | loop_2
4115 | A[10]
4116 | endloop_2
4117 | endloop_1
4119 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
4120 save (0, 1), then we have to save (1, 0). */
4122 static void
4123 add_outer_distances (struct data_dependence_relation *ddr,
4124 lambda_vector dist_v, int index)
4126 /* For each outer loop where init_v is not set, the accesses are
4127 in dependence of distance 1 in the loop. */
4128 while (--index >= 0)
4130 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4131 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4132 save_v[index] = 1;
4133 save_dist_v (ddr, save_v);
4137 /* Return false when fail to represent the data dependence as a
4138 distance vector. A_INDEX is the index of the first reference
4139 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4140 second reference. INIT_B is set to true when a component has been
4141 added to the distance vector DIST_V. INDEX_CARRY is then set to
4142 the index in DIST_V that carries the dependence. */
4144 static bool
4145 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4146 unsigned int a_index, unsigned int b_index,
4147 lambda_vector dist_v, bool *init_b,
4148 int *index_carry)
4150 unsigned i;
4151 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4153 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4155 tree access_fn_a, access_fn_b;
4156 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4158 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4160 non_affine_dependence_relation (ddr);
4161 return false;
4164 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4165 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4167 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4168 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4170 HOST_WIDE_INT dist;
4171 int index;
4172 int var_a = CHREC_VARIABLE (access_fn_a);
4173 int var_b = CHREC_VARIABLE (access_fn_b);
4175 if (var_a != var_b
4176 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4178 non_affine_dependence_relation (ddr);
4179 return false;
4182 dist = int_cst_value (SUB_DISTANCE (subscript));
4183 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4184 *index_carry = MIN (index, *index_carry);
4186 /* This is the subscript coupling test. If we have already
4187 recorded a distance for this loop (a distance coming from
4188 another subscript), it should be the same. For example,
4189 in the following code, there is no dependence:
4191 | loop i = 0, N, 1
4192 | T[i+1][i] = ...
4193 | ... = T[i][i]
4194 | endloop
4196 if (init_v[index] != 0 && dist_v[index] != dist)
4198 finalize_ddr_dependent (ddr, chrec_known);
4199 return false;
4202 dist_v[index] = dist;
4203 init_v[index] = 1;
4204 *init_b = true;
4206 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4208 /* This can be for example an affine vs. constant dependence
4209 (T[i] vs. T[3]) that is not an affine dependence and is
4210 not representable as a distance vector. */
4211 non_affine_dependence_relation (ddr);
4212 return false;
4216 return true;
4219 /* Return true when the DDR contains only constant access functions. */
4221 static bool
4222 constant_access_functions (const struct data_dependence_relation *ddr)
4224 unsigned i;
4225 subscript *sub;
4227 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4228 if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4229 || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4230 return false;
4232 return true;
4235 /* Helper function for the case where DDR_A and DDR_B are the same
4236 multivariate access function with a constant step. For an example
4237 see pr34635-1.c. */
4239 static void
4240 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4242 int x_1, x_2;
4243 tree c_1 = CHREC_LEFT (c_2);
4244 tree c_0 = CHREC_LEFT (c_1);
4245 lambda_vector dist_v;
4246 HOST_WIDE_INT v1, v2, cd;
4248 /* Polynomials with more than 2 variables are not handled yet. When
4249 the evolution steps are parameters, it is not possible to
4250 represent the dependence using classical distance vectors. */
4251 if (TREE_CODE (c_0) != INTEGER_CST
4252 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4253 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4255 DDR_AFFINE_P (ddr) = false;
4256 return;
4259 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4260 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4262 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
4263 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4264 v1 = int_cst_value (CHREC_RIGHT (c_1));
4265 v2 = int_cst_value (CHREC_RIGHT (c_2));
4266 cd = gcd (v1, v2);
4267 v1 /= cd;
4268 v2 /= cd;
4270 if (v2 < 0)
4272 v2 = -v2;
4273 v1 = -v1;
4276 dist_v[x_1] = v2;
4277 dist_v[x_2] = -v1;
4278 save_dist_v (ddr, dist_v);
4280 add_outer_distances (ddr, dist_v, x_1);
4283 /* Helper function for the case where DDR_A and DDR_B are the same
4284 access functions. */
4286 static void
4287 add_other_self_distances (struct data_dependence_relation *ddr)
4289 lambda_vector dist_v;
4290 unsigned i;
4291 int index_carry = DDR_NB_LOOPS (ddr);
4292 subscript *sub;
4294 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4296 tree access_fun = SUB_ACCESS_FN (sub, 0);
4298 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4300 if (!evolution_function_is_univariate_p (access_fun))
4302 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4304 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4305 return;
4308 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4310 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4311 add_multivariate_self_dist (ddr, access_fun);
4312 else
4313 /* The evolution step is not constant: it varies in
4314 the outer loop, so this cannot be represented by a
4315 distance vector. For example in pr34635.c the
4316 evolution is {0, +, {0, +, 4}_1}_2. */
4317 DDR_AFFINE_P (ddr) = false;
4319 return;
4322 index_carry = MIN (index_carry,
4323 index_in_loop_nest (CHREC_VARIABLE (access_fun),
4324 DDR_LOOP_NEST (ddr)));
4328 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4329 add_outer_distances (ddr, dist_v, index_carry);
4332 static void
4333 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4335 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4337 dist_v[DDR_INNER_LOOP (ddr)] = 1;
4338 save_dist_v (ddr, dist_v);
4341 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
4342 is the case for example when access functions are the same and
4343 equal to a constant, as in:
4345 | loop_1
4346 | A[3] = ...
4347 | ... = A[3]
4348 | endloop_1
4350 in which case the distance vectors are (0) and (1). */
4352 static void
4353 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4355 unsigned i, j;
4357 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4359 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4360 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4361 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4363 for (j = 0; j < ca->n; j++)
4364 if (affine_function_zero_p (ca->fns[j]))
4366 insert_innermost_unit_dist_vector (ddr);
4367 return;
4370 for (j = 0; j < cb->n; j++)
4371 if (affine_function_zero_p (cb->fns[j]))
4373 insert_innermost_unit_dist_vector (ddr);
4374 return;
4379 /* Return true when the DDR contains two data references that have the
4380 same access functions. */
4382 static inline bool
4383 same_access_functions (const struct data_dependence_relation *ddr)
4385 unsigned i;
4386 subscript *sub;
4388 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4389 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4390 SUB_ACCESS_FN (sub, 1)))
4391 return false;
4393 return true;
4396 /* Compute the classic per loop distance vector. DDR is the data
4397 dependence relation to build a vector from. Return false when fail
4398 to represent the data dependence as a distance vector. */
4400 static bool
4401 build_classic_dist_vector (struct data_dependence_relation *ddr,
4402 struct loop *loop_nest)
4404 bool init_b = false;
4405 int index_carry = DDR_NB_LOOPS (ddr);
4406 lambda_vector dist_v;
4408 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4409 return false;
4411 if (same_access_functions (ddr))
4413 /* Save the 0 vector. */
4414 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4415 save_dist_v (ddr, dist_v);
4417 if (constant_access_functions (ddr))
4418 add_distance_for_zero_overlaps (ddr);
4420 if (DDR_NB_LOOPS (ddr) > 1)
4421 add_other_self_distances (ddr);
4423 return true;
4426 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4427 if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4428 return false;
4430 /* Save the distance vector if we initialized one. */
4431 if (init_b)
4433 /* Verify a basic constraint: classic distance vectors should
4434 always be lexicographically positive.
4436 Data references are collected in the order of execution of
4437 the program, thus for the following loop
4439 | for (i = 1; i < 100; i++)
4440 | for (j = 1; j < 100; j++)
4442 | t = T[j+1][i-1]; // A
4443 | T[j][i] = t + 2; // B
4446 references are collected following the direction of the wind:
4447 A then B. The data dependence tests are performed also
4448 following this order, such that we're looking at the distance
4449 separating the elements accessed by A from the elements later
4450 accessed by B. But in this example, the distance returned by
4451 test_dep (A, B) is lexicographically negative (-1, 1), that
4452 means that the access A occurs later than B with respect to
4453 the outer loop, ie. we're actually looking upwind. In this
4454 case we solve test_dep (B, A) looking downwind to the
4455 lexicographically positive solution, that returns the
4456 distance vector (1, -1). */
4457 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4459 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4460 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4461 return false;
4462 compute_subscript_distance (ddr);
4463 if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4464 &index_carry))
4465 return false;
4466 save_dist_v (ddr, save_v);
4467 DDR_REVERSED_P (ddr) = true;
4469 /* In this case there is a dependence forward for all the
4470 outer loops:
4472 | for (k = 1; k < 100; k++)
4473 | for (i = 1; i < 100; i++)
4474 | for (j = 1; j < 100; j++)
4476 | t = T[j+1][i-1]; // A
4477 | T[j][i] = t + 2; // B
4480 the vectors are:
4481 (0, 1, -1)
4482 (1, 1, -1)
4483 (1, -1, 1)
4485 if (DDR_NB_LOOPS (ddr) > 1)
4487 add_outer_distances (ddr, save_v, index_carry);
4488 add_outer_distances (ddr, dist_v, index_carry);
4491 else
4493 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4494 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4496 if (DDR_NB_LOOPS (ddr) > 1)
4498 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4500 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4501 return false;
4502 compute_subscript_distance (ddr);
4503 if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4504 &index_carry))
4505 return false;
4507 save_dist_v (ddr, save_v);
4508 add_outer_distances (ddr, dist_v, index_carry);
4509 add_outer_distances (ddr, opposite_v, index_carry);
4511 else
4512 save_dist_v (ddr, save_v);
4515 else
4517 /* There is a distance of 1 on all the outer loops: Example:
4518 there is a dependence of distance 1 on loop_1 for the array A.
4520 | loop_1
4521 | A[5] = ...
4522 | endloop
4524 add_outer_distances (ddr, dist_v,
4525 lambda_vector_first_nz (dist_v,
4526 DDR_NB_LOOPS (ddr), 0));
4529 if (dump_file && (dump_flags & TDF_DETAILS))
4531 unsigned i;
4533 fprintf (dump_file, "(build_classic_dist_vector\n");
4534 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4536 fprintf (dump_file, " dist_vector = (");
4537 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4538 DDR_NB_LOOPS (ddr));
4539 fprintf (dump_file, " )\n");
4541 fprintf (dump_file, ")\n");
4544 return true;
4547 /* Return the direction for a given distance.
4548 FIXME: Computing dir this way is suboptimal, since dir can catch
4549 cases that dist is unable to represent. */
4551 static inline enum data_dependence_direction
4552 dir_from_dist (int dist)
4554 if (dist > 0)
4555 return dir_positive;
4556 else if (dist < 0)
4557 return dir_negative;
4558 else
4559 return dir_equal;
4562 /* Compute the classic per loop direction vector. DDR is the data
4563 dependence relation to build a vector from. */
4565 static void
4566 build_classic_dir_vector (struct data_dependence_relation *ddr)
4568 unsigned i, j;
4569 lambda_vector dist_v;
4571 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4573 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4575 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4576 dir_v[j] = dir_from_dist (dist_v[j]);
4578 save_dir_v (ddr, dir_v);
4582 /* Helper function. Returns true when there is a dependence between the
4583 data references. A_INDEX is the index of the first reference (0 for
4584 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
4586 static bool
4587 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4588 unsigned int a_index, unsigned int b_index,
4589 struct loop *loop_nest)
4591 unsigned int i;
4592 tree last_conflicts;
4593 struct subscript *subscript;
4594 tree res = NULL_TREE;
4596 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4598 conflict_function *overlaps_a, *overlaps_b;
4600 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4601 SUB_ACCESS_FN (subscript, b_index),
4602 &overlaps_a, &overlaps_b,
4603 &last_conflicts, loop_nest);
4605 if (SUB_CONFLICTS_IN_A (subscript))
4606 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4607 if (SUB_CONFLICTS_IN_B (subscript))
4608 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4610 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4611 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4612 SUB_LAST_CONFLICT (subscript) = last_conflicts;
4614 /* If there is any undetermined conflict function we have to
4615 give a conservative answer in case we cannot prove that
4616 no dependence exists when analyzing another subscript. */
4617 if (CF_NOT_KNOWN_P (overlaps_a)
4618 || CF_NOT_KNOWN_P (overlaps_b))
4620 res = chrec_dont_know;
4621 continue;
4624 /* When there is a subscript with no dependence we can stop. */
4625 else if (CF_NO_DEPENDENCE_P (overlaps_a)
4626 || CF_NO_DEPENDENCE_P (overlaps_b))
4628 res = chrec_known;
4629 break;
4633 if (res == NULL_TREE)
4634 return true;
4636 if (res == chrec_known)
4637 dependence_stats.num_dependence_independent++;
4638 else
4639 dependence_stats.num_dependence_undetermined++;
4640 finalize_ddr_dependent (ddr, res);
4641 return false;
4644 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
4646 static void
4647 subscript_dependence_tester (struct data_dependence_relation *ddr,
4648 struct loop *loop_nest)
4650 if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4651 dependence_stats.num_dependence_dependent++;
4653 compute_subscript_distance (ddr);
4654 if (build_classic_dist_vector (ddr, loop_nest))
4655 build_classic_dir_vector (ddr);
4658 /* Returns true when all the access functions of A are affine or
4659 constant with respect to LOOP_NEST. */
4661 static bool
4662 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4663 const struct loop *loop_nest)
4665 unsigned int i;
4666 vec<tree> fns = DR_ACCESS_FNS (a);
4667 tree t;
4669 FOR_EACH_VEC_ELT (fns, i, t)
4670 if (!evolution_function_is_invariant_p (t, loop_nest->num)
4671 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4672 return false;
4674 return true;
4677 /* This computes the affine dependence relation between A and B with
4678 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4679 independence between two accesses, while CHREC_DONT_KNOW is used
4680 for representing the unknown relation.
4682 Note that it is possible to stop the computation of the dependence
4683 relation the first time we detect a CHREC_KNOWN element for a given
4684 subscript. */
4686 void
4687 compute_affine_dependence (struct data_dependence_relation *ddr,
4688 struct loop *loop_nest)
4690 struct data_reference *dra = DDR_A (ddr);
4691 struct data_reference *drb = DDR_B (ddr);
4693 if (dump_file && (dump_flags & TDF_DETAILS))
4695 fprintf (dump_file, "(compute_affine_dependence\n");
4696 fprintf (dump_file, " stmt_a: ");
4697 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4698 fprintf (dump_file, " stmt_b: ");
4699 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4702 /* Analyze only when the dependence relation is not yet known. */
4703 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4705 dependence_stats.num_dependence_tests++;
4707 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4708 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4709 subscript_dependence_tester (ddr, loop_nest);
4711 /* As a last case, if the dependence cannot be determined, or if
4712 the dependence is considered too difficult to determine, answer
4713 "don't know". */
4714 else
4716 dependence_stats.num_dependence_undetermined++;
4718 if (dump_file && (dump_flags & TDF_DETAILS))
4720 fprintf (dump_file, "Data ref a:\n");
4721 dump_data_reference (dump_file, dra);
4722 fprintf (dump_file, "Data ref b:\n");
4723 dump_data_reference (dump_file, drb);
4724 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4726 finalize_ddr_dependent (ddr, chrec_dont_know);
4730 if (dump_file && (dump_flags & TDF_DETAILS))
4732 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4733 fprintf (dump_file, ") -> no dependence\n");
4734 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4735 fprintf (dump_file, ") -> dependence analysis failed\n");
4736 else
4737 fprintf (dump_file, ")\n");
4741 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4742 the data references in DATAREFS, in the LOOP_NEST. When
4743 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4744 relations. Return true when successful, i.e. data references number
4745 is small enough to be handled. */
4747 bool
4748 compute_all_dependences (vec<data_reference_p> datarefs,
4749 vec<ddr_p> *dependence_relations,
4750 vec<loop_p> loop_nest,
4751 bool compute_self_and_rr)
4753 struct data_dependence_relation *ddr;
4754 struct data_reference *a, *b;
4755 unsigned int i, j;
4757 if ((int) datarefs.length ()
4758 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4760 struct data_dependence_relation *ddr;
4762 /* Insert a single relation into dependence_relations:
4763 chrec_dont_know. */
4764 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4765 dependence_relations->safe_push (ddr);
4766 return false;
4769 FOR_EACH_VEC_ELT (datarefs, i, a)
4770 for (j = i + 1; datarefs.iterate (j, &b); j++)
4771 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4773 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4774 dependence_relations->safe_push (ddr);
4775 if (loop_nest.exists ())
4776 compute_affine_dependence (ddr, loop_nest[0]);
4779 if (compute_self_and_rr)
4780 FOR_EACH_VEC_ELT (datarefs, i, a)
4782 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4783 dependence_relations->safe_push (ddr);
4784 if (loop_nest.exists ())
4785 compute_affine_dependence (ddr, loop_nest[0]);
4788 return true;
4791 /* Describes a location of a memory reference. */
4793 struct data_ref_loc
4795 /* The memory reference. */
4796 tree ref;
4798 /* True if the memory reference is read. */
4799 bool is_read;
4801 /* True if the data reference is conditional within the containing
4802 statement, i.e. if it might not occur even when the statement
4803 is executed and runs to completion. */
4804 bool is_conditional_in_stmt;
4808 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4809 true if STMT clobbers memory, false otherwise. */
4811 static bool
4812 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4814 bool clobbers_memory = false;
4815 data_ref_loc ref;
4816 tree op0, op1;
4817 enum gimple_code stmt_code = gimple_code (stmt);
4819 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4820 As we cannot model data-references to not spelled out
4821 accesses give up if they may occur. */
4822 if (stmt_code == GIMPLE_CALL
4823 && !(gimple_call_flags (stmt) & ECF_CONST))
4825 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4826 if (gimple_call_internal_p (stmt))
4827 switch (gimple_call_internal_fn (stmt))
4829 case IFN_GOMP_SIMD_LANE:
4831 struct loop *loop = gimple_bb (stmt)->loop_father;
4832 tree uid = gimple_call_arg (stmt, 0);
4833 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4834 if (loop == NULL
4835 || loop->simduid != SSA_NAME_VAR (uid))
4836 clobbers_memory = true;
4837 break;
4839 case IFN_MASK_LOAD:
4840 case IFN_MASK_STORE:
4841 break;
4842 default:
4843 clobbers_memory = true;
4844 break;
4846 else
4847 clobbers_memory = true;
4849 else if (stmt_code == GIMPLE_ASM
4850 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4851 || gimple_vuse (stmt)))
4852 clobbers_memory = true;
4854 if (!gimple_vuse (stmt))
4855 return clobbers_memory;
4857 if (stmt_code == GIMPLE_ASSIGN)
4859 tree base;
4860 op0 = gimple_assign_lhs (stmt);
4861 op1 = gimple_assign_rhs1 (stmt);
4863 if (DECL_P (op1)
4864 || (REFERENCE_CLASS_P (op1)
4865 && (base = get_base_address (op1))
4866 && TREE_CODE (base) != SSA_NAME
4867 && !is_gimple_min_invariant (base)))
4869 ref.ref = op1;
4870 ref.is_read = true;
4871 ref.is_conditional_in_stmt = false;
4872 references->safe_push (ref);
4875 else if (stmt_code == GIMPLE_CALL)
4877 unsigned i, n;
4878 tree ptr, type;
4879 unsigned int align;
4881 ref.is_read = false;
4882 if (gimple_call_internal_p (stmt))
4883 switch (gimple_call_internal_fn (stmt))
4885 case IFN_MASK_LOAD:
4886 if (gimple_call_lhs (stmt) == NULL_TREE)
4887 break;
4888 ref.is_read = true;
4889 /* FALLTHRU */
4890 case IFN_MASK_STORE:
4891 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4892 align = tree_to_shwi (gimple_call_arg (stmt, 1));
4893 if (ref.is_read)
4894 type = TREE_TYPE (gimple_call_lhs (stmt));
4895 else
4896 type = TREE_TYPE (gimple_call_arg (stmt, 3));
4897 if (TYPE_ALIGN (type) != align)
4898 type = build_aligned_type (type, align);
4899 ref.is_conditional_in_stmt = true;
4900 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4901 ptr);
4902 references->safe_push (ref);
4903 return false;
4904 default:
4905 break;
4908 op0 = gimple_call_lhs (stmt);
4909 n = gimple_call_num_args (stmt);
4910 for (i = 0; i < n; i++)
4912 op1 = gimple_call_arg (stmt, i);
4914 if (DECL_P (op1)
4915 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4917 ref.ref = op1;
4918 ref.is_read = true;
4919 ref.is_conditional_in_stmt = false;
4920 references->safe_push (ref);
4924 else
4925 return clobbers_memory;
4927 if (op0
4928 && (DECL_P (op0)
4929 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4931 ref.ref = op0;
4932 ref.is_read = false;
4933 ref.is_conditional_in_stmt = false;
4934 references->safe_push (ref);
4936 return clobbers_memory;
4940 /* Returns true if the loop-nest has any data reference. */
4942 bool
4943 loop_nest_has_data_refs (loop_p loop)
4945 basic_block *bbs = get_loop_body (loop);
4946 auto_vec<data_ref_loc, 3> references;
4948 for (unsigned i = 0; i < loop->num_nodes; i++)
4950 basic_block bb = bbs[i];
4951 gimple_stmt_iterator bsi;
4953 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4955 gimple *stmt = gsi_stmt (bsi);
4956 get_references_in_stmt (stmt, &references);
4957 if (references.length ())
4959 free (bbs);
4960 return true;
4964 free (bbs);
4965 return false;
4968 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4969 reference, returns false, otherwise returns true. NEST is the outermost
4970 loop of the loop nest in which the references should be analyzed. */
4972 bool
4973 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4974 vec<data_reference_p> *datarefs)
4976 unsigned i;
4977 auto_vec<data_ref_loc, 2> references;
4978 data_ref_loc *ref;
4979 bool ret = true;
4980 data_reference_p dr;
4982 if (get_references_in_stmt (stmt, &references))
4983 return false;
4985 FOR_EACH_VEC_ELT (references, i, ref)
4987 dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4988 loop_containing_stmt (stmt), ref->ref,
4989 stmt, ref->is_read, ref->is_conditional_in_stmt);
4990 gcc_assert (dr != NULL);
4991 datarefs->safe_push (dr);
4994 return ret;
4997 /* Stores the data references in STMT to DATAREFS. If there is an
4998 unanalyzable reference, returns false, otherwise returns true.
4999 NEST is the outermost loop of the loop nest in which the references
5000 should be instantiated, LOOP is the loop in which the references
5001 should be analyzed. */
5003 bool
5004 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5005 vec<data_reference_p> *datarefs)
5007 unsigned i;
5008 auto_vec<data_ref_loc, 2> references;
5009 data_ref_loc *ref;
5010 bool ret = true;
5011 data_reference_p dr;
5013 if (get_references_in_stmt (stmt, &references))
5014 return false;
5016 FOR_EACH_VEC_ELT (references, i, ref)
5018 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5019 ref->is_conditional_in_stmt);
5020 gcc_assert (dr != NULL);
5021 datarefs->safe_push (dr);
5024 return ret;
5027 /* Search the data references in LOOP, and record the information into
5028 DATAREFS. Returns chrec_dont_know when failing to analyze a
5029 difficult case, returns NULL_TREE otherwise. */
5031 tree
5032 find_data_references_in_bb (struct loop *loop, basic_block bb,
5033 vec<data_reference_p> *datarefs)
5035 gimple_stmt_iterator bsi;
5037 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5039 gimple *stmt = gsi_stmt (bsi);
5041 if (!find_data_references_in_stmt (loop, stmt, datarefs))
5043 struct data_reference *res;
5044 res = XCNEW (struct data_reference);
5045 datarefs->safe_push (res);
5047 return chrec_dont_know;
5051 return NULL_TREE;
5054 /* Search the data references in LOOP, and record the information into
5055 DATAREFS. Returns chrec_dont_know when failing to analyze a
5056 difficult case, returns NULL_TREE otherwise.
5058 TODO: This function should be made smarter so that it can handle address
5059 arithmetic as if they were array accesses, etc. */
5061 tree
5062 find_data_references_in_loop (struct loop *loop,
5063 vec<data_reference_p> *datarefs)
5065 basic_block bb, *bbs;
5066 unsigned int i;
5068 bbs = get_loop_body_in_dom_order (loop);
5070 for (i = 0; i < loop->num_nodes; i++)
5072 bb = bbs[i];
5074 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5076 free (bbs);
5077 return chrec_dont_know;
5080 free (bbs);
5082 return NULL_TREE;
5085 /* Return the alignment in bytes that DRB is guaranteed to have at all
5086 times. */
5088 unsigned int
5089 dr_alignment (innermost_loop_behavior *drb)
5091 /* Get the alignment of BASE_ADDRESS + INIT. */
5092 unsigned int alignment = drb->base_alignment;
5093 unsigned int misalignment = (drb->base_misalignment
5094 + TREE_INT_CST_LOW (drb->init));
5095 if (misalignment != 0)
5096 alignment = MIN (alignment, misalignment & -misalignment);
5098 /* Cap it to the alignment of OFFSET. */
5099 if (!integer_zerop (drb->offset))
5100 alignment = MIN (alignment, drb->offset_alignment);
5102 /* Cap it to the alignment of STEP. */
5103 if (!integer_zerop (drb->step))
5104 alignment = MIN (alignment, drb->step_alignment);
5106 return alignment;
5109 /* Recursive helper function. */
5111 static bool
5112 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5114 /* Inner loops of the nest should not contain siblings. Example:
5115 when there are two consecutive loops,
5117 | loop_0
5118 | loop_1
5119 | A[{0, +, 1}_1]
5120 | endloop_1
5121 | loop_2
5122 | A[{0, +, 1}_2]
5123 | endloop_2
5124 | endloop_0
5126 the dependence relation cannot be captured by the distance
5127 abstraction. */
5128 if (loop->next)
5129 return false;
5131 loop_nest->safe_push (loop);
5132 if (loop->inner)
5133 return find_loop_nest_1 (loop->inner, loop_nest);
5134 return true;
5137 /* Return false when the LOOP is not well nested. Otherwise return
5138 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
5139 contain the loops from the outermost to the innermost, as they will
5140 appear in the classic distance vector. */
5142 bool
5143 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5145 loop_nest->safe_push (loop);
5146 if (loop->inner)
5147 return find_loop_nest_1 (loop->inner, loop_nest);
5148 return true;
5151 /* Returns true when the data dependences have been computed, false otherwise.
5152 Given a loop nest LOOP, the following vectors are returned:
5153 DATAREFS is initialized to all the array elements contained in this loop,
5154 DEPENDENCE_RELATIONS contains the relations between the data references.
5155 Compute read-read and self relations if
5156 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
5158 bool
5159 compute_data_dependences_for_loop (struct loop *loop,
5160 bool compute_self_and_read_read_dependences,
5161 vec<loop_p> *loop_nest,
5162 vec<data_reference_p> *datarefs,
5163 vec<ddr_p> *dependence_relations)
5165 bool res = true;
5167 memset (&dependence_stats, 0, sizeof (dependence_stats));
5169 /* If the loop nest is not well formed, or one of the data references
5170 is not computable, give up without spending time to compute other
5171 dependences. */
5172 if (!loop
5173 || !find_loop_nest (loop, loop_nest)
5174 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5175 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5176 compute_self_and_read_read_dependences))
5177 res = false;
5179 if (dump_file && (dump_flags & TDF_STATS))
5181 fprintf (dump_file, "Dependence tester statistics:\n");
5183 fprintf (dump_file, "Number of dependence tests: %d\n",
5184 dependence_stats.num_dependence_tests);
5185 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5186 dependence_stats.num_dependence_dependent);
5187 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5188 dependence_stats.num_dependence_independent);
5189 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5190 dependence_stats.num_dependence_undetermined);
5192 fprintf (dump_file, "Number of subscript tests: %d\n",
5193 dependence_stats.num_subscript_tests);
5194 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5195 dependence_stats.num_subscript_undetermined);
5196 fprintf (dump_file, "Number of same subscript function: %d\n",
5197 dependence_stats.num_same_subscript_function);
5199 fprintf (dump_file, "Number of ziv tests: %d\n",
5200 dependence_stats.num_ziv);
5201 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5202 dependence_stats.num_ziv_dependent);
5203 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5204 dependence_stats.num_ziv_independent);
5205 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5206 dependence_stats.num_ziv_unimplemented);
5208 fprintf (dump_file, "Number of siv tests: %d\n",
5209 dependence_stats.num_siv);
5210 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5211 dependence_stats.num_siv_dependent);
5212 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5213 dependence_stats.num_siv_independent);
5214 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5215 dependence_stats.num_siv_unimplemented);
5217 fprintf (dump_file, "Number of miv tests: %d\n",
5218 dependence_stats.num_miv);
5219 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5220 dependence_stats.num_miv_dependent);
5221 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5222 dependence_stats.num_miv_independent);
5223 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5224 dependence_stats.num_miv_unimplemented);
5227 return res;
5230 /* Free the memory used by a data dependence relation DDR. */
5232 void
5233 free_dependence_relation (struct data_dependence_relation *ddr)
5235 if (ddr == NULL)
5236 return;
5238 if (DDR_SUBSCRIPTS (ddr).exists ())
5239 free_subscripts (DDR_SUBSCRIPTS (ddr));
5240 DDR_DIST_VECTS (ddr).release ();
5241 DDR_DIR_VECTS (ddr).release ();
5243 free (ddr);
5246 /* Free the memory used by the data dependence relations from
5247 DEPENDENCE_RELATIONS. */
5249 void
5250 free_dependence_relations (vec<ddr_p> dependence_relations)
5252 unsigned int i;
5253 struct data_dependence_relation *ddr;
5255 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5256 if (ddr)
5257 free_dependence_relation (ddr);
5259 dependence_relations.release ();
5262 /* Free the memory used by the data references from DATAREFS. */
5264 void
5265 free_data_refs (vec<data_reference_p> datarefs)
5267 unsigned int i;
5268 struct data_reference *dr;
5270 FOR_EACH_VEC_ELT (datarefs, i, dr)
5271 free_data_ref (dr);
5272 datarefs.release ();