2013-11-19 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-data-ref.c
blob217295834314041cbee3a1f291aa46f00cebed3f
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tree.h"
80 #include "expr.h"
81 #include "gimple-pretty-print.h"
82 #include "gimple.h"
83 #include "gimple-iterator.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
86 #include "tree-ssa.h"
87 #include "cfgloop.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "dumpfile.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "params.h"
95 static struct datadep_stats
97 int num_dependence_tests;
98 int num_dependence_dependent;
99 int num_dependence_independent;
100 int num_dependence_undetermined;
102 int num_subscript_tests;
103 int num_subscript_undetermined;
104 int num_same_subscript_function;
106 int num_ziv;
107 int num_ziv_independent;
108 int num_ziv_dependent;
109 int num_ziv_unimplemented;
111 int num_siv;
112 int num_siv_independent;
113 int num_siv_dependent;
114 int num_siv_unimplemented;
116 int num_miv;
117 int num_miv_independent;
118 int num_miv_dependent;
119 int num_miv_unimplemented;
120 } dependence_stats;
122 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
123 struct data_reference *,
124 struct data_reference *,
125 struct loop *);
126 /* Returns true iff A divides B. */
128 static inline bool
129 tree_fold_divides_p (const_tree a, const_tree b)
131 gcc_assert (TREE_CODE (a) == INTEGER_CST);
132 gcc_assert (TREE_CODE (b) == INTEGER_CST);
133 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
136 /* Returns true iff A divides B. */
138 static inline bool
139 int_divides_p (int a, int b)
141 return ((b % a) == 0);
146 /* Dump into FILE all the data references from DATAREFS. */
148 static void
149 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
151 unsigned int i;
152 struct data_reference *dr;
154 FOR_EACH_VEC_ELT (datarefs, i, dr)
155 dump_data_reference (file, dr);
158 /* Unified dump into FILE all the data references from DATAREFS. */
160 DEBUG_FUNCTION void
161 debug (vec<data_reference_p> &ref)
163 dump_data_references (stderr, ref);
166 DEBUG_FUNCTION void
167 debug (vec<data_reference_p> *ptr)
169 if (ptr)
170 debug (*ptr);
171 else
172 fprintf (stderr, "<nil>\n");
176 /* Dump into STDERR all the data references from DATAREFS. */
178 DEBUG_FUNCTION void
179 debug_data_references (vec<data_reference_p> datarefs)
181 dump_data_references (stderr, datarefs);
184 /* Print to STDERR the data_reference DR. */
186 DEBUG_FUNCTION void
187 debug_data_reference (struct data_reference *dr)
189 dump_data_reference (stderr, dr);
192 /* Dump function for a DATA_REFERENCE structure. */
194 void
195 dump_data_reference (FILE *outf,
196 struct data_reference *dr)
198 unsigned int i;
200 fprintf (outf, "#(Data Ref: \n");
201 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
202 fprintf (outf, "# stmt: ");
203 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
204 fprintf (outf, "# ref: ");
205 print_generic_stmt (outf, DR_REF (dr), 0);
206 fprintf (outf, "# base_object: ");
207 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
209 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
211 fprintf (outf, "# Access function %d: ", i);
212 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
214 fprintf (outf, "#)\n");
217 /* Unified dump function for a DATA_REFERENCE structure. */
219 DEBUG_FUNCTION void
220 debug (data_reference &ref)
222 dump_data_reference (stderr, &ref);
225 DEBUG_FUNCTION void
226 debug (data_reference *ptr)
228 if (ptr)
229 debug (*ptr);
230 else
231 fprintf (stderr, "<nil>\n");
235 /* Dumps the affine function described by FN to the file OUTF. */
237 static void
238 dump_affine_function (FILE *outf, affine_fn fn)
240 unsigned i;
241 tree coef;
243 print_generic_expr (outf, fn[0], TDF_SLIM);
244 for (i = 1; fn.iterate (i, &coef); i++)
246 fprintf (outf, " + ");
247 print_generic_expr (outf, coef, TDF_SLIM);
248 fprintf (outf, " * x_%u", i);
252 /* Dumps the conflict function CF to the file OUTF. */
254 static void
255 dump_conflict_function (FILE *outf, conflict_function *cf)
257 unsigned i;
259 if (cf->n == NO_DEPENDENCE)
260 fprintf (outf, "no dependence");
261 else if (cf->n == NOT_KNOWN)
262 fprintf (outf, "not known");
263 else
265 for (i = 0; i < cf->n; i++)
267 if (i != 0)
268 fprintf (outf, " ");
269 fprintf (outf, "[");
270 dump_affine_function (outf, cf->fns[i]);
271 fprintf (outf, "]");
276 /* Dump function for a SUBSCRIPT structure. */
278 static void
279 dump_subscript (FILE *outf, struct subscript *subscript)
281 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
283 fprintf (outf, "\n (subscript \n");
284 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
285 dump_conflict_function (outf, cf);
286 if (CF_NONTRIVIAL_P (cf))
288 tree last_iteration = SUB_LAST_CONFLICT (subscript);
289 fprintf (outf, "\n last_conflict: ");
290 print_generic_expr (outf, last_iteration, 0);
293 cf = SUB_CONFLICTS_IN_B (subscript);
294 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
295 dump_conflict_function (outf, cf);
296 if (CF_NONTRIVIAL_P (cf))
298 tree last_iteration = SUB_LAST_CONFLICT (subscript);
299 fprintf (outf, "\n last_conflict: ");
300 print_generic_expr (outf, last_iteration, 0);
303 fprintf (outf, "\n (Subscript distance: ");
304 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
305 fprintf (outf, " ))\n");
308 /* Print the classic direction vector DIRV to OUTF. */
310 static void
311 print_direction_vector (FILE *outf,
312 lambda_vector dirv,
313 int length)
315 int eq;
317 for (eq = 0; eq < length; eq++)
319 enum data_dependence_direction dir = ((enum data_dependence_direction)
320 dirv[eq]);
322 switch (dir)
324 case dir_positive:
325 fprintf (outf, " +");
326 break;
327 case dir_negative:
328 fprintf (outf, " -");
329 break;
330 case dir_equal:
331 fprintf (outf, " =");
332 break;
333 case dir_positive_or_equal:
334 fprintf (outf, " +=");
335 break;
336 case dir_positive_or_negative:
337 fprintf (outf, " +-");
338 break;
339 case dir_negative_or_equal:
340 fprintf (outf, " -=");
341 break;
342 case dir_star:
343 fprintf (outf, " *");
344 break;
345 default:
346 fprintf (outf, "indep");
347 break;
350 fprintf (outf, "\n");
353 /* Print a vector of direction vectors. */
355 static void
356 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
357 int length)
359 unsigned j;
360 lambda_vector v;
362 FOR_EACH_VEC_ELT (dir_vects, j, v)
363 print_direction_vector (outf, v, length);
366 /* Print out a vector VEC of length N to OUTFILE. */
368 static inline void
369 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
371 int i;
373 for (i = 0; i < n; i++)
374 fprintf (outfile, "%3d ", vector[i]);
375 fprintf (outfile, "\n");
378 /* Print a vector of distance vectors. */
380 static void
381 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
382 int length)
384 unsigned j;
385 lambda_vector v;
387 FOR_EACH_VEC_ELT (dist_vects, j, v)
388 print_lambda_vector (outf, v, length);
391 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
393 static void
394 dump_data_dependence_relation (FILE *outf,
395 struct data_dependence_relation *ddr)
397 struct data_reference *dra, *drb;
399 fprintf (outf, "(Data Dep: \n");
401 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
403 if (ddr)
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 if (dra)
408 dump_data_reference (outf, dra);
409 else
410 fprintf (outf, " (nil)\n");
411 if (drb)
412 dump_data_reference (outf, drb);
413 else
414 fprintf (outf, " (nil)\n");
416 fprintf (outf, " (don't know)\n)\n");
417 return;
420 dra = DDR_A (ddr);
421 drb = DDR_B (ddr);
422 dump_data_reference (outf, dra);
423 dump_data_reference (outf, drb);
425 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
426 fprintf (outf, " (no dependence)\n");
428 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
430 unsigned int i;
431 struct loop *loopi;
433 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
435 fprintf (outf, " access_fn_A: ");
436 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
437 fprintf (outf, " access_fn_B: ");
438 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
439 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
442 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
443 fprintf (outf, " loop nest: (");
444 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
445 fprintf (outf, "%d ", loopi->num);
446 fprintf (outf, ")\n");
448 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
450 fprintf (outf, " distance_vector: ");
451 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
452 DDR_NB_LOOPS (ddr));
455 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
457 fprintf (outf, " direction_vector: ");
458 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
459 DDR_NB_LOOPS (ddr));
463 fprintf (outf, ")\n");
466 /* Debug version. */
468 DEBUG_FUNCTION void
469 debug_data_dependence_relation (struct data_dependence_relation *ddr)
471 dump_data_dependence_relation (stderr, ddr);
474 /* Dump into FILE all the dependence relations from DDRS. */
476 void
477 dump_data_dependence_relations (FILE *file,
478 vec<ddr_p> ddrs)
480 unsigned int i;
481 struct data_dependence_relation *ddr;
483 FOR_EACH_VEC_ELT (ddrs, i, ddr)
484 dump_data_dependence_relation (file, ddr);
487 DEBUG_FUNCTION void
488 debug (vec<ddr_p> &ref)
490 dump_data_dependence_relations (stderr, ref);
493 DEBUG_FUNCTION void
494 debug (vec<ddr_p> *ptr)
496 if (ptr)
497 debug (*ptr);
498 else
499 fprintf (stderr, "<nil>\n");
503 /* Dump to STDERR all the dependence relations from DDRS. */
505 DEBUG_FUNCTION void
506 debug_data_dependence_relations (vec<ddr_p> ddrs)
508 dump_data_dependence_relations (stderr, ddrs);
511 /* Dumps the distance and direction vectors in FILE. DDRS contains
512 the dependence relations, and VECT_SIZE is the size of the
513 dependence vectors, or in other words the number of loops in the
514 considered nest. */
516 static void
517 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
519 unsigned int i, j;
520 struct data_dependence_relation *ddr;
521 lambda_vector v;
523 FOR_EACH_VEC_ELT (ddrs, i, ddr)
524 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
526 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
528 fprintf (file, "DISTANCE_V (");
529 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
530 fprintf (file, ")\n");
533 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
535 fprintf (file, "DIRECTION_V (");
536 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
537 fprintf (file, ")\n");
541 fprintf (file, "\n\n");
544 /* Dumps the data dependence relations DDRS in FILE. */
546 static void
547 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
549 unsigned int i;
550 struct data_dependence_relation *ddr;
552 FOR_EACH_VEC_ELT (ddrs, i, ddr)
553 dump_data_dependence_relation (file, ddr);
555 fprintf (file, "\n\n");
558 DEBUG_FUNCTION void
559 debug_ddrs (vec<ddr_p> ddrs)
561 dump_ddrs (stderr, ddrs);
564 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
565 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
566 constant of type ssizetype, and returns true. If we cannot do this
567 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
568 is returned. */
570 static bool
571 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
572 tree *var, tree *off)
574 tree var0, var1;
575 tree off0, off1;
576 enum tree_code ocode = code;
578 *var = NULL_TREE;
579 *off = NULL_TREE;
581 switch (code)
583 case INTEGER_CST:
584 *var = build_int_cst (type, 0);
585 *off = fold_convert (ssizetype, op0);
586 return true;
588 case POINTER_PLUS_EXPR:
589 ocode = PLUS_EXPR;
590 /* FALLTHROUGH */
591 case PLUS_EXPR:
592 case MINUS_EXPR:
593 split_constant_offset (op0, &var0, &off0);
594 split_constant_offset (op1, &var1, &off1);
595 *var = fold_build2 (code, type, var0, var1);
596 *off = size_binop (ocode, off0, off1);
597 return true;
599 case MULT_EXPR:
600 if (TREE_CODE (op1) != INTEGER_CST)
601 return false;
603 split_constant_offset (op0, &var0, &off0);
604 *var = fold_build2 (MULT_EXPR, type, var0, op1);
605 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
606 return true;
608 case ADDR_EXPR:
610 tree base, poffset;
611 HOST_WIDE_INT pbitsize, pbitpos;
612 enum machine_mode pmode;
613 int punsignedp, pvolatilep;
615 op0 = TREE_OPERAND (op0, 0);
616 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
617 &pmode, &punsignedp, &pvolatilep, false);
619 if (pbitpos % BITS_PER_UNIT != 0)
620 return false;
621 base = build_fold_addr_expr (base);
622 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
624 if (poffset)
626 split_constant_offset (poffset, &poffset, &off1);
627 off0 = size_binop (PLUS_EXPR, off0, off1);
628 if (POINTER_TYPE_P (TREE_TYPE (base)))
629 base = fold_build_pointer_plus (base, poffset);
630 else
631 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
632 fold_convert (TREE_TYPE (base), poffset));
635 var0 = fold_convert (type, base);
637 /* If variable length types are involved, punt, otherwise casts
638 might be converted into ARRAY_REFs in gimplify_conversion.
639 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
640 possibly no longer appears in current GIMPLE, might resurface.
641 This perhaps could run
642 if (CONVERT_EXPR_P (var0))
644 gimplify_conversion (&var0);
645 // Attempt to fill in any within var0 found ARRAY_REF's
646 // element size from corresponding op embedded ARRAY_REF,
647 // if unsuccessful, just punt.
648 } */
649 while (POINTER_TYPE_P (type))
650 type = TREE_TYPE (type);
651 if (int_size_in_bytes (type) < 0)
652 return false;
654 *var = var0;
655 *off = off0;
656 return true;
659 case SSA_NAME:
661 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
662 enum tree_code subcode;
664 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
665 return false;
667 var0 = gimple_assign_rhs1 (def_stmt);
668 subcode = gimple_assign_rhs_code (def_stmt);
669 var1 = gimple_assign_rhs2 (def_stmt);
671 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
673 CASE_CONVERT:
675 /* We must not introduce undefined overflow, and we must not change the value.
676 Hence we're okay if the inner type doesn't overflow to start with
677 (pointer or signed), the outer type also is an integer or pointer
678 and the outer precision is at least as large as the inner. */
679 tree itype = TREE_TYPE (op0);
680 if ((POINTER_TYPE_P (itype)
681 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
682 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
683 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
685 split_constant_offset (op0, &var0, off);
686 *var = fold_convert (type, var0);
687 return true;
689 return false;
692 default:
693 return false;
697 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
698 will be ssizetype. */
700 void
701 split_constant_offset (tree exp, tree *var, tree *off)
703 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
704 enum tree_code code;
706 *var = exp;
707 *off = ssize_int (0);
708 STRIP_NOPS (exp);
710 if (tree_is_chrec (exp)
711 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
712 return;
714 otype = TREE_TYPE (exp);
715 code = TREE_CODE (exp);
716 extract_ops_from_tree (exp, &code, &op0, &op1);
717 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
719 *var = fold_convert (type, e);
720 *off = o;
724 /* Returns the address ADDR of an object in a canonical shape (without nop
725 casts, and with type of pointer to the object). */
727 static tree
728 canonicalize_base_object_address (tree addr)
730 tree orig = addr;
732 STRIP_NOPS (addr);
734 /* The base address may be obtained by casting from integer, in that case
735 keep the cast. */
736 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
737 return orig;
739 if (TREE_CODE (addr) != ADDR_EXPR)
740 return addr;
742 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
745 /* Analyzes the behavior of the memory reference DR in the innermost loop or
746 basic block that contains it. Returns true if analysis succeed or false
747 otherwise. */
749 bool
750 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
752 gimple stmt = DR_STMT (dr);
753 struct loop *loop = loop_containing_stmt (stmt);
754 tree ref = DR_REF (dr);
755 HOST_WIDE_INT pbitsize, pbitpos;
756 tree base, poffset;
757 enum machine_mode pmode;
758 int punsignedp, pvolatilep;
759 affine_iv base_iv, offset_iv;
760 tree init, dinit, step;
761 bool in_loop = (loop && loop->num);
763 if (dump_file && (dump_flags & TDF_DETAILS))
764 fprintf (dump_file, "analyze_innermost: ");
766 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
767 &pmode, &punsignedp, &pvolatilep, false);
768 gcc_assert (base != NULL_TREE);
770 if (pbitpos % BITS_PER_UNIT != 0)
772 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "failed: bit offset alignment.\n");
774 return false;
777 if (TREE_CODE (base) == MEM_REF)
779 if (!integer_zerop (TREE_OPERAND (base, 1)))
781 double_int moff = mem_ref_offset (base);
782 tree mofft = double_int_to_tree (sizetype, moff);
783 if (!poffset)
784 poffset = mofft;
785 else
786 poffset = size_binop (PLUS_EXPR, poffset, mofft);
788 base = TREE_OPERAND (base, 0);
790 else
791 base = build_fold_addr_expr (base);
793 if (in_loop)
795 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
796 nest ? true : false))
798 if (nest)
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "failed: evolution of base is not"
802 " affine.\n");
803 return false;
805 else
807 base_iv.base = base;
808 base_iv.step = ssize_int (0);
809 base_iv.no_overflow = true;
813 else
815 base_iv.base = base;
816 base_iv.step = ssize_int (0);
817 base_iv.no_overflow = true;
820 if (!poffset)
822 offset_iv.base = ssize_int (0);
823 offset_iv.step = ssize_int (0);
825 else
827 if (!in_loop)
829 offset_iv.base = poffset;
830 offset_iv.step = ssize_int (0);
832 else if (!simple_iv (loop, loop_containing_stmt (stmt),
833 poffset, &offset_iv,
834 nest ? true : false))
836 if (nest)
838 if (dump_file && (dump_flags & TDF_DETAILS))
839 fprintf (dump_file, "failed: evolution of offset is not"
840 " affine.\n");
841 return false;
843 else
845 offset_iv.base = poffset;
846 offset_iv.step = ssize_int (0);
851 init = ssize_int (pbitpos / BITS_PER_UNIT);
852 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
853 init = size_binop (PLUS_EXPR, init, dinit);
854 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
855 init = size_binop (PLUS_EXPR, init, dinit);
857 step = size_binop (PLUS_EXPR,
858 fold_convert (ssizetype, base_iv.step),
859 fold_convert (ssizetype, offset_iv.step));
861 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
863 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
864 DR_INIT (dr) = init;
865 DR_STEP (dr) = step;
867 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
869 if (dump_file && (dump_flags & TDF_DETAILS))
870 fprintf (dump_file, "success.\n");
872 return true;
875 /* Determines the base object and the list of indices of memory reference
876 DR, analyzed in LOOP and instantiated in loop nest NEST. */
878 static void
879 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
881 vec<tree> access_fns = vNULL;
882 tree ref, op;
883 tree base, off, access_fn;
884 basic_block before_loop;
886 /* If analyzing a basic-block there are no indices to analyze
887 and thus no access functions. */
888 if (!nest)
890 DR_BASE_OBJECT (dr) = DR_REF (dr);
891 DR_ACCESS_FNS (dr).create (0);
892 return;
895 ref = DR_REF (dr);
896 before_loop = block_before_loop (nest);
898 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
899 into a two element array with a constant index. The base is
900 then just the immediate underlying object. */
901 if (TREE_CODE (ref) == REALPART_EXPR)
903 ref = TREE_OPERAND (ref, 0);
904 access_fns.safe_push (integer_zero_node);
906 else if (TREE_CODE (ref) == IMAGPART_EXPR)
908 ref = TREE_OPERAND (ref, 0);
909 access_fns.safe_push (integer_one_node);
912 /* Analyze access functions of dimensions we know to be independent. */
913 while (handled_component_p (ref))
915 if (TREE_CODE (ref) == ARRAY_REF)
917 op = TREE_OPERAND (ref, 1);
918 access_fn = analyze_scalar_evolution (loop, op);
919 access_fn = instantiate_scev (before_loop, loop, access_fn);
920 access_fns.safe_push (access_fn);
922 else if (TREE_CODE (ref) == COMPONENT_REF
923 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
925 /* For COMPONENT_REFs of records (but not unions!) use the
926 FIELD_DECL offset as constant access function so we can
927 disambiguate a[i].f1 and a[i].f2. */
928 tree off = component_ref_field_offset (ref);
929 off = size_binop (PLUS_EXPR,
930 size_binop (MULT_EXPR,
931 fold_convert (bitsizetype, off),
932 bitsize_int (BITS_PER_UNIT)),
933 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
934 access_fns.safe_push (off);
936 else
937 /* If we have an unhandled component we could not translate
938 to an access function stop analyzing. We have determined
939 our base object in this case. */
940 break;
942 ref = TREE_OPERAND (ref, 0);
945 /* If the address operand of a MEM_REF base has an evolution in the
946 analyzed nest, add it as an additional independent access-function. */
947 if (TREE_CODE (ref) == MEM_REF)
949 op = TREE_OPERAND (ref, 0);
950 access_fn = analyze_scalar_evolution (loop, op);
951 access_fn = instantiate_scev (before_loop, loop, access_fn);
952 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
954 tree orig_type;
955 tree memoff = TREE_OPERAND (ref, 1);
956 base = initial_condition (access_fn);
957 orig_type = TREE_TYPE (base);
958 STRIP_USELESS_TYPE_CONVERSION (base);
959 split_constant_offset (base, &base, &off);
960 /* Fold the MEM_REF offset into the evolutions initial
961 value to make more bases comparable. */
962 if (!integer_zerop (memoff))
964 off = size_binop (PLUS_EXPR, off,
965 fold_convert (ssizetype, memoff));
966 memoff = build_int_cst (TREE_TYPE (memoff), 0);
968 access_fn = chrec_replace_initial_condition
969 (access_fn, fold_convert (orig_type, off));
970 /* ??? This is still not a suitable base object for
971 dr_may_alias_p - the base object needs to be an
972 access that covers the object as whole. With
973 an evolution in the pointer this cannot be
974 guaranteed.
975 As a band-aid, mark the access so we can special-case
976 it in dr_may_alias_p. */
977 ref = fold_build2_loc (EXPR_LOCATION (ref),
978 MEM_REF, TREE_TYPE (ref),
979 base, memoff);
980 DR_UNCONSTRAINED_BASE (dr) = true;
981 access_fns.safe_push (access_fn);
984 else if (DECL_P (ref))
986 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
987 ref = build2 (MEM_REF, TREE_TYPE (ref),
988 build_fold_addr_expr (ref),
989 build_int_cst (reference_alias_ptr_type (ref), 0));
992 DR_BASE_OBJECT (dr) = ref;
993 DR_ACCESS_FNS (dr) = access_fns;
996 /* Extracts the alias analysis information from the memory reference DR. */
998 static void
999 dr_analyze_alias (struct data_reference *dr)
1001 tree ref = DR_REF (dr);
1002 tree base = get_base_address (ref), addr;
1004 if (INDIRECT_REF_P (base)
1005 || TREE_CODE (base) == MEM_REF)
1007 addr = TREE_OPERAND (base, 0);
1008 if (TREE_CODE (addr) == SSA_NAME)
1009 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1013 /* Frees data reference DR. */
1015 void
1016 free_data_ref (data_reference_p dr)
1018 DR_ACCESS_FNS (dr).release ();
1019 free (dr);
1022 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1023 is read if IS_READ is true, write otherwise. Returns the
1024 data_reference description of MEMREF. NEST is the outermost loop
1025 in which the reference should be instantiated, LOOP is the loop in
1026 which the data reference should be analyzed. */
1028 struct data_reference *
1029 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1030 bool is_read)
1032 struct data_reference *dr;
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "Creating dr for ");
1037 print_generic_expr (dump_file, memref, TDF_SLIM);
1038 fprintf (dump_file, "\n");
1041 dr = XCNEW (struct data_reference);
1042 DR_STMT (dr) = stmt;
1043 DR_REF (dr) = memref;
1044 DR_IS_READ (dr) = is_read;
1046 dr_analyze_innermost (dr, nest);
1047 dr_analyze_indices (dr, nest, loop);
1048 dr_analyze_alias (dr);
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1052 unsigned i;
1053 fprintf (dump_file, "\tbase_address: ");
1054 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1055 fprintf (dump_file, "\n\toffset from base address: ");
1056 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1057 fprintf (dump_file, "\n\tconstant offset from base address: ");
1058 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1059 fprintf (dump_file, "\n\tstep: ");
1060 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1061 fprintf (dump_file, "\n\taligned to: ");
1062 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1063 fprintf (dump_file, "\n\tbase_object: ");
1064 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1065 fprintf (dump_file, "\n");
1066 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1068 fprintf (dump_file, "\tAccess function %d: ", i);
1069 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1073 return dr;
1076 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1077 expressions. */
1078 static bool
1079 dr_equal_offsets_p1 (tree offset1, tree offset2)
1081 bool res;
1083 STRIP_NOPS (offset1);
1084 STRIP_NOPS (offset2);
1086 if (offset1 == offset2)
1087 return true;
1089 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1090 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1091 return false;
1093 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1094 TREE_OPERAND (offset2, 0));
1096 if (!res || !BINARY_CLASS_P (offset1))
1097 return res;
1099 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1100 TREE_OPERAND (offset2, 1));
1102 return res;
1105 /* Check if DRA and DRB have equal offsets. */
1106 bool
1107 dr_equal_offsets_p (struct data_reference *dra,
1108 struct data_reference *drb)
1110 tree offset1, offset2;
1112 offset1 = DR_OFFSET (dra);
1113 offset2 = DR_OFFSET (drb);
1115 return dr_equal_offsets_p1 (offset1, offset2);
1118 /* Returns true if FNA == FNB. */
1120 static bool
1121 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1123 unsigned i, n = fna.length ();
1125 if (n != fnb.length ())
1126 return false;
1128 for (i = 0; i < n; i++)
1129 if (!operand_equal_p (fna[i], fnb[i], 0))
1130 return false;
1132 return true;
1135 /* If all the functions in CF are the same, returns one of them,
1136 otherwise returns NULL. */
1138 static affine_fn
1139 common_affine_function (conflict_function *cf)
1141 unsigned i;
1142 affine_fn comm;
1144 if (!CF_NONTRIVIAL_P (cf))
1145 return affine_fn ();
1147 comm = cf->fns[0];
1149 for (i = 1; i < cf->n; i++)
1150 if (!affine_function_equal_p (comm, cf->fns[i]))
1151 return affine_fn ();
1153 return comm;
1156 /* Returns the base of the affine function FN. */
1158 static tree
1159 affine_function_base (affine_fn fn)
1161 return fn[0];
1164 /* Returns true if FN is a constant. */
1166 static bool
1167 affine_function_constant_p (affine_fn fn)
1169 unsigned i;
1170 tree coef;
1172 for (i = 1; fn.iterate (i, &coef); i++)
1173 if (!integer_zerop (coef))
1174 return false;
1176 return true;
1179 /* Returns true if FN is the zero constant function. */
1181 static bool
1182 affine_function_zero_p (affine_fn fn)
1184 return (integer_zerop (affine_function_base (fn))
1185 && affine_function_constant_p (fn));
1188 /* Returns a signed integer type with the largest precision from TA
1189 and TB. */
1191 static tree
1192 signed_type_for_types (tree ta, tree tb)
1194 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1195 return signed_type_for (ta);
1196 else
1197 return signed_type_for (tb);
1200 /* Applies operation OP on affine functions FNA and FNB, and returns the
1201 result. */
1203 static affine_fn
1204 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1206 unsigned i, n, m;
1207 affine_fn ret;
1208 tree coef;
1210 if (fnb.length () > fna.length ())
1212 n = fna.length ();
1213 m = fnb.length ();
1215 else
1217 n = fnb.length ();
1218 m = fna.length ();
1221 ret.create (m);
1222 for (i = 0; i < n; i++)
1224 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1225 TREE_TYPE (fnb[i]));
1226 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1229 for (; fna.iterate (i, &coef); i++)
1230 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1231 coef, integer_zero_node));
1232 for (; fnb.iterate (i, &coef); i++)
1233 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1234 integer_zero_node, coef));
1236 return ret;
1239 /* Returns the sum of affine functions FNA and FNB. */
1241 static affine_fn
1242 affine_fn_plus (affine_fn fna, affine_fn fnb)
1244 return affine_fn_op (PLUS_EXPR, fna, fnb);
1247 /* Returns the difference of affine functions FNA and FNB. */
1249 static affine_fn
1250 affine_fn_minus (affine_fn fna, affine_fn fnb)
1252 return affine_fn_op (MINUS_EXPR, fna, fnb);
1255 /* Frees affine function FN. */
1257 static void
1258 affine_fn_free (affine_fn fn)
1260 fn.release ();
1263 /* Determine for each subscript in the data dependence relation DDR
1264 the distance. */
1266 static void
1267 compute_subscript_distance (struct data_dependence_relation *ddr)
1269 conflict_function *cf_a, *cf_b;
1270 affine_fn fn_a, fn_b, diff;
1272 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1274 unsigned int i;
1276 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1278 struct subscript *subscript;
1280 subscript = DDR_SUBSCRIPT (ddr, i);
1281 cf_a = SUB_CONFLICTS_IN_A (subscript);
1282 cf_b = SUB_CONFLICTS_IN_B (subscript);
1284 fn_a = common_affine_function (cf_a);
1285 fn_b = common_affine_function (cf_b);
1286 if (!fn_a.exists () || !fn_b.exists ())
1288 SUB_DISTANCE (subscript) = chrec_dont_know;
1289 return;
1291 diff = affine_fn_minus (fn_a, fn_b);
1293 if (affine_function_constant_p (diff))
1294 SUB_DISTANCE (subscript) = affine_function_base (diff);
1295 else
1296 SUB_DISTANCE (subscript) = chrec_dont_know;
1298 affine_fn_free (diff);
1303 /* Returns the conflict function for "unknown". */
1305 static conflict_function *
1306 conflict_fn_not_known (void)
1308 conflict_function *fn = XCNEW (conflict_function);
1309 fn->n = NOT_KNOWN;
1311 return fn;
1314 /* Returns the conflict function for "independent". */
1316 static conflict_function *
1317 conflict_fn_no_dependence (void)
1319 conflict_function *fn = XCNEW (conflict_function);
1320 fn->n = NO_DEPENDENCE;
1322 return fn;
1325 /* Returns true if the address of OBJ is invariant in LOOP. */
1327 static bool
1328 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1330 while (handled_component_p (obj))
1332 if (TREE_CODE (obj) == ARRAY_REF)
1334 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1335 need to check the stride and the lower bound of the reference. */
1336 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1337 loop->num)
1338 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1339 loop->num))
1340 return false;
1342 else if (TREE_CODE (obj) == COMPONENT_REF)
1344 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1345 loop->num))
1346 return false;
1348 obj = TREE_OPERAND (obj, 0);
1351 if (!INDIRECT_REF_P (obj)
1352 && TREE_CODE (obj) != MEM_REF)
1353 return true;
1355 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1356 loop->num);
1359 /* Returns false if we can prove that data references A and B do not alias,
1360 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1361 considered. */
1363 bool
1364 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1365 bool loop_nest)
1367 tree addr_a = DR_BASE_OBJECT (a);
1368 tree addr_b = DR_BASE_OBJECT (b);
1370 /* If we are not processing a loop nest but scalar code we
1371 do not need to care about possible cross-iteration dependences
1372 and thus can process the full original reference. Do so,
1373 similar to how loop invariant motion applies extra offset-based
1374 disambiguation. */
1375 if (!loop_nest)
1377 aff_tree off1, off2;
1378 double_int size1, size2;
1379 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1380 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1381 aff_combination_scale (&off1, double_int_minus_one);
1382 aff_combination_add (&off2, &off1);
1383 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1384 return false;
1387 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1388 the size of the base-object. So we cannot do any offset/overlap
1389 based analysis but have to rely on points-to information only. */
1390 if (TREE_CODE (addr_a) == MEM_REF
1391 && DR_UNCONSTRAINED_BASE (a))
1393 if (TREE_CODE (addr_b) == MEM_REF
1394 && DR_UNCONSTRAINED_BASE (b))
1395 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1396 TREE_OPERAND (addr_b, 0));
1397 else
1398 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1399 build_fold_addr_expr (addr_b));
1401 else if (TREE_CODE (addr_b) == MEM_REF
1402 && DR_UNCONSTRAINED_BASE (b))
1403 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1404 TREE_OPERAND (addr_b, 0));
1406 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1407 that is being subsetted in the loop nest. */
1408 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1409 return refs_output_dependent_p (addr_a, addr_b);
1410 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1411 return refs_anti_dependent_p (addr_a, addr_b);
1412 return refs_may_alias_p (addr_a, addr_b);
1415 /* Initialize a data dependence relation between data accesses A and
1416 B. NB_LOOPS is the number of loops surrounding the references: the
1417 size of the classic distance/direction vectors. */
1419 struct data_dependence_relation *
1420 initialize_data_dependence_relation (struct data_reference *a,
1421 struct data_reference *b,
1422 vec<loop_p> loop_nest)
1424 struct data_dependence_relation *res;
1425 unsigned int i;
1427 res = XNEW (struct data_dependence_relation);
1428 DDR_A (res) = a;
1429 DDR_B (res) = b;
1430 DDR_LOOP_NEST (res).create (0);
1431 DDR_REVERSED_P (res) = false;
1432 DDR_SUBSCRIPTS (res).create (0);
1433 DDR_DIR_VECTS (res).create (0);
1434 DDR_DIST_VECTS (res).create (0);
1436 if (a == NULL || b == NULL)
1438 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1439 return res;
1442 /* If the data references do not alias, then they are independent. */
1443 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1445 DDR_ARE_DEPENDENT (res) = chrec_known;
1446 return res;
1449 /* The case where the references are exactly the same. */
1450 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1452 if (loop_nest.exists ()
1453 && !object_address_invariant_in_loop_p (loop_nest[0],
1454 DR_BASE_OBJECT (a)))
1456 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1457 return res;
1459 DDR_AFFINE_P (res) = true;
1460 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1461 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1462 DDR_LOOP_NEST (res) = loop_nest;
1463 DDR_INNER_LOOP (res) = 0;
1464 DDR_SELF_REFERENCE (res) = true;
1465 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1467 struct subscript *subscript;
1469 subscript = XNEW (struct subscript);
1470 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1471 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1472 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1473 SUB_DISTANCE (subscript) = chrec_dont_know;
1474 DDR_SUBSCRIPTS (res).safe_push (subscript);
1476 return res;
1479 /* If the references do not access the same object, we do not know
1480 whether they alias or not. */
1481 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1483 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1484 return res;
1487 /* If the base of the object is not invariant in the loop nest, we cannot
1488 analyze it. TODO -- in fact, it would suffice to record that there may
1489 be arbitrary dependences in the loops where the base object varies. */
1490 if (loop_nest.exists ()
1491 && !object_address_invariant_in_loop_p (loop_nest[0],
1492 DR_BASE_OBJECT (a)))
1494 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1495 return res;
1498 /* If the number of dimensions of the access to not agree we can have
1499 a pointer access to a component of the array element type and an
1500 array access while the base-objects are still the same. Punt. */
1501 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1503 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1504 return res;
1507 DDR_AFFINE_P (res) = true;
1508 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1509 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1510 DDR_LOOP_NEST (res) = loop_nest;
1511 DDR_INNER_LOOP (res) = 0;
1512 DDR_SELF_REFERENCE (res) = false;
1514 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1516 struct subscript *subscript;
1518 subscript = XNEW (struct subscript);
1519 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1520 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1521 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1522 SUB_DISTANCE (subscript) = chrec_dont_know;
1523 DDR_SUBSCRIPTS (res).safe_push (subscript);
1526 return res;
1529 /* Frees memory used by the conflict function F. */
1531 static void
1532 free_conflict_function (conflict_function *f)
1534 unsigned i;
1536 if (CF_NONTRIVIAL_P (f))
1538 for (i = 0; i < f->n; i++)
1539 affine_fn_free (f->fns[i]);
1541 free (f);
1544 /* Frees memory used by SUBSCRIPTS. */
1546 static void
1547 free_subscripts (vec<subscript_p> subscripts)
1549 unsigned i;
1550 subscript_p s;
1552 FOR_EACH_VEC_ELT (subscripts, i, s)
1554 free_conflict_function (s->conflicting_iterations_in_a);
1555 free_conflict_function (s->conflicting_iterations_in_b);
1556 free (s);
1558 subscripts.release ();
1561 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1562 description. */
1564 static inline void
1565 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1566 tree chrec)
1568 DDR_ARE_DEPENDENT (ddr) = chrec;
1569 free_subscripts (DDR_SUBSCRIPTS (ddr));
1570 DDR_SUBSCRIPTS (ddr).create (0);
1573 /* The dependence relation DDR cannot be represented by a distance
1574 vector. */
1576 static inline void
1577 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1579 if (dump_file && (dump_flags & TDF_DETAILS))
1580 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1582 DDR_AFFINE_P (ddr) = false;
1587 /* This section contains the classic Banerjee tests. */
1589 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1590 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1592 static inline bool
1593 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1595 return (evolution_function_is_constant_p (chrec_a)
1596 && evolution_function_is_constant_p (chrec_b));
1599 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1600 variable, i.e., if the SIV (Single Index Variable) test is true. */
1602 static bool
1603 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1605 if ((evolution_function_is_constant_p (chrec_a)
1606 && evolution_function_is_univariate_p (chrec_b))
1607 || (evolution_function_is_constant_p (chrec_b)
1608 && evolution_function_is_univariate_p (chrec_a)))
1609 return true;
1611 if (evolution_function_is_univariate_p (chrec_a)
1612 && evolution_function_is_univariate_p (chrec_b))
1614 switch (TREE_CODE (chrec_a))
1616 case POLYNOMIAL_CHREC:
1617 switch (TREE_CODE (chrec_b))
1619 case POLYNOMIAL_CHREC:
1620 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1621 return false;
1623 default:
1624 return true;
1627 default:
1628 return true;
1632 return false;
1635 /* Creates a conflict function with N dimensions. The affine functions
1636 in each dimension follow. */
1638 static conflict_function *
1639 conflict_fn (unsigned n, ...)
1641 unsigned i;
1642 conflict_function *ret = XCNEW (conflict_function);
1643 va_list ap;
1645 gcc_assert (0 < n && n <= MAX_DIM);
1646 va_start (ap, n);
1648 ret->n = n;
1649 for (i = 0; i < n; i++)
1650 ret->fns[i] = va_arg (ap, affine_fn);
1651 va_end (ap);
1653 return ret;
1656 /* Returns constant affine function with value CST. */
1658 static affine_fn
1659 affine_fn_cst (tree cst)
1661 affine_fn fn;
1662 fn.create (1);
1663 fn.quick_push (cst);
1664 return fn;
1667 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1669 static affine_fn
1670 affine_fn_univar (tree cst, unsigned dim, tree coef)
1672 affine_fn fn;
1673 fn.create (dim + 1);
1674 unsigned i;
1676 gcc_assert (dim > 0);
1677 fn.quick_push (cst);
1678 for (i = 1; i < dim; i++)
1679 fn.quick_push (integer_zero_node);
1680 fn.quick_push (coef);
1681 return fn;
1684 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1685 *OVERLAPS_B are initialized to the functions that describe the
1686 relation between the elements accessed twice by CHREC_A and
1687 CHREC_B. For k >= 0, the following property is verified:
1689 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1691 static void
1692 analyze_ziv_subscript (tree chrec_a,
1693 tree chrec_b,
1694 conflict_function **overlaps_a,
1695 conflict_function **overlaps_b,
1696 tree *last_conflicts)
1698 tree type, difference;
1699 dependence_stats.num_ziv++;
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "(analyze_ziv_subscript \n");
1704 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1705 chrec_a = chrec_convert (type, chrec_a, NULL);
1706 chrec_b = chrec_convert (type, chrec_b, NULL);
1707 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1709 switch (TREE_CODE (difference))
1711 case INTEGER_CST:
1712 if (integer_zerop (difference))
1714 /* The difference is equal to zero: the accessed index
1715 overlaps for each iteration in the loop. */
1716 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1717 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1718 *last_conflicts = chrec_dont_know;
1719 dependence_stats.num_ziv_dependent++;
1721 else
1723 /* The accesses do not overlap. */
1724 *overlaps_a = conflict_fn_no_dependence ();
1725 *overlaps_b = conflict_fn_no_dependence ();
1726 *last_conflicts = integer_zero_node;
1727 dependence_stats.num_ziv_independent++;
1729 break;
1731 default:
1732 /* We're not sure whether the indexes overlap. For the moment,
1733 conservatively answer "don't know". */
1734 if (dump_file && (dump_flags & TDF_DETAILS))
1735 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1737 *overlaps_a = conflict_fn_not_known ();
1738 *overlaps_b = conflict_fn_not_known ();
1739 *last_conflicts = chrec_dont_know;
1740 dependence_stats.num_ziv_unimplemented++;
1741 break;
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, ")\n");
1748 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1749 and only if it fits to the int type. If this is not the case, or the
1750 bound on the number of iterations of LOOP could not be derived, returns
1751 chrec_dont_know. */
1753 static tree
1754 max_stmt_executions_tree (struct loop *loop)
1756 double_int nit;
1758 if (!max_stmt_executions (loop, &nit))
1759 return chrec_dont_know;
1761 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1762 return chrec_dont_know;
1764 return double_int_to_tree (unsigned_type_node, nit);
1767 /* Determine whether the CHREC is always positive/negative. If the expression
1768 cannot be statically analyzed, return false, otherwise set the answer into
1769 VALUE. */
1771 static bool
1772 chrec_is_positive (tree chrec, bool *value)
1774 bool value0, value1, value2;
1775 tree end_value, nb_iter;
1777 switch (TREE_CODE (chrec))
1779 case POLYNOMIAL_CHREC:
1780 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1781 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1782 return false;
1784 /* FIXME -- overflows. */
1785 if (value0 == value1)
1787 *value = value0;
1788 return true;
1791 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1792 and the proof consists in showing that the sign never
1793 changes during the execution of the loop, from 0 to
1794 loop->nb_iterations. */
1795 if (!evolution_function_is_affine_p (chrec))
1796 return false;
1798 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1799 if (chrec_contains_undetermined (nb_iter))
1800 return false;
1802 #if 0
1803 /* TODO -- If the test is after the exit, we may decrease the number of
1804 iterations by one. */
1805 if (after_exit)
1806 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1807 #endif
1809 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1811 if (!chrec_is_positive (end_value, &value2))
1812 return false;
1814 *value = value0;
1815 return value0 == value1;
1817 case INTEGER_CST:
1818 switch (tree_int_cst_sgn (chrec))
1820 case -1:
1821 *value = false;
1822 break;
1823 case 1:
1824 *value = true;
1825 break;
1826 default:
1827 return false;
1829 return true;
1831 default:
1832 return false;
1837 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1838 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1839 *OVERLAPS_B are initialized to the functions that describe the
1840 relation between the elements accessed twice by CHREC_A and
1841 CHREC_B. For k >= 0, the following property is verified:
1843 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1845 static void
1846 analyze_siv_subscript_cst_affine (tree chrec_a,
1847 tree chrec_b,
1848 conflict_function **overlaps_a,
1849 conflict_function **overlaps_b,
1850 tree *last_conflicts)
1852 bool value0, value1, value2;
1853 tree type, difference, tmp;
1855 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1856 chrec_a = chrec_convert (type, chrec_a, NULL);
1857 chrec_b = chrec_convert (type, chrec_b, NULL);
1858 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1860 /* Special case overlap in the first iteration. */
1861 if (integer_zerop (difference))
1863 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1864 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1865 *last_conflicts = integer_one_node;
1866 return;
1869 if (!chrec_is_positive (initial_condition (difference), &value0))
1871 if (dump_file && (dump_flags & TDF_DETAILS))
1872 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1874 dependence_stats.num_siv_unimplemented++;
1875 *overlaps_a = conflict_fn_not_known ();
1876 *overlaps_b = conflict_fn_not_known ();
1877 *last_conflicts = chrec_dont_know;
1878 return;
1880 else
1882 if (value0 == false)
1884 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1886 if (dump_file && (dump_flags & TDF_DETAILS))
1887 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1889 *overlaps_a = conflict_fn_not_known ();
1890 *overlaps_b = conflict_fn_not_known ();
1891 *last_conflicts = chrec_dont_know;
1892 dependence_stats.num_siv_unimplemented++;
1893 return;
1895 else
1897 if (value1 == true)
1899 /* Example:
1900 chrec_a = 12
1901 chrec_b = {10, +, 1}
1904 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1906 HOST_WIDE_INT numiter;
1907 struct loop *loop = get_chrec_loop (chrec_b);
1909 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1910 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1911 fold_build1 (ABS_EXPR, type, difference),
1912 CHREC_RIGHT (chrec_b));
1913 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1914 *last_conflicts = integer_one_node;
1917 /* Perform weak-zero siv test to see if overlap is
1918 outside the loop bounds. */
1919 numiter = max_stmt_executions_int (loop);
1921 if (numiter >= 0
1922 && compare_tree_int (tmp, numiter) > 0)
1924 free_conflict_function (*overlaps_a);
1925 free_conflict_function (*overlaps_b);
1926 *overlaps_a = conflict_fn_no_dependence ();
1927 *overlaps_b = conflict_fn_no_dependence ();
1928 *last_conflicts = integer_zero_node;
1929 dependence_stats.num_siv_independent++;
1930 return;
1932 dependence_stats.num_siv_dependent++;
1933 return;
1936 /* When the step does not divide the difference, there are
1937 no overlaps. */
1938 else
1940 *overlaps_a = conflict_fn_no_dependence ();
1941 *overlaps_b = conflict_fn_no_dependence ();
1942 *last_conflicts = integer_zero_node;
1943 dependence_stats.num_siv_independent++;
1944 return;
1948 else
1950 /* Example:
1951 chrec_a = 12
1952 chrec_b = {10, +, -1}
1954 In this case, chrec_a will not overlap with chrec_b. */
1955 *overlaps_a = conflict_fn_no_dependence ();
1956 *overlaps_b = conflict_fn_no_dependence ();
1957 *last_conflicts = integer_zero_node;
1958 dependence_stats.num_siv_independent++;
1959 return;
1963 else
1965 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1970 *overlaps_a = conflict_fn_not_known ();
1971 *overlaps_b = conflict_fn_not_known ();
1972 *last_conflicts = chrec_dont_know;
1973 dependence_stats.num_siv_unimplemented++;
1974 return;
1976 else
1978 if (value2 == false)
1980 /* Example:
1981 chrec_a = 3
1982 chrec_b = {10, +, -1}
1984 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1986 HOST_WIDE_INT numiter;
1987 struct loop *loop = get_chrec_loop (chrec_b);
1989 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1990 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1991 CHREC_RIGHT (chrec_b));
1992 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1993 *last_conflicts = integer_one_node;
1995 /* Perform weak-zero siv test to see if overlap is
1996 outside the loop bounds. */
1997 numiter = max_stmt_executions_int (loop);
1999 if (numiter >= 0
2000 && compare_tree_int (tmp, numiter) > 0)
2002 free_conflict_function (*overlaps_a);
2003 free_conflict_function (*overlaps_b);
2004 *overlaps_a = conflict_fn_no_dependence ();
2005 *overlaps_b = conflict_fn_no_dependence ();
2006 *last_conflicts = integer_zero_node;
2007 dependence_stats.num_siv_independent++;
2008 return;
2010 dependence_stats.num_siv_dependent++;
2011 return;
2014 /* When the step does not divide the difference, there
2015 are no overlaps. */
2016 else
2018 *overlaps_a = conflict_fn_no_dependence ();
2019 *overlaps_b = conflict_fn_no_dependence ();
2020 *last_conflicts = integer_zero_node;
2021 dependence_stats.num_siv_independent++;
2022 return;
2025 else
2027 /* Example:
2028 chrec_a = 3
2029 chrec_b = {4, +, 1}
2031 In this case, chrec_a will not overlap with chrec_b. */
2032 *overlaps_a = conflict_fn_no_dependence ();
2033 *overlaps_b = conflict_fn_no_dependence ();
2034 *last_conflicts = integer_zero_node;
2035 dependence_stats.num_siv_independent++;
2036 return;
2043 /* Helper recursive function for initializing the matrix A. Returns
2044 the initial value of CHREC. */
2046 static tree
2047 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2049 gcc_assert (chrec);
2051 switch (TREE_CODE (chrec))
2053 case POLYNOMIAL_CHREC:
2054 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2056 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2057 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2059 case PLUS_EXPR:
2060 case MULT_EXPR:
2061 case MINUS_EXPR:
2063 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2064 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2066 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2069 case NOP_EXPR:
2071 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2072 return chrec_convert (chrec_type (chrec), op, NULL);
2075 case BIT_NOT_EXPR:
2077 /* Handle ~X as -1 - X. */
2078 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2079 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2080 build_int_cst (TREE_TYPE (chrec), -1), op);
2083 case INTEGER_CST:
2084 return chrec;
2086 default:
2087 gcc_unreachable ();
2088 return NULL_TREE;
2092 #define FLOOR_DIV(x,y) ((x) / (y))
2094 /* Solves the special case of the Diophantine equation:
2095 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2097 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2098 number of iterations that loops X and Y run. The overlaps will be
2099 constructed as evolutions in dimension DIM. */
2101 static void
2102 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2103 affine_fn *overlaps_a,
2104 affine_fn *overlaps_b,
2105 tree *last_conflicts, int dim)
2107 if (((step_a > 0 && step_b > 0)
2108 || (step_a < 0 && step_b < 0)))
2110 int step_overlaps_a, step_overlaps_b;
2111 int gcd_steps_a_b, last_conflict, tau2;
2113 gcd_steps_a_b = gcd (step_a, step_b);
2114 step_overlaps_a = step_b / gcd_steps_a_b;
2115 step_overlaps_b = step_a / gcd_steps_a_b;
2117 if (niter > 0)
2119 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2120 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2121 last_conflict = tau2;
2122 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2124 else
2125 *last_conflicts = chrec_dont_know;
2127 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2128 build_int_cst (NULL_TREE,
2129 step_overlaps_a));
2130 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2131 build_int_cst (NULL_TREE,
2132 step_overlaps_b));
2135 else
2137 *overlaps_a = affine_fn_cst (integer_zero_node);
2138 *overlaps_b = affine_fn_cst (integer_zero_node);
2139 *last_conflicts = integer_zero_node;
2143 /* Solves the special case of a Diophantine equation where CHREC_A is
2144 an affine bivariate function, and CHREC_B is an affine univariate
2145 function. For example,
2147 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2149 has the following overlapping functions:
2151 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2152 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2153 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2155 FORNOW: This is a specialized implementation for a case occurring in
2156 a common benchmark. Implement the general algorithm. */
2158 static void
2159 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2160 conflict_function **overlaps_a,
2161 conflict_function **overlaps_b,
2162 tree *last_conflicts)
2164 bool xz_p, yz_p, xyz_p;
2165 int step_x, step_y, step_z;
2166 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2167 affine_fn overlaps_a_xz, overlaps_b_xz;
2168 affine_fn overlaps_a_yz, overlaps_b_yz;
2169 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2170 affine_fn ova1, ova2, ovb;
2171 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2173 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2174 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2175 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2177 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2178 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2179 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2181 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2186 *overlaps_a = conflict_fn_not_known ();
2187 *overlaps_b = conflict_fn_not_known ();
2188 *last_conflicts = chrec_dont_know;
2189 return;
2192 niter = MIN (niter_x, niter_z);
2193 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2194 &overlaps_a_xz,
2195 &overlaps_b_xz,
2196 &last_conflicts_xz, 1);
2197 niter = MIN (niter_y, niter_z);
2198 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2199 &overlaps_a_yz,
2200 &overlaps_b_yz,
2201 &last_conflicts_yz, 2);
2202 niter = MIN (niter_x, niter_z);
2203 niter = MIN (niter_y, niter);
2204 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2205 &overlaps_a_xyz,
2206 &overlaps_b_xyz,
2207 &last_conflicts_xyz, 3);
2209 xz_p = !integer_zerop (last_conflicts_xz);
2210 yz_p = !integer_zerop (last_conflicts_yz);
2211 xyz_p = !integer_zerop (last_conflicts_xyz);
2213 if (xz_p || yz_p || xyz_p)
2215 ova1 = affine_fn_cst (integer_zero_node);
2216 ova2 = affine_fn_cst (integer_zero_node);
2217 ovb = affine_fn_cst (integer_zero_node);
2218 if (xz_p)
2220 affine_fn t0 = ova1;
2221 affine_fn t2 = ovb;
2223 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2224 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2225 affine_fn_free (t0);
2226 affine_fn_free (t2);
2227 *last_conflicts = last_conflicts_xz;
2229 if (yz_p)
2231 affine_fn t0 = ova2;
2232 affine_fn t2 = ovb;
2234 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2235 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2236 affine_fn_free (t0);
2237 affine_fn_free (t2);
2238 *last_conflicts = last_conflicts_yz;
2240 if (xyz_p)
2242 affine_fn t0 = ova1;
2243 affine_fn t2 = ova2;
2244 affine_fn t4 = ovb;
2246 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2247 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2248 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2249 affine_fn_free (t0);
2250 affine_fn_free (t2);
2251 affine_fn_free (t4);
2252 *last_conflicts = last_conflicts_xyz;
2254 *overlaps_a = conflict_fn (2, ova1, ova2);
2255 *overlaps_b = conflict_fn (1, ovb);
2257 else
2259 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2260 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2261 *last_conflicts = integer_zero_node;
2264 affine_fn_free (overlaps_a_xz);
2265 affine_fn_free (overlaps_b_xz);
2266 affine_fn_free (overlaps_a_yz);
2267 affine_fn_free (overlaps_b_yz);
2268 affine_fn_free (overlaps_a_xyz);
2269 affine_fn_free (overlaps_b_xyz);
2272 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2274 static void
2275 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2276 int size)
2278 memcpy (vec2, vec1, size * sizeof (*vec1));
2281 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2283 static void
2284 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2285 int m, int n)
2287 int i;
2289 for (i = 0; i < m; i++)
2290 lambda_vector_copy (mat1[i], mat2[i], n);
2293 /* Store the N x N identity matrix in MAT. */
2295 static void
2296 lambda_matrix_id (lambda_matrix mat, int size)
2298 int i, j;
2300 for (i = 0; i < size; i++)
2301 for (j = 0; j < size; j++)
2302 mat[i][j] = (i == j) ? 1 : 0;
2305 /* Return the first nonzero element of vector VEC1 between START and N.
2306 We must have START <= N. Returns N if VEC1 is the zero vector. */
2308 static int
2309 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2311 int j = start;
2312 while (j < n && vec1[j] == 0)
2313 j++;
2314 return j;
2317 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2318 R2 = R2 + CONST1 * R1. */
2320 static void
2321 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2323 int i;
2325 if (const1 == 0)
2326 return;
2328 for (i = 0; i < n; i++)
2329 mat[r2][i] += const1 * mat[r1][i];
2332 /* Swap rows R1 and R2 in matrix MAT. */
2334 static void
2335 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2337 lambda_vector row;
2339 row = mat[r1];
2340 mat[r1] = mat[r2];
2341 mat[r2] = row;
2344 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2345 and store the result in VEC2. */
2347 static void
2348 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2349 int size, int const1)
2351 int i;
2353 if (const1 == 0)
2354 lambda_vector_clear (vec2, size);
2355 else
2356 for (i = 0; i < size; i++)
2357 vec2[i] = const1 * vec1[i];
2360 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2362 static void
2363 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2364 int size)
2366 lambda_vector_mult_const (vec1, vec2, size, -1);
2369 /* Negate row R1 of matrix MAT which has N columns. */
2371 static void
2372 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2374 lambda_vector_negate (mat[r1], mat[r1], n);
2377 /* Return true if two vectors are equal. */
2379 static bool
2380 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2382 int i;
2383 for (i = 0; i < size; i++)
2384 if (vec1[i] != vec2[i])
2385 return false;
2386 return true;
2389 /* Given an M x N integer matrix A, this function determines an M x
2390 M unimodular matrix U, and an M x N echelon matrix S such that
2391 "U.A = S". This decomposition is also known as "right Hermite".
2393 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2394 Restructuring Compilers" Utpal Banerjee. */
2396 static void
2397 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2398 lambda_matrix S, lambda_matrix U)
2400 int i, j, i0 = 0;
2402 lambda_matrix_copy (A, S, m, n);
2403 lambda_matrix_id (U, m);
2405 for (j = 0; j < n; j++)
2407 if (lambda_vector_first_nz (S[j], m, i0) < m)
2409 ++i0;
2410 for (i = m - 1; i >= i0; i--)
2412 while (S[i][j] != 0)
2414 int sigma, factor, a, b;
2416 a = S[i-1][j];
2417 b = S[i][j];
2418 sigma = (a * b < 0) ? -1: 1;
2419 a = abs (a);
2420 b = abs (b);
2421 factor = sigma * (a / b);
2423 lambda_matrix_row_add (S, n, i, i-1, -factor);
2424 lambda_matrix_row_exchange (S, i, i-1);
2426 lambda_matrix_row_add (U, m, i, i-1, -factor);
2427 lambda_matrix_row_exchange (U, i, i-1);
2434 /* Determines the overlapping elements due to accesses CHREC_A and
2435 CHREC_B, that are affine functions. This function cannot handle
2436 symbolic evolution functions, ie. when initial conditions are
2437 parameters, because it uses lambda matrices of integers. */
2439 static void
2440 analyze_subscript_affine_affine (tree chrec_a,
2441 tree chrec_b,
2442 conflict_function **overlaps_a,
2443 conflict_function **overlaps_b,
2444 tree *last_conflicts)
2446 unsigned nb_vars_a, nb_vars_b, dim;
2447 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2448 lambda_matrix A, U, S;
2449 struct obstack scratch_obstack;
2451 if (eq_evolutions_p (chrec_a, chrec_b))
2453 /* The accessed index overlaps for each iteration in the
2454 loop. */
2455 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2456 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2457 *last_conflicts = chrec_dont_know;
2458 return;
2460 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2463 /* For determining the initial intersection, we have to solve a
2464 Diophantine equation. This is the most time consuming part.
2466 For answering to the question: "Is there a dependence?" we have
2467 to prove that there exists a solution to the Diophantine
2468 equation, and that the solution is in the iteration domain,
2469 i.e. the solution is positive or zero, and that the solution
2470 happens before the upper bound loop.nb_iterations. Otherwise
2471 there is no dependence. This function outputs a description of
2472 the iterations that hold the intersections. */
2474 nb_vars_a = nb_vars_in_chrec (chrec_a);
2475 nb_vars_b = nb_vars_in_chrec (chrec_b);
2477 gcc_obstack_init (&scratch_obstack);
2479 dim = nb_vars_a + nb_vars_b;
2480 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2481 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2482 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2484 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2485 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2486 gamma = init_b - init_a;
2488 /* Don't do all the hard work of solving the Diophantine equation
2489 when we already know the solution: for example,
2490 | {3, +, 1}_1
2491 | {3, +, 4}_2
2492 | gamma = 3 - 3 = 0.
2493 Then the first overlap occurs during the first iterations:
2494 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2496 if (gamma == 0)
2498 if (nb_vars_a == 1 && nb_vars_b == 1)
2500 HOST_WIDE_INT step_a, step_b;
2501 HOST_WIDE_INT niter, niter_a, niter_b;
2502 affine_fn ova, ovb;
2504 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2505 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2506 niter = MIN (niter_a, niter_b);
2507 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2508 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2510 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2511 &ova, &ovb,
2512 last_conflicts, 1);
2513 *overlaps_a = conflict_fn (1, ova);
2514 *overlaps_b = conflict_fn (1, ovb);
2517 else if (nb_vars_a == 2 && nb_vars_b == 1)
2518 compute_overlap_steps_for_affine_1_2
2519 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2521 else if (nb_vars_a == 1 && nb_vars_b == 2)
2522 compute_overlap_steps_for_affine_1_2
2523 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2525 else
2527 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2529 *overlaps_a = conflict_fn_not_known ();
2530 *overlaps_b = conflict_fn_not_known ();
2531 *last_conflicts = chrec_dont_know;
2533 goto end_analyze_subs_aa;
2536 /* U.A = S */
2537 lambda_matrix_right_hermite (A, dim, 1, S, U);
2539 if (S[0][0] < 0)
2541 S[0][0] *= -1;
2542 lambda_matrix_row_negate (U, dim, 0);
2544 gcd_alpha_beta = S[0][0];
2546 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2547 but that is a quite strange case. Instead of ICEing, answer
2548 don't know. */
2549 if (gcd_alpha_beta == 0)
2551 *overlaps_a = conflict_fn_not_known ();
2552 *overlaps_b = conflict_fn_not_known ();
2553 *last_conflicts = chrec_dont_know;
2554 goto end_analyze_subs_aa;
2557 /* The classic "gcd-test". */
2558 if (!int_divides_p (gcd_alpha_beta, gamma))
2560 /* The "gcd-test" has determined that there is no integer
2561 solution, i.e. there is no dependence. */
2562 *overlaps_a = conflict_fn_no_dependence ();
2563 *overlaps_b = conflict_fn_no_dependence ();
2564 *last_conflicts = integer_zero_node;
2567 /* Both access functions are univariate. This includes SIV and MIV cases. */
2568 else if (nb_vars_a == 1 && nb_vars_b == 1)
2570 /* Both functions should have the same evolution sign. */
2571 if (((A[0][0] > 0 && -A[1][0] > 0)
2572 || (A[0][0] < 0 && -A[1][0] < 0)))
2574 /* The solutions are given by:
2576 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2577 | [u21 u22] [y0]
2579 For a given integer t. Using the following variables,
2581 | i0 = u11 * gamma / gcd_alpha_beta
2582 | j0 = u12 * gamma / gcd_alpha_beta
2583 | i1 = u21
2584 | j1 = u22
2586 the solutions are:
2588 | x0 = i0 + i1 * t,
2589 | y0 = j0 + j1 * t. */
2590 HOST_WIDE_INT i0, j0, i1, j1;
2592 i0 = U[0][0] * gamma / gcd_alpha_beta;
2593 j0 = U[0][1] * gamma / gcd_alpha_beta;
2594 i1 = U[1][0];
2595 j1 = U[1][1];
2597 if ((i1 == 0 && i0 < 0)
2598 || (j1 == 0 && j0 < 0))
2600 /* There is no solution.
2601 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2602 falls in here, but for the moment we don't look at the
2603 upper bound of the iteration domain. */
2604 *overlaps_a = conflict_fn_no_dependence ();
2605 *overlaps_b = conflict_fn_no_dependence ();
2606 *last_conflicts = integer_zero_node;
2607 goto end_analyze_subs_aa;
2610 if (i1 > 0 && j1 > 0)
2612 HOST_WIDE_INT niter_a
2613 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2614 HOST_WIDE_INT niter_b
2615 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2616 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2618 /* (X0, Y0) is a solution of the Diophantine equation:
2619 "chrec_a (X0) = chrec_b (Y0)". */
2620 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2621 CEIL (-j0, j1));
2622 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2623 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2625 /* (X1, Y1) is the smallest positive solution of the eq
2626 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2627 first conflict occurs. */
2628 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2629 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2630 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2632 if (niter > 0)
2634 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2635 FLOOR_DIV (niter - j0, j1));
2636 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2638 /* If the overlap occurs outside of the bounds of the
2639 loop, there is no dependence. */
2640 if (x1 >= niter || y1 >= niter)
2642 *overlaps_a = conflict_fn_no_dependence ();
2643 *overlaps_b = conflict_fn_no_dependence ();
2644 *last_conflicts = integer_zero_node;
2645 goto end_analyze_subs_aa;
2647 else
2648 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2650 else
2651 *last_conflicts = chrec_dont_know;
2653 *overlaps_a
2654 = conflict_fn (1,
2655 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2657 build_int_cst (NULL_TREE, i1)));
2658 *overlaps_b
2659 = conflict_fn (1,
2660 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2662 build_int_cst (NULL_TREE, j1)));
2664 else
2666 /* FIXME: For the moment, the upper bound of the
2667 iteration domain for i and j is not checked. */
2668 if (dump_file && (dump_flags & TDF_DETAILS))
2669 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2670 *overlaps_a = conflict_fn_not_known ();
2671 *overlaps_b = conflict_fn_not_known ();
2672 *last_conflicts = chrec_dont_know;
2675 else
2677 if (dump_file && (dump_flags & TDF_DETAILS))
2678 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2679 *overlaps_a = conflict_fn_not_known ();
2680 *overlaps_b = conflict_fn_not_known ();
2681 *last_conflicts = chrec_dont_know;
2684 else
2686 if (dump_file && (dump_flags & TDF_DETAILS))
2687 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2688 *overlaps_a = conflict_fn_not_known ();
2689 *overlaps_b = conflict_fn_not_known ();
2690 *last_conflicts = chrec_dont_know;
2693 end_analyze_subs_aa:
2694 obstack_free (&scratch_obstack, NULL);
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2697 fprintf (dump_file, " (overlaps_a = ");
2698 dump_conflict_function (dump_file, *overlaps_a);
2699 fprintf (dump_file, ")\n (overlaps_b = ");
2700 dump_conflict_function (dump_file, *overlaps_b);
2701 fprintf (dump_file, "))\n");
2705 /* Returns true when analyze_subscript_affine_affine can be used for
2706 determining the dependence relation between chrec_a and chrec_b,
2707 that contain symbols. This function modifies chrec_a and chrec_b
2708 such that the analysis result is the same, and such that they don't
2709 contain symbols, and then can safely be passed to the analyzer.
2711 Example: The analysis of the following tuples of evolutions produce
2712 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2713 vs. {0, +, 1}_1
2715 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2716 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2719 static bool
2720 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2722 tree diff, type, left_a, left_b, right_b;
2724 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2725 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2726 /* FIXME: For the moment not handled. Might be refined later. */
2727 return false;
2729 type = chrec_type (*chrec_a);
2730 left_a = CHREC_LEFT (*chrec_a);
2731 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2732 diff = chrec_fold_minus (type, left_a, left_b);
2734 if (!evolution_function_is_constant_p (diff))
2735 return false;
2737 if (dump_file && (dump_flags & TDF_DETAILS))
2738 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2740 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2741 diff, CHREC_RIGHT (*chrec_a));
2742 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2743 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2744 build_int_cst (type, 0),
2745 right_b);
2746 return true;
2749 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2750 *OVERLAPS_B are initialized to the functions that describe the
2751 relation between the elements accessed twice by CHREC_A and
2752 CHREC_B. For k >= 0, the following property is verified:
2754 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2756 static void
2757 analyze_siv_subscript (tree chrec_a,
2758 tree chrec_b,
2759 conflict_function **overlaps_a,
2760 conflict_function **overlaps_b,
2761 tree *last_conflicts,
2762 int loop_nest_num)
2764 dependence_stats.num_siv++;
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 fprintf (dump_file, "(analyze_siv_subscript \n");
2769 if (evolution_function_is_constant_p (chrec_a)
2770 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2771 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2772 overlaps_a, overlaps_b, last_conflicts);
2774 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2775 && evolution_function_is_constant_p (chrec_b))
2776 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2777 overlaps_b, overlaps_a, last_conflicts);
2779 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2780 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2782 if (!chrec_contains_symbols (chrec_a)
2783 && !chrec_contains_symbols (chrec_b))
2785 analyze_subscript_affine_affine (chrec_a, chrec_b,
2786 overlaps_a, overlaps_b,
2787 last_conflicts);
2789 if (CF_NOT_KNOWN_P (*overlaps_a)
2790 || CF_NOT_KNOWN_P (*overlaps_b))
2791 dependence_stats.num_siv_unimplemented++;
2792 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2793 || CF_NO_DEPENDENCE_P (*overlaps_b))
2794 dependence_stats.num_siv_independent++;
2795 else
2796 dependence_stats.num_siv_dependent++;
2798 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2799 &chrec_b))
2801 analyze_subscript_affine_affine (chrec_a, chrec_b,
2802 overlaps_a, overlaps_b,
2803 last_conflicts);
2805 if (CF_NOT_KNOWN_P (*overlaps_a)
2806 || CF_NOT_KNOWN_P (*overlaps_b))
2807 dependence_stats.num_siv_unimplemented++;
2808 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2809 || CF_NO_DEPENDENCE_P (*overlaps_b))
2810 dependence_stats.num_siv_independent++;
2811 else
2812 dependence_stats.num_siv_dependent++;
2814 else
2815 goto siv_subscript_dontknow;
2818 else
2820 siv_subscript_dontknow:;
2821 if (dump_file && (dump_flags & TDF_DETAILS))
2822 fprintf (dump_file, " siv test failed: unimplemented");
2823 *overlaps_a = conflict_fn_not_known ();
2824 *overlaps_b = conflict_fn_not_known ();
2825 *last_conflicts = chrec_dont_know;
2826 dependence_stats.num_siv_unimplemented++;
2829 if (dump_file && (dump_flags & TDF_DETAILS))
2830 fprintf (dump_file, ")\n");
2833 /* Returns false if we can prove that the greatest common divisor of the steps
2834 of CHREC does not divide CST, false otherwise. */
2836 static bool
2837 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2839 HOST_WIDE_INT cd = 0, val;
2840 tree step;
2842 if (!tree_fits_shwi_p (cst))
2843 return true;
2844 val = tree_to_shwi (cst);
2846 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2848 step = CHREC_RIGHT (chrec);
2849 if (!tree_fits_shwi_p (step))
2850 return true;
2851 cd = gcd (cd, tree_to_shwi (step));
2852 chrec = CHREC_LEFT (chrec);
2855 return val % cd == 0;
2858 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2859 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2860 functions that describe the relation between the elements accessed
2861 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2862 is verified:
2864 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2866 static void
2867 analyze_miv_subscript (tree chrec_a,
2868 tree chrec_b,
2869 conflict_function **overlaps_a,
2870 conflict_function **overlaps_b,
2871 tree *last_conflicts,
2872 struct loop *loop_nest)
2874 tree type, difference;
2876 dependence_stats.num_miv++;
2877 if (dump_file && (dump_flags & TDF_DETAILS))
2878 fprintf (dump_file, "(analyze_miv_subscript \n");
2880 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2881 chrec_a = chrec_convert (type, chrec_a, NULL);
2882 chrec_b = chrec_convert (type, chrec_b, NULL);
2883 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2885 if (eq_evolutions_p (chrec_a, chrec_b))
2887 /* Access functions are the same: all the elements are accessed
2888 in the same order. */
2889 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2890 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2891 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2892 dependence_stats.num_miv_dependent++;
2895 else if (evolution_function_is_constant_p (difference)
2896 /* For the moment, the following is verified:
2897 evolution_function_is_affine_multivariate_p (chrec_a,
2898 loop_nest->num) */
2899 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2901 /* testsuite/.../ssa-chrec-33.c
2902 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2904 The difference is 1, and all the evolution steps are multiples
2905 of 2, consequently there are no overlapping elements. */
2906 *overlaps_a = conflict_fn_no_dependence ();
2907 *overlaps_b = conflict_fn_no_dependence ();
2908 *last_conflicts = integer_zero_node;
2909 dependence_stats.num_miv_independent++;
2912 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2913 && !chrec_contains_symbols (chrec_a)
2914 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2915 && !chrec_contains_symbols (chrec_b))
2917 /* testsuite/.../ssa-chrec-35.c
2918 {0, +, 1}_2 vs. {0, +, 1}_3
2919 the overlapping elements are respectively located at iterations:
2920 {0, +, 1}_x and {0, +, 1}_x,
2921 in other words, we have the equality:
2922 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2924 Other examples:
2925 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2926 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2928 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2929 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2931 analyze_subscript_affine_affine (chrec_a, chrec_b,
2932 overlaps_a, overlaps_b, last_conflicts);
2934 if (CF_NOT_KNOWN_P (*overlaps_a)
2935 || CF_NOT_KNOWN_P (*overlaps_b))
2936 dependence_stats.num_miv_unimplemented++;
2937 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2938 || CF_NO_DEPENDENCE_P (*overlaps_b))
2939 dependence_stats.num_miv_independent++;
2940 else
2941 dependence_stats.num_miv_dependent++;
2944 else
2946 /* When the analysis is too difficult, answer "don't know". */
2947 if (dump_file && (dump_flags & TDF_DETAILS))
2948 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2950 *overlaps_a = conflict_fn_not_known ();
2951 *overlaps_b = conflict_fn_not_known ();
2952 *last_conflicts = chrec_dont_know;
2953 dependence_stats.num_miv_unimplemented++;
2956 if (dump_file && (dump_flags & TDF_DETAILS))
2957 fprintf (dump_file, ")\n");
2960 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2961 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2962 OVERLAP_ITERATIONS_B are initialized with two functions that
2963 describe the iterations that contain conflicting elements.
2965 Remark: For an integer k >= 0, the following equality is true:
2967 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2970 static void
2971 analyze_overlapping_iterations (tree chrec_a,
2972 tree chrec_b,
2973 conflict_function **overlap_iterations_a,
2974 conflict_function **overlap_iterations_b,
2975 tree *last_conflicts, struct loop *loop_nest)
2977 unsigned int lnn = loop_nest->num;
2979 dependence_stats.num_subscript_tests++;
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2983 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2984 fprintf (dump_file, " (chrec_a = ");
2985 print_generic_expr (dump_file, chrec_a, 0);
2986 fprintf (dump_file, ")\n (chrec_b = ");
2987 print_generic_expr (dump_file, chrec_b, 0);
2988 fprintf (dump_file, ")\n");
2991 if (chrec_a == NULL_TREE
2992 || chrec_b == NULL_TREE
2993 || chrec_contains_undetermined (chrec_a)
2994 || chrec_contains_undetermined (chrec_b))
2996 dependence_stats.num_subscript_undetermined++;
2998 *overlap_iterations_a = conflict_fn_not_known ();
2999 *overlap_iterations_b = conflict_fn_not_known ();
3002 /* If they are the same chrec, and are affine, they overlap
3003 on every iteration. */
3004 else if (eq_evolutions_p (chrec_a, chrec_b)
3005 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3006 || operand_equal_p (chrec_a, chrec_b, 0)))
3008 dependence_stats.num_same_subscript_function++;
3009 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3010 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3011 *last_conflicts = chrec_dont_know;
3014 /* If they aren't the same, and aren't affine, we can't do anything
3015 yet. */
3016 else if ((chrec_contains_symbols (chrec_a)
3017 || chrec_contains_symbols (chrec_b))
3018 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3019 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3021 dependence_stats.num_subscript_undetermined++;
3022 *overlap_iterations_a = conflict_fn_not_known ();
3023 *overlap_iterations_b = conflict_fn_not_known ();
3026 else if (ziv_subscript_p (chrec_a, chrec_b))
3027 analyze_ziv_subscript (chrec_a, chrec_b,
3028 overlap_iterations_a, overlap_iterations_b,
3029 last_conflicts);
3031 else if (siv_subscript_p (chrec_a, chrec_b))
3032 analyze_siv_subscript (chrec_a, chrec_b,
3033 overlap_iterations_a, overlap_iterations_b,
3034 last_conflicts, lnn);
3036 else
3037 analyze_miv_subscript (chrec_a, chrec_b,
3038 overlap_iterations_a, overlap_iterations_b,
3039 last_conflicts, loop_nest);
3041 if (dump_file && (dump_flags & TDF_DETAILS))
3043 fprintf (dump_file, " (overlap_iterations_a = ");
3044 dump_conflict_function (dump_file, *overlap_iterations_a);
3045 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3046 dump_conflict_function (dump_file, *overlap_iterations_b);
3047 fprintf (dump_file, "))\n");
3051 /* Helper function for uniquely inserting distance vectors. */
3053 static void
3054 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3056 unsigned i;
3057 lambda_vector v;
3059 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3060 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3061 return;
3063 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3066 /* Helper function for uniquely inserting direction vectors. */
3068 static void
3069 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3071 unsigned i;
3072 lambda_vector v;
3074 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3075 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3076 return;
3078 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3081 /* Add a distance of 1 on all the loops outer than INDEX. If we
3082 haven't yet determined a distance for this outer loop, push a new
3083 distance vector composed of the previous distance, and a distance
3084 of 1 for this outer loop. Example:
3086 | loop_1
3087 | loop_2
3088 | A[10]
3089 | endloop_2
3090 | endloop_1
3092 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3093 save (0, 1), then we have to save (1, 0). */
3095 static void
3096 add_outer_distances (struct data_dependence_relation *ddr,
3097 lambda_vector dist_v, int index)
3099 /* For each outer loop where init_v is not set, the accesses are
3100 in dependence of distance 1 in the loop. */
3101 while (--index >= 0)
3103 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3104 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3105 save_v[index] = 1;
3106 save_dist_v (ddr, save_v);
3110 /* Return false when fail to represent the data dependence as a
3111 distance vector. INIT_B is set to true when a component has been
3112 added to the distance vector DIST_V. INDEX_CARRY is then set to
3113 the index in DIST_V that carries the dependence. */
3115 static bool
3116 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3117 struct data_reference *ddr_a,
3118 struct data_reference *ddr_b,
3119 lambda_vector dist_v, bool *init_b,
3120 int *index_carry)
3122 unsigned i;
3123 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3125 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3127 tree access_fn_a, access_fn_b;
3128 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3130 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3132 non_affine_dependence_relation (ddr);
3133 return false;
3136 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3137 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3139 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3140 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3142 int dist, index;
3143 int var_a = CHREC_VARIABLE (access_fn_a);
3144 int var_b = CHREC_VARIABLE (access_fn_b);
3146 if (var_a != var_b
3147 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3149 non_affine_dependence_relation (ddr);
3150 return false;
3153 dist = int_cst_value (SUB_DISTANCE (subscript));
3154 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3155 *index_carry = MIN (index, *index_carry);
3157 /* This is the subscript coupling test. If we have already
3158 recorded a distance for this loop (a distance coming from
3159 another subscript), it should be the same. For example,
3160 in the following code, there is no dependence:
3162 | loop i = 0, N, 1
3163 | T[i+1][i] = ...
3164 | ... = T[i][i]
3165 | endloop
3167 if (init_v[index] != 0 && dist_v[index] != dist)
3169 finalize_ddr_dependent (ddr, chrec_known);
3170 return false;
3173 dist_v[index] = dist;
3174 init_v[index] = 1;
3175 *init_b = true;
3177 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3179 /* This can be for example an affine vs. constant dependence
3180 (T[i] vs. T[3]) that is not an affine dependence and is
3181 not representable as a distance vector. */
3182 non_affine_dependence_relation (ddr);
3183 return false;
3187 return true;
3190 /* Return true when the DDR contains only constant access functions. */
3192 static bool
3193 constant_access_functions (const struct data_dependence_relation *ddr)
3195 unsigned i;
3197 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3198 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3199 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3200 return false;
3202 return true;
3205 /* Helper function for the case where DDR_A and DDR_B are the same
3206 multivariate access function with a constant step. For an example
3207 see pr34635-1.c. */
3209 static void
3210 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3212 int x_1, x_2;
3213 tree c_1 = CHREC_LEFT (c_2);
3214 tree c_0 = CHREC_LEFT (c_1);
3215 lambda_vector dist_v;
3216 int v1, v2, cd;
3218 /* Polynomials with more than 2 variables are not handled yet. When
3219 the evolution steps are parameters, it is not possible to
3220 represent the dependence using classical distance vectors. */
3221 if (TREE_CODE (c_0) != INTEGER_CST
3222 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3223 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3225 DDR_AFFINE_P (ddr) = false;
3226 return;
3229 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3230 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3233 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3234 v1 = int_cst_value (CHREC_RIGHT (c_1));
3235 v2 = int_cst_value (CHREC_RIGHT (c_2));
3236 cd = gcd (v1, v2);
3237 v1 /= cd;
3238 v2 /= cd;
3240 if (v2 < 0)
3242 v2 = -v2;
3243 v1 = -v1;
3246 dist_v[x_1] = v2;
3247 dist_v[x_2] = -v1;
3248 save_dist_v (ddr, dist_v);
3250 add_outer_distances (ddr, dist_v, x_1);
3253 /* Helper function for the case where DDR_A and DDR_B are the same
3254 access functions. */
3256 static void
3257 add_other_self_distances (struct data_dependence_relation *ddr)
3259 lambda_vector dist_v;
3260 unsigned i;
3261 int index_carry = DDR_NB_LOOPS (ddr);
3263 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3265 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3267 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3269 if (!evolution_function_is_univariate_p (access_fun))
3271 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3273 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3274 return;
3277 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3279 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3280 add_multivariate_self_dist (ddr, access_fun);
3281 else
3282 /* The evolution step is not constant: it varies in
3283 the outer loop, so this cannot be represented by a
3284 distance vector. For example in pr34635.c the
3285 evolution is {0, +, {0, +, 4}_1}_2. */
3286 DDR_AFFINE_P (ddr) = false;
3288 return;
3291 index_carry = MIN (index_carry,
3292 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3293 DDR_LOOP_NEST (ddr)));
3297 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3298 add_outer_distances (ddr, dist_v, index_carry);
3301 static void
3302 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3304 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3306 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3307 save_dist_v (ddr, dist_v);
3310 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3311 is the case for example when access functions are the same and
3312 equal to a constant, as in:
3314 | loop_1
3315 | A[3] = ...
3316 | ... = A[3]
3317 | endloop_1
3319 in which case the distance vectors are (0) and (1). */
3321 static void
3322 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3324 unsigned i, j;
3326 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3328 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3329 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3330 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3332 for (j = 0; j < ca->n; j++)
3333 if (affine_function_zero_p (ca->fns[j]))
3335 insert_innermost_unit_dist_vector (ddr);
3336 return;
3339 for (j = 0; j < cb->n; j++)
3340 if (affine_function_zero_p (cb->fns[j]))
3342 insert_innermost_unit_dist_vector (ddr);
3343 return;
3348 /* Compute the classic per loop distance vector. DDR is the data
3349 dependence relation to build a vector from. Return false when fail
3350 to represent the data dependence as a distance vector. */
3352 static bool
3353 build_classic_dist_vector (struct data_dependence_relation *ddr,
3354 struct loop *loop_nest)
3356 bool init_b = false;
3357 int index_carry = DDR_NB_LOOPS (ddr);
3358 lambda_vector dist_v;
3360 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3361 return false;
3363 if (same_access_functions (ddr))
3365 /* Save the 0 vector. */
3366 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3367 save_dist_v (ddr, dist_v);
3369 if (constant_access_functions (ddr))
3370 add_distance_for_zero_overlaps (ddr);
3372 if (DDR_NB_LOOPS (ddr) > 1)
3373 add_other_self_distances (ddr);
3375 return true;
3378 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3379 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3380 dist_v, &init_b, &index_carry))
3381 return false;
3383 /* Save the distance vector if we initialized one. */
3384 if (init_b)
3386 /* Verify a basic constraint: classic distance vectors should
3387 always be lexicographically positive.
3389 Data references are collected in the order of execution of
3390 the program, thus for the following loop
3392 | for (i = 1; i < 100; i++)
3393 | for (j = 1; j < 100; j++)
3395 | t = T[j+1][i-1]; // A
3396 | T[j][i] = t + 2; // B
3399 references are collected following the direction of the wind:
3400 A then B. The data dependence tests are performed also
3401 following this order, such that we're looking at the distance
3402 separating the elements accessed by A from the elements later
3403 accessed by B. But in this example, the distance returned by
3404 test_dep (A, B) is lexicographically negative (-1, 1), that
3405 means that the access A occurs later than B with respect to
3406 the outer loop, ie. we're actually looking upwind. In this
3407 case we solve test_dep (B, A) looking downwind to the
3408 lexicographically positive solution, that returns the
3409 distance vector (1, -1). */
3410 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3412 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3413 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3414 loop_nest))
3415 return false;
3416 compute_subscript_distance (ddr);
3417 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3418 save_v, &init_b, &index_carry))
3419 return false;
3420 save_dist_v (ddr, save_v);
3421 DDR_REVERSED_P (ddr) = true;
3423 /* In this case there is a dependence forward for all the
3424 outer loops:
3426 | for (k = 1; k < 100; k++)
3427 | for (i = 1; i < 100; i++)
3428 | for (j = 1; j < 100; j++)
3430 | t = T[j+1][i-1]; // A
3431 | T[j][i] = t + 2; // B
3434 the vectors are:
3435 (0, 1, -1)
3436 (1, 1, -1)
3437 (1, -1, 1)
3439 if (DDR_NB_LOOPS (ddr) > 1)
3441 add_outer_distances (ddr, save_v, index_carry);
3442 add_outer_distances (ddr, dist_v, index_carry);
3445 else
3447 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3448 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3450 if (DDR_NB_LOOPS (ddr) > 1)
3452 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3454 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3455 DDR_A (ddr), loop_nest))
3456 return false;
3457 compute_subscript_distance (ddr);
3458 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3459 opposite_v, &init_b,
3460 &index_carry))
3461 return false;
3463 save_dist_v (ddr, save_v);
3464 add_outer_distances (ddr, dist_v, index_carry);
3465 add_outer_distances (ddr, opposite_v, index_carry);
3467 else
3468 save_dist_v (ddr, save_v);
3471 else
3473 /* There is a distance of 1 on all the outer loops: Example:
3474 there is a dependence of distance 1 on loop_1 for the array A.
3476 | loop_1
3477 | A[5] = ...
3478 | endloop
3480 add_outer_distances (ddr, dist_v,
3481 lambda_vector_first_nz (dist_v,
3482 DDR_NB_LOOPS (ddr), 0));
3485 if (dump_file && (dump_flags & TDF_DETAILS))
3487 unsigned i;
3489 fprintf (dump_file, "(build_classic_dist_vector\n");
3490 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3492 fprintf (dump_file, " dist_vector = (");
3493 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3494 DDR_NB_LOOPS (ddr));
3495 fprintf (dump_file, " )\n");
3497 fprintf (dump_file, ")\n");
3500 return true;
3503 /* Return the direction for a given distance.
3504 FIXME: Computing dir this way is suboptimal, since dir can catch
3505 cases that dist is unable to represent. */
3507 static inline enum data_dependence_direction
3508 dir_from_dist (int dist)
3510 if (dist > 0)
3511 return dir_positive;
3512 else if (dist < 0)
3513 return dir_negative;
3514 else
3515 return dir_equal;
3518 /* Compute the classic per loop direction vector. DDR is the data
3519 dependence relation to build a vector from. */
3521 static void
3522 build_classic_dir_vector (struct data_dependence_relation *ddr)
3524 unsigned i, j;
3525 lambda_vector dist_v;
3527 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3529 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3531 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3532 dir_v[j] = dir_from_dist (dist_v[j]);
3534 save_dir_v (ddr, dir_v);
3538 /* Helper function. Returns true when there is a dependence between
3539 data references DRA and DRB. */
3541 static bool
3542 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3543 struct data_reference *dra,
3544 struct data_reference *drb,
3545 struct loop *loop_nest)
3547 unsigned int i;
3548 tree last_conflicts;
3549 struct subscript *subscript;
3550 tree res = NULL_TREE;
3552 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3554 conflict_function *overlaps_a, *overlaps_b;
3556 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3557 DR_ACCESS_FN (drb, i),
3558 &overlaps_a, &overlaps_b,
3559 &last_conflicts, loop_nest);
3561 if (SUB_CONFLICTS_IN_A (subscript))
3562 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3563 if (SUB_CONFLICTS_IN_B (subscript))
3564 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3566 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3567 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3568 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3570 /* If there is any undetermined conflict function we have to
3571 give a conservative answer in case we cannot prove that
3572 no dependence exists when analyzing another subscript. */
3573 if (CF_NOT_KNOWN_P (overlaps_a)
3574 || CF_NOT_KNOWN_P (overlaps_b))
3576 res = chrec_dont_know;
3577 continue;
3580 /* When there is a subscript with no dependence we can stop. */
3581 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3582 || CF_NO_DEPENDENCE_P (overlaps_b))
3584 res = chrec_known;
3585 break;
3589 if (res == NULL_TREE)
3590 return true;
3592 if (res == chrec_known)
3593 dependence_stats.num_dependence_independent++;
3594 else
3595 dependence_stats.num_dependence_undetermined++;
3596 finalize_ddr_dependent (ddr, res);
3597 return false;
3600 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3602 static void
3603 subscript_dependence_tester (struct data_dependence_relation *ddr,
3604 struct loop *loop_nest)
3606 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3607 dependence_stats.num_dependence_dependent++;
3609 compute_subscript_distance (ddr);
3610 if (build_classic_dist_vector (ddr, loop_nest))
3611 build_classic_dir_vector (ddr);
3614 /* Returns true when all the access functions of A are affine or
3615 constant with respect to LOOP_NEST. */
3617 static bool
3618 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3619 const struct loop *loop_nest)
3621 unsigned int i;
3622 vec<tree> fns = DR_ACCESS_FNS (a);
3623 tree t;
3625 FOR_EACH_VEC_ELT (fns, i, t)
3626 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3627 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3628 return false;
3630 return true;
3633 /* Initializes an equation for an OMEGA problem using the information
3634 contained in the ACCESS_FUN. Returns true when the operation
3635 succeeded.
3637 PB is the omega constraint system.
3638 EQ is the number of the equation to be initialized.
3639 OFFSET is used for shifting the variables names in the constraints:
3640 a constrain is composed of 2 * the number of variables surrounding
3641 dependence accesses. OFFSET is set either to 0 for the first n variables,
3642 then it is set to n.
3643 ACCESS_FUN is expected to be an affine chrec. */
3645 static bool
3646 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3647 unsigned int offset, tree access_fun,
3648 struct data_dependence_relation *ddr)
3650 switch (TREE_CODE (access_fun))
3652 case POLYNOMIAL_CHREC:
3654 tree left = CHREC_LEFT (access_fun);
3655 tree right = CHREC_RIGHT (access_fun);
3656 int var = CHREC_VARIABLE (access_fun);
3657 unsigned var_idx;
3659 if (TREE_CODE (right) != INTEGER_CST)
3660 return false;
3662 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3663 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3665 /* Compute the innermost loop index. */
3666 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3668 if (offset == 0)
3669 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3670 += int_cst_value (right);
3672 switch (TREE_CODE (left))
3674 case POLYNOMIAL_CHREC:
3675 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3677 case INTEGER_CST:
3678 pb->eqs[eq].coef[0] += int_cst_value (left);
3679 return true;
3681 default:
3682 return false;
3686 case INTEGER_CST:
3687 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3688 return true;
3690 default:
3691 return false;
3695 /* As explained in the comments preceding init_omega_for_ddr, we have
3696 to set up a system for each loop level, setting outer loops
3697 variation to zero, and current loop variation to positive or zero.
3698 Save each lexico positive distance vector. */
3700 static void
3701 omega_extract_distance_vectors (omega_pb pb,
3702 struct data_dependence_relation *ddr)
3704 int eq, geq;
3705 unsigned i, j;
3706 struct loop *loopi, *loopj;
3707 enum omega_result res;
3709 /* Set a new problem for each loop in the nest. The basis is the
3710 problem that we have initialized until now. On top of this we
3711 add new constraints. */
3712 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3713 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3715 int dist = 0;
3716 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3717 DDR_NB_LOOPS (ddr));
3719 omega_copy_problem (copy, pb);
3721 /* For all the outer loops "loop_j", add "dj = 0". */
3722 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3724 eq = omega_add_zero_eq (copy, omega_black);
3725 copy->eqs[eq].coef[j + 1] = 1;
3728 /* For "loop_i", add "0 <= di". */
3729 geq = omega_add_zero_geq (copy, omega_black);
3730 copy->geqs[geq].coef[i + 1] = 1;
3732 /* Reduce the constraint system, and test that the current
3733 problem is feasible. */
3734 res = omega_simplify_problem (copy);
3735 if (res == omega_false
3736 || res == omega_unknown
3737 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3738 goto next_problem;
3740 for (eq = 0; eq < copy->num_subs; eq++)
3741 if (copy->subs[eq].key == (int) i + 1)
3743 dist = copy->subs[eq].coef[0];
3744 goto found_dist;
3747 if (dist == 0)
3749 /* Reinitialize problem... */
3750 omega_copy_problem (copy, pb);
3751 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3753 eq = omega_add_zero_eq (copy, omega_black);
3754 copy->eqs[eq].coef[j + 1] = 1;
3757 /* ..., but this time "di = 1". */
3758 eq = omega_add_zero_eq (copy, omega_black);
3759 copy->eqs[eq].coef[i + 1] = 1;
3760 copy->eqs[eq].coef[0] = -1;
3762 res = omega_simplify_problem (copy);
3763 if (res == omega_false
3764 || res == omega_unknown
3765 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3766 goto next_problem;
3768 for (eq = 0; eq < copy->num_subs; eq++)
3769 if (copy->subs[eq].key == (int) i + 1)
3771 dist = copy->subs[eq].coef[0];
3772 goto found_dist;
3776 found_dist:;
3777 /* Save the lexicographically positive distance vector. */
3778 if (dist >= 0)
3780 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3781 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3783 dist_v[i] = dist;
3785 for (eq = 0; eq < copy->num_subs; eq++)
3786 if (copy->subs[eq].key > 0)
3788 dist = copy->subs[eq].coef[0];
3789 dist_v[copy->subs[eq].key - 1] = dist;
3792 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3793 dir_v[j] = dir_from_dist (dist_v[j]);
3795 save_dist_v (ddr, dist_v);
3796 save_dir_v (ddr, dir_v);
3799 next_problem:;
3800 omega_free_problem (copy);
3804 /* This is called for each subscript of a tuple of data references:
3805 insert an equality for representing the conflicts. */
3807 static bool
3808 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3809 struct data_dependence_relation *ddr,
3810 omega_pb pb, bool *maybe_dependent)
3812 int eq;
3813 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3814 TREE_TYPE (access_fun_b));
3815 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3816 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3817 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3818 tree minus_one;
3820 /* When the fun_a - fun_b is not constant, the dependence is not
3821 captured by the classic distance vector representation. */
3822 if (TREE_CODE (difference) != INTEGER_CST)
3823 return false;
3825 /* ZIV test. */
3826 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3828 /* There is no dependence. */
3829 *maybe_dependent = false;
3830 return true;
3833 minus_one = build_int_cst (type, -1);
3834 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3836 eq = omega_add_zero_eq (pb, omega_black);
3837 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3838 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3839 /* There is probably a dependence, but the system of
3840 constraints cannot be built: answer "don't know". */
3841 return false;
3843 /* GCD test. */
3844 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3845 && !int_divides_p (lambda_vector_gcd
3846 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3847 2 * DDR_NB_LOOPS (ddr)),
3848 pb->eqs[eq].coef[0]))
3850 /* There is no dependence. */
3851 *maybe_dependent = false;
3852 return true;
3855 return true;
3858 /* Helper function, same as init_omega_for_ddr but specialized for
3859 data references A and B. */
3861 static bool
3862 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3863 struct data_dependence_relation *ddr,
3864 omega_pb pb, bool *maybe_dependent)
3866 unsigned i;
3867 int ineq;
3868 struct loop *loopi;
3869 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3871 /* Insert an equality per subscript. */
3872 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3874 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3875 ddr, pb, maybe_dependent))
3876 return false;
3877 else if (*maybe_dependent == false)
3879 /* There is no dependence. */
3880 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3881 return true;
3885 /* Insert inequalities: constraints corresponding to the iteration
3886 domain, i.e. the loops surrounding the references "loop_x" and
3887 the distance variables "dx". The layout of the OMEGA
3888 representation is as follows:
3889 - coef[0] is the constant
3890 - coef[1..nb_loops] are the protected variables that will not be
3891 removed by the solver: the "dx"
3892 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3894 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3895 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3897 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3899 /* 0 <= loop_x */
3900 ineq = omega_add_zero_geq (pb, omega_black);
3901 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3903 /* 0 <= loop_x + dx */
3904 ineq = omega_add_zero_geq (pb, omega_black);
3905 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3906 pb->geqs[ineq].coef[i + 1] = 1;
3908 if (nbi != -1)
3910 /* loop_x <= nb_iters */
3911 ineq = omega_add_zero_geq (pb, omega_black);
3912 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3913 pb->geqs[ineq].coef[0] = nbi;
3915 /* loop_x + dx <= nb_iters */
3916 ineq = omega_add_zero_geq (pb, omega_black);
3917 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3918 pb->geqs[ineq].coef[i + 1] = -1;
3919 pb->geqs[ineq].coef[0] = nbi;
3921 /* A step "dx" bigger than nb_iters is not feasible, so
3922 add "0 <= nb_iters + dx", */
3923 ineq = omega_add_zero_geq (pb, omega_black);
3924 pb->geqs[ineq].coef[i + 1] = 1;
3925 pb->geqs[ineq].coef[0] = nbi;
3926 /* and "dx <= nb_iters". */
3927 ineq = omega_add_zero_geq (pb, omega_black);
3928 pb->geqs[ineq].coef[i + 1] = -1;
3929 pb->geqs[ineq].coef[0] = nbi;
3933 omega_extract_distance_vectors (pb, ddr);
3935 return true;
3938 /* Sets up the Omega dependence problem for the data dependence
3939 relation DDR. Returns false when the constraint system cannot be
3940 built, ie. when the test answers "don't know". Returns true
3941 otherwise, and when independence has been proved (using one of the
3942 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3943 set MAYBE_DEPENDENT to true.
3945 Example: for setting up the dependence system corresponding to the
3946 conflicting accesses
3948 | loop_i
3949 | loop_j
3950 | A[i, i+1] = ...
3951 | ... A[2*j, 2*(i + j)]
3952 | endloop_j
3953 | endloop_i
3955 the following constraints come from the iteration domain:
3957 0 <= i <= Ni
3958 0 <= i + di <= Ni
3959 0 <= j <= Nj
3960 0 <= j + dj <= Nj
3962 where di, dj are the distance variables. The constraints
3963 representing the conflicting elements are:
3965 i = 2 * (j + dj)
3966 i + 1 = 2 * (i + di + j + dj)
3968 For asking that the resulting distance vector (di, dj) be
3969 lexicographically positive, we insert the constraint "di >= 0". If
3970 "di = 0" in the solution, we fix that component to zero, and we
3971 look at the inner loops: we set a new problem where all the outer
3972 loop distances are zero, and fix this inner component to be
3973 positive. When one of the components is positive, we save that
3974 distance, and set a new problem where the distance on this loop is
3975 zero, searching for other distances in the inner loops. Here is
3976 the classic example that illustrates that we have to set for each
3977 inner loop a new problem:
3979 | loop_1
3980 | loop_2
3981 | A[10]
3982 | endloop_2
3983 | endloop_1
3985 we have to save two distances (1, 0) and (0, 1).
3987 Given two array references, refA and refB, we have to set the
3988 dependence problem twice, refA vs. refB and refB vs. refA, and we
3989 cannot do a single test, as refB might occur before refA in the
3990 inner loops, and the contrary when considering outer loops: ex.
3992 | loop_0
3993 | loop_1
3994 | loop_2
3995 | T[{1,+,1}_2][{1,+,1}_1] // refA
3996 | T[{2,+,1}_2][{0,+,1}_1] // refB
3997 | endloop_2
3998 | endloop_1
3999 | endloop_0
4001 refB touches the elements in T before refA, and thus for the same
4002 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4003 but for successive loop_0 iterations, we have (1, -1, 1)
4005 The Omega solver expects the distance variables ("di" in the
4006 previous example) to come first in the constraint system (as
4007 variables to be protected, or "safe" variables), the constraint
4008 system is built using the following layout:
4010 "cst | distance vars | index vars".
4013 static bool
4014 init_omega_for_ddr (struct data_dependence_relation *ddr,
4015 bool *maybe_dependent)
4017 omega_pb pb;
4018 bool res = false;
4020 *maybe_dependent = true;
4022 if (same_access_functions (ddr))
4024 unsigned j;
4025 lambda_vector dir_v;
4027 /* Save the 0 vector. */
4028 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4029 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4030 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4031 dir_v[j] = dir_equal;
4032 save_dir_v (ddr, dir_v);
4034 /* Save the dependences carried by outer loops. */
4035 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4036 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4037 maybe_dependent);
4038 omega_free_problem (pb);
4039 return res;
4042 /* Omega expects the protected variables (those that have to be kept
4043 after elimination) to appear first in the constraint system.
4044 These variables are the distance variables. In the following
4045 initialization we declare NB_LOOPS safe variables, and the total
4046 number of variables for the constraint system is 2*NB_LOOPS. */
4047 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4048 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4049 maybe_dependent);
4050 omega_free_problem (pb);
4052 /* Stop computation if not decidable, or no dependence. */
4053 if (res == false || *maybe_dependent == false)
4054 return res;
4056 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4057 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4058 maybe_dependent);
4059 omega_free_problem (pb);
4061 return res;
4064 /* Return true when DDR contains the same information as that stored
4065 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4067 static bool
4068 ddr_consistent_p (FILE *file,
4069 struct data_dependence_relation *ddr,
4070 vec<lambda_vector> dist_vects,
4071 vec<lambda_vector> dir_vects)
4073 unsigned int i, j;
4075 /* If dump_file is set, output there. */
4076 if (dump_file && (dump_flags & TDF_DETAILS))
4077 file = dump_file;
4079 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4081 lambda_vector b_dist_v;
4082 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4083 dist_vects.length (),
4084 DDR_NUM_DIST_VECTS (ddr));
4086 fprintf (file, "Banerjee dist vectors:\n");
4087 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4088 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4090 fprintf (file, "Omega dist vectors:\n");
4091 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4092 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4094 fprintf (file, "data dependence relation:\n");
4095 dump_data_dependence_relation (file, ddr);
4097 fprintf (file, ")\n");
4098 return false;
4101 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4103 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4104 dir_vects.length (),
4105 DDR_NUM_DIR_VECTS (ddr));
4106 return false;
4109 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4111 lambda_vector a_dist_v;
4112 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4114 /* Distance vectors are not ordered in the same way in the DDR
4115 and in the DIST_VECTS: search for a matching vector. */
4116 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4117 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4118 break;
4120 if (j == dist_vects.length ())
4122 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4123 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4124 fprintf (file, "not found in Omega dist vectors:\n");
4125 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4126 fprintf (file, "data dependence relation:\n");
4127 dump_data_dependence_relation (file, ddr);
4128 fprintf (file, ")\n");
4132 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4134 lambda_vector a_dir_v;
4135 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4137 /* Direction vectors are not ordered in the same way in the DDR
4138 and in the DIR_VECTS: search for a matching vector. */
4139 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4140 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4141 break;
4143 if (j == dist_vects.length ())
4145 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4146 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4147 fprintf (file, "not found in Omega dir vectors:\n");
4148 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4149 fprintf (file, "data dependence relation:\n");
4150 dump_data_dependence_relation (file, ddr);
4151 fprintf (file, ")\n");
4155 return true;
4158 /* This computes the affine dependence relation between A and B with
4159 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4160 independence between two accesses, while CHREC_DONT_KNOW is used
4161 for representing the unknown relation.
4163 Note that it is possible to stop the computation of the dependence
4164 relation the first time we detect a CHREC_KNOWN element for a given
4165 subscript. */
4167 void
4168 compute_affine_dependence (struct data_dependence_relation *ddr,
4169 struct loop *loop_nest)
4171 struct data_reference *dra = DDR_A (ddr);
4172 struct data_reference *drb = DDR_B (ddr);
4174 if (dump_file && (dump_flags & TDF_DETAILS))
4176 fprintf (dump_file, "(compute_affine_dependence\n");
4177 fprintf (dump_file, " stmt_a: ");
4178 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4179 fprintf (dump_file, " stmt_b: ");
4180 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4183 /* Analyze only when the dependence relation is not yet known. */
4184 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4186 dependence_stats.num_dependence_tests++;
4188 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4189 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4191 subscript_dependence_tester (ddr, loop_nest);
4193 if (flag_check_data_deps)
4195 /* Dump the dependences from the first algorithm. */
4196 if (dump_file && (dump_flags & TDF_DETAILS))
4198 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4199 dump_data_dependence_relation (dump_file, ddr);
4202 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4204 bool maybe_dependent;
4205 vec<lambda_vector> dir_vects, dist_vects;
4207 /* Save the result of the first DD analyzer. */
4208 dist_vects = DDR_DIST_VECTS (ddr);
4209 dir_vects = DDR_DIR_VECTS (ddr);
4211 /* Reset the information. */
4212 DDR_DIST_VECTS (ddr).create (0);
4213 DDR_DIR_VECTS (ddr).create (0);
4215 /* Compute the same information using Omega. */
4216 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4217 goto csys_dont_know;
4219 if (dump_file && (dump_flags & TDF_DETAILS))
4221 fprintf (dump_file, "Omega Analyzer\n");
4222 dump_data_dependence_relation (dump_file, ddr);
4225 /* Check that we get the same information. */
4226 if (maybe_dependent)
4227 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4228 dir_vects));
4233 /* As a last case, if the dependence cannot be determined, or if
4234 the dependence is considered too difficult to determine, answer
4235 "don't know". */
4236 else
4238 csys_dont_know:;
4239 dependence_stats.num_dependence_undetermined++;
4241 if (dump_file && (dump_flags & TDF_DETAILS))
4243 fprintf (dump_file, "Data ref a:\n");
4244 dump_data_reference (dump_file, dra);
4245 fprintf (dump_file, "Data ref b:\n");
4246 dump_data_reference (dump_file, drb);
4247 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4249 finalize_ddr_dependent (ddr, chrec_dont_know);
4253 if (dump_file && (dump_flags & TDF_DETAILS))
4255 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4256 fprintf (dump_file, ") -> no dependence\n");
4257 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4258 fprintf (dump_file, ") -> dependence analysis failed\n");
4259 else
4260 fprintf (dump_file, ")\n");
4264 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4265 the data references in DATAREFS, in the LOOP_NEST. When
4266 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4267 relations. Return true when successful, i.e. data references number
4268 is small enough to be handled. */
4270 bool
4271 compute_all_dependences (vec<data_reference_p> datarefs,
4272 vec<ddr_p> *dependence_relations,
4273 vec<loop_p> loop_nest,
4274 bool compute_self_and_rr)
4276 struct data_dependence_relation *ddr;
4277 struct data_reference *a, *b;
4278 unsigned int i, j;
4280 if ((int) datarefs.length ()
4281 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4283 struct data_dependence_relation *ddr;
4285 /* Insert a single relation into dependence_relations:
4286 chrec_dont_know. */
4287 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4288 dependence_relations->safe_push (ddr);
4289 return false;
4292 FOR_EACH_VEC_ELT (datarefs, i, a)
4293 for (j = i + 1; datarefs.iterate (j, &b); j++)
4294 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4296 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4297 dependence_relations->safe_push (ddr);
4298 if (loop_nest.exists ())
4299 compute_affine_dependence (ddr, loop_nest[0]);
4302 if (compute_self_and_rr)
4303 FOR_EACH_VEC_ELT (datarefs, i, a)
4305 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4306 dependence_relations->safe_push (ddr);
4307 if (loop_nest.exists ())
4308 compute_affine_dependence (ddr, loop_nest[0]);
4311 return true;
4314 /* Describes a location of a memory reference. */
4316 typedef struct data_ref_loc_d
4318 /* Position of the memory reference. */
4319 tree *pos;
4321 /* True if the memory reference is read. */
4322 bool is_read;
4323 } data_ref_loc;
4326 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4327 true if STMT clobbers memory, false otherwise. */
4329 static bool
4330 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4332 bool clobbers_memory = false;
4333 data_ref_loc ref;
4334 tree *op0, *op1;
4335 enum gimple_code stmt_code = gimple_code (stmt);
4337 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4338 As we cannot model data-references to not spelled out
4339 accesses give up if they may occur. */
4340 if (stmt_code == GIMPLE_CALL
4341 && !(gimple_call_flags (stmt) & ECF_CONST))
4343 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4344 if (gimple_call_internal_p (stmt)
4345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
4347 struct loop *loop = gimple_bb (stmt)->loop_father;
4348 tree uid = gimple_call_arg (stmt, 0);
4349 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4350 if (loop == NULL
4351 || loop->simduid != SSA_NAME_VAR (uid))
4352 clobbers_memory = true;
4354 else
4355 clobbers_memory = true;
4357 else if (stmt_code == GIMPLE_ASM
4358 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))
4359 clobbers_memory = true;
4361 if (!gimple_vuse (stmt))
4362 return clobbers_memory;
4364 if (stmt_code == GIMPLE_ASSIGN)
4366 tree base;
4367 op0 = gimple_assign_lhs_ptr (stmt);
4368 op1 = gimple_assign_rhs1_ptr (stmt);
4370 if (DECL_P (*op1)
4371 || (REFERENCE_CLASS_P (*op1)
4372 && (base = get_base_address (*op1))
4373 && TREE_CODE (base) != SSA_NAME))
4375 ref.pos = op1;
4376 ref.is_read = true;
4377 references->safe_push (ref);
4380 else if (stmt_code == GIMPLE_CALL)
4382 unsigned i, n;
4384 op0 = gimple_call_lhs_ptr (stmt);
4385 n = gimple_call_num_args (stmt);
4386 for (i = 0; i < n; i++)
4388 op1 = gimple_call_arg_ptr (stmt, i);
4390 if (DECL_P (*op1)
4391 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4393 ref.pos = op1;
4394 ref.is_read = true;
4395 references->safe_push (ref);
4399 else
4400 return clobbers_memory;
4402 if (*op0
4403 && (DECL_P (*op0)
4404 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4406 ref.pos = op0;
4407 ref.is_read = false;
4408 references->safe_push (ref);
4410 return clobbers_memory;
4413 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4414 reference, returns false, otherwise returns true. NEST is the outermost
4415 loop of the loop nest in which the references should be analyzed. */
4417 bool
4418 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4419 vec<data_reference_p> *datarefs)
4421 unsigned i;
4422 stack_vec<data_ref_loc, 2> references;
4423 data_ref_loc *ref;
4424 bool ret = true;
4425 data_reference_p dr;
4427 if (get_references_in_stmt (stmt, &references))
4428 return false;
4430 FOR_EACH_VEC_ELT (references, i, ref)
4432 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4433 *ref->pos, stmt, ref->is_read);
4434 gcc_assert (dr != NULL);
4435 datarefs->safe_push (dr);
4437 references.release ();
4438 return ret;
4441 /* Stores the data references in STMT to DATAREFS. If there is an
4442 unanalyzable reference, returns false, otherwise returns true.
4443 NEST is the outermost loop of the loop nest in which the references
4444 should be instantiated, LOOP is the loop in which the references
4445 should be analyzed. */
4447 bool
4448 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4449 vec<data_reference_p> *datarefs)
4451 unsigned i;
4452 stack_vec<data_ref_loc, 2> references;
4453 data_ref_loc *ref;
4454 bool ret = true;
4455 data_reference_p dr;
4457 if (get_references_in_stmt (stmt, &references))
4458 return false;
4460 FOR_EACH_VEC_ELT (references, i, ref)
4462 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4463 gcc_assert (dr != NULL);
4464 datarefs->safe_push (dr);
4467 references.release ();
4468 return ret;
4471 /* Search the data references in LOOP, and record the information into
4472 DATAREFS. Returns chrec_dont_know when failing to analyze a
4473 difficult case, returns NULL_TREE otherwise. */
4475 tree
4476 find_data_references_in_bb (struct loop *loop, basic_block bb,
4477 vec<data_reference_p> *datarefs)
4479 gimple_stmt_iterator bsi;
4481 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4483 gimple stmt = gsi_stmt (bsi);
4485 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4487 struct data_reference *res;
4488 res = XCNEW (struct data_reference);
4489 datarefs->safe_push (res);
4491 return chrec_dont_know;
4495 return NULL_TREE;
4498 /* Search the data references in LOOP, and record the information into
4499 DATAREFS. Returns chrec_dont_know when failing to analyze a
4500 difficult case, returns NULL_TREE otherwise.
4502 TODO: This function should be made smarter so that it can handle address
4503 arithmetic as if they were array accesses, etc. */
4505 tree
4506 find_data_references_in_loop (struct loop *loop,
4507 vec<data_reference_p> *datarefs)
4509 basic_block bb, *bbs;
4510 unsigned int i;
4512 bbs = get_loop_body_in_dom_order (loop);
4514 for (i = 0; i < loop->num_nodes; i++)
4516 bb = bbs[i];
4518 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4520 free (bbs);
4521 return chrec_dont_know;
4524 free (bbs);
4526 return NULL_TREE;
4529 /* Recursive helper function. */
4531 static bool
4532 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4534 /* Inner loops of the nest should not contain siblings. Example:
4535 when there are two consecutive loops,
4537 | loop_0
4538 | loop_1
4539 | A[{0, +, 1}_1]
4540 | endloop_1
4541 | loop_2
4542 | A[{0, +, 1}_2]
4543 | endloop_2
4544 | endloop_0
4546 the dependence relation cannot be captured by the distance
4547 abstraction. */
4548 if (loop->next)
4549 return false;
4551 loop_nest->safe_push (loop);
4552 if (loop->inner)
4553 return find_loop_nest_1 (loop->inner, loop_nest);
4554 return true;
4557 /* Return false when the LOOP is not well nested. Otherwise return
4558 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4559 contain the loops from the outermost to the innermost, as they will
4560 appear in the classic distance vector. */
4562 bool
4563 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4565 loop_nest->safe_push (loop);
4566 if (loop->inner)
4567 return find_loop_nest_1 (loop->inner, loop_nest);
4568 return true;
4571 /* Returns true when the data dependences have been computed, false otherwise.
4572 Given a loop nest LOOP, the following vectors are returned:
4573 DATAREFS is initialized to all the array elements contained in this loop,
4574 DEPENDENCE_RELATIONS contains the relations between the data references.
4575 Compute read-read and self relations if
4576 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4578 bool
4579 compute_data_dependences_for_loop (struct loop *loop,
4580 bool compute_self_and_read_read_dependences,
4581 vec<loop_p> *loop_nest,
4582 vec<data_reference_p> *datarefs,
4583 vec<ddr_p> *dependence_relations)
4585 bool res = true;
4587 memset (&dependence_stats, 0, sizeof (dependence_stats));
4589 /* If the loop nest is not well formed, or one of the data references
4590 is not computable, give up without spending time to compute other
4591 dependences. */
4592 if (!loop
4593 || !find_loop_nest (loop, loop_nest)
4594 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4595 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4596 compute_self_and_read_read_dependences))
4597 res = false;
4599 if (dump_file && (dump_flags & TDF_STATS))
4601 fprintf (dump_file, "Dependence tester statistics:\n");
4603 fprintf (dump_file, "Number of dependence tests: %d\n",
4604 dependence_stats.num_dependence_tests);
4605 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4606 dependence_stats.num_dependence_dependent);
4607 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4608 dependence_stats.num_dependence_independent);
4609 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4610 dependence_stats.num_dependence_undetermined);
4612 fprintf (dump_file, "Number of subscript tests: %d\n",
4613 dependence_stats.num_subscript_tests);
4614 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4615 dependence_stats.num_subscript_undetermined);
4616 fprintf (dump_file, "Number of same subscript function: %d\n",
4617 dependence_stats.num_same_subscript_function);
4619 fprintf (dump_file, "Number of ziv tests: %d\n",
4620 dependence_stats.num_ziv);
4621 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4622 dependence_stats.num_ziv_dependent);
4623 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4624 dependence_stats.num_ziv_independent);
4625 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4626 dependence_stats.num_ziv_unimplemented);
4628 fprintf (dump_file, "Number of siv tests: %d\n",
4629 dependence_stats.num_siv);
4630 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4631 dependence_stats.num_siv_dependent);
4632 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4633 dependence_stats.num_siv_independent);
4634 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4635 dependence_stats.num_siv_unimplemented);
4637 fprintf (dump_file, "Number of miv tests: %d\n",
4638 dependence_stats.num_miv);
4639 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4640 dependence_stats.num_miv_dependent);
4641 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4642 dependence_stats.num_miv_independent);
4643 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4644 dependence_stats.num_miv_unimplemented);
4647 return res;
4650 /* Returns true when the data dependences for the basic block BB have been
4651 computed, false otherwise.
4652 DATAREFS is initialized to all the array elements contained in this basic
4653 block, DEPENDENCE_RELATIONS contains the relations between the data
4654 references. Compute read-read and self relations if
4655 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4656 bool
4657 compute_data_dependences_for_bb (basic_block bb,
4658 bool compute_self_and_read_read_dependences,
4659 vec<data_reference_p> *datarefs,
4660 vec<ddr_p> *dependence_relations)
4662 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4663 return false;
4665 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4666 compute_self_and_read_read_dependences);
4669 /* Entry point (for testing only). Analyze all the data references
4670 and the dependence relations in LOOP.
4672 The data references are computed first.
4674 A relation on these nodes is represented by a complete graph. Some
4675 of the relations could be of no interest, thus the relations can be
4676 computed on demand.
4678 In the following function we compute all the relations. This is
4679 just a first implementation that is here for:
4680 - for showing how to ask for the dependence relations,
4681 - for the debugging the whole dependence graph,
4682 - for the dejagnu testcases and maintenance.
4684 It is possible to ask only for a part of the graph, avoiding to
4685 compute the whole dependence graph. The computed dependences are
4686 stored in a knowledge base (KB) such that later queries don't
4687 recompute the same information. The implementation of this KB is
4688 transparent to the optimizer, and thus the KB can be changed with a
4689 more efficient implementation, or the KB could be disabled. */
4690 static void
4691 analyze_all_data_dependences (struct loop *loop)
4693 unsigned int i;
4694 int nb_data_refs = 10;
4695 vec<data_reference_p> datarefs;
4696 datarefs.create (nb_data_refs);
4697 vec<ddr_p> dependence_relations;
4698 dependence_relations.create (nb_data_refs * nb_data_refs);
4699 vec<loop_p> loop_nest;
4700 loop_nest.create (3);
4702 /* Compute DDs on the whole function. */
4703 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4704 &dependence_relations);
4706 if (dump_file)
4708 dump_data_dependence_relations (dump_file, dependence_relations);
4709 fprintf (dump_file, "\n\n");
4711 if (dump_flags & TDF_DETAILS)
4712 dump_dist_dir_vectors (dump_file, dependence_relations);
4714 if (dump_flags & TDF_STATS)
4716 unsigned nb_top_relations = 0;
4717 unsigned nb_bot_relations = 0;
4718 unsigned nb_chrec_relations = 0;
4719 struct data_dependence_relation *ddr;
4721 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4723 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4724 nb_top_relations++;
4726 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4727 nb_bot_relations++;
4729 else
4730 nb_chrec_relations++;
4733 gather_stats_on_scev_database ();
4737 loop_nest.release ();
4738 free_dependence_relations (dependence_relations);
4739 free_data_refs (datarefs);
4742 /* Computes all the data dependences and check that the results of
4743 several analyzers are the same. */
4745 void
4746 tree_check_data_deps (void)
4748 struct loop *loop_nest;
4750 FOR_EACH_LOOP (loop_nest, 0)
4751 analyze_all_data_dependences (loop_nest);
4754 /* Free the memory used by a data dependence relation DDR. */
4756 void
4757 free_dependence_relation (struct data_dependence_relation *ddr)
4759 if (ddr == NULL)
4760 return;
4762 if (DDR_SUBSCRIPTS (ddr).exists ())
4763 free_subscripts (DDR_SUBSCRIPTS (ddr));
4764 DDR_DIST_VECTS (ddr).release ();
4765 DDR_DIR_VECTS (ddr).release ();
4767 free (ddr);
4770 /* Free the memory used by the data dependence relations from
4771 DEPENDENCE_RELATIONS. */
4773 void
4774 free_dependence_relations (vec<ddr_p> dependence_relations)
4776 unsigned int i;
4777 struct data_dependence_relation *ddr;
4779 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4780 if (ddr)
4781 free_dependence_relation (ddr);
4783 dependence_relations.release ();
4786 /* Free the memory used by the data references from DATAREFS. */
4788 void
4789 free_data_refs (vec<data_reference_p> datarefs)
4791 unsigned int i;
4792 struct data_reference *dr;
4794 FOR_EACH_VEC_ELT (datarefs, i, dr)
4795 free_data_ref (dr);
4796 datarefs.release ();