31370.cc: Skip this test on powerpc64-*-freebsd*.
[official-gcc.git] / gcc / tree-data-ref.c
blob9b3a10df3c73c3f2da1473c4ab3b06e92f570b4d
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
90 static struct datadep_stats
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
131 /* Returns true iff A divides B. */
133 static inline bool
134 int_divides_p (int a, int b)
136 return ((b % a) == 0);
141 /* Dump into FILE all the data references from DATAREFS. */
143 void
144 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
146 unsigned int i;
147 struct data_reference *dr;
149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
150 dump_data_reference (file, dr);
153 /* Dump into STDERR all the data references from DATAREFS. */
155 DEBUG_FUNCTION void
156 debug_data_references (VEC (data_reference_p, heap) *datarefs)
158 dump_data_references (stderr, datarefs);
161 /* Dump to STDERR all the dependence relations from DDRS. */
163 DEBUG_FUNCTION void
164 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
166 dump_data_dependence_relations (stderr, ddrs);
169 /* Dump into FILE all the dependence relations from DDRS. */
171 void
172 dump_data_dependence_relations (FILE *file,
173 VEC (ddr_p, heap) *ddrs)
175 unsigned int i;
176 struct data_dependence_relation *ddr;
178 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
179 dump_data_dependence_relation (file, ddr);
182 /* Print to STDERR the data_reference DR. */
184 DEBUG_FUNCTION void
185 debug_data_reference (struct data_reference *dr)
187 dump_data_reference (stderr, dr);
190 /* Dump function for a DATA_REFERENCE structure. */
192 void
193 dump_data_reference (FILE *outf,
194 struct data_reference *dr)
196 unsigned int i;
198 fprintf (outf, "#(Data Ref: \n");
199 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
200 fprintf (outf, "# stmt: ");
201 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
202 fprintf (outf, "# ref: ");
203 print_generic_stmt (outf, DR_REF (dr), 0);
204 fprintf (outf, "# base_object: ");
205 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
207 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
209 fprintf (outf, "# Access function %d: ", i);
210 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
212 fprintf (outf, "#)\n");
215 /* Dumps the affine function described by FN to the file OUTF. */
217 static void
218 dump_affine_function (FILE *outf, affine_fn fn)
220 unsigned i;
221 tree coef;
223 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
224 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
226 fprintf (outf, " + ");
227 print_generic_expr (outf, coef, TDF_SLIM);
228 fprintf (outf, " * x_%u", i);
232 /* Dumps the conflict function CF to the file OUTF. */
234 static void
235 dump_conflict_function (FILE *outf, conflict_function *cf)
237 unsigned i;
239 if (cf->n == NO_DEPENDENCE)
240 fprintf (outf, "no dependence\n");
241 else if (cf->n == NOT_KNOWN)
242 fprintf (outf, "not known\n");
243 else
245 for (i = 0; i < cf->n; i++)
247 fprintf (outf, "[");
248 dump_affine_function (outf, cf->fns[i]);
249 fprintf (outf, "]\n");
254 /* Dump function for a SUBSCRIPT structure. */
256 void
257 dump_subscript (FILE *outf, struct subscript *subscript)
259 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
261 fprintf (outf, "\n (subscript \n");
262 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
263 dump_conflict_function (outf, cf);
264 if (CF_NONTRIVIAL_P (cf))
266 tree last_iteration = SUB_LAST_CONFLICT (subscript);
267 fprintf (outf, " last_conflict: ");
268 print_generic_stmt (outf, last_iteration, 0);
271 cf = SUB_CONFLICTS_IN_B (subscript);
272 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
273 dump_conflict_function (outf, cf);
274 if (CF_NONTRIVIAL_P (cf))
276 tree last_iteration = SUB_LAST_CONFLICT (subscript);
277 fprintf (outf, " last_conflict: ");
278 print_generic_stmt (outf, last_iteration, 0);
281 fprintf (outf, " (Subscript distance: ");
282 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
283 fprintf (outf, " )\n");
284 fprintf (outf, " )\n");
287 /* Print the classic direction vector DIRV to OUTF. */
289 void
290 print_direction_vector (FILE *outf,
291 lambda_vector dirv,
292 int length)
294 int eq;
296 for (eq = 0; eq < length; eq++)
298 enum data_dependence_direction dir = ((enum data_dependence_direction)
299 dirv[eq]);
301 switch (dir)
303 case dir_positive:
304 fprintf (outf, " +");
305 break;
306 case dir_negative:
307 fprintf (outf, " -");
308 break;
309 case dir_equal:
310 fprintf (outf, " =");
311 break;
312 case dir_positive_or_equal:
313 fprintf (outf, " +=");
314 break;
315 case dir_positive_or_negative:
316 fprintf (outf, " +-");
317 break;
318 case dir_negative_or_equal:
319 fprintf (outf, " -=");
320 break;
321 case dir_star:
322 fprintf (outf, " *");
323 break;
324 default:
325 fprintf (outf, "indep");
326 break;
329 fprintf (outf, "\n");
332 /* Print a vector of direction vectors. */
334 void
335 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
336 int length)
338 unsigned j;
339 lambda_vector v;
341 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
342 print_direction_vector (outf, v, length);
345 /* Print out a vector VEC of length N to OUTFILE. */
347 static inline void
348 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
350 int i;
352 for (i = 0; i < n; i++)
353 fprintf (outfile, "%3d ", vector[i]);
354 fprintf (outfile, "\n");
357 /* Print a vector of distance vectors. */
359 void
360 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
361 int length)
363 unsigned j;
364 lambda_vector v;
366 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
367 print_lambda_vector (outf, v, length);
370 /* Debug version. */
372 DEBUG_FUNCTION void
373 debug_data_dependence_relation (struct data_dependence_relation *ddr)
375 dump_data_dependence_relation (stderr, ddr);
378 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
380 void
381 dump_data_dependence_relation (FILE *outf,
382 struct data_dependence_relation *ddr)
384 struct data_reference *dra, *drb;
386 fprintf (outf, "(Data Dep: \n");
388 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
390 if (ddr)
392 dra = DDR_A (ddr);
393 drb = DDR_B (ddr);
394 if (dra)
395 dump_data_reference (outf, dra);
396 else
397 fprintf (outf, " (nil)\n");
398 if (drb)
399 dump_data_reference (outf, drb);
400 else
401 fprintf (outf, " (nil)\n");
403 fprintf (outf, " (don't know)\n)\n");
404 return;
407 dra = DDR_A (ddr);
408 drb = DDR_B (ddr);
409 dump_data_reference (outf, dra);
410 dump_data_reference (outf, drb);
412 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
413 fprintf (outf, " (no dependence)\n");
415 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
417 unsigned int i;
418 struct loop *loopi;
420 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
422 fprintf (outf, " access_fn_A: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
424 fprintf (outf, " access_fn_B: ");
425 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
426 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
429 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
430 fprintf (outf, " loop nest: (");
431 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
432 fprintf (outf, "%d ", loopi->num);
433 fprintf (outf, ")\n");
435 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
437 fprintf (outf, " distance_vector: ");
438 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
439 DDR_NB_LOOPS (ddr));
442 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
444 fprintf (outf, " direction_vector: ");
445 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
446 DDR_NB_LOOPS (ddr));
450 fprintf (outf, ")\n");
453 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
455 void
456 dump_data_dependence_direction (FILE *file,
457 enum data_dependence_direction dir)
459 switch (dir)
461 case dir_positive:
462 fprintf (file, "+");
463 break;
465 case dir_negative:
466 fprintf (file, "-");
467 break;
469 case dir_equal:
470 fprintf (file, "=");
471 break;
473 case dir_positive_or_negative:
474 fprintf (file, "+-");
475 break;
477 case dir_positive_or_equal:
478 fprintf (file, "+=");
479 break;
481 case dir_negative_or_equal:
482 fprintf (file, "-=");
483 break;
485 case dir_star:
486 fprintf (file, "*");
487 break;
489 default:
490 break;
494 /* Dumps the distance and direction vectors in FILE. DDRS contains
495 the dependence relations, and VECT_SIZE is the size of the
496 dependence vectors, or in other words the number of loops in the
497 considered nest. */
499 void
500 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
502 unsigned int i, j;
503 struct data_dependence_relation *ddr;
504 lambda_vector v;
506 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
507 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
509 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
511 fprintf (file, "DISTANCE_V (");
512 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
513 fprintf (file, ")\n");
516 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
518 fprintf (file, "DIRECTION_V (");
519 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
520 fprintf (file, ")\n");
524 fprintf (file, "\n\n");
527 /* Dumps the data dependence relations DDRS in FILE. */
529 void
530 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
532 unsigned int i;
533 struct data_dependence_relation *ddr;
535 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
536 dump_data_dependence_relation (file, ddr);
538 fprintf (file, "\n\n");
541 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
542 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
543 constant of type ssizetype, and returns true. If we cannot do this
544 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
545 is returned. */
547 static bool
548 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
549 tree *var, tree *off)
551 tree var0, var1;
552 tree off0, off1;
553 enum tree_code ocode = code;
555 *var = NULL_TREE;
556 *off = NULL_TREE;
558 switch (code)
560 case INTEGER_CST:
561 *var = build_int_cst (type, 0);
562 *off = fold_convert (ssizetype, op0);
563 return true;
565 case POINTER_PLUS_EXPR:
566 ocode = PLUS_EXPR;
567 /* FALLTHROUGH */
568 case PLUS_EXPR:
569 case MINUS_EXPR:
570 split_constant_offset (op0, &var0, &off0);
571 split_constant_offset (op1, &var1, &off1);
572 *var = fold_build2 (code, type, var0, var1);
573 *off = size_binop (ocode, off0, off1);
574 return true;
576 case MULT_EXPR:
577 if (TREE_CODE (op1) != INTEGER_CST)
578 return false;
580 split_constant_offset (op0, &var0, &off0);
581 *var = fold_build2 (MULT_EXPR, type, var0, op1);
582 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
583 return true;
585 case ADDR_EXPR:
587 tree base, poffset;
588 HOST_WIDE_INT pbitsize, pbitpos;
589 enum machine_mode pmode;
590 int punsignedp, pvolatilep;
592 op0 = TREE_OPERAND (op0, 0);
593 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
594 &pmode, &punsignedp, &pvolatilep, false);
596 if (pbitpos % BITS_PER_UNIT != 0)
597 return false;
598 base = build_fold_addr_expr (base);
599 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
601 if (poffset)
603 split_constant_offset (poffset, &poffset, &off1);
604 off0 = size_binop (PLUS_EXPR, off0, off1);
605 if (POINTER_TYPE_P (TREE_TYPE (base)))
606 base = fold_build_pointer_plus (base, poffset);
607 else
608 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
609 fold_convert (TREE_TYPE (base), poffset));
612 var0 = fold_convert (type, base);
614 /* If variable length types are involved, punt, otherwise casts
615 might be converted into ARRAY_REFs in gimplify_conversion.
616 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
617 possibly no longer appears in current GIMPLE, might resurface.
618 This perhaps could run
619 if (CONVERT_EXPR_P (var0))
621 gimplify_conversion (&var0);
622 // Attempt to fill in any within var0 found ARRAY_REF's
623 // element size from corresponding op embedded ARRAY_REF,
624 // if unsuccessful, just punt.
625 } */
626 while (POINTER_TYPE_P (type))
627 type = TREE_TYPE (type);
628 if (int_size_in_bytes (type) < 0)
629 return false;
631 *var = var0;
632 *off = off0;
633 return true;
636 case SSA_NAME:
638 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
639 enum tree_code subcode;
641 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
642 return false;
644 var0 = gimple_assign_rhs1 (def_stmt);
645 subcode = gimple_assign_rhs_code (def_stmt);
646 var1 = gimple_assign_rhs2 (def_stmt);
648 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
650 CASE_CONVERT:
652 /* We must not introduce undefined overflow, and we must not change the value.
653 Hence we're okay if the inner type doesn't overflow to start with
654 (pointer or signed), the outer type also is an integer or pointer
655 and the outer precision is at least as large as the inner. */
656 tree itype = TREE_TYPE (op0);
657 if ((POINTER_TYPE_P (itype)
658 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
659 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
660 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
662 split_constant_offset (op0, &var0, off);
663 *var = fold_convert (type, var0);
664 return true;
666 return false;
669 default:
670 return false;
674 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
675 will be ssizetype. */
677 void
678 split_constant_offset (tree exp, tree *var, tree *off)
680 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
681 enum tree_code code;
683 *var = exp;
684 *off = ssize_int (0);
685 STRIP_NOPS (exp);
687 if (tree_is_chrec (exp)
688 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
689 return;
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 *var = fold_convert (type, e);
697 *off = o;
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
704 static tree
705 canonicalize_base_object_address (tree addr)
707 tree orig = addr;
709 STRIP_NOPS (addr);
711 /* The base address may be obtained by casting from integer, in that case
712 keep the cast. */
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714 return orig;
716 if (TREE_CODE (addr) != ADDR_EXPR)
717 return addr;
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
724 otherwise. */
726 bool
727 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
733 tree base, poffset;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
747 if (pbitpos % BITS_PER_UNIT != 0)
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
751 return false;
754 if (TREE_CODE (base) == MEM_REF)
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 if (!poffset)
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
763 else
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 base = TREE_OPERAND (base, 0);
768 else
769 base = build_fold_addr_expr (base);
771 if (in_loop)
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
776 if (nest)
778 if (dump_file && (dump_flags & TDF_DETAILS))
779 fprintf (dump_file, "failed: evolution of base is not"
780 " affine.\n");
781 return false;
783 else
785 base_iv.base = base;
786 base_iv.step = ssize_int (0);
787 base_iv.no_overflow = true;
791 else
793 base_iv.base = base;
794 base_iv.step = ssize_int (0);
795 base_iv.no_overflow = true;
798 if (!poffset)
800 offset_iv.base = ssize_int (0);
801 offset_iv.step = ssize_int (0);
803 else
805 if (!in_loop)
807 offset_iv.base = poffset;
808 offset_iv.step = ssize_int (0);
810 else if (!simple_iv (loop, loop_containing_stmt (stmt),
811 poffset, &offset_iv, false))
813 if (nest)
815 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "failed: evolution of offset is not"
817 " affine.\n");
818 return false;
820 else
822 offset_iv.base = poffset;
823 offset_iv.step = ssize_int (0);
828 init = ssize_int (pbitpos / BITS_PER_UNIT);
829 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
830 init = size_binop (PLUS_EXPR, init, dinit);
831 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
832 init = size_binop (PLUS_EXPR, init, dinit);
834 step = size_binop (PLUS_EXPR,
835 fold_convert (ssizetype, base_iv.step),
836 fold_convert (ssizetype, offset_iv.step));
838 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
840 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
841 DR_INIT (dr) = init;
842 DR_STEP (dr) = step;
844 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "success.\n");
849 return true;
852 /* Determines the base object and the list of indices of memory reference
853 DR, analyzed in LOOP and instantiated in loop nest NEST. */
855 static void
856 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
858 VEC (tree, heap) *access_fns = NULL;
859 tree ref, op;
860 tree base, off, access_fn;
861 basic_block before_loop;
863 /* If analyzing a basic-block there are no indices to analyze
864 and thus no access functions. */
865 if (!nest)
867 DR_BASE_OBJECT (dr) = DR_REF (dr);
868 DR_ACCESS_FNS (dr) = NULL;
869 return;
872 ref = DR_REF (dr);
873 before_loop = block_before_loop (nest);
875 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
876 into a two element array with a constant index. The base is
877 then just the immediate underlying object. */
878 if (TREE_CODE (ref) == REALPART_EXPR)
880 ref = TREE_OPERAND (ref, 0);
881 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
883 else if (TREE_CODE (ref) == IMAGPART_EXPR)
885 ref = TREE_OPERAND (ref, 0);
886 VEC_safe_push (tree, heap, access_fns, integer_one_node);
889 /* Analyze access functions of dimensions we know to be independent. */
890 while (handled_component_p (ref))
892 if (TREE_CODE (ref) == ARRAY_REF)
894 op = TREE_OPERAND (ref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
899 else if (TREE_CODE (ref) == COMPONENT_REF
900 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
902 /* For COMPONENT_REFs of records (but not unions!) use the
903 FIELD_DECL offset as constant access function so we can
904 disambiguate a[i].f1 and a[i].f2. */
905 tree off = component_ref_field_offset (ref);
906 off = size_binop (PLUS_EXPR,
907 size_binop (MULT_EXPR,
908 fold_convert (bitsizetype, off),
909 bitsize_int (BITS_PER_UNIT)),
910 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
911 VEC_safe_push (tree, heap, access_fns, off);
913 else
914 /* If we have an unhandled component we could not translate
915 to an access function stop analyzing. We have determined
916 our base object in this case. */
917 break;
919 ref = TREE_OPERAND (ref, 0);
922 /* If the address operand of a MEM_REF base has an evolution in the
923 analyzed nest, add it as an additional independent access-function. */
924 if (TREE_CODE (ref) == MEM_REF)
926 op = TREE_OPERAND (ref, 0);
927 access_fn = analyze_scalar_evolution (loop, op);
928 access_fn = instantiate_scev (before_loop, loop, access_fn);
929 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
931 tree orig_type;
932 tree memoff = TREE_OPERAND (ref, 1);
933 base = initial_condition (access_fn);
934 orig_type = TREE_TYPE (base);
935 STRIP_USELESS_TYPE_CONVERSION (base);
936 split_constant_offset (base, &base, &off);
937 /* Fold the MEM_REF offset into the evolutions initial
938 value to make more bases comparable. */
939 if (!integer_zerop (memoff))
941 off = size_binop (PLUS_EXPR, off,
942 fold_convert (ssizetype, memoff));
943 memoff = build_int_cst (TREE_TYPE (memoff), 0);
945 access_fn = chrec_replace_initial_condition
946 (access_fn, fold_convert (orig_type, off));
947 /* ??? This is still not a suitable base object for
948 dr_may_alias_p - the base object needs to be an
949 access that covers the object as whole. With
950 an evolution in the pointer this cannot be
951 guaranteed.
952 As a band-aid, mark the access so we can special-case
953 it in dr_may_alias_p. */
954 ref = fold_build2_loc (EXPR_LOCATION (ref),
955 MEM_REF, TREE_TYPE (ref),
956 base, memoff);
957 DR_UNCONSTRAINED_BASE (dr) = true;
958 VEC_safe_push (tree, heap, access_fns, access_fn);
961 else if (DECL_P (ref))
963 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
964 ref = build2 (MEM_REF, TREE_TYPE (ref),
965 build_fold_addr_expr (ref),
966 build_int_cst (reference_alias_ptr_type (ref), 0));
969 DR_BASE_OBJECT (dr) = ref;
970 DR_ACCESS_FNS (dr) = access_fns;
973 /* Extracts the alias analysis information from the memory reference DR. */
975 static void
976 dr_analyze_alias (struct data_reference *dr)
978 tree ref = DR_REF (dr);
979 tree base = get_base_address (ref), addr;
981 if (INDIRECT_REF_P (base)
982 || TREE_CODE (base) == MEM_REF)
984 addr = TREE_OPERAND (base, 0);
985 if (TREE_CODE (addr) == SSA_NAME)
986 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
990 /* Frees data reference DR. */
992 void
993 free_data_ref (data_reference_p dr)
995 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
996 free (dr);
999 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1000 is read if IS_READ is true, write otherwise. Returns the
1001 data_reference description of MEMREF. NEST is the outermost loop
1002 in which the reference should be instantiated, LOOP is the loop in
1003 which the data reference should be analyzed. */
1005 struct data_reference *
1006 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1007 bool is_read)
1009 struct data_reference *dr;
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1013 fprintf (dump_file, "Creating dr for ");
1014 print_generic_expr (dump_file, memref, TDF_SLIM);
1015 fprintf (dump_file, "\n");
1018 dr = XCNEW (struct data_reference);
1019 DR_STMT (dr) = stmt;
1020 DR_REF (dr) = memref;
1021 DR_IS_READ (dr) = is_read;
1023 dr_analyze_innermost (dr, nest);
1024 dr_analyze_indices (dr, nest, loop);
1025 dr_analyze_alias (dr);
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1029 unsigned i;
1030 fprintf (dump_file, "\tbase_address: ");
1031 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1032 fprintf (dump_file, "\n\toffset from base address: ");
1033 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1034 fprintf (dump_file, "\n\tconstant offset from base address: ");
1035 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1036 fprintf (dump_file, "\n\tstep: ");
1037 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1038 fprintf (dump_file, "\n\taligned to: ");
1039 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1040 fprintf (dump_file, "\n\tbase_object: ");
1041 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1042 fprintf (dump_file, "\n");
1043 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1045 fprintf (dump_file, "\tAccess function %d: ", i);
1046 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1050 return dr;
1053 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1054 expressions. */
1055 static bool
1056 dr_equal_offsets_p1 (tree offset1, tree offset2)
1058 bool res;
1060 STRIP_NOPS (offset1);
1061 STRIP_NOPS (offset2);
1063 if (offset1 == offset2)
1064 return true;
1066 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1067 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1068 return false;
1070 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1071 TREE_OPERAND (offset2, 0));
1073 if (!res || !BINARY_CLASS_P (offset1))
1074 return res;
1076 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1077 TREE_OPERAND (offset2, 1));
1079 return res;
1082 /* Check if DRA and DRB have equal offsets. */
1083 bool
1084 dr_equal_offsets_p (struct data_reference *dra,
1085 struct data_reference *drb)
1087 tree offset1, offset2;
1089 offset1 = DR_OFFSET (dra);
1090 offset2 = DR_OFFSET (drb);
1092 return dr_equal_offsets_p1 (offset1, offset2);
1095 /* Returns true if FNA == FNB. */
1097 static bool
1098 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1100 unsigned i, n = VEC_length (tree, fna);
1102 if (n != VEC_length (tree, fnb))
1103 return false;
1105 for (i = 0; i < n; i++)
1106 if (!operand_equal_p (VEC_index (tree, fna, i),
1107 VEC_index (tree, fnb, i), 0))
1108 return false;
1110 return true;
1113 /* If all the functions in CF are the same, returns one of them,
1114 otherwise returns NULL. */
1116 static affine_fn
1117 common_affine_function (conflict_function *cf)
1119 unsigned i;
1120 affine_fn comm;
1122 if (!CF_NONTRIVIAL_P (cf))
1123 return NULL;
1125 comm = cf->fns[0];
1127 for (i = 1; i < cf->n; i++)
1128 if (!affine_function_equal_p (comm, cf->fns[i]))
1129 return NULL;
1131 return comm;
1134 /* Returns the base of the affine function FN. */
1136 static tree
1137 affine_function_base (affine_fn fn)
1139 return VEC_index (tree, fn, 0);
1142 /* Returns true if FN is a constant. */
1144 static bool
1145 affine_function_constant_p (affine_fn fn)
1147 unsigned i;
1148 tree coef;
1150 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1151 if (!integer_zerop (coef))
1152 return false;
1154 return true;
1157 /* Returns true if FN is the zero constant function. */
1159 static bool
1160 affine_function_zero_p (affine_fn fn)
1162 return (integer_zerop (affine_function_base (fn))
1163 && affine_function_constant_p (fn));
1166 /* Returns a signed integer type with the largest precision from TA
1167 and TB. */
1169 static tree
1170 signed_type_for_types (tree ta, tree tb)
1172 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1173 return signed_type_for (ta);
1174 else
1175 return signed_type_for (tb);
1178 /* Applies operation OP on affine functions FNA and FNB, and returns the
1179 result. */
1181 static affine_fn
1182 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1184 unsigned i, n, m;
1185 affine_fn ret;
1186 tree coef;
1188 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1190 n = VEC_length (tree, fna);
1191 m = VEC_length (tree, fnb);
1193 else
1195 n = VEC_length (tree, fnb);
1196 m = VEC_length (tree, fna);
1199 ret = VEC_alloc (tree, heap, m);
1200 for (i = 0; i < n; i++)
1202 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1203 TREE_TYPE (VEC_index (tree, fnb, i)));
1205 VEC_quick_push (tree, ret,
1206 fold_build2 (op, type,
1207 VEC_index (tree, fna, i),
1208 VEC_index (tree, fnb, i)));
1211 for (; VEC_iterate (tree, fna, i, coef); i++)
1212 VEC_quick_push (tree, ret,
1213 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1214 coef, integer_zero_node));
1215 for (; VEC_iterate (tree, fnb, i, coef); i++)
1216 VEC_quick_push (tree, ret,
1217 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1218 integer_zero_node, coef));
1220 return ret;
1223 /* Returns the sum of affine functions FNA and FNB. */
1225 static affine_fn
1226 affine_fn_plus (affine_fn fna, affine_fn fnb)
1228 return affine_fn_op (PLUS_EXPR, fna, fnb);
1231 /* Returns the difference of affine functions FNA and FNB. */
1233 static affine_fn
1234 affine_fn_minus (affine_fn fna, affine_fn fnb)
1236 return affine_fn_op (MINUS_EXPR, fna, fnb);
1239 /* Frees affine function FN. */
1241 static void
1242 affine_fn_free (affine_fn fn)
1244 VEC_free (tree, heap, fn);
1247 /* Determine for each subscript in the data dependence relation DDR
1248 the distance. */
1250 static void
1251 compute_subscript_distance (struct data_dependence_relation *ddr)
1253 conflict_function *cf_a, *cf_b;
1254 affine_fn fn_a, fn_b, diff;
1256 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1258 unsigned int i;
1260 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1262 struct subscript *subscript;
1264 subscript = DDR_SUBSCRIPT (ddr, i);
1265 cf_a = SUB_CONFLICTS_IN_A (subscript);
1266 cf_b = SUB_CONFLICTS_IN_B (subscript);
1268 fn_a = common_affine_function (cf_a);
1269 fn_b = common_affine_function (cf_b);
1270 if (!fn_a || !fn_b)
1272 SUB_DISTANCE (subscript) = chrec_dont_know;
1273 return;
1275 diff = affine_fn_minus (fn_a, fn_b);
1277 if (affine_function_constant_p (diff))
1278 SUB_DISTANCE (subscript) = affine_function_base (diff);
1279 else
1280 SUB_DISTANCE (subscript) = chrec_dont_know;
1282 affine_fn_free (diff);
1287 /* Returns the conflict function for "unknown". */
1289 static conflict_function *
1290 conflict_fn_not_known (void)
1292 conflict_function *fn = XCNEW (conflict_function);
1293 fn->n = NOT_KNOWN;
1295 return fn;
1298 /* Returns the conflict function for "independent". */
1300 static conflict_function *
1301 conflict_fn_no_dependence (void)
1303 conflict_function *fn = XCNEW (conflict_function);
1304 fn->n = NO_DEPENDENCE;
1306 return fn;
1309 /* Returns true if the address of OBJ is invariant in LOOP. */
1311 static bool
1312 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1314 while (handled_component_p (obj))
1316 if (TREE_CODE (obj) == ARRAY_REF)
1318 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1319 need to check the stride and the lower bound of the reference. */
1320 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1321 loop->num)
1322 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1323 loop->num))
1324 return false;
1326 else if (TREE_CODE (obj) == COMPONENT_REF)
1328 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1329 loop->num))
1330 return false;
1332 obj = TREE_OPERAND (obj, 0);
1335 if (!INDIRECT_REF_P (obj)
1336 && TREE_CODE (obj) != MEM_REF)
1337 return true;
1339 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1340 loop->num);
1343 /* Returns false if we can prove that data references A and B do not alias,
1344 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1345 considered. */
1347 bool
1348 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1349 bool loop_nest)
1351 tree addr_a = DR_BASE_OBJECT (a);
1352 tree addr_b = DR_BASE_OBJECT (b);
1354 /* If we are not processing a loop nest but scalar code we
1355 do not need to care about possible cross-iteration dependences
1356 and thus can process the full original reference. Do so,
1357 similar to how loop invariant motion applies extra offset-based
1358 disambiguation. */
1359 if (!loop_nest)
1361 aff_tree off1, off2;
1362 double_int size1, size2;
1363 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1364 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1365 aff_combination_scale (&off1, double_int_minus_one);
1366 aff_combination_add (&off2, &off1);
1367 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1368 return false;
1371 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1372 the size of the base-object. So we cannot do any offset/overlap
1373 based analysis but have to rely on points-to information only. */
1374 if (TREE_CODE (addr_a) == MEM_REF
1375 && DR_UNCONSTRAINED_BASE (a))
1377 if (TREE_CODE (addr_b) == MEM_REF
1378 && DR_UNCONSTRAINED_BASE (b))
1379 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1380 TREE_OPERAND (addr_b, 0));
1381 else
1382 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1383 build_fold_addr_expr (addr_b));
1385 else if (TREE_CODE (addr_b) == MEM_REF
1386 && DR_UNCONSTRAINED_BASE (b))
1387 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1388 TREE_OPERAND (addr_b, 0));
1390 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1391 that is being subsetted in the loop nest. */
1392 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1393 return refs_output_dependent_p (addr_a, addr_b);
1394 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1395 return refs_anti_dependent_p (addr_a, addr_b);
1396 return refs_may_alias_p (addr_a, addr_b);
1399 /* Initialize a data dependence relation between data accesses A and
1400 B. NB_LOOPS is the number of loops surrounding the references: the
1401 size of the classic distance/direction vectors. */
1403 struct data_dependence_relation *
1404 initialize_data_dependence_relation (struct data_reference *a,
1405 struct data_reference *b,
1406 VEC (loop_p, heap) *loop_nest)
1408 struct data_dependence_relation *res;
1409 unsigned int i;
1411 res = XNEW (struct data_dependence_relation);
1412 DDR_A (res) = a;
1413 DDR_B (res) = b;
1414 DDR_LOOP_NEST (res) = NULL;
1415 DDR_REVERSED_P (res) = false;
1416 DDR_SUBSCRIPTS (res) = NULL;
1417 DDR_DIR_VECTS (res) = NULL;
1418 DDR_DIST_VECTS (res) = NULL;
1420 if (a == NULL || b == NULL)
1422 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1423 return res;
1426 /* If the data references do not alias, then they are independent. */
1427 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1429 DDR_ARE_DEPENDENT (res) = chrec_known;
1430 return res;
1433 /* The case where the references are exactly the same. */
1434 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1436 if (loop_nest
1437 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1438 DR_BASE_OBJECT (a)))
1440 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1441 return res;
1443 DDR_AFFINE_P (res) = true;
1444 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1445 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1446 DDR_LOOP_NEST (res) = loop_nest;
1447 DDR_INNER_LOOP (res) = 0;
1448 DDR_SELF_REFERENCE (res) = true;
1449 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1451 struct subscript *subscript;
1453 subscript = XNEW (struct subscript);
1454 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1455 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1456 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1457 SUB_DISTANCE (subscript) = chrec_dont_know;
1458 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1460 return res;
1463 /* If the references do not access the same object, we do not know
1464 whether they alias or not. */
1465 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1467 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1468 return res;
1471 /* If the base of the object is not invariant in the loop nest, we cannot
1472 analyze it. TODO -- in fact, it would suffice to record that there may
1473 be arbitrary dependences in the loops where the base object varies. */
1474 if (loop_nest
1475 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1476 DR_BASE_OBJECT (a)))
1478 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1479 return res;
1482 /* If the number of dimensions of the access to not agree we can have
1483 a pointer access to a component of the array element type and an
1484 array access while the base-objects are still the same. Punt. */
1485 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1487 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1488 return res;
1491 DDR_AFFINE_P (res) = true;
1492 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1493 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1494 DDR_LOOP_NEST (res) = loop_nest;
1495 DDR_INNER_LOOP (res) = 0;
1496 DDR_SELF_REFERENCE (res) = false;
1498 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1500 struct subscript *subscript;
1502 subscript = XNEW (struct subscript);
1503 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1504 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1505 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1506 SUB_DISTANCE (subscript) = chrec_dont_know;
1507 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1510 return res;
1513 /* Frees memory used by the conflict function F. */
1515 static void
1516 free_conflict_function (conflict_function *f)
1518 unsigned i;
1520 if (CF_NONTRIVIAL_P (f))
1522 for (i = 0; i < f->n; i++)
1523 affine_fn_free (f->fns[i]);
1525 free (f);
1528 /* Frees memory used by SUBSCRIPTS. */
1530 static void
1531 free_subscripts (VEC (subscript_p, heap) *subscripts)
1533 unsigned i;
1534 subscript_p s;
1536 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1538 free_conflict_function (s->conflicting_iterations_in_a);
1539 free_conflict_function (s->conflicting_iterations_in_b);
1540 free (s);
1542 VEC_free (subscript_p, heap, subscripts);
1545 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1546 description. */
1548 static inline void
1549 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1550 tree chrec)
1552 if (dump_file && (dump_flags & TDF_DETAILS))
1554 fprintf (dump_file, "(dependence classified: ");
1555 print_generic_expr (dump_file, chrec, 0);
1556 fprintf (dump_file, ")\n");
1559 DDR_ARE_DEPENDENT (ddr) = chrec;
1560 free_subscripts (DDR_SUBSCRIPTS (ddr));
1561 DDR_SUBSCRIPTS (ddr) = NULL;
1564 /* The dependence relation DDR cannot be represented by a distance
1565 vector. */
1567 static inline void
1568 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1570 if (dump_file && (dump_flags & TDF_DETAILS))
1571 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1573 DDR_AFFINE_P (ddr) = false;
1578 /* This section contains the classic Banerjee tests. */
1580 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1581 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1583 static inline bool
1584 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1586 return (evolution_function_is_constant_p (chrec_a)
1587 && evolution_function_is_constant_p (chrec_b));
1590 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1591 variable, i.e., if the SIV (Single Index Variable) test is true. */
1593 static bool
1594 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1596 if ((evolution_function_is_constant_p (chrec_a)
1597 && evolution_function_is_univariate_p (chrec_b))
1598 || (evolution_function_is_constant_p (chrec_b)
1599 && evolution_function_is_univariate_p (chrec_a)))
1600 return true;
1602 if (evolution_function_is_univariate_p (chrec_a)
1603 && evolution_function_is_univariate_p (chrec_b))
1605 switch (TREE_CODE (chrec_a))
1607 case POLYNOMIAL_CHREC:
1608 switch (TREE_CODE (chrec_b))
1610 case POLYNOMIAL_CHREC:
1611 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1612 return false;
1614 default:
1615 return true;
1618 default:
1619 return true;
1623 return false;
1626 /* Creates a conflict function with N dimensions. The affine functions
1627 in each dimension follow. */
1629 static conflict_function *
1630 conflict_fn (unsigned n, ...)
1632 unsigned i;
1633 conflict_function *ret = XCNEW (conflict_function);
1634 va_list ap;
1636 gcc_assert (0 < n && n <= MAX_DIM);
1637 va_start(ap, n);
1639 ret->n = n;
1640 for (i = 0; i < n; i++)
1641 ret->fns[i] = va_arg (ap, affine_fn);
1642 va_end(ap);
1644 return ret;
1647 /* Returns constant affine function with value CST. */
1649 static affine_fn
1650 affine_fn_cst (tree cst)
1652 affine_fn fn = VEC_alloc (tree, heap, 1);
1653 VEC_quick_push (tree, fn, cst);
1654 return fn;
1657 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1659 static affine_fn
1660 affine_fn_univar (tree cst, unsigned dim, tree coef)
1662 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1663 unsigned i;
1665 gcc_assert (dim > 0);
1666 VEC_quick_push (tree, fn, cst);
1667 for (i = 1; i < dim; i++)
1668 VEC_quick_push (tree, fn, integer_zero_node);
1669 VEC_quick_push (tree, fn, coef);
1670 return fn;
1673 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1674 *OVERLAPS_B are initialized to the functions that describe the
1675 relation between the elements accessed twice by CHREC_A and
1676 CHREC_B. For k >= 0, the following property is verified:
1678 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1680 static void
1681 analyze_ziv_subscript (tree chrec_a,
1682 tree chrec_b,
1683 conflict_function **overlaps_a,
1684 conflict_function **overlaps_b,
1685 tree *last_conflicts)
1687 tree type, difference;
1688 dependence_stats.num_ziv++;
1690 if (dump_file && (dump_flags & TDF_DETAILS))
1691 fprintf (dump_file, "(analyze_ziv_subscript \n");
1693 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1694 chrec_a = chrec_convert (type, chrec_a, NULL);
1695 chrec_b = chrec_convert (type, chrec_b, NULL);
1696 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1698 switch (TREE_CODE (difference))
1700 case INTEGER_CST:
1701 if (integer_zerop (difference))
1703 /* The difference is equal to zero: the accessed index
1704 overlaps for each iteration in the loop. */
1705 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1706 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1707 *last_conflicts = chrec_dont_know;
1708 dependence_stats.num_ziv_dependent++;
1710 else
1712 /* The accesses do not overlap. */
1713 *overlaps_a = conflict_fn_no_dependence ();
1714 *overlaps_b = conflict_fn_no_dependence ();
1715 *last_conflicts = integer_zero_node;
1716 dependence_stats.num_ziv_independent++;
1718 break;
1720 default:
1721 /* We're not sure whether the indexes overlap. For the moment,
1722 conservatively answer "don't know". */
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1726 *overlaps_a = conflict_fn_not_known ();
1727 *overlaps_b = conflict_fn_not_known ();
1728 *last_conflicts = chrec_dont_know;
1729 dependence_stats.num_ziv_unimplemented++;
1730 break;
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, ")\n");
1737 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1738 and only if it fits to the int type. If this is not the case, or the
1739 bound on the number of iterations of LOOP could not be derived, returns
1740 chrec_dont_know. */
1742 static tree
1743 max_stmt_executions_tree (struct loop *loop)
1745 double_int nit;
1747 if (!max_stmt_executions (loop, true, &nit))
1748 return chrec_dont_know;
1750 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1751 return chrec_dont_know;
1753 return double_int_to_tree (unsigned_type_node, nit);
1756 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1757 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1758 *OVERLAPS_B are initialized to the functions that describe the
1759 relation between the elements accessed twice by CHREC_A and
1760 CHREC_B. For k >= 0, the following property is verified:
1762 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1764 static void
1765 analyze_siv_subscript_cst_affine (tree chrec_a,
1766 tree chrec_b,
1767 conflict_function **overlaps_a,
1768 conflict_function **overlaps_b,
1769 tree *last_conflicts)
1771 bool value0, value1, value2;
1772 tree type, difference, tmp;
1774 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1775 chrec_a = chrec_convert (type, chrec_a, NULL);
1776 chrec_b = chrec_convert (type, chrec_b, NULL);
1777 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1779 if (!chrec_is_positive (initial_condition (difference), &value0))
1781 if (dump_file && (dump_flags & TDF_DETAILS))
1782 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1784 dependence_stats.num_siv_unimplemented++;
1785 *overlaps_a = conflict_fn_not_known ();
1786 *overlaps_b = conflict_fn_not_known ();
1787 *last_conflicts = chrec_dont_know;
1788 return;
1790 else
1792 if (value0 == false)
1794 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1796 if (dump_file && (dump_flags & TDF_DETAILS))
1797 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1799 *overlaps_a = conflict_fn_not_known ();
1800 *overlaps_b = conflict_fn_not_known ();
1801 *last_conflicts = chrec_dont_know;
1802 dependence_stats.num_siv_unimplemented++;
1803 return;
1805 else
1807 if (value1 == true)
1809 /* Example:
1810 chrec_a = 12
1811 chrec_b = {10, +, 1}
1814 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1816 HOST_WIDE_INT numiter;
1817 struct loop *loop = get_chrec_loop (chrec_b);
1819 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1820 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1821 fold_build1 (ABS_EXPR, type, difference),
1822 CHREC_RIGHT (chrec_b));
1823 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1824 *last_conflicts = integer_one_node;
1827 /* Perform weak-zero siv test to see if overlap is
1828 outside the loop bounds. */
1829 numiter = max_stmt_executions_int (loop, true);
1831 if (numiter >= 0
1832 && compare_tree_int (tmp, numiter) > 0)
1834 free_conflict_function (*overlaps_a);
1835 free_conflict_function (*overlaps_b);
1836 *overlaps_a = conflict_fn_no_dependence ();
1837 *overlaps_b = conflict_fn_no_dependence ();
1838 *last_conflicts = integer_zero_node;
1839 dependence_stats.num_siv_independent++;
1840 return;
1842 dependence_stats.num_siv_dependent++;
1843 return;
1846 /* When the step does not divide the difference, there are
1847 no overlaps. */
1848 else
1850 *overlaps_a = conflict_fn_no_dependence ();
1851 *overlaps_b = conflict_fn_no_dependence ();
1852 *last_conflicts = integer_zero_node;
1853 dependence_stats.num_siv_independent++;
1854 return;
1858 else
1860 /* Example:
1861 chrec_a = 12
1862 chrec_b = {10, +, -1}
1864 In this case, chrec_a will not overlap with chrec_b. */
1865 *overlaps_a = conflict_fn_no_dependence ();
1866 *overlaps_b = conflict_fn_no_dependence ();
1867 *last_conflicts = integer_zero_node;
1868 dependence_stats.num_siv_independent++;
1869 return;
1873 else
1875 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1877 if (dump_file && (dump_flags & TDF_DETAILS))
1878 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1880 *overlaps_a = conflict_fn_not_known ();
1881 *overlaps_b = conflict_fn_not_known ();
1882 *last_conflicts = chrec_dont_know;
1883 dependence_stats.num_siv_unimplemented++;
1884 return;
1886 else
1888 if (value2 == false)
1890 /* Example:
1891 chrec_a = 3
1892 chrec_b = {10, +, -1}
1894 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1896 HOST_WIDE_INT numiter;
1897 struct loop *loop = get_chrec_loop (chrec_b);
1899 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1900 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1901 CHREC_RIGHT (chrec_b));
1902 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1903 *last_conflicts = integer_one_node;
1905 /* Perform weak-zero siv test to see if overlap is
1906 outside the loop bounds. */
1907 numiter = max_stmt_executions_int (loop, true);
1909 if (numiter >= 0
1910 && compare_tree_int (tmp, numiter) > 0)
1912 free_conflict_function (*overlaps_a);
1913 free_conflict_function (*overlaps_b);
1914 *overlaps_a = conflict_fn_no_dependence ();
1915 *overlaps_b = conflict_fn_no_dependence ();
1916 *last_conflicts = integer_zero_node;
1917 dependence_stats.num_siv_independent++;
1918 return;
1920 dependence_stats.num_siv_dependent++;
1921 return;
1924 /* When the step does not divide the difference, there
1925 are no overlaps. */
1926 else
1928 *overlaps_a = conflict_fn_no_dependence ();
1929 *overlaps_b = conflict_fn_no_dependence ();
1930 *last_conflicts = integer_zero_node;
1931 dependence_stats.num_siv_independent++;
1932 return;
1935 else
1937 /* Example:
1938 chrec_a = 3
1939 chrec_b = {4, +, 1}
1941 In this case, chrec_a will not overlap with chrec_b. */
1942 *overlaps_a = conflict_fn_no_dependence ();
1943 *overlaps_b = conflict_fn_no_dependence ();
1944 *last_conflicts = integer_zero_node;
1945 dependence_stats.num_siv_independent++;
1946 return;
1953 /* Helper recursive function for initializing the matrix A. Returns
1954 the initial value of CHREC. */
1956 static tree
1957 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1959 gcc_assert (chrec);
1961 switch (TREE_CODE (chrec))
1963 case POLYNOMIAL_CHREC:
1964 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1966 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1967 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1969 case PLUS_EXPR:
1970 case MULT_EXPR:
1971 case MINUS_EXPR:
1973 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1974 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1976 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1979 case NOP_EXPR:
1981 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1982 return chrec_convert (chrec_type (chrec), op, NULL);
1985 case BIT_NOT_EXPR:
1987 /* Handle ~X as -1 - X. */
1988 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1989 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1990 build_int_cst (TREE_TYPE (chrec), -1), op);
1993 case INTEGER_CST:
1994 return chrec;
1996 default:
1997 gcc_unreachable ();
1998 return NULL_TREE;
2002 #define FLOOR_DIV(x,y) ((x) / (y))
2004 /* Solves the special case of the Diophantine equation:
2005 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2007 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2008 number of iterations that loops X and Y run. The overlaps will be
2009 constructed as evolutions in dimension DIM. */
2011 static void
2012 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2013 affine_fn *overlaps_a,
2014 affine_fn *overlaps_b,
2015 tree *last_conflicts, int dim)
2017 if (((step_a > 0 && step_b > 0)
2018 || (step_a < 0 && step_b < 0)))
2020 int step_overlaps_a, step_overlaps_b;
2021 int gcd_steps_a_b, last_conflict, tau2;
2023 gcd_steps_a_b = gcd (step_a, step_b);
2024 step_overlaps_a = step_b / gcd_steps_a_b;
2025 step_overlaps_b = step_a / gcd_steps_a_b;
2027 if (niter > 0)
2029 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2030 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2031 last_conflict = tau2;
2032 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2034 else
2035 *last_conflicts = chrec_dont_know;
2037 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2038 build_int_cst (NULL_TREE,
2039 step_overlaps_a));
2040 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2041 build_int_cst (NULL_TREE,
2042 step_overlaps_b));
2045 else
2047 *overlaps_a = affine_fn_cst (integer_zero_node);
2048 *overlaps_b = affine_fn_cst (integer_zero_node);
2049 *last_conflicts = integer_zero_node;
2053 /* Solves the special case of a Diophantine equation where CHREC_A is
2054 an affine bivariate function, and CHREC_B is an affine univariate
2055 function. For example,
2057 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2059 has the following overlapping functions:
2061 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2062 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2063 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2065 FORNOW: This is a specialized implementation for a case occurring in
2066 a common benchmark. Implement the general algorithm. */
2068 static void
2069 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2070 conflict_function **overlaps_a,
2071 conflict_function **overlaps_b,
2072 tree *last_conflicts)
2074 bool xz_p, yz_p, xyz_p;
2075 int step_x, step_y, step_z;
2076 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2077 affine_fn overlaps_a_xz, overlaps_b_xz;
2078 affine_fn overlaps_a_yz, overlaps_b_yz;
2079 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2080 affine_fn ova1, ova2, ovb;
2081 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2083 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2084 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2085 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2087 niter_x =
2088 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2089 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2090 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2092 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2097 *overlaps_a = conflict_fn_not_known ();
2098 *overlaps_b = conflict_fn_not_known ();
2099 *last_conflicts = chrec_dont_know;
2100 return;
2103 niter = MIN (niter_x, niter_z);
2104 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2105 &overlaps_a_xz,
2106 &overlaps_b_xz,
2107 &last_conflicts_xz, 1);
2108 niter = MIN (niter_y, niter_z);
2109 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2110 &overlaps_a_yz,
2111 &overlaps_b_yz,
2112 &last_conflicts_yz, 2);
2113 niter = MIN (niter_x, niter_z);
2114 niter = MIN (niter_y, niter);
2115 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2116 &overlaps_a_xyz,
2117 &overlaps_b_xyz,
2118 &last_conflicts_xyz, 3);
2120 xz_p = !integer_zerop (last_conflicts_xz);
2121 yz_p = !integer_zerop (last_conflicts_yz);
2122 xyz_p = !integer_zerop (last_conflicts_xyz);
2124 if (xz_p || yz_p || xyz_p)
2126 ova1 = affine_fn_cst (integer_zero_node);
2127 ova2 = affine_fn_cst (integer_zero_node);
2128 ovb = affine_fn_cst (integer_zero_node);
2129 if (xz_p)
2131 affine_fn t0 = ova1;
2132 affine_fn t2 = ovb;
2134 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2135 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2136 affine_fn_free (t0);
2137 affine_fn_free (t2);
2138 *last_conflicts = last_conflicts_xz;
2140 if (yz_p)
2142 affine_fn t0 = ova2;
2143 affine_fn t2 = ovb;
2145 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2146 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2147 affine_fn_free (t0);
2148 affine_fn_free (t2);
2149 *last_conflicts = last_conflicts_yz;
2151 if (xyz_p)
2153 affine_fn t0 = ova1;
2154 affine_fn t2 = ova2;
2155 affine_fn t4 = ovb;
2157 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2158 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2159 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2160 affine_fn_free (t0);
2161 affine_fn_free (t2);
2162 affine_fn_free (t4);
2163 *last_conflicts = last_conflicts_xyz;
2165 *overlaps_a = conflict_fn (2, ova1, ova2);
2166 *overlaps_b = conflict_fn (1, ovb);
2168 else
2170 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2171 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2172 *last_conflicts = integer_zero_node;
2175 affine_fn_free (overlaps_a_xz);
2176 affine_fn_free (overlaps_b_xz);
2177 affine_fn_free (overlaps_a_yz);
2178 affine_fn_free (overlaps_b_yz);
2179 affine_fn_free (overlaps_a_xyz);
2180 affine_fn_free (overlaps_b_xyz);
2183 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2185 static void
2186 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2187 int size)
2189 memcpy (vec2, vec1, size * sizeof (*vec1));
2192 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2194 static void
2195 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2196 int m, int n)
2198 int i;
2200 for (i = 0; i < m; i++)
2201 lambda_vector_copy (mat1[i], mat2[i], n);
2204 /* Store the N x N identity matrix in MAT. */
2206 static void
2207 lambda_matrix_id (lambda_matrix mat, int size)
2209 int i, j;
2211 for (i = 0; i < size; i++)
2212 for (j = 0; j < size; j++)
2213 mat[i][j] = (i == j) ? 1 : 0;
2216 /* Return the first nonzero element of vector VEC1 between START and N.
2217 We must have START <= N. Returns N if VEC1 is the zero vector. */
2219 static int
2220 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2222 int j = start;
2223 while (j < n && vec1[j] == 0)
2224 j++;
2225 return j;
2228 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2229 R2 = R2 + CONST1 * R1. */
2231 static void
2232 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2234 int i;
2236 if (const1 == 0)
2237 return;
2239 for (i = 0; i < n; i++)
2240 mat[r2][i] += const1 * mat[r1][i];
2243 /* Swap rows R1 and R2 in matrix MAT. */
2245 static void
2246 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2248 lambda_vector row;
2250 row = mat[r1];
2251 mat[r1] = mat[r2];
2252 mat[r2] = row;
2255 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2256 and store the result in VEC2. */
2258 static void
2259 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2260 int size, int const1)
2262 int i;
2264 if (const1 == 0)
2265 lambda_vector_clear (vec2, size);
2266 else
2267 for (i = 0; i < size; i++)
2268 vec2[i] = const1 * vec1[i];
2271 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2273 static void
2274 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2275 int size)
2277 lambda_vector_mult_const (vec1, vec2, size, -1);
2280 /* Negate row R1 of matrix MAT which has N columns. */
2282 static void
2283 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2285 lambda_vector_negate (mat[r1], mat[r1], n);
2288 /* Return true if two vectors are equal. */
2290 static bool
2291 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2293 int i;
2294 for (i = 0; i < size; i++)
2295 if (vec1[i] != vec2[i])
2296 return false;
2297 return true;
2300 /* Given an M x N integer matrix A, this function determines an M x
2301 M unimodular matrix U, and an M x N echelon matrix S such that
2302 "U.A = S". This decomposition is also known as "right Hermite".
2304 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2305 Restructuring Compilers" Utpal Banerjee. */
2307 static void
2308 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2309 lambda_matrix S, lambda_matrix U)
2311 int i, j, i0 = 0;
2313 lambda_matrix_copy (A, S, m, n);
2314 lambda_matrix_id (U, m);
2316 for (j = 0; j < n; j++)
2318 if (lambda_vector_first_nz (S[j], m, i0) < m)
2320 ++i0;
2321 for (i = m - 1; i >= i0; i--)
2323 while (S[i][j] != 0)
2325 int sigma, factor, a, b;
2327 a = S[i-1][j];
2328 b = S[i][j];
2329 sigma = (a * b < 0) ? -1: 1;
2330 a = abs (a);
2331 b = abs (b);
2332 factor = sigma * (a / b);
2334 lambda_matrix_row_add (S, n, i, i-1, -factor);
2335 lambda_matrix_row_exchange (S, i, i-1);
2337 lambda_matrix_row_add (U, m, i, i-1, -factor);
2338 lambda_matrix_row_exchange (U, i, i-1);
2345 /* Determines the overlapping elements due to accesses CHREC_A and
2346 CHREC_B, that are affine functions. This function cannot handle
2347 symbolic evolution functions, ie. when initial conditions are
2348 parameters, because it uses lambda matrices of integers. */
2350 static void
2351 analyze_subscript_affine_affine (tree chrec_a,
2352 tree chrec_b,
2353 conflict_function **overlaps_a,
2354 conflict_function **overlaps_b,
2355 tree *last_conflicts)
2357 unsigned nb_vars_a, nb_vars_b, dim;
2358 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2359 lambda_matrix A, U, S;
2360 struct obstack scratch_obstack;
2362 if (eq_evolutions_p (chrec_a, chrec_b))
2364 /* The accessed index overlaps for each iteration in the
2365 loop. */
2366 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2367 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2368 *last_conflicts = chrec_dont_know;
2369 return;
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2374 /* For determining the initial intersection, we have to solve a
2375 Diophantine equation. This is the most time consuming part.
2377 For answering to the question: "Is there a dependence?" we have
2378 to prove that there exists a solution to the Diophantine
2379 equation, and that the solution is in the iteration domain,
2380 i.e. the solution is positive or zero, and that the solution
2381 happens before the upper bound loop.nb_iterations. Otherwise
2382 there is no dependence. This function outputs a description of
2383 the iterations that hold the intersections. */
2385 nb_vars_a = nb_vars_in_chrec (chrec_a);
2386 nb_vars_b = nb_vars_in_chrec (chrec_b);
2388 gcc_obstack_init (&scratch_obstack);
2390 dim = nb_vars_a + nb_vars_b;
2391 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2392 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2393 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2395 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2396 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2397 gamma = init_b - init_a;
2399 /* Don't do all the hard work of solving the Diophantine equation
2400 when we already know the solution: for example,
2401 | {3, +, 1}_1
2402 | {3, +, 4}_2
2403 | gamma = 3 - 3 = 0.
2404 Then the first overlap occurs during the first iterations:
2405 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2407 if (gamma == 0)
2409 if (nb_vars_a == 1 && nb_vars_b == 1)
2411 HOST_WIDE_INT step_a, step_b;
2412 HOST_WIDE_INT niter, niter_a, niter_b;
2413 affine_fn ova, ovb;
2415 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2416 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2417 niter = MIN (niter_a, niter_b);
2418 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2419 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2421 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2422 &ova, &ovb,
2423 last_conflicts, 1);
2424 *overlaps_a = conflict_fn (1, ova);
2425 *overlaps_b = conflict_fn (1, ovb);
2428 else if (nb_vars_a == 2 && nb_vars_b == 1)
2429 compute_overlap_steps_for_affine_1_2
2430 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2432 else if (nb_vars_a == 1 && nb_vars_b == 2)
2433 compute_overlap_steps_for_affine_1_2
2434 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2436 else
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2440 *overlaps_a = conflict_fn_not_known ();
2441 *overlaps_b = conflict_fn_not_known ();
2442 *last_conflicts = chrec_dont_know;
2444 goto end_analyze_subs_aa;
2447 /* U.A = S */
2448 lambda_matrix_right_hermite (A, dim, 1, S, U);
2450 if (S[0][0] < 0)
2452 S[0][0] *= -1;
2453 lambda_matrix_row_negate (U, dim, 0);
2455 gcd_alpha_beta = S[0][0];
2457 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2458 but that is a quite strange case. Instead of ICEing, answer
2459 don't know. */
2460 if (gcd_alpha_beta == 0)
2462 *overlaps_a = conflict_fn_not_known ();
2463 *overlaps_b = conflict_fn_not_known ();
2464 *last_conflicts = chrec_dont_know;
2465 goto end_analyze_subs_aa;
2468 /* The classic "gcd-test". */
2469 if (!int_divides_p (gcd_alpha_beta, gamma))
2471 /* The "gcd-test" has determined that there is no integer
2472 solution, i.e. there is no dependence. */
2473 *overlaps_a = conflict_fn_no_dependence ();
2474 *overlaps_b = conflict_fn_no_dependence ();
2475 *last_conflicts = integer_zero_node;
2478 /* Both access functions are univariate. This includes SIV and MIV cases. */
2479 else if (nb_vars_a == 1 && nb_vars_b == 1)
2481 /* Both functions should have the same evolution sign. */
2482 if (((A[0][0] > 0 && -A[1][0] > 0)
2483 || (A[0][0] < 0 && -A[1][0] < 0)))
2485 /* The solutions are given by:
2487 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2488 | [u21 u22] [y0]
2490 For a given integer t. Using the following variables,
2492 | i0 = u11 * gamma / gcd_alpha_beta
2493 | j0 = u12 * gamma / gcd_alpha_beta
2494 | i1 = u21
2495 | j1 = u22
2497 the solutions are:
2499 | x0 = i0 + i1 * t,
2500 | y0 = j0 + j1 * t. */
2501 HOST_WIDE_INT i0, j0, i1, j1;
2503 i0 = U[0][0] * gamma / gcd_alpha_beta;
2504 j0 = U[0][1] * gamma / gcd_alpha_beta;
2505 i1 = U[1][0];
2506 j1 = U[1][1];
2508 if ((i1 == 0 && i0 < 0)
2509 || (j1 == 0 && j0 < 0))
2511 /* There is no solution.
2512 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2513 falls in here, but for the moment we don't look at the
2514 upper bound of the iteration domain. */
2515 *overlaps_a = conflict_fn_no_dependence ();
2516 *overlaps_b = conflict_fn_no_dependence ();
2517 *last_conflicts = integer_zero_node;
2518 goto end_analyze_subs_aa;
2521 if (i1 > 0 && j1 > 0)
2523 HOST_WIDE_INT niter_a = max_stmt_executions_int
2524 (get_chrec_loop (chrec_a), true);
2525 HOST_WIDE_INT niter_b = max_stmt_executions_int
2526 (get_chrec_loop (chrec_b), true);
2527 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2529 /* (X0, Y0) is a solution of the Diophantine equation:
2530 "chrec_a (X0) = chrec_b (Y0)". */
2531 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2532 CEIL (-j0, j1));
2533 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2534 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2536 /* (X1, Y1) is the smallest positive solution of the eq
2537 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2538 first conflict occurs. */
2539 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2540 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2541 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2543 if (niter > 0)
2545 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2546 FLOOR_DIV (niter - j0, j1));
2547 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2549 /* If the overlap occurs outside of the bounds of the
2550 loop, there is no dependence. */
2551 if (x1 >= niter || y1 >= niter)
2553 *overlaps_a = conflict_fn_no_dependence ();
2554 *overlaps_b = conflict_fn_no_dependence ();
2555 *last_conflicts = integer_zero_node;
2556 goto end_analyze_subs_aa;
2558 else
2559 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2561 else
2562 *last_conflicts = chrec_dont_know;
2564 *overlaps_a
2565 = conflict_fn (1,
2566 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2568 build_int_cst (NULL_TREE, i1)));
2569 *overlaps_b
2570 = conflict_fn (1,
2571 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2573 build_int_cst (NULL_TREE, j1)));
2575 else
2577 /* FIXME: For the moment, the upper bound of the
2578 iteration domain for i and j is not checked. */
2579 if (dump_file && (dump_flags & TDF_DETAILS))
2580 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2581 *overlaps_a = conflict_fn_not_known ();
2582 *overlaps_b = conflict_fn_not_known ();
2583 *last_conflicts = chrec_dont_know;
2586 else
2588 if (dump_file && (dump_flags & TDF_DETAILS))
2589 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2590 *overlaps_a = conflict_fn_not_known ();
2591 *overlaps_b = conflict_fn_not_known ();
2592 *last_conflicts = chrec_dont_know;
2595 else
2597 if (dump_file && (dump_flags & TDF_DETAILS))
2598 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2599 *overlaps_a = conflict_fn_not_known ();
2600 *overlaps_b = conflict_fn_not_known ();
2601 *last_conflicts = chrec_dont_know;
2604 end_analyze_subs_aa:
2605 obstack_free (&scratch_obstack, NULL);
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2608 fprintf (dump_file, " (overlaps_a = ");
2609 dump_conflict_function (dump_file, *overlaps_a);
2610 fprintf (dump_file, ")\n (overlaps_b = ");
2611 dump_conflict_function (dump_file, *overlaps_b);
2612 fprintf (dump_file, ")\n");
2613 fprintf (dump_file, ")\n");
2617 /* Returns true when analyze_subscript_affine_affine can be used for
2618 determining the dependence relation between chrec_a and chrec_b,
2619 that contain symbols. This function modifies chrec_a and chrec_b
2620 such that the analysis result is the same, and such that they don't
2621 contain symbols, and then can safely be passed to the analyzer.
2623 Example: The analysis of the following tuples of evolutions produce
2624 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2625 vs. {0, +, 1}_1
2627 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2628 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2631 static bool
2632 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2634 tree diff, type, left_a, left_b, right_b;
2636 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2637 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2638 /* FIXME: For the moment not handled. Might be refined later. */
2639 return false;
2641 type = chrec_type (*chrec_a);
2642 left_a = CHREC_LEFT (*chrec_a);
2643 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2644 diff = chrec_fold_minus (type, left_a, left_b);
2646 if (!evolution_function_is_constant_p (diff))
2647 return false;
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2650 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2652 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2653 diff, CHREC_RIGHT (*chrec_a));
2654 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2655 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2656 build_int_cst (type, 0),
2657 right_b);
2658 return true;
2661 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2662 *OVERLAPS_B are initialized to the functions that describe the
2663 relation between the elements accessed twice by CHREC_A and
2664 CHREC_B. For k >= 0, the following property is verified:
2666 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2668 static void
2669 analyze_siv_subscript (tree chrec_a,
2670 tree chrec_b,
2671 conflict_function **overlaps_a,
2672 conflict_function **overlaps_b,
2673 tree *last_conflicts,
2674 int loop_nest_num)
2676 dependence_stats.num_siv++;
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "(analyze_siv_subscript \n");
2681 if (evolution_function_is_constant_p (chrec_a)
2682 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2683 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2684 overlaps_a, overlaps_b, last_conflicts);
2686 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2687 && evolution_function_is_constant_p (chrec_b))
2688 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2689 overlaps_b, overlaps_a, last_conflicts);
2691 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2692 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2694 if (!chrec_contains_symbols (chrec_a)
2695 && !chrec_contains_symbols (chrec_b))
2697 analyze_subscript_affine_affine (chrec_a, chrec_b,
2698 overlaps_a, overlaps_b,
2699 last_conflicts);
2701 if (CF_NOT_KNOWN_P (*overlaps_a)
2702 || CF_NOT_KNOWN_P (*overlaps_b))
2703 dependence_stats.num_siv_unimplemented++;
2704 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2705 || CF_NO_DEPENDENCE_P (*overlaps_b))
2706 dependence_stats.num_siv_independent++;
2707 else
2708 dependence_stats.num_siv_dependent++;
2710 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2711 &chrec_b))
2713 analyze_subscript_affine_affine (chrec_a, chrec_b,
2714 overlaps_a, overlaps_b,
2715 last_conflicts);
2717 if (CF_NOT_KNOWN_P (*overlaps_a)
2718 || CF_NOT_KNOWN_P (*overlaps_b))
2719 dependence_stats.num_siv_unimplemented++;
2720 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2721 || CF_NO_DEPENDENCE_P (*overlaps_b))
2722 dependence_stats.num_siv_independent++;
2723 else
2724 dependence_stats.num_siv_dependent++;
2726 else
2727 goto siv_subscript_dontknow;
2730 else
2732 siv_subscript_dontknow:;
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "siv test failed: unimplemented.\n");
2735 *overlaps_a = conflict_fn_not_known ();
2736 *overlaps_b = conflict_fn_not_known ();
2737 *last_conflicts = chrec_dont_know;
2738 dependence_stats.num_siv_unimplemented++;
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, ")\n");
2745 /* Returns false if we can prove that the greatest common divisor of the steps
2746 of CHREC does not divide CST, false otherwise. */
2748 static bool
2749 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2751 HOST_WIDE_INT cd = 0, val;
2752 tree step;
2754 if (!host_integerp (cst, 0))
2755 return true;
2756 val = tree_low_cst (cst, 0);
2758 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2760 step = CHREC_RIGHT (chrec);
2761 if (!host_integerp (step, 0))
2762 return true;
2763 cd = gcd (cd, tree_low_cst (step, 0));
2764 chrec = CHREC_LEFT (chrec);
2767 return val % cd == 0;
2770 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2771 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2772 functions that describe the relation between the elements accessed
2773 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2774 is verified:
2776 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2778 static void
2779 analyze_miv_subscript (tree chrec_a,
2780 tree chrec_b,
2781 conflict_function **overlaps_a,
2782 conflict_function **overlaps_b,
2783 tree *last_conflicts,
2784 struct loop *loop_nest)
2786 tree type, difference;
2788 dependence_stats.num_miv++;
2789 if (dump_file && (dump_flags & TDF_DETAILS))
2790 fprintf (dump_file, "(analyze_miv_subscript \n");
2792 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2793 chrec_a = chrec_convert (type, chrec_a, NULL);
2794 chrec_b = chrec_convert (type, chrec_b, NULL);
2795 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2797 if (eq_evolutions_p (chrec_a, chrec_b))
2799 /* Access functions are the same: all the elements are accessed
2800 in the same order. */
2801 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2802 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2803 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2804 dependence_stats.num_miv_dependent++;
2807 else if (evolution_function_is_constant_p (difference)
2808 /* For the moment, the following is verified:
2809 evolution_function_is_affine_multivariate_p (chrec_a,
2810 loop_nest->num) */
2811 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2813 /* testsuite/.../ssa-chrec-33.c
2814 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2816 The difference is 1, and all the evolution steps are multiples
2817 of 2, consequently there are no overlapping elements. */
2818 *overlaps_a = conflict_fn_no_dependence ();
2819 *overlaps_b = conflict_fn_no_dependence ();
2820 *last_conflicts = integer_zero_node;
2821 dependence_stats.num_miv_independent++;
2824 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2825 && !chrec_contains_symbols (chrec_a)
2826 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2827 && !chrec_contains_symbols (chrec_b))
2829 /* testsuite/.../ssa-chrec-35.c
2830 {0, +, 1}_2 vs. {0, +, 1}_3
2831 the overlapping elements are respectively located at iterations:
2832 {0, +, 1}_x and {0, +, 1}_x,
2833 in other words, we have the equality:
2834 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2836 Other examples:
2837 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2838 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2840 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2841 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2843 analyze_subscript_affine_affine (chrec_a, chrec_b,
2844 overlaps_a, overlaps_b, last_conflicts);
2846 if (CF_NOT_KNOWN_P (*overlaps_a)
2847 || CF_NOT_KNOWN_P (*overlaps_b))
2848 dependence_stats.num_miv_unimplemented++;
2849 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2850 || CF_NO_DEPENDENCE_P (*overlaps_b))
2851 dependence_stats.num_miv_independent++;
2852 else
2853 dependence_stats.num_miv_dependent++;
2856 else
2858 /* When the analysis is too difficult, answer "don't know". */
2859 if (dump_file && (dump_flags & TDF_DETAILS))
2860 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2862 *overlaps_a = conflict_fn_not_known ();
2863 *overlaps_b = conflict_fn_not_known ();
2864 *last_conflicts = chrec_dont_know;
2865 dependence_stats.num_miv_unimplemented++;
2868 if (dump_file && (dump_flags & TDF_DETAILS))
2869 fprintf (dump_file, ")\n");
2872 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2873 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2874 OVERLAP_ITERATIONS_B are initialized with two functions that
2875 describe the iterations that contain conflicting elements.
2877 Remark: For an integer k >= 0, the following equality is true:
2879 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2882 static void
2883 analyze_overlapping_iterations (tree chrec_a,
2884 tree chrec_b,
2885 conflict_function **overlap_iterations_a,
2886 conflict_function **overlap_iterations_b,
2887 tree *last_conflicts, struct loop *loop_nest)
2889 unsigned int lnn = loop_nest->num;
2891 dependence_stats.num_subscript_tests++;
2893 if (dump_file && (dump_flags & TDF_DETAILS))
2895 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2896 fprintf (dump_file, " (chrec_a = ");
2897 print_generic_expr (dump_file, chrec_a, 0);
2898 fprintf (dump_file, ")\n (chrec_b = ");
2899 print_generic_expr (dump_file, chrec_b, 0);
2900 fprintf (dump_file, ")\n");
2903 if (chrec_a == NULL_TREE
2904 || chrec_b == NULL_TREE
2905 || chrec_contains_undetermined (chrec_a)
2906 || chrec_contains_undetermined (chrec_b))
2908 dependence_stats.num_subscript_undetermined++;
2910 *overlap_iterations_a = conflict_fn_not_known ();
2911 *overlap_iterations_b = conflict_fn_not_known ();
2914 /* If they are the same chrec, and are affine, they overlap
2915 on every iteration. */
2916 else if (eq_evolutions_p (chrec_a, chrec_b)
2917 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2918 || operand_equal_p (chrec_a, chrec_b, 0)))
2920 dependence_stats.num_same_subscript_function++;
2921 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2922 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2923 *last_conflicts = chrec_dont_know;
2926 /* If they aren't the same, and aren't affine, we can't do anything
2927 yet. */
2928 else if ((chrec_contains_symbols (chrec_a)
2929 || chrec_contains_symbols (chrec_b))
2930 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2931 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2933 dependence_stats.num_subscript_undetermined++;
2934 *overlap_iterations_a = conflict_fn_not_known ();
2935 *overlap_iterations_b = conflict_fn_not_known ();
2938 else if (ziv_subscript_p (chrec_a, chrec_b))
2939 analyze_ziv_subscript (chrec_a, chrec_b,
2940 overlap_iterations_a, overlap_iterations_b,
2941 last_conflicts);
2943 else if (siv_subscript_p (chrec_a, chrec_b))
2944 analyze_siv_subscript (chrec_a, chrec_b,
2945 overlap_iterations_a, overlap_iterations_b,
2946 last_conflicts, lnn);
2948 else
2949 analyze_miv_subscript (chrec_a, chrec_b,
2950 overlap_iterations_a, overlap_iterations_b,
2951 last_conflicts, loop_nest);
2953 if (dump_file && (dump_flags & TDF_DETAILS))
2955 fprintf (dump_file, " (overlap_iterations_a = ");
2956 dump_conflict_function (dump_file, *overlap_iterations_a);
2957 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2958 dump_conflict_function (dump_file, *overlap_iterations_b);
2959 fprintf (dump_file, ")\n");
2960 fprintf (dump_file, ")\n");
2964 /* Helper function for uniquely inserting distance vectors. */
2966 static void
2967 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2969 unsigned i;
2970 lambda_vector v;
2972 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2973 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2974 return;
2976 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2979 /* Helper function for uniquely inserting direction vectors. */
2981 static void
2982 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2984 unsigned i;
2985 lambda_vector v;
2987 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2988 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2989 return;
2991 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2994 /* Add a distance of 1 on all the loops outer than INDEX. If we
2995 haven't yet determined a distance for this outer loop, push a new
2996 distance vector composed of the previous distance, and a distance
2997 of 1 for this outer loop. Example:
2999 | loop_1
3000 | loop_2
3001 | A[10]
3002 | endloop_2
3003 | endloop_1
3005 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3006 save (0, 1), then we have to save (1, 0). */
3008 static void
3009 add_outer_distances (struct data_dependence_relation *ddr,
3010 lambda_vector dist_v, int index)
3012 /* For each outer loop where init_v is not set, the accesses are
3013 in dependence of distance 1 in the loop. */
3014 while (--index >= 0)
3016 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3017 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3018 save_v[index] = 1;
3019 save_dist_v (ddr, save_v);
3023 /* Return false when fail to represent the data dependence as a
3024 distance vector. INIT_B is set to true when a component has been
3025 added to the distance vector DIST_V. INDEX_CARRY is then set to
3026 the index in DIST_V that carries the dependence. */
3028 static bool
3029 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3030 struct data_reference *ddr_a,
3031 struct data_reference *ddr_b,
3032 lambda_vector dist_v, bool *init_b,
3033 int *index_carry)
3035 unsigned i;
3036 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3038 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3040 tree access_fn_a, access_fn_b;
3041 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3043 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3045 non_affine_dependence_relation (ddr);
3046 return false;
3049 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3050 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3052 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3053 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3055 int dist, index;
3056 int var_a = CHREC_VARIABLE (access_fn_a);
3057 int var_b = CHREC_VARIABLE (access_fn_b);
3059 if (var_a != var_b
3060 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3062 non_affine_dependence_relation (ddr);
3063 return false;
3066 dist = int_cst_value (SUB_DISTANCE (subscript));
3067 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3068 *index_carry = MIN (index, *index_carry);
3070 /* This is the subscript coupling test. If we have already
3071 recorded a distance for this loop (a distance coming from
3072 another subscript), it should be the same. For example,
3073 in the following code, there is no dependence:
3075 | loop i = 0, N, 1
3076 | T[i+1][i] = ...
3077 | ... = T[i][i]
3078 | endloop
3080 if (init_v[index] != 0 && dist_v[index] != dist)
3082 finalize_ddr_dependent (ddr, chrec_known);
3083 return false;
3086 dist_v[index] = dist;
3087 init_v[index] = 1;
3088 *init_b = true;
3090 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3092 /* This can be for example an affine vs. constant dependence
3093 (T[i] vs. T[3]) that is not an affine dependence and is
3094 not representable as a distance vector. */
3095 non_affine_dependence_relation (ddr);
3096 return false;
3100 return true;
3103 /* Return true when the DDR contains only constant access functions. */
3105 static bool
3106 constant_access_functions (const struct data_dependence_relation *ddr)
3108 unsigned i;
3110 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3111 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3112 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3113 return false;
3115 return true;
3118 /* Helper function for the case where DDR_A and DDR_B are the same
3119 multivariate access function with a constant step. For an example
3120 see pr34635-1.c. */
3122 static void
3123 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3125 int x_1, x_2;
3126 tree c_1 = CHREC_LEFT (c_2);
3127 tree c_0 = CHREC_LEFT (c_1);
3128 lambda_vector dist_v;
3129 int v1, v2, cd;
3131 /* Polynomials with more than 2 variables are not handled yet. When
3132 the evolution steps are parameters, it is not possible to
3133 represent the dependence using classical distance vectors. */
3134 if (TREE_CODE (c_0) != INTEGER_CST
3135 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3136 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3138 DDR_AFFINE_P (ddr) = false;
3139 return;
3142 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3143 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3145 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3146 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3147 v1 = int_cst_value (CHREC_RIGHT (c_1));
3148 v2 = int_cst_value (CHREC_RIGHT (c_2));
3149 cd = gcd (v1, v2);
3150 v1 /= cd;
3151 v2 /= cd;
3153 if (v2 < 0)
3155 v2 = -v2;
3156 v1 = -v1;
3159 dist_v[x_1] = v2;
3160 dist_v[x_2] = -v1;
3161 save_dist_v (ddr, dist_v);
3163 add_outer_distances (ddr, dist_v, x_1);
3166 /* Helper function for the case where DDR_A and DDR_B are the same
3167 access functions. */
3169 static void
3170 add_other_self_distances (struct data_dependence_relation *ddr)
3172 lambda_vector dist_v;
3173 unsigned i;
3174 int index_carry = DDR_NB_LOOPS (ddr);
3176 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3178 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3180 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3182 if (!evolution_function_is_univariate_p (access_fun))
3184 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3186 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3187 return;
3190 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3192 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3193 add_multivariate_self_dist (ddr, access_fun);
3194 else
3195 /* The evolution step is not constant: it varies in
3196 the outer loop, so this cannot be represented by a
3197 distance vector. For example in pr34635.c the
3198 evolution is {0, +, {0, +, 4}_1}_2. */
3199 DDR_AFFINE_P (ddr) = false;
3201 return;
3204 index_carry = MIN (index_carry,
3205 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3206 DDR_LOOP_NEST (ddr)));
3210 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3211 add_outer_distances (ddr, dist_v, index_carry);
3214 static void
3215 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3217 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3219 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3220 save_dist_v (ddr, dist_v);
3223 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3224 is the case for example when access functions are the same and
3225 equal to a constant, as in:
3227 | loop_1
3228 | A[3] = ...
3229 | ... = A[3]
3230 | endloop_1
3232 in which case the distance vectors are (0) and (1). */
3234 static void
3235 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3237 unsigned i, j;
3239 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3241 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3242 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3243 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3245 for (j = 0; j < ca->n; j++)
3246 if (affine_function_zero_p (ca->fns[j]))
3248 insert_innermost_unit_dist_vector (ddr);
3249 return;
3252 for (j = 0; j < cb->n; j++)
3253 if (affine_function_zero_p (cb->fns[j]))
3255 insert_innermost_unit_dist_vector (ddr);
3256 return;
3261 /* Compute the classic per loop distance vector. DDR is the data
3262 dependence relation to build a vector from. Return false when fail
3263 to represent the data dependence as a distance vector. */
3265 static bool
3266 build_classic_dist_vector (struct data_dependence_relation *ddr,
3267 struct loop *loop_nest)
3269 bool init_b = false;
3270 int index_carry = DDR_NB_LOOPS (ddr);
3271 lambda_vector dist_v;
3273 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3274 return false;
3276 if (same_access_functions (ddr))
3278 /* Save the 0 vector. */
3279 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3280 save_dist_v (ddr, dist_v);
3282 if (constant_access_functions (ddr))
3283 add_distance_for_zero_overlaps (ddr);
3285 if (DDR_NB_LOOPS (ddr) > 1)
3286 add_other_self_distances (ddr);
3288 return true;
3291 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3292 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3293 dist_v, &init_b, &index_carry))
3294 return false;
3296 /* Save the distance vector if we initialized one. */
3297 if (init_b)
3299 /* Verify a basic constraint: classic distance vectors should
3300 always be lexicographically positive.
3302 Data references are collected in the order of execution of
3303 the program, thus for the following loop
3305 | for (i = 1; i < 100; i++)
3306 | for (j = 1; j < 100; j++)
3308 | t = T[j+1][i-1]; // A
3309 | T[j][i] = t + 2; // B
3312 references are collected following the direction of the wind:
3313 A then B. The data dependence tests are performed also
3314 following this order, such that we're looking at the distance
3315 separating the elements accessed by A from the elements later
3316 accessed by B. But in this example, the distance returned by
3317 test_dep (A, B) is lexicographically negative (-1, 1), that
3318 means that the access A occurs later than B with respect to
3319 the outer loop, ie. we're actually looking upwind. In this
3320 case we solve test_dep (B, A) looking downwind to the
3321 lexicographically positive solution, that returns the
3322 distance vector (1, -1). */
3323 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3325 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3326 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3327 loop_nest))
3328 return false;
3329 compute_subscript_distance (ddr);
3330 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3331 save_v, &init_b, &index_carry))
3332 return false;
3333 save_dist_v (ddr, save_v);
3334 DDR_REVERSED_P (ddr) = true;
3336 /* In this case there is a dependence forward for all the
3337 outer loops:
3339 | for (k = 1; k < 100; k++)
3340 | for (i = 1; i < 100; i++)
3341 | for (j = 1; j < 100; j++)
3343 | t = T[j+1][i-1]; // A
3344 | T[j][i] = t + 2; // B
3347 the vectors are:
3348 (0, 1, -1)
3349 (1, 1, -1)
3350 (1, -1, 1)
3352 if (DDR_NB_LOOPS (ddr) > 1)
3354 add_outer_distances (ddr, save_v, index_carry);
3355 add_outer_distances (ddr, dist_v, index_carry);
3358 else
3360 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3361 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3363 if (DDR_NB_LOOPS (ddr) > 1)
3365 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3367 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3368 DDR_A (ddr), loop_nest))
3369 return false;
3370 compute_subscript_distance (ddr);
3371 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3372 opposite_v, &init_b,
3373 &index_carry))
3374 return false;
3376 save_dist_v (ddr, save_v);
3377 add_outer_distances (ddr, dist_v, index_carry);
3378 add_outer_distances (ddr, opposite_v, index_carry);
3380 else
3381 save_dist_v (ddr, save_v);
3384 else
3386 /* There is a distance of 1 on all the outer loops: Example:
3387 there is a dependence of distance 1 on loop_1 for the array A.
3389 | loop_1
3390 | A[5] = ...
3391 | endloop
3393 add_outer_distances (ddr, dist_v,
3394 lambda_vector_first_nz (dist_v,
3395 DDR_NB_LOOPS (ddr), 0));
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3400 unsigned i;
3402 fprintf (dump_file, "(build_classic_dist_vector\n");
3403 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3405 fprintf (dump_file, " dist_vector = (");
3406 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3407 DDR_NB_LOOPS (ddr));
3408 fprintf (dump_file, " )\n");
3410 fprintf (dump_file, ")\n");
3413 return true;
3416 /* Return the direction for a given distance.
3417 FIXME: Computing dir this way is suboptimal, since dir can catch
3418 cases that dist is unable to represent. */
3420 static inline enum data_dependence_direction
3421 dir_from_dist (int dist)
3423 if (dist > 0)
3424 return dir_positive;
3425 else if (dist < 0)
3426 return dir_negative;
3427 else
3428 return dir_equal;
3431 /* Compute the classic per loop direction vector. DDR is the data
3432 dependence relation to build a vector from. */
3434 static void
3435 build_classic_dir_vector (struct data_dependence_relation *ddr)
3437 unsigned i, j;
3438 lambda_vector dist_v;
3440 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3442 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3444 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3445 dir_v[j] = dir_from_dist (dist_v[j]);
3447 save_dir_v (ddr, dir_v);
3451 /* Helper function. Returns true when there is a dependence between
3452 data references DRA and DRB. */
3454 static bool
3455 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3456 struct data_reference *dra,
3457 struct data_reference *drb,
3458 struct loop *loop_nest)
3460 unsigned int i;
3461 tree last_conflicts;
3462 struct subscript *subscript;
3463 tree res = NULL_TREE;
3465 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3466 i++)
3468 conflict_function *overlaps_a, *overlaps_b;
3470 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3471 DR_ACCESS_FN (drb, i),
3472 &overlaps_a, &overlaps_b,
3473 &last_conflicts, loop_nest);
3475 if (SUB_CONFLICTS_IN_A (subscript))
3476 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3477 if (SUB_CONFLICTS_IN_B (subscript))
3478 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3480 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3481 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3482 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3484 /* If there is any undetermined conflict function we have to
3485 give a conservative answer in case we cannot prove that
3486 no dependence exists when analyzing another subscript. */
3487 if (CF_NOT_KNOWN_P (overlaps_a)
3488 || CF_NOT_KNOWN_P (overlaps_b))
3490 res = chrec_dont_know;
3491 continue;
3494 /* When there is a subscript with no dependence we can stop. */
3495 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3496 || CF_NO_DEPENDENCE_P (overlaps_b))
3498 res = chrec_known;
3499 break;
3503 if (res == NULL_TREE)
3504 return true;
3506 if (res == chrec_known)
3507 dependence_stats.num_dependence_independent++;
3508 else
3509 dependence_stats.num_dependence_undetermined++;
3510 finalize_ddr_dependent (ddr, res);
3511 return false;
3514 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3516 static void
3517 subscript_dependence_tester (struct data_dependence_relation *ddr,
3518 struct loop *loop_nest)
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3522 fprintf (dump_file, "(subscript_dependence_tester \n");
3524 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3525 dependence_stats.num_dependence_dependent++;
3527 compute_subscript_distance (ddr);
3528 if (build_classic_dist_vector (ddr, loop_nest))
3529 build_classic_dir_vector (ddr);
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3532 fprintf (dump_file, ")\n");
3535 /* Returns true when all the access functions of A are affine or
3536 constant with respect to LOOP_NEST. */
3538 static bool
3539 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3540 const struct loop *loop_nest)
3542 unsigned int i;
3543 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3544 tree t;
3546 FOR_EACH_VEC_ELT (tree, fns, i, t)
3547 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3548 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3549 return false;
3551 return true;
3554 /* Initializes an equation for an OMEGA problem using the information
3555 contained in the ACCESS_FUN. Returns true when the operation
3556 succeeded.
3558 PB is the omega constraint system.
3559 EQ is the number of the equation to be initialized.
3560 OFFSET is used for shifting the variables names in the constraints:
3561 a constrain is composed of 2 * the number of variables surrounding
3562 dependence accesses. OFFSET is set either to 0 for the first n variables,
3563 then it is set to n.
3564 ACCESS_FUN is expected to be an affine chrec. */
3566 static bool
3567 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3568 unsigned int offset, tree access_fun,
3569 struct data_dependence_relation *ddr)
3571 switch (TREE_CODE (access_fun))
3573 case POLYNOMIAL_CHREC:
3575 tree left = CHREC_LEFT (access_fun);
3576 tree right = CHREC_RIGHT (access_fun);
3577 int var = CHREC_VARIABLE (access_fun);
3578 unsigned var_idx;
3580 if (TREE_CODE (right) != INTEGER_CST)
3581 return false;
3583 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3584 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3586 /* Compute the innermost loop index. */
3587 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3589 if (offset == 0)
3590 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3591 += int_cst_value (right);
3593 switch (TREE_CODE (left))
3595 case POLYNOMIAL_CHREC:
3596 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3598 case INTEGER_CST:
3599 pb->eqs[eq].coef[0] += int_cst_value (left);
3600 return true;
3602 default:
3603 return false;
3607 case INTEGER_CST:
3608 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3609 return true;
3611 default:
3612 return false;
3616 /* As explained in the comments preceding init_omega_for_ddr, we have
3617 to set up a system for each loop level, setting outer loops
3618 variation to zero, and current loop variation to positive or zero.
3619 Save each lexico positive distance vector. */
3621 static void
3622 omega_extract_distance_vectors (omega_pb pb,
3623 struct data_dependence_relation *ddr)
3625 int eq, geq;
3626 unsigned i, j;
3627 struct loop *loopi, *loopj;
3628 enum omega_result res;
3630 /* Set a new problem for each loop in the nest. The basis is the
3631 problem that we have initialized until now. On top of this we
3632 add new constraints. */
3633 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3634 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3636 int dist = 0;
3637 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3638 DDR_NB_LOOPS (ddr));
3640 omega_copy_problem (copy, pb);
3642 /* For all the outer loops "loop_j", add "dj = 0". */
3643 for (j = 0;
3644 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3646 eq = omega_add_zero_eq (copy, omega_black);
3647 copy->eqs[eq].coef[j + 1] = 1;
3650 /* For "loop_i", add "0 <= di". */
3651 geq = omega_add_zero_geq (copy, omega_black);
3652 copy->geqs[geq].coef[i + 1] = 1;
3654 /* Reduce the constraint system, and test that the current
3655 problem is feasible. */
3656 res = omega_simplify_problem (copy);
3657 if (res == omega_false
3658 || res == omega_unknown
3659 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3660 goto next_problem;
3662 for (eq = 0; eq < copy->num_subs; eq++)
3663 if (copy->subs[eq].key == (int) i + 1)
3665 dist = copy->subs[eq].coef[0];
3666 goto found_dist;
3669 if (dist == 0)
3671 /* Reinitialize problem... */
3672 omega_copy_problem (copy, pb);
3673 for (j = 0;
3674 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3676 eq = omega_add_zero_eq (copy, omega_black);
3677 copy->eqs[eq].coef[j + 1] = 1;
3680 /* ..., but this time "di = 1". */
3681 eq = omega_add_zero_eq (copy, omega_black);
3682 copy->eqs[eq].coef[i + 1] = 1;
3683 copy->eqs[eq].coef[0] = -1;
3685 res = omega_simplify_problem (copy);
3686 if (res == omega_false
3687 || res == omega_unknown
3688 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3689 goto next_problem;
3691 for (eq = 0; eq < copy->num_subs; eq++)
3692 if (copy->subs[eq].key == (int) i + 1)
3694 dist = copy->subs[eq].coef[0];
3695 goto found_dist;
3699 found_dist:;
3700 /* Save the lexicographically positive distance vector. */
3701 if (dist >= 0)
3703 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3704 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3706 dist_v[i] = dist;
3708 for (eq = 0; eq < copy->num_subs; eq++)
3709 if (copy->subs[eq].key > 0)
3711 dist = copy->subs[eq].coef[0];
3712 dist_v[copy->subs[eq].key - 1] = dist;
3715 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3716 dir_v[j] = dir_from_dist (dist_v[j]);
3718 save_dist_v (ddr, dist_v);
3719 save_dir_v (ddr, dir_v);
3722 next_problem:;
3723 omega_free_problem (copy);
3727 /* This is called for each subscript of a tuple of data references:
3728 insert an equality for representing the conflicts. */
3730 static bool
3731 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3732 struct data_dependence_relation *ddr,
3733 omega_pb pb, bool *maybe_dependent)
3735 int eq;
3736 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3737 TREE_TYPE (access_fun_b));
3738 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3739 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3740 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3741 tree minus_one;
3743 /* When the fun_a - fun_b is not constant, the dependence is not
3744 captured by the classic distance vector representation. */
3745 if (TREE_CODE (difference) != INTEGER_CST)
3746 return false;
3748 /* ZIV test. */
3749 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3751 /* There is no dependence. */
3752 *maybe_dependent = false;
3753 return true;
3756 minus_one = build_int_cst (type, -1);
3757 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3759 eq = omega_add_zero_eq (pb, omega_black);
3760 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3761 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3762 /* There is probably a dependence, but the system of
3763 constraints cannot be built: answer "don't know". */
3764 return false;
3766 /* GCD test. */
3767 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3768 && !int_divides_p (lambda_vector_gcd
3769 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3770 2 * DDR_NB_LOOPS (ddr)),
3771 pb->eqs[eq].coef[0]))
3773 /* There is no dependence. */
3774 *maybe_dependent = false;
3775 return true;
3778 return true;
3781 /* Helper function, same as init_omega_for_ddr but specialized for
3782 data references A and B. */
3784 static bool
3785 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3786 struct data_dependence_relation *ddr,
3787 omega_pb pb, bool *maybe_dependent)
3789 unsigned i;
3790 int ineq;
3791 struct loop *loopi;
3792 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3794 /* Insert an equality per subscript. */
3795 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3797 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3798 ddr, pb, maybe_dependent))
3799 return false;
3800 else if (*maybe_dependent == false)
3802 /* There is no dependence. */
3803 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3804 return true;
3808 /* Insert inequalities: constraints corresponding to the iteration
3809 domain, i.e. the loops surrounding the references "loop_x" and
3810 the distance variables "dx". The layout of the OMEGA
3811 representation is as follows:
3812 - coef[0] is the constant
3813 - coef[1..nb_loops] are the protected variables that will not be
3814 removed by the solver: the "dx"
3815 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3817 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3818 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3820 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3822 /* 0 <= loop_x */
3823 ineq = omega_add_zero_geq (pb, omega_black);
3824 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3826 /* 0 <= loop_x + dx */
3827 ineq = omega_add_zero_geq (pb, omega_black);
3828 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3829 pb->geqs[ineq].coef[i + 1] = 1;
3831 if (nbi != -1)
3833 /* loop_x <= nb_iters */
3834 ineq = omega_add_zero_geq (pb, omega_black);
3835 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3836 pb->geqs[ineq].coef[0] = nbi;
3838 /* loop_x + dx <= nb_iters */
3839 ineq = omega_add_zero_geq (pb, omega_black);
3840 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3841 pb->geqs[ineq].coef[i + 1] = -1;
3842 pb->geqs[ineq].coef[0] = nbi;
3844 /* A step "dx" bigger than nb_iters is not feasible, so
3845 add "0 <= nb_iters + dx", */
3846 ineq = omega_add_zero_geq (pb, omega_black);
3847 pb->geqs[ineq].coef[i + 1] = 1;
3848 pb->geqs[ineq].coef[0] = nbi;
3849 /* and "dx <= nb_iters". */
3850 ineq = omega_add_zero_geq (pb, omega_black);
3851 pb->geqs[ineq].coef[i + 1] = -1;
3852 pb->geqs[ineq].coef[0] = nbi;
3856 omega_extract_distance_vectors (pb, ddr);
3858 return true;
3861 /* Sets up the Omega dependence problem for the data dependence
3862 relation DDR. Returns false when the constraint system cannot be
3863 built, ie. when the test answers "don't know". Returns true
3864 otherwise, and when independence has been proved (using one of the
3865 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3866 set MAYBE_DEPENDENT to true.
3868 Example: for setting up the dependence system corresponding to the
3869 conflicting accesses
3871 | loop_i
3872 | loop_j
3873 | A[i, i+1] = ...
3874 | ... A[2*j, 2*(i + j)]
3875 | endloop_j
3876 | endloop_i
3878 the following constraints come from the iteration domain:
3880 0 <= i <= Ni
3881 0 <= i + di <= Ni
3882 0 <= j <= Nj
3883 0 <= j + dj <= Nj
3885 where di, dj are the distance variables. The constraints
3886 representing the conflicting elements are:
3888 i = 2 * (j + dj)
3889 i + 1 = 2 * (i + di + j + dj)
3891 For asking that the resulting distance vector (di, dj) be
3892 lexicographically positive, we insert the constraint "di >= 0". If
3893 "di = 0" in the solution, we fix that component to zero, and we
3894 look at the inner loops: we set a new problem where all the outer
3895 loop distances are zero, and fix this inner component to be
3896 positive. When one of the components is positive, we save that
3897 distance, and set a new problem where the distance on this loop is
3898 zero, searching for other distances in the inner loops. Here is
3899 the classic example that illustrates that we have to set for each
3900 inner loop a new problem:
3902 | loop_1
3903 | loop_2
3904 | A[10]
3905 | endloop_2
3906 | endloop_1
3908 we have to save two distances (1, 0) and (0, 1).
3910 Given two array references, refA and refB, we have to set the
3911 dependence problem twice, refA vs. refB and refB vs. refA, and we
3912 cannot do a single test, as refB might occur before refA in the
3913 inner loops, and the contrary when considering outer loops: ex.
3915 | loop_0
3916 | loop_1
3917 | loop_2
3918 | T[{1,+,1}_2][{1,+,1}_1] // refA
3919 | T[{2,+,1}_2][{0,+,1}_1] // refB
3920 | endloop_2
3921 | endloop_1
3922 | endloop_0
3924 refB touches the elements in T before refA, and thus for the same
3925 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3926 but for successive loop_0 iterations, we have (1, -1, 1)
3928 The Omega solver expects the distance variables ("di" in the
3929 previous example) to come first in the constraint system (as
3930 variables to be protected, or "safe" variables), the constraint
3931 system is built using the following layout:
3933 "cst | distance vars | index vars".
3936 static bool
3937 init_omega_for_ddr (struct data_dependence_relation *ddr,
3938 bool *maybe_dependent)
3940 omega_pb pb;
3941 bool res = false;
3943 *maybe_dependent = true;
3945 if (same_access_functions (ddr))
3947 unsigned j;
3948 lambda_vector dir_v;
3950 /* Save the 0 vector. */
3951 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3952 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3953 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3954 dir_v[j] = dir_equal;
3955 save_dir_v (ddr, dir_v);
3957 /* Save the dependences carried by outer loops. */
3958 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3959 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3960 maybe_dependent);
3961 omega_free_problem (pb);
3962 return res;
3965 /* Omega expects the protected variables (those that have to be kept
3966 after elimination) to appear first in the constraint system.
3967 These variables are the distance variables. In the following
3968 initialization we declare NB_LOOPS safe variables, and the total
3969 number of variables for the constraint system is 2*NB_LOOPS. */
3970 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3971 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3972 maybe_dependent);
3973 omega_free_problem (pb);
3975 /* Stop computation if not decidable, or no dependence. */
3976 if (res == false || *maybe_dependent == false)
3977 return res;
3979 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3980 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3981 maybe_dependent);
3982 omega_free_problem (pb);
3984 return res;
3987 /* Return true when DDR contains the same information as that stored
3988 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3990 static bool
3991 ddr_consistent_p (FILE *file,
3992 struct data_dependence_relation *ddr,
3993 VEC (lambda_vector, heap) *dist_vects,
3994 VEC (lambda_vector, heap) *dir_vects)
3996 unsigned int i, j;
3998 /* If dump_file is set, output there. */
3999 if (dump_file && (dump_flags & TDF_DETAILS))
4000 file = dump_file;
4002 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
4004 lambda_vector b_dist_v;
4005 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4006 VEC_length (lambda_vector, dist_vects),
4007 DDR_NUM_DIST_VECTS (ddr));
4009 fprintf (file, "Banerjee dist vectors:\n");
4010 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
4011 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4013 fprintf (file, "Omega dist vectors:\n");
4014 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4015 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4017 fprintf (file, "data dependence relation:\n");
4018 dump_data_dependence_relation (file, ddr);
4020 fprintf (file, ")\n");
4021 return false;
4024 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
4026 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4027 VEC_length (lambda_vector, dir_vects),
4028 DDR_NUM_DIR_VECTS (ddr));
4029 return false;
4032 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4034 lambda_vector a_dist_v;
4035 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4037 /* Distance vectors are not ordered in the same way in the DDR
4038 and in the DIST_VECTS: search for a matching vector. */
4039 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
4040 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4041 break;
4043 if (j == VEC_length (lambda_vector, dist_vects))
4045 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4046 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4047 fprintf (file, "not found in Omega dist vectors:\n");
4048 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4049 fprintf (file, "data dependence relation:\n");
4050 dump_data_dependence_relation (file, ddr);
4051 fprintf (file, ")\n");
4055 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4057 lambda_vector a_dir_v;
4058 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4060 /* Direction vectors are not ordered in the same way in the DDR
4061 and in the DIR_VECTS: search for a matching vector. */
4062 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4063 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4064 break;
4066 if (j == VEC_length (lambda_vector, dist_vects))
4068 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4069 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4070 fprintf (file, "not found in Omega dir vectors:\n");
4071 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4072 fprintf (file, "data dependence relation:\n");
4073 dump_data_dependence_relation (file, ddr);
4074 fprintf (file, ")\n");
4078 return true;
4081 /* This computes the affine dependence relation between A and B with
4082 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4083 independence between two accesses, while CHREC_DONT_KNOW is used
4084 for representing the unknown relation.
4086 Note that it is possible to stop the computation of the dependence
4087 relation the first time we detect a CHREC_KNOWN element for a given
4088 subscript. */
4090 static void
4091 compute_affine_dependence (struct data_dependence_relation *ddr,
4092 struct loop *loop_nest)
4094 struct data_reference *dra = DDR_A (ddr);
4095 struct data_reference *drb = DDR_B (ddr);
4097 if (dump_file && (dump_flags & TDF_DETAILS))
4099 fprintf (dump_file, "(compute_affine_dependence\n");
4100 fprintf (dump_file, " stmt_a: ");
4101 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4102 fprintf (dump_file, " stmt_b: ");
4103 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4106 /* Analyze only when the dependence relation is not yet known. */
4107 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4109 dependence_stats.num_dependence_tests++;
4111 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4112 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4114 if (flag_check_data_deps)
4116 /* Compute the dependences using the first algorithm. */
4117 subscript_dependence_tester (ddr, loop_nest);
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4121 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4122 dump_data_dependence_relation (dump_file, ddr);
4125 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4127 bool maybe_dependent;
4128 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4130 /* Save the result of the first DD analyzer. */
4131 dist_vects = DDR_DIST_VECTS (ddr);
4132 dir_vects = DDR_DIR_VECTS (ddr);
4134 /* Reset the information. */
4135 DDR_DIST_VECTS (ddr) = NULL;
4136 DDR_DIR_VECTS (ddr) = NULL;
4138 /* Compute the same information using Omega. */
4139 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4140 goto csys_dont_know;
4142 if (dump_file && (dump_flags & TDF_DETAILS))
4144 fprintf (dump_file, "Omega Analyzer\n");
4145 dump_data_dependence_relation (dump_file, ddr);
4148 /* Check that we get the same information. */
4149 if (maybe_dependent)
4150 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4151 dir_vects));
4154 else
4155 subscript_dependence_tester (ddr, loop_nest);
4158 /* As a last case, if the dependence cannot be determined, or if
4159 the dependence is considered too difficult to determine, answer
4160 "don't know". */
4161 else
4163 csys_dont_know:;
4164 dependence_stats.num_dependence_undetermined++;
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4168 fprintf (dump_file, "Data ref a:\n");
4169 dump_data_reference (dump_file, dra);
4170 fprintf (dump_file, "Data ref b:\n");
4171 dump_data_reference (dump_file, drb);
4172 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4174 finalize_ddr_dependent (ddr, chrec_dont_know);
4178 if (dump_file && (dump_flags & TDF_DETAILS))
4180 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4181 fprintf (dump_file, ") -> no dependence\n");
4182 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4183 fprintf (dump_file, ") -> dependence analysis failed\n");
4184 else
4185 fprintf (dump_file, ")\n");
4189 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4190 the data references in DATAREFS, in the LOOP_NEST. When
4191 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4192 relations. Return true when successful, i.e. data references number
4193 is small enough to be handled. */
4195 bool
4196 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4197 VEC (ddr_p, heap) **dependence_relations,
4198 VEC (loop_p, heap) *loop_nest,
4199 bool compute_self_and_rr)
4201 struct data_dependence_relation *ddr;
4202 struct data_reference *a, *b;
4203 unsigned int i, j;
4205 if ((int) VEC_length (data_reference_p, datarefs)
4206 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4208 struct data_dependence_relation *ddr;
4210 /* Insert a single relation into dependence_relations:
4211 chrec_dont_know. */
4212 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4213 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4214 return false;
4217 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4218 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4219 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4221 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4222 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4223 if (loop_nest)
4224 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4227 if (compute_self_and_rr)
4228 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4230 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4231 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4232 if (loop_nest)
4233 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4236 return true;
4239 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4240 true if STMT clobbers memory, false otherwise. */
4242 bool
4243 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4245 bool clobbers_memory = false;
4246 data_ref_loc *ref;
4247 tree *op0, *op1;
4248 enum gimple_code stmt_code = gimple_code (stmt);
4250 *references = NULL;
4252 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4253 Calls have side-effects, except those to const or pure
4254 functions. */
4255 if ((stmt_code == GIMPLE_CALL
4256 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4257 || (stmt_code == GIMPLE_ASM
4258 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4259 clobbers_memory = true;
4261 if (!gimple_vuse (stmt))
4262 return clobbers_memory;
4264 if (stmt_code == GIMPLE_ASSIGN)
4266 tree base;
4267 op0 = gimple_assign_lhs_ptr (stmt);
4268 op1 = gimple_assign_rhs1_ptr (stmt);
4270 if (DECL_P (*op1)
4271 || (REFERENCE_CLASS_P (*op1)
4272 && (base = get_base_address (*op1))
4273 && TREE_CODE (base) != SSA_NAME))
4275 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4276 ref->pos = op1;
4277 ref->is_read = true;
4280 else if (stmt_code == GIMPLE_CALL)
4282 unsigned i, n;
4284 op0 = gimple_call_lhs_ptr (stmt);
4285 n = gimple_call_num_args (stmt);
4286 for (i = 0; i < n; i++)
4288 op1 = gimple_call_arg_ptr (stmt, i);
4290 if (DECL_P (*op1)
4291 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4293 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4294 ref->pos = op1;
4295 ref->is_read = true;
4299 else
4300 return clobbers_memory;
4302 if (*op0
4303 && (DECL_P (*op0)
4304 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4306 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4307 ref->pos = op0;
4308 ref->is_read = false;
4310 return clobbers_memory;
4313 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4314 reference, returns false, otherwise returns true. NEST is the outermost
4315 loop of the loop nest in which the references should be analyzed. */
4317 bool
4318 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4319 VEC (data_reference_p, heap) **datarefs)
4321 unsigned i;
4322 VEC (data_ref_loc, heap) *references;
4323 data_ref_loc *ref;
4324 bool ret = true;
4325 data_reference_p dr;
4327 if (get_references_in_stmt (stmt, &references))
4329 VEC_free (data_ref_loc, heap, references);
4330 return false;
4333 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4335 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4336 *ref->pos, stmt, ref->is_read);
4337 gcc_assert (dr != NULL);
4338 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4340 VEC_free (data_ref_loc, heap, references);
4341 return ret;
4344 /* Stores the data references in STMT to DATAREFS. If there is an
4345 unanalyzable reference, returns false, otherwise returns true.
4346 NEST is the outermost loop of the loop nest in which the references
4347 should be instantiated, LOOP is the loop in which the references
4348 should be analyzed. */
4350 bool
4351 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4352 VEC (data_reference_p, heap) **datarefs)
4354 unsigned i;
4355 VEC (data_ref_loc, heap) *references;
4356 data_ref_loc *ref;
4357 bool ret = true;
4358 data_reference_p dr;
4360 if (get_references_in_stmt (stmt, &references))
4362 VEC_free (data_ref_loc, heap, references);
4363 return false;
4366 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4368 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4369 gcc_assert (dr != NULL);
4370 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4373 VEC_free (data_ref_loc, heap, references);
4374 return ret;
4377 /* Search the data references in LOOP, and record the information into
4378 DATAREFS. Returns chrec_dont_know when failing to analyze a
4379 difficult case, returns NULL_TREE otherwise. */
4381 tree
4382 find_data_references_in_bb (struct loop *loop, basic_block bb,
4383 VEC (data_reference_p, heap) **datarefs)
4385 gimple_stmt_iterator bsi;
4387 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4389 gimple stmt = gsi_stmt (bsi);
4391 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4393 struct data_reference *res;
4394 res = XCNEW (struct data_reference);
4395 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4397 return chrec_dont_know;
4401 return NULL_TREE;
4404 /* Search the data references in LOOP, and record the information into
4405 DATAREFS. Returns chrec_dont_know when failing to analyze a
4406 difficult case, returns NULL_TREE otherwise.
4408 TODO: This function should be made smarter so that it can handle address
4409 arithmetic as if they were array accesses, etc. */
4411 static tree
4412 find_data_references_in_loop (struct loop *loop,
4413 VEC (data_reference_p, heap) **datarefs)
4415 basic_block bb, *bbs;
4416 unsigned int i;
4418 bbs = get_loop_body_in_dom_order (loop);
4420 for (i = 0; i < loop->num_nodes; i++)
4422 bb = bbs[i];
4424 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4426 free (bbs);
4427 return chrec_dont_know;
4430 free (bbs);
4432 return NULL_TREE;
4435 /* Recursive helper function. */
4437 static bool
4438 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4440 /* Inner loops of the nest should not contain siblings. Example:
4441 when there are two consecutive loops,
4443 | loop_0
4444 | loop_1
4445 | A[{0, +, 1}_1]
4446 | endloop_1
4447 | loop_2
4448 | A[{0, +, 1}_2]
4449 | endloop_2
4450 | endloop_0
4452 the dependence relation cannot be captured by the distance
4453 abstraction. */
4454 if (loop->next)
4455 return false;
4457 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4458 if (loop->inner)
4459 return find_loop_nest_1 (loop->inner, loop_nest);
4460 return true;
4463 /* Return false when the LOOP is not well nested. Otherwise return
4464 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4465 contain the loops from the outermost to the innermost, as they will
4466 appear in the classic distance vector. */
4468 bool
4469 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4471 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4472 if (loop->inner)
4473 return find_loop_nest_1 (loop->inner, loop_nest);
4474 return true;
4477 /* Returns true when the data dependences have been computed, false otherwise.
4478 Given a loop nest LOOP, the following vectors are returned:
4479 DATAREFS is initialized to all the array elements contained in this loop,
4480 DEPENDENCE_RELATIONS contains the relations between the data references.
4481 Compute read-read and self relations if
4482 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4484 bool
4485 compute_data_dependences_for_loop (struct loop *loop,
4486 bool compute_self_and_read_read_dependences,
4487 VEC (loop_p, heap) **loop_nest,
4488 VEC (data_reference_p, heap) **datarefs,
4489 VEC (ddr_p, heap) **dependence_relations)
4491 bool res = true;
4493 memset (&dependence_stats, 0, sizeof (dependence_stats));
4495 /* If the loop nest is not well formed, or one of the data references
4496 is not computable, give up without spending time to compute other
4497 dependences. */
4498 if (!loop
4499 || !find_loop_nest (loop, loop_nest)
4500 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4501 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4502 compute_self_and_read_read_dependences))
4503 res = false;
4505 if (dump_file && (dump_flags & TDF_STATS))
4507 fprintf (dump_file, "Dependence tester statistics:\n");
4509 fprintf (dump_file, "Number of dependence tests: %d\n",
4510 dependence_stats.num_dependence_tests);
4511 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4512 dependence_stats.num_dependence_dependent);
4513 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4514 dependence_stats.num_dependence_independent);
4515 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4516 dependence_stats.num_dependence_undetermined);
4518 fprintf (dump_file, "Number of subscript tests: %d\n",
4519 dependence_stats.num_subscript_tests);
4520 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4521 dependence_stats.num_subscript_undetermined);
4522 fprintf (dump_file, "Number of same subscript function: %d\n",
4523 dependence_stats.num_same_subscript_function);
4525 fprintf (dump_file, "Number of ziv tests: %d\n",
4526 dependence_stats.num_ziv);
4527 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4528 dependence_stats.num_ziv_dependent);
4529 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4530 dependence_stats.num_ziv_independent);
4531 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4532 dependence_stats.num_ziv_unimplemented);
4534 fprintf (dump_file, "Number of siv tests: %d\n",
4535 dependence_stats.num_siv);
4536 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4537 dependence_stats.num_siv_dependent);
4538 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4539 dependence_stats.num_siv_independent);
4540 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4541 dependence_stats.num_siv_unimplemented);
4543 fprintf (dump_file, "Number of miv tests: %d\n",
4544 dependence_stats.num_miv);
4545 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4546 dependence_stats.num_miv_dependent);
4547 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4548 dependence_stats.num_miv_independent);
4549 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4550 dependence_stats.num_miv_unimplemented);
4553 return res;
4556 /* Returns true when the data dependences for the basic block BB have been
4557 computed, false otherwise.
4558 DATAREFS is initialized to all the array elements contained in this basic
4559 block, DEPENDENCE_RELATIONS contains the relations between the data
4560 references. Compute read-read and self relations if
4561 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4562 bool
4563 compute_data_dependences_for_bb (basic_block bb,
4564 bool compute_self_and_read_read_dependences,
4565 VEC (data_reference_p, heap) **datarefs,
4566 VEC (ddr_p, heap) **dependence_relations)
4568 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4569 return false;
4571 return compute_all_dependences (*datarefs, dependence_relations, NULL,
4572 compute_self_and_read_read_dependences);
4575 /* Entry point (for testing only). Analyze all the data references
4576 and the dependence relations in LOOP.
4578 The data references are computed first.
4580 A relation on these nodes is represented by a complete graph. Some
4581 of the relations could be of no interest, thus the relations can be
4582 computed on demand.
4584 In the following function we compute all the relations. This is
4585 just a first implementation that is here for:
4586 - for showing how to ask for the dependence relations,
4587 - for the debugging the whole dependence graph,
4588 - for the dejagnu testcases and maintenance.
4590 It is possible to ask only for a part of the graph, avoiding to
4591 compute the whole dependence graph. The computed dependences are
4592 stored in a knowledge base (KB) such that later queries don't
4593 recompute the same information. The implementation of this KB is
4594 transparent to the optimizer, and thus the KB can be changed with a
4595 more efficient implementation, or the KB could be disabled. */
4596 static void
4597 analyze_all_data_dependences (struct loop *loop)
4599 unsigned int i;
4600 int nb_data_refs = 10;
4601 VEC (data_reference_p, heap) *datarefs =
4602 VEC_alloc (data_reference_p, heap, nb_data_refs);
4603 VEC (ddr_p, heap) *dependence_relations =
4604 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4605 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4607 /* Compute DDs on the whole function. */
4608 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4609 &dependence_relations);
4611 if (dump_file)
4613 dump_data_dependence_relations (dump_file, dependence_relations);
4614 fprintf (dump_file, "\n\n");
4616 if (dump_flags & TDF_DETAILS)
4617 dump_dist_dir_vectors (dump_file, dependence_relations);
4619 if (dump_flags & TDF_STATS)
4621 unsigned nb_top_relations = 0;
4622 unsigned nb_bot_relations = 0;
4623 unsigned nb_chrec_relations = 0;
4624 struct data_dependence_relation *ddr;
4626 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4628 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4629 nb_top_relations++;
4631 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4632 nb_bot_relations++;
4634 else
4635 nb_chrec_relations++;
4638 gather_stats_on_scev_database ();
4642 VEC_free (loop_p, heap, loop_nest);
4643 free_dependence_relations (dependence_relations);
4644 free_data_refs (datarefs);
4647 /* Computes all the data dependences and check that the results of
4648 several analyzers are the same. */
4650 void
4651 tree_check_data_deps (void)
4653 loop_iterator li;
4654 struct loop *loop_nest;
4656 FOR_EACH_LOOP (li, loop_nest, 0)
4657 analyze_all_data_dependences (loop_nest);
4660 /* Free the memory used by a data dependence relation DDR. */
4662 void
4663 free_dependence_relation (struct data_dependence_relation *ddr)
4665 if (ddr == NULL)
4666 return;
4668 if (DDR_SUBSCRIPTS (ddr))
4669 free_subscripts (DDR_SUBSCRIPTS (ddr));
4670 if (DDR_DIST_VECTS (ddr))
4671 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4672 if (DDR_DIR_VECTS (ddr))
4673 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4675 free (ddr);
4678 /* Free the memory used by the data dependence relations from
4679 DEPENDENCE_RELATIONS. */
4681 void
4682 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4684 unsigned int i;
4685 struct data_dependence_relation *ddr;
4687 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4688 if (ddr)
4689 free_dependence_relation (ddr);
4691 VEC_free (ddr_p, heap, dependence_relations);
4694 /* Free the memory used by the data references from DATAREFS. */
4696 void
4697 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4699 unsigned int i;
4700 struct data_reference *dr;
4702 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4703 free_data_ref (dr);
4704 VEC_free (data_reference_p, heap, datarefs);
4709 /* Dump vertex I in RDG to FILE. */
4711 void
4712 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4714 struct vertex *v = &(rdg->vertices[i]);
4715 struct graph_edge *e;
4717 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4718 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4719 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4721 if (v->pred)
4722 for (e = v->pred; e; e = e->pred_next)
4723 fprintf (file, " %d", e->src);
4725 fprintf (file, ") (out:");
4727 if (v->succ)
4728 for (e = v->succ; e; e = e->succ_next)
4729 fprintf (file, " %d", e->dest);
4731 fprintf (file, ")\n");
4732 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4733 fprintf (file, ")\n");
4736 /* Call dump_rdg_vertex on stderr. */
4738 DEBUG_FUNCTION void
4739 debug_rdg_vertex (struct graph *rdg, int i)
4741 dump_rdg_vertex (stderr, rdg, i);
4744 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4745 dumped vertices to that bitmap. */
4747 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4749 int i;
4751 fprintf (file, "(%d\n", c);
4753 for (i = 0; i < rdg->n_vertices; i++)
4754 if (rdg->vertices[i].component == c)
4756 if (dumped)
4757 bitmap_set_bit (dumped, i);
4759 dump_rdg_vertex (file, rdg, i);
4762 fprintf (file, ")\n");
4765 /* Call dump_rdg_vertex on stderr. */
4767 DEBUG_FUNCTION void
4768 debug_rdg_component (struct graph *rdg, int c)
4770 dump_rdg_component (stderr, rdg, c, NULL);
4773 /* Dump the reduced dependence graph RDG to FILE. */
4775 void
4776 dump_rdg (FILE *file, struct graph *rdg)
4778 int i;
4779 bitmap dumped = BITMAP_ALLOC (NULL);
4781 fprintf (file, "(rdg\n");
4783 for (i = 0; i < rdg->n_vertices; i++)
4784 if (!bitmap_bit_p (dumped, i))
4785 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4787 fprintf (file, ")\n");
4788 BITMAP_FREE (dumped);
4791 /* Call dump_rdg on stderr. */
4793 DEBUG_FUNCTION void
4794 debug_rdg (struct graph *rdg)
4796 dump_rdg (stderr, rdg);
4799 static void
4800 dot_rdg_1 (FILE *file, struct graph *rdg)
4802 int i;
4804 fprintf (file, "digraph RDG {\n");
4806 for (i = 0; i < rdg->n_vertices; i++)
4808 struct vertex *v = &(rdg->vertices[i]);
4809 struct graph_edge *e;
4811 /* Highlight reads from memory. */
4812 if (RDG_MEM_READS_STMT (rdg, i))
4813 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4815 /* Highlight stores to memory. */
4816 if (RDG_MEM_WRITE_STMT (rdg, i))
4817 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4819 if (v->succ)
4820 for (e = v->succ; e; e = e->succ_next)
4821 switch (RDGE_TYPE (e))
4823 case input_dd:
4824 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4825 break;
4827 case output_dd:
4828 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4829 break;
4831 case flow_dd:
4832 /* These are the most common dependences: don't print these. */
4833 fprintf (file, "%d -> %d \n", i, e->dest);
4834 break;
4836 case anti_dd:
4837 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4838 break;
4840 default:
4841 gcc_unreachable ();
4845 fprintf (file, "}\n\n");
4848 /* Display the Reduced Dependence Graph using dotty. */
4849 extern void dot_rdg (struct graph *);
4851 DEBUG_FUNCTION void
4852 dot_rdg (struct graph *rdg)
4854 /* When debugging, enable the following code. This cannot be used
4855 in production compilers because it calls "system". */
4856 #if 0
4857 FILE *file = fopen ("/tmp/rdg.dot", "w");
4858 gcc_assert (file != NULL);
4860 dot_rdg_1 (file, rdg);
4861 fclose (file);
4863 system ("dotty /tmp/rdg.dot &");
4864 #else
4865 dot_rdg_1 (stderr, rdg);
4866 #endif
4869 /* This structure is used for recording the mapping statement index in
4870 the RDG. */
4872 struct GTY(()) rdg_vertex_info
4874 gimple stmt;
4875 int index;
4878 /* Returns the index of STMT in RDG. */
4881 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4883 struct rdg_vertex_info rvi, *slot;
4885 rvi.stmt = stmt;
4886 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4888 if (!slot)
4889 return -1;
4891 return slot->index;
4894 /* Creates an edge in RDG for each distance vector from DDR. The
4895 order that we keep track of in the RDG is the order in which
4896 statements have to be executed. */
4898 static void
4899 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4901 struct graph_edge *e;
4902 int va, vb;
4903 data_reference_p dra = DDR_A (ddr);
4904 data_reference_p drb = DDR_B (ddr);
4905 unsigned level = ddr_dependence_level (ddr);
4907 /* For non scalar dependences, when the dependence is REVERSED,
4908 statement B has to be executed before statement A. */
4909 if (level > 0
4910 && !DDR_REVERSED_P (ddr))
4912 data_reference_p tmp = dra;
4913 dra = drb;
4914 drb = tmp;
4917 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4918 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4920 if (va < 0 || vb < 0)
4921 return;
4923 e = add_edge (rdg, va, vb);
4924 e->data = XNEW (struct rdg_edge);
4926 RDGE_LEVEL (e) = level;
4927 RDGE_RELATION (e) = ddr;
4929 /* Determines the type of the data dependence. */
4930 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4931 RDGE_TYPE (e) = input_dd;
4932 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4933 RDGE_TYPE (e) = output_dd;
4934 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4935 RDGE_TYPE (e) = flow_dd;
4936 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4937 RDGE_TYPE (e) = anti_dd;
4940 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4941 the index of DEF in RDG. */
4943 static void
4944 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4946 use_operand_p imm_use_p;
4947 imm_use_iterator iterator;
4949 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4951 struct graph_edge *e;
4952 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4954 if (use < 0)
4955 continue;
4957 e = add_edge (rdg, idef, use);
4958 e->data = XNEW (struct rdg_edge);
4959 RDGE_TYPE (e) = flow_dd;
4960 RDGE_RELATION (e) = NULL;
4964 /* Creates the edges of the reduced dependence graph RDG. */
4966 static void
4967 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4969 int i;
4970 struct data_dependence_relation *ddr;
4971 def_operand_p def_p;
4972 ssa_op_iter iter;
4974 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4975 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4976 create_rdg_edge_for_ddr (rdg, ddr);
4978 for (i = 0; i < rdg->n_vertices; i++)
4979 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4980 iter, SSA_OP_DEF)
4981 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4984 /* Build the vertices of the reduced dependence graph RDG. */
4986 void
4987 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4989 int i, j;
4990 gimple stmt;
4992 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4994 VEC (data_ref_loc, heap) *references;
4995 data_ref_loc *ref;
4996 struct vertex *v = &(rdg->vertices[i]);
4997 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4998 struct rdg_vertex_info **slot;
5000 rvi->stmt = stmt;
5001 rvi->index = i;
5002 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
5004 if (!*slot)
5005 *slot = rvi;
5006 else
5007 free (rvi);
5009 v->data = XNEW (struct rdg_vertex);
5010 RDG_STMT (rdg, i) = stmt;
5012 RDG_MEM_WRITE_STMT (rdg, i) = false;
5013 RDG_MEM_READS_STMT (rdg, i) = false;
5014 if (gimple_code (stmt) == GIMPLE_PHI)
5015 continue;
5017 get_references_in_stmt (stmt, &references);
5018 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
5019 if (!ref->is_read)
5020 RDG_MEM_WRITE_STMT (rdg, i) = true;
5021 else
5022 RDG_MEM_READS_STMT (rdg, i) = true;
5024 VEC_free (data_ref_loc, heap, references);
5028 /* Initialize STMTS with all the statements of LOOP. When
5029 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5030 which we discover statements is important as
5031 generate_loops_for_partition is using the same traversal for
5032 identifying statements. */
5034 static void
5035 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5037 unsigned int i;
5038 basic_block *bbs = get_loop_body_in_dom_order (loop);
5040 for (i = 0; i < loop->num_nodes; i++)
5042 basic_block bb = bbs[i];
5043 gimple_stmt_iterator bsi;
5044 gimple stmt;
5046 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5047 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5049 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5051 stmt = gsi_stmt (bsi);
5052 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5053 VEC_safe_push (gimple, heap, *stmts, stmt);
5057 free (bbs);
5060 /* Returns true when all the dependences are computable. */
5062 static bool
5063 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5065 ddr_p ddr;
5066 unsigned int i;
5068 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5069 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5070 return false;
5072 return true;
5075 /* Computes a hash function for element ELT. */
5077 static hashval_t
5078 hash_stmt_vertex_info (const void *elt)
5080 const struct rdg_vertex_info *const rvi =
5081 (const struct rdg_vertex_info *) elt;
5082 gimple stmt = rvi->stmt;
5084 return htab_hash_pointer (stmt);
5087 /* Compares database elements E1 and E2. */
5089 static int
5090 eq_stmt_vertex_info (const void *e1, const void *e2)
5092 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5093 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5095 return elt1->stmt == elt2->stmt;
5098 /* Free the element E. */
5100 static void
5101 hash_stmt_vertex_del (void *e)
5103 free (e);
5106 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5107 statement of the loop nest, and one edge per data dependence or
5108 scalar dependence. */
5110 struct graph *
5111 build_empty_rdg (int n_stmts)
5113 int nb_data_refs = 10;
5114 struct graph *rdg = new_graph (n_stmts);
5116 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5117 eq_stmt_vertex_info, hash_stmt_vertex_del);
5118 return rdg;
5121 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5122 statement of the loop nest, and one edge per data dependence or
5123 scalar dependence. */
5125 struct graph *
5126 build_rdg (struct loop *loop,
5127 VEC (loop_p, heap) **loop_nest,
5128 VEC (ddr_p, heap) **dependence_relations,
5129 VEC (data_reference_p, heap) **datarefs)
5131 struct graph *rdg = NULL;
5132 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5134 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5135 dependence_relations);
5137 if (known_dependences_p (*dependence_relations))
5139 stmts_from_loop (loop, &stmts);
5140 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5141 create_rdg_vertices (rdg, stmts);
5142 create_rdg_edges (rdg, *dependence_relations);
5145 VEC_free (gimple, heap, stmts);
5146 return rdg;
5149 /* Free the reduced dependence graph RDG. */
5151 void
5152 free_rdg (struct graph *rdg)
5154 int i;
5156 for (i = 0; i < rdg->n_vertices; i++)
5158 struct vertex *v = &(rdg->vertices[i]);
5159 struct graph_edge *e;
5161 for (e = v->succ; e; e = e->succ_next)
5162 free (e->data);
5164 free (v->data);
5167 htab_delete (rdg->indices);
5168 free_graph (rdg);
5171 /* Initialize STMTS with all the statements of LOOP that contain a
5172 store to memory. */
5174 void
5175 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5177 unsigned int i;
5178 basic_block *bbs = get_loop_body_in_dom_order (loop);
5180 for (i = 0; i < loop->num_nodes; i++)
5182 basic_block bb = bbs[i];
5183 gimple_stmt_iterator bsi;
5185 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5186 if (gimple_vdef (gsi_stmt (bsi)))
5187 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5190 free (bbs);
5193 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5194 that contains a data reference on its LHS with a stride of the same
5195 size as its unit type. */
5197 bool
5198 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5200 tree op0, op1;
5201 bool res;
5202 struct data_reference *dr;
5204 if (!stmt
5205 || !gimple_vdef (stmt)
5206 || !is_gimple_assign (stmt)
5207 || !gimple_assign_single_p (stmt)
5208 || !(op1 = gimple_assign_rhs1 (stmt))
5209 || !(integer_zerop (op1) || real_zerop (op1)))
5210 return false;
5212 dr = XCNEW (struct data_reference);
5213 op0 = gimple_assign_lhs (stmt);
5215 DR_STMT (dr) = stmt;
5216 DR_REF (dr) = op0;
5218 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5219 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5221 free_data_ref (dr);
5222 return res;
5225 /* Initialize STMTS with all the statements of LOOP that contain a
5226 store to memory of the form "A[i] = 0". */
5228 void
5229 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5231 unsigned int i;
5232 basic_block bb;
5233 gimple_stmt_iterator si;
5234 gimple stmt;
5235 basic_block *bbs = get_loop_body_in_dom_order (loop);
5237 for (i = 0; i < loop->num_nodes; i++)
5238 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5239 if ((stmt = gsi_stmt (si))
5240 && stmt_with_adjacent_zero_store_dr_p (stmt))
5241 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5243 free (bbs);
5246 /* For a data reference REF, return the declaration of its base
5247 address or NULL_TREE if the base is not determined. */
5249 static inline tree
5250 ref_base_address (gimple stmt, data_ref_loc *ref)
5252 tree base = NULL_TREE;
5253 tree base_address;
5254 struct data_reference *dr = XCNEW (struct data_reference);
5256 DR_STMT (dr) = stmt;
5257 DR_REF (dr) = *ref->pos;
5258 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5259 base_address = DR_BASE_ADDRESS (dr);
5261 if (!base_address)
5262 goto end;
5264 switch (TREE_CODE (base_address))
5266 case ADDR_EXPR:
5267 base = TREE_OPERAND (base_address, 0);
5268 break;
5270 default:
5271 base = base_address;
5272 break;
5275 end:
5276 free_data_ref (dr);
5277 return base;
5280 /* Determines whether the statement from vertex V of the RDG has a
5281 definition used outside the loop that contains this statement. */
5283 bool
5284 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5286 gimple stmt = RDG_STMT (rdg, v);
5287 struct loop *loop = loop_containing_stmt (stmt);
5288 use_operand_p imm_use_p;
5289 imm_use_iterator iterator;
5290 ssa_op_iter it;
5291 def_operand_p def_p;
5293 if (!loop)
5294 return true;
5296 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5298 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5300 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5301 return true;
5305 return false;
5308 /* Determines whether statements S1 and S2 access to similar memory
5309 locations. Two memory accesses are considered similar when they
5310 have the same base address declaration, i.e. when their
5311 ref_base_address is the same. */
5313 bool
5314 have_similar_memory_accesses (gimple s1, gimple s2)
5316 bool res = false;
5317 unsigned i, j;
5318 VEC (data_ref_loc, heap) *refs1, *refs2;
5319 data_ref_loc *ref1, *ref2;
5321 get_references_in_stmt (s1, &refs1);
5322 get_references_in_stmt (s2, &refs2);
5324 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5326 tree base1 = ref_base_address (s1, ref1);
5328 if (base1)
5329 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5330 if (base1 == ref_base_address (s2, ref2))
5332 res = true;
5333 goto end;
5337 end:
5338 VEC_free (data_ref_loc, heap, refs1);
5339 VEC_free (data_ref_loc, heap, refs2);
5340 return res;
5343 /* Helper function for the hashtab. */
5345 static int
5346 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5348 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5349 CONST_CAST_GIMPLE ((const_gimple) s2));
5352 /* Helper function for the hashtab. */
5354 static hashval_t
5355 ref_base_address_1 (const void *s)
5357 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5358 unsigned i;
5359 VEC (data_ref_loc, heap) *refs;
5360 data_ref_loc *ref;
5361 hashval_t res = 0;
5363 get_references_in_stmt (stmt, &refs);
5365 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5366 if (!ref->is_read)
5368 res = htab_hash_pointer (ref_base_address (stmt, ref));
5369 break;
5372 VEC_free (data_ref_loc, heap, refs);
5373 return res;
5376 /* Try to remove duplicated write data references from STMTS. */
5378 void
5379 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5381 unsigned i;
5382 gimple stmt;
5383 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5384 have_similar_memory_accesses_1, NULL);
5386 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5388 void **slot;
5390 slot = htab_find_slot (seen, stmt, INSERT);
5392 if (*slot)
5393 VEC_ordered_remove (gimple, *stmts, i);
5394 else
5396 *slot = (void *) stmt;
5397 i++;
5401 htab_delete (seen);
5404 /* Returns the index of PARAMETER in the parameters vector of the
5405 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5408 access_matrix_get_index_for_parameter (tree parameter,
5409 struct access_matrix *access_matrix)
5411 int i;
5412 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5413 tree lambda_parameter;
5415 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5416 if (lambda_parameter == parameter)
5417 return i + AM_NB_INDUCTION_VARS (access_matrix);
5419 return -1;