Merge from mainline (168000:168310).
[official-gcc/graphite-test-results.git] / gcc / tree-data-ref.c
blob3de3234c22b093baa5d7ceaeac378017b3296b88
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
129 /* Returns true iff A divides B. */
131 static inline bool
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 unsigned int i;
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
173 unsigned int i;
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
194 unsigned int i;
196 fprintf (outf, "#(Data Ref: \n# stmt: ");
197 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
198 fprintf (outf, "# ref: ");
199 print_generic_stmt (outf, DR_REF (dr), 0);
200 fprintf (outf, "# base_object: ");
201 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
203 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
205 fprintf (outf, "# Access function %d: ", i);
206 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
208 fprintf (outf, "#)\n");
211 /* Dumps the affine function described by FN to the file OUTF. */
213 static void
214 dump_affine_function (FILE *outf, affine_fn fn)
216 unsigned i;
217 tree coef;
219 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
220 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
222 fprintf (outf, " + ");
223 print_generic_expr (outf, coef, TDF_SLIM);
224 fprintf (outf, " * x_%u", i);
228 /* Dumps the conflict function CF to the file OUTF. */
230 static void
231 dump_conflict_function (FILE *outf, conflict_function *cf)
233 unsigned i;
235 if (cf->n == NO_DEPENDENCE)
236 fprintf (outf, "no dependence\n");
237 else if (cf->n == NOT_KNOWN)
238 fprintf (outf, "not known\n");
239 else
241 for (i = 0; i < cf->n; i++)
243 fprintf (outf, "[");
244 dump_affine_function (outf, cf->fns[i]);
245 fprintf (outf, "]\n");
250 /* Dump function for a SUBSCRIPT structure. */
252 void
253 dump_subscript (FILE *outf, struct subscript *subscript)
255 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
257 fprintf (outf, "\n (subscript \n");
258 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
259 dump_conflict_function (outf, cf);
260 if (CF_NONTRIVIAL_P (cf))
262 tree last_iteration = SUB_LAST_CONFLICT (subscript);
263 fprintf (outf, " last_conflict: ");
264 print_generic_stmt (outf, last_iteration, 0);
267 cf = SUB_CONFLICTS_IN_B (subscript);
268 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
269 dump_conflict_function (outf, cf);
270 if (CF_NONTRIVIAL_P (cf))
272 tree last_iteration = SUB_LAST_CONFLICT (subscript);
273 fprintf (outf, " last_conflict: ");
274 print_generic_stmt (outf, last_iteration, 0);
277 fprintf (outf, " (Subscript distance: ");
278 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
279 fprintf (outf, " )\n");
280 fprintf (outf, " )\n");
283 /* Print the classic direction vector DIRV to OUTF. */
285 void
286 print_direction_vector (FILE *outf,
287 lambda_vector dirv,
288 int length)
290 int eq;
292 for (eq = 0; eq < length; eq++)
294 enum data_dependence_direction dir = ((enum data_dependence_direction)
295 dirv[eq]);
297 switch (dir)
299 case dir_positive:
300 fprintf (outf, " +");
301 break;
302 case dir_negative:
303 fprintf (outf, " -");
304 break;
305 case dir_equal:
306 fprintf (outf, " =");
307 break;
308 case dir_positive_or_equal:
309 fprintf (outf, " +=");
310 break;
311 case dir_positive_or_negative:
312 fprintf (outf, " +-");
313 break;
314 case dir_negative_or_equal:
315 fprintf (outf, " -=");
316 break;
317 case dir_star:
318 fprintf (outf, " *");
319 break;
320 default:
321 fprintf (outf, "indep");
322 break;
325 fprintf (outf, "\n");
328 /* Print a vector of direction vectors. */
330 void
331 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
332 int length)
334 unsigned j;
335 lambda_vector v;
337 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
338 print_direction_vector (outf, v, length);
341 /* Print a vector of distance vectors. */
343 void
344 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
345 int length)
347 unsigned j;
348 lambda_vector v;
350 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
351 print_lambda_vector (outf, v, length);
354 /* Debug version. */
356 DEBUG_FUNCTION void
357 debug_data_dependence_relation (struct data_dependence_relation *ddr)
359 dump_data_dependence_relation (stderr, ddr);
362 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
364 void
365 dump_data_dependence_relation (FILE *outf,
366 struct data_dependence_relation *ddr)
368 struct data_reference *dra, *drb;
370 fprintf (outf, "(Data Dep: \n");
372 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
374 if (ddr)
376 dra = DDR_A (ddr);
377 drb = DDR_B (ddr);
378 if (dra)
379 dump_data_reference (outf, dra);
380 else
381 fprintf (outf, " (nil)\n");
382 if (drb)
383 dump_data_reference (outf, drb);
384 else
385 fprintf (outf, " (nil)\n");
387 fprintf (outf, " (don't know)\n)\n");
388 return;
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 dump_data_reference (outf, dra);
394 dump_data_reference (outf, drb);
396 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
397 fprintf (outf, " (no dependence)\n");
399 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
401 unsigned int i;
402 struct loop *loopi;
404 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
406 fprintf (outf, " access_fn_A: ");
407 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
408 fprintf (outf, " access_fn_B: ");
409 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
410 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
413 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
414 fprintf (outf, " loop nest: (");
415 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
416 fprintf (outf, "%d ", loopi->num);
417 fprintf (outf, ")\n");
419 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
421 fprintf (outf, " distance_vector: ");
422 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
423 DDR_NB_LOOPS (ddr));
426 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
428 fprintf (outf, " direction_vector: ");
429 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
430 DDR_NB_LOOPS (ddr));
434 fprintf (outf, ")\n");
437 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
439 void
440 dump_data_dependence_direction (FILE *file,
441 enum data_dependence_direction dir)
443 switch (dir)
445 case dir_positive:
446 fprintf (file, "+");
447 break;
449 case dir_negative:
450 fprintf (file, "-");
451 break;
453 case dir_equal:
454 fprintf (file, "=");
455 break;
457 case dir_positive_or_negative:
458 fprintf (file, "+-");
459 break;
461 case dir_positive_or_equal:
462 fprintf (file, "+=");
463 break;
465 case dir_negative_or_equal:
466 fprintf (file, "-=");
467 break;
469 case dir_star:
470 fprintf (file, "*");
471 break;
473 default:
474 break;
478 /* Dumps the distance and direction vectors in FILE. DDRS contains
479 the dependence relations, and VECT_SIZE is the size of the
480 dependence vectors, or in other words the number of loops in the
481 considered nest. */
483 void
484 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
486 unsigned int i, j;
487 struct data_dependence_relation *ddr;
488 lambda_vector v;
490 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
491 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
493 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
495 fprintf (file, "DISTANCE_V (");
496 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
497 fprintf (file, ")\n");
500 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
502 fprintf (file, "DIRECTION_V (");
503 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
504 fprintf (file, ")\n");
508 fprintf (file, "\n\n");
511 /* Dumps the data dependence relations DDRS in FILE. */
513 void
514 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
516 unsigned int i;
517 struct data_dependence_relation *ddr;
519 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
520 dump_data_dependence_relation (file, ddr);
522 fprintf (file, "\n\n");
525 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
526 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
527 constant of type ssizetype, and returns true. If we cannot do this
528 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
529 is returned. */
531 static bool
532 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
533 tree *var, tree *off)
535 tree var0, var1;
536 tree off0, off1;
537 enum tree_code ocode = code;
539 *var = NULL_TREE;
540 *off = NULL_TREE;
542 switch (code)
544 case INTEGER_CST:
545 *var = build_int_cst (type, 0);
546 *off = fold_convert (ssizetype, op0);
547 return true;
549 case POINTER_PLUS_EXPR:
550 ocode = PLUS_EXPR;
551 /* FALLTHROUGH */
552 case PLUS_EXPR:
553 case MINUS_EXPR:
554 split_constant_offset (op0, &var0, &off0);
555 split_constant_offset (op1, &var1, &off1);
556 *var = fold_build2 (code, type, var0, var1);
557 *off = size_binop (ocode, off0, off1);
558 return true;
560 case MULT_EXPR:
561 if (TREE_CODE (op1) != INTEGER_CST)
562 return false;
564 split_constant_offset (op0, &var0, &off0);
565 *var = fold_build2 (MULT_EXPR, type, var0, op1);
566 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
567 return true;
569 case ADDR_EXPR:
571 tree base, poffset;
572 HOST_WIDE_INT pbitsize, pbitpos;
573 enum machine_mode pmode;
574 int punsignedp, pvolatilep;
576 op0 = TREE_OPERAND (op0, 0);
577 if (!handled_component_p (op0))
578 return false;
580 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
581 &pmode, &punsignedp, &pvolatilep, false);
583 if (pbitpos % BITS_PER_UNIT != 0)
584 return false;
585 base = build_fold_addr_expr (base);
586 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
588 if (poffset)
590 split_constant_offset (poffset, &poffset, &off1);
591 off0 = size_binop (PLUS_EXPR, off0, off1);
592 if (POINTER_TYPE_P (TREE_TYPE (base)))
593 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
594 base, fold_convert (sizetype, poffset));
595 else
596 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
597 fold_convert (TREE_TYPE (base), poffset));
600 var0 = fold_convert (type, base);
602 /* If variable length types are involved, punt, otherwise casts
603 might be converted into ARRAY_REFs in gimplify_conversion.
604 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
605 possibly no longer appears in current GIMPLE, might resurface.
606 This perhaps could run
607 if (CONVERT_EXPR_P (var0))
609 gimplify_conversion (&var0);
610 // Attempt to fill in any within var0 found ARRAY_REF's
611 // element size from corresponding op embedded ARRAY_REF,
612 // if unsuccessful, just punt.
613 } */
614 while (POINTER_TYPE_P (type))
615 type = TREE_TYPE (type);
616 if (int_size_in_bytes (type) < 0)
617 return false;
619 *var = var0;
620 *off = off0;
621 return true;
624 case SSA_NAME:
626 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
627 enum tree_code subcode;
629 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
630 return false;
632 var0 = gimple_assign_rhs1 (def_stmt);
633 subcode = gimple_assign_rhs_code (def_stmt);
634 var1 = gimple_assign_rhs2 (def_stmt);
636 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
638 CASE_CONVERT:
640 /* We must not introduce undefined overflow, and we must not change the value.
641 Hence we're okay if the inner type doesn't overflow to start with
642 (pointer or signed), the outer type also is an integer or pointer
643 and the outer precision is at least as large as the inner. */
644 tree itype = TREE_TYPE (op0);
645 if ((POINTER_TYPE_P (itype)
646 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
647 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
648 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
650 split_constant_offset (op0, &var0, off);
651 *var = fold_convert (type, var0);
652 return true;
654 return false;
657 default:
658 return false;
662 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
663 will be ssizetype. */
665 void
666 split_constant_offset (tree exp, tree *var, tree *off)
668 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
669 enum tree_code code;
671 *var = exp;
672 *off = ssize_int (0);
673 STRIP_NOPS (exp);
675 if (automatically_generated_chrec_p (exp))
676 return;
678 otype = TREE_TYPE (exp);
679 code = TREE_CODE (exp);
680 extract_ops_from_tree (exp, &code, &op0, &op1);
681 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
683 *var = fold_convert (type, e);
684 *off = o;
688 /* Returns the address ADDR of an object in a canonical shape (without nop
689 casts, and with type of pointer to the object). */
691 static tree
692 canonicalize_base_object_address (tree addr)
694 tree orig = addr;
696 STRIP_NOPS (addr);
698 /* The base address may be obtained by casting from integer, in that case
699 keep the cast. */
700 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
701 return orig;
703 if (TREE_CODE (addr) != ADDR_EXPR)
704 return addr;
706 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
709 /* Analyzes the behavior of the memory reference DR in the innermost loop or
710 basic block that contains it. Returns true if analysis succeed or false
711 otherwise. */
713 bool
714 dr_analyze_innermost (struct data_reference *dr)
716 gimple stmt = DR_STMT (dr);
717 struct loop *loop = loop_containing_stmt (stmt);
718 tree ref = DR_REF (dr);
719 HOST_WIDE_INT pbitsize, pbitpos;
720 tree base, poffset;
721 enum machine_mode pmode;
722 int punsignedp, pvolatilep;
723 affine_iv base_iv, offset_iv;
724 tree init, dinit, step;
725 bool in_loop = (loop && loop->num);
727 if (dump_file && (dump_flags & TDF_DETAILS))
728 fprintf (dump_file, "analyze_innermost: ");
730 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
731 &pmode, &punsignedp, &pvolatilep, false);
732 gcc_assert (base != NULL_TREE);
734 if (pbitpos % BITS_PER_UNIT != 0)
736 if (dump_file && (dump_flags & TDF_DETAILS))
737 fprintf (dump_file, "failed: bit offset alignment.\n");
738 return false;
741 if (TREE_CODE (base) == MEM_REF)
743 if (!integer_zerop (TREE_OPERAND (base, 1)))
745 if (!poffset)
747 double_int moff = mem_ref_offset (base);
748 poffset = double_int_to_tree (sizetype, moff);
750 else
751 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
753 base = TREE_OPERAND (base, 0);
755 else
756 base = build_fold_addr_expr (base);
757 if (in_loop)
759 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
760 false))
762 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "failed: evolution of base is not affine.\n");
764 return false;
767 else
769 base_iv.base = base;
770 base_iv.step = ssize_int (0);
771 base_iv.no_overflow = true;
774 if (!poffset)
776 offset_iv.base = ssize_int (0);
777 offset_iv.step = ssize_int (0);
779 else
781 if (!in_loop)
783 offset_iv.base = poffset;
784 offset_iv.step = ssize_int (0);
786 else if (!simple_iv (loop, loop_containing_stmt (stmt),
787 poffset, &offset_iv, false))
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "failed: evolution of offset is not"
791 " affine.\n");
792 return false;
796 init = ssize_int (pbitpos / BITS_PER_UNIT);
797 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
798 init = size_binop (PLUS_EXPR, init, dinit);
799 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
800 init = size_binop (PLUS_EXPR, init, dinit);
802 step = size_binop (PLUS_EXPR,
803 fold_convert (ssizetype, base_iv.step),
804 fold_convert (ssizetype, offset_iv.step));
806 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
808 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
809 DR_INIT (dr) = init;
810 DR_STEP (dr) = step;
812 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "success.\n");
817 return true;
820 /* Determines the base object and the list of indices of memory reference
821 DR, analyzed in loop nest NEST. */
823 static void
824 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
826 gimple stmt = DR_STMT (dr);
827 struct loop *loop = loop_containing_stmt (stmt);
828 VEC (tree, heap) *access_fns = NULL;
829 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
830 tree base, off, access_fn = NULL_TREE;
831 basic_block before_loop = NULL;
833 if (nest)
834 before_loop = block_before_loop (nest);
836 while (handled_component_p (aref))
838 if (TREE_CODE (aref) == ARRAY_REF)
840 op = TREE_OPERAND (aref, 1);
841 if (nest)
843 access_fn = analyze_scalar_evolution (loop, op);
844 access_fn = instantiate_scev (before_loop, loop, access_fn);
845 VEC_safe_push (tree, heap, access_fns, access_fn);
848 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
851 aref = TREE_OPERAND (aref, 0);
854 if (nest
855 && (INDIRECT_REF_P (aref)
856 || TREE_CODE (aref) == MEM_REF))
858 op = TREE_OPERAND (aref, 0);
859 access_fn = analyze_scalar_evolution (loop, op);
860 access_fn = instantiate_scev (before_loop, loop, access_fn);
861 base = initial_condition (access_fn);
862 split_constant_offset (base, &base, &off);
863 if (TREE_CODE (aref) == MEM_REF)
864 off = size_binop (PLUS_EXPR, off,
865 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
866 access_fn = chrec_replace_initial_condition (access_fn,
867 fold_convert (TREE_TYPE (base), off));
869 TREE_OPERAND (aref, 0) = base;
870 VEC_safe_push (tree, heap, access_fns, access_fn);
873 if (TREE_CODE (aref) == MEM_REF)
874 TREE_OPERAND (aref, 1)
875 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
877 if (TREE_CODE (ref) == MEM_REF
878 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
879 && integer_zerop (TREE_OPERAND (ref, 1)))
880 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
882 /* For canonicalization purposes we'd like to strip all outermost
883 zero-offset component-refs.
884 ??? For now simply handle zero-index array-refs. */
885 while (TREE_CODE (ref) == ARRAY_REF
886 && integer_zerop (TREE_OPERAND (ref, 1)))
887 ref = TREE_OPERAND (ref, 0);
889 DR_BASE_OBJECT (dr) = ref;
890 DR_ACCESS_FNS (dr) = access_fns;
893 /* Extracts the alias analysis information from the memory reference DR. */
895 static void
896 dr_analyze_alias (struct data_reference *dr)
898 tree ref = DR_REF (dr);
899 tree base = get_base_address (ref), addr;
901 if (INDIRECT_REF_P (base)
902 || TREE_CODE (base) == MEM_REF)
904 addr = TREE_OPERAND (base, 0);
905 if (TREE_CODE (addr) == SSA_NAME)
906 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
910 /* Returns true if the address of DR is invariant. */
912 static bool
913 dr_address_invariant_p (struct data_reference *dr)
915 unsigned i;
916 tree idx;
918 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
919 if (tree_contains_chrecs (idx, NULL))
920 return false;
922 return true;
925 /* Frees data reference DR. */
927 void
928 free_data_ref (data_reference_p dr)
930 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
931 free (dr);
934 /* Analyzes memory reference MEMREF accessed in STMT. The reference
935 is read if IS_READ is true, write otherwise. Returns the
936 data_reference description of MEMREF. NEST is the outermost loop of the
937 loop nest in that the reference should be analyzed. */
939 struct data_reference *
940 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
942 struct data_reference *dr;
944 if (dump_file && (dump_flags & TDF_DETAILS))
946 fprintf (dump_file, "Creating dr for ");
947 print_generic_expr (dump_file, memref, TDF_SLIM);
948 fprintf (dump_file, "\n");
951 dr = XCNEW (struct data_reference);
952 DR_STMT (dr) = stmt;
953 DR_REF (dr) = memref;
954 DR_IS_READ (dr) = is_read;
956 dr_analyze_innermost (dr);
957 dr_analyze_indices (dr, nest);
958 dr_analyze_alias (dr);
960 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "\tbase_address: ");
963 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
964 fprintf (dump_file, "\n\toffset from base address: ");
965 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\tconstant offset from base address: ");
967 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\tstep: ");
969 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\taligned to: ");
971 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
972 fprintf (dump_file, "\n\tbase_object: ");
973 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
974 fprintf (dump_file, "\n");
977 return dr;
980 /* Returns true if FNA == FNB. */
982 static bool
983 affine_function_equal_p (affine_fn fna, affine_fn fnb)
985 unsigned i, n = VEC_length (tree, fna);
987 if (n != VEC_length (tree, fnb))
988 return false;
990 for (i = 0; i < n; i++)
991 if (!operand_equal_p (VEC_index (tree, fna, i),
992 VEC_index (tree, fnb, i), 0))
993 return false;
995 return true;
998 /* If all the functions in CF are the same, returns one of them,
999 otherwise returns NULL. */
1001 static affine_fn
1002 common_affine_function (conflict_function *cf)
1004 unsigned i;
1005 affine_fn comm;
1007 if (!CF_NONTRIVIAL_P (cf))
1008 return NULL;
1010 comm = cf->fns[0];
1012 for (i = 1; i < cf->n; i++)
1013 if (!affine_function_equal_p (comm, cf->fns[i]))
1014 return NULL;
1016 return comm;
1019 /* Returns the base of the affine function FN. */
1021 static tree
1022 affine_function_base (affine_fn fn)
1024 return VEC_index (tree, fn, 0);
1027 /* Returns true if FN is a constant. */
1029 static bool
1030 affine_function_constant_p (affine_fn fn)
1032 unsigned i;
1033 tree coef;
1035 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1036 if (!integer_zerop (coef))
1037 return false;
1039 return true;
1042 /* Returns true if FN is the zero constant function. */
1044 static bool
1045 affine_function_zero_p (affine_fn fn)
1047 return (integer_zerop (affine_function_base (fn))
1048 && affine_function_constant_p (fn));
1051 /* Returns a signed integer type with the largest precision from TA
1052 and TB. */
1054 static tree
1055 signed_type_for_types (tree ta, tree tb)
1057 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1058 return signed_type_for (ta);
1059 else
1060 return signed_type_for (tb);
1063 /* Applies operation OP on affine functions FNA and FNB, and returns the
1064 result. */
1066 static affine_fn
1067 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1069 unsigned i, n, m;
1070 affine_fn ret;
1071 tree coef;
1073 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1075 n = VEC_length (tree, fna);
1076 m = VEC_length (tree, fnb);
1078 else
1080 n = VEC_length (tree, fnb);
1081 m = VEC_length (tree, fna);
1084 ret = VEC_alloc (tree, heap, m);
1085 for (i = 0; i < n; i++)
1087 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1088 TREE_TYPE (VEC_index (tree, fnb, i)));
1090 VEC_quick_push (tree, ret,
1091 fold_build2 (op, type,
1092 VEC_index (tree, fna, i),
1093 VEC_index (tree, fnb, i)));
1096 for (; VEC_iterate (tree, fna, i, coef); i++)
1097 VEC_quick_push (tree, ret,
1098 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1099 coef, integer_zero_node));
1100 for (; VEC_iterate (tree, fnb, i, coef); i++)
1101 VEC_quick_push (tree, ret,
1102 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1103 integer_zero_node, coef));
1105 return ret;
1108 /* Returns the sum of affine functions FNA and FNB. */
1110 static affine_fn
1111 affine_fn_plus (affine_fn fna, affine_fn fnb)
1113 return affine_fn_op (PLUS_EXPR, fna, fnb);
1116 /* Returns the difference of affine functions FNA and FNB. */
1118 static affine_fn
1119 affine_fn_minus (affine_fn fna, affine_fn fnb)
1121 return affine_fn_op (MINUS_EXPR, fna, fnb);
1124 /* Frees affine function FN. */
1126 static void
1127 affine_fn_free (affine_fn fn)
1129 VEC_free (tree, heap, fn);
1132 /* Determine for each subscript in the data dependence relation DDR
1133 the distance. */
1135 static void
1136 compute_subscript_distance (struct data_dependence_relation *ddr)
1138 conflict_function *cf_a, *cf_b;
1139 affine_fn fn_a, fn_b, diff;
1141 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1143 unsigned int i;
1145 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1147 struct subscript *subscript;
1149 subscript = DDR_SUBSCRIPT (ddr, i);
1150 cf_a = SUB_CONFLICTS_IN_A (subscript);
1151 cf_b = SUB_CONFLICTS_IN_B (subscript);
1153 fn_a = common_affine_function (cf_a);
1154 fn_b = common_affine_function (cf_b);
1155 if (!fn_a || !fn_b)
1157 SUB_DISTANCE (subscript) = chrec_dont_know;
1158 return;
1160 diff = affine_fn_minus (fn_a, fn_b);
1162 if (affine_function_constant_p (diff))
1163 SUB_DISTANCE (subscript) = affine_function_base (diff);
1164 else
1165 SUB_DISTANCE (subscript) = chrec_dont_know;
1167 affine_fn_free (diff);
1172 /* Returns the conflict function for "unknown". */
1174 static conflict_function *
1175 conflict_fn_not_known (void)
1177 conflict_function *fn = XCNEW (conflict_function);
1178 fn->n = NOT_KNOWN;
1180 return fn;
1183 /* Returns the conflict function for "independent". */
1185 static conflict_function *
1186 conflict_fn_no_dependence (void)
1188 conflict_function *fn = XCNEW (conflict_function);
1189 fn->n = NO_DEPENDENCE;
1191 return fn;
1194 /* Returns true if the address of OBJ is invariant in LOOP. */
1196 static bool
1197 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1199 while (handled_component_p (obj))
1201 if (TREE_CODE (obj) == ARRAY_REF)
1203 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1204 need to check the stride and the lower bound of the reference. */
1205 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1206 loop->num)
1207 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1208 loop->num))
1209 return false;
1211 else if (TREE_CODE (obj) == COMPONENT_REF)
1213 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1214 loop->num))
1215 return false;
1217 obj = TREE_OPERAND (obj, 0);
1220 if (!INDIRECT_REF_P (obj)
1221 && TREE_CODE (obj) != MEM_REF)
1222 return true;
1224 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1225 loop->num);
1228 /* Returns false if we can prove that data references A and B do not alias,
1229 true otherwise. */
1231 bool
1232 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1234 tree addr_a = DR_BASE_OBJECT (a);
1235 tree addr_b = DR_BASE_OBJECT (b);
1237 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1238 return refs_output_dependent_p (addr_a, addr_b);
1239 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1240 return refs_anti_dependent_p (addr_a, addr_b);
1241 return refs_may_alias_p (addr_a, addr_b);
1244 static void compute_self_dependence (struct data_dependence_relation *);
1246 /* Initialize a data dependence relation between data accesses A and
1247 B. NB_LOOPS is the number of loops surrounding the references: the
1248 size of the classic distance/direction vectors. */
1250 static struct data_dependence_relation *
1251 initialize_data_dependence_relation (struct data_reference *a,
1252 struct data_reference *b,
1253 VEC (loop_p, heap) *loop_nest)
1255 struct data_dependence_relation *res;
1256 unsigned int i;
1258 res = XNEW (struct data_dependence_relation);
1259 DDR_A (res) = a;
1260 DDR_B (res) = b;
1261 DDR_LOOP_NEST (res) = NULL;
1262 DDR_REVERSED_P (res) = false;
1263 DDR_SUBSCRIPTS (res) = NULL;
1264 DDR_DIR_VECTS (res) = NULL;
1265 DDR_DIST_VECTS (res) = NULL;
1267 if (a == NULL || b == NULL)
1269 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1270 return res;
1273 /* If the data references do not alias, then they are independent. */
1274 if (!dr_may_alias_p (a, b))
1276 DDR_ARE_DEPENDENT (res) = chrec_known;
1277 return res;
1280 /* When the references are exactly the same, don't spend time doing
1281 the data dependence tests, just initialize the ddr and return. */
1282 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1284 DDR_AFFINE_P (res) = true;
1285 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1286 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1287 DDR_LOOP_NEST (res) = loop_nest;
1288 DDR_INNER_LOOP (res) = 0;
1289 DDR_SELF_REFERENCE (res) = true;
1290 compute_self_dependence (res);
1291 return res;
1294 /* If the references do not access the same object, we do not know
1295 whether they alias or not. */
1296 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1298 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1299 return res;
1302 /* If the base of the object is not invariant in the loop nest, we cannot
1303 analyze it. TODO -- in fact, it would suffice to record that there may
1304 be arbitrary dependences in the loops where the base object varies. */
1305 if (loop_nest
1306 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1307 DR_BASE_OBJECT (a)))
1309 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1310 return res;
1313 /* If the number of dimensions of the access to not agree we can have
1314 a pointer access to a component of the array element type and an
1315 array access while the base-objects are still the same. Punt. */
1316 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1318 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1319 return res;
1322 DDR_AFFINE_P (res) = true;
1323 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1324 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1325 DDR_LOOP_NEST (res) = loop_nest;
1326 DDR_INNER_LOOP (res) = 0;
1327 DDR_SELF_REFERENCE (res) = false;
1329 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1331 struct subscript *subscript;
1333 subscript = XNEW (struct subscript);
1334 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1335 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1336 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1337 SUB_DISTANCE (subscript) = chrec_dont_know;
1338 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1341 return res;
1344 /* Frees memory used by the conflict function F. */
1346 static void
1347 free_conflict_function (conflict_function *f)
1349 unsigned i;
1351 if (CF_NONTRIVIAL_P (f))
1353 for (i = 0; i < f->n; i++)
1354 affine_fn_free (f->fns[i]);
1356 free (f);
1359 /* Frees memory used by SUBSCRIPTS. */
1361 static void
1362 free_subscripts (VEC (subscript_p, heap) *subscripts)
1364 unsigned i;
1365 subscript_p s;
1367 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1369 free_conflict_function (s->conflicting_iterations_in_a);
1370 free_conflict_function (s->conflicting_iterations_in_b);
1371 free (s);
1373 VEC_free (subscript_p, heap, subscripts);
1376 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1377 description. */
1379 static inline void
1380 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1381 tree chrec)
1383 if (dump_file && (dump_flags & TDF_DETAILS))
1385 fprintf (dump_file, "(dependence classified: ");
1386 print_generic_expr (dump_file, chrec, 0);
1387 fprintf (dump_file, ")\n");
1390 DDR_ARE_DEPENDENT (ddr) = chrec;
1391 free_subscripts (DDR_SUBSCRIPTS (ddr));
1392 DDR_SUBSCRIPTS (ddr) = NULL;
1395 /* The dependence relation DDR cannot be represented by a distance
1396 vector. */
1398 static inline void
1399 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1401 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1404 DDR_AFFINE_P (ddr) = false;
1409 /* This section contains the classic Banerjee tests. */
1411 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1412 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1414 static inline bool
1415 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1417 return (evolution_function_is_constant_p (chrec_a)
1418 && evolution_function_is_constant_p (chrec_b));
1421 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1422 variable, i.e., if the SIV (Single Index Variable) test is true. */
1424 static bool
1425 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1427 if ((evolution_function_is_constant_p (chrec_a)
1428 && evolution_function_is_univariate_p (chrec_b))
1429 || (evolution_function_is_constant_p (chrec_b)
1430 && evolution_function_is_univariate_p (chrec_a)))
1431 return true;
1433 if (evolution_function_is_univariate_p (chrec_a)
1434 && evolution_function_is_univariate_p (chrec_b))
1436 switch (TREE_CODE (chrec_a))
1438 case POLYNOMIAL_CHREC:
1439 switch (TREE_CODE (chrec_b))
1441 case POLYNOMIAL_CHREC:
1442 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1443 return false;
1445 default:
1446 return true;
1449 default:
1450 return true;
1454 return false;
1457 /* Creates a conflict function with N dimensions. The affine functions
1458 in each dimension follow. */
1460 static conflict_function *
1461 conflict_fn (unsigned n, ...)
1463 unsigned i;
1464 conflict_function *ret = XCNEW (conflict_function);
1465 va_list ap;
1467 gcc_assert (0 < n && n <= MAX_DIM);
1468 va_start(ap, n);
1470 ret->n = n;
1471 for (i = 0; i < n; i++)
1472 ret->fns[i] = va_arg (ap, affine_fn);
1473 va_end(ap);
1475 return ret;
1478 /* Returns constant affine function with value CST. */
1480 static affine_fn
1481 affine_fn_cst (tree cst)
1483 affine_fn fn = VEC_alloc (tree, heap, 1);
1484 VEC_quick_push (tree, fn, cst);
1485 return fn;
1488 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1490 static affine_fn
1491 affine_fn_univar (tree cst, unsigned dim, tree coef)
1493 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1494 unsigned i;
1496 gcc_assert (dim > 0);
1497 VEC_quick_push (tree, fn, cst);
1498 for (i = 1; i < dim; i++)
1499 VEC_quick_push (tree, fn, integer_zero_node);
1500 VEC_quick_push (tree, fn, coef);
1501 return fn;
1504 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1505 *OVERLAPS_B are initialized to the functions that describe the
1506 relation between the elements accessed twice by CHREC_A and
1507 CHREC_B. For k >= 0, the following property is verified:
1509 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1511 static void
1512 analyze_ziv_subscript (tree chrec_a,
1513 tree chrec_b,
1514 conflict_function **overlaps_a,
1515 conflict_function **overlaps_b,
1516 tree *last_conflicts)
1518 tree type, difference;
1519 dependence_stats.num_ziv++;
1521 if (dump_file && (dump_flags & TDF_DETAILS))
1522 fprintf (dump_file, "(analyze_ziv_subscript \n");
1524 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1525 chrec_a = chrec_convert (type, chrec_a, NULL);
1526 chrec_b = chrec_convert (type, chrec_b, NULL);
1527 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1529 switch (TREE_CODE (difference))
1531 case INTEGER_CST:
1532 if (integer_zerop (difference))
1534 /* The difference is equal to zero: the accessed index
1535 overlaps for each iteration in the loop. */
1536 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1537 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1538 *last_conflicts = chrec_dont_know;
1539 dependence_stats.num_ziv_dependent++;
1541 else
1543 /* The accesses do not overlap. */
1544 *overlaps_a = conflict_fn_no_dependence ();
1545 *overlaps_b = conflict_fn_no_dependence ();
1546 *last_conflicts = integer_zero_node;
1547 dependence_stats.num_ziv_independent++;
1549 break;
1551 default:
1552 /* We're not sure whether the indexes overlap. For the moment,
1553 conservatively answer "don't know". */
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1555 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1557 *overlaps_a = conflict_fn_not_known ();
1558 *overlaps_b = conflict_fn_not_known ();
1559 *last_conflicts = chrec_dont_know;
1560 dependence_stats.num_ziv_unimplemented++;
1561 break;
1564 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, ")\n");
1568 /* Sets NIT to the estimated number of executions of the statements in
1569 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1570 large as the number of iterations. If we have no reliable estimate,
1571 the function returns false, otherwise returns true. */
1573 bool
1574 estimated_loop_iterations (struct loop *loop, bool conservative,
1575 double_int *nit)
1577 estimate_numbers_of_iterations_loop (loop, true);
1578 if (conservative)
1580 if (!loop->any_upper_bound)
1581 return false;
1583 *nit = loop->nb_iterations_upper_bound;
1585 else
1587 if (!loop->any_estimate)
1588 return false;
1590 *nit = loop->nb_iterations_estimate;
1593 return true;
1596 /* Similar to estimated_loop_iterations, but returns the estimate only
1597 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1598 on the number of iterations of LOOP could not be derived, returns -1. */
1600 HOST_WIDE_INT
1601 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1603 double_int nit;
1604 HOST_WIDE_INT hwi_nit;
1606 if (!estimated_loop_iterations (loop, conservative, &nit))
1607 return -1;
1609 if (!double_int_fits_in_shwi_p (nit))
1610 return -1;
1611 hwi_nit = double_int_to_shwi (nit);
1613 return hwi_nit < 0 ? -1 : hwi_nit;
1616 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1617 and only if it fits to the int type. If this is not the case, or the
1618 estimate on the number of iterations of LOOP could not be derived, returns
1619 chrec_dont_know. */
1621 static tree
1622 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1624 double_int nit;
1625 tree type;
1627 if (!estimated_loop_iterations (loop, conservative, &nit))
1628 return chrec_dont_know;
1630 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1631 if (!double_int_fits_to_tree_p (type, nit))
1632 return chrec_dont_know;
1634 return double_int_to_tree (type, nit);
1637 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1638 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1639 *OVERLAPS_B are initialized to the functions that describe the
1640 relation between the elements accessed twice by CHREC_A and
1641 CHREC_B. For k >= 0, the following property is verified:
1643 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1645 static void
1646 analyze_siv_subscript_cst_affine (tree chrec_a,
1647 tree chrec_b,
1648 conflict_function **overlaps_a,
1649 conflict_function **overlaps_b,
1650 tree *last_conflicts)
1652 bool value0, value1, value2;
1653 tree type, difference, tmp;
1655 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1656 chrec_a = chrec_convert (type, chrec_a, NULL);
1657 chrec_b = chrec_convert (type, chrec_b, NULL);
1658 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1660 if (!chrec_is_positive (initial_condition (difference), &value0))
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1665 dependence_stats.num_siv_unimplemented++;
1666 *overlaps_a = conflict_fn_not_known ();
1667 *overlaps_b = conflict_fn_not_known ();
1668 *last_conflicts = chrec_dont_know;
1669 return;
1671 else
1673 if (value0 == false)
1675 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1677 if (dump_file && (dump_flags & TDF_DETAILS))
1678 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
1682 *last_conflicts = chrec_dont_know;
1683 dependence_stats.num_siv_unimplemented++;
1684 return;
1686 else
1688 if (value1 == true)
1690 /* Example:
1691 chrec_a = 12
1692 chrec_b = {10, +, 1}
1695 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1697 HOST_WIDE_INT numiter;
1698 struct loop *loop = get_chrec_loop (chrec_b);
1700 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1701 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1702 fold_build1 (ABS_EXPR, type, difference),
1703 CHREC_RIGHT (chrec_b));
1704 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1705 *last_conflicts = integer_one_node;
1708 /* Perform weak-zero siv test to see if overlap is
1709 outside the loop bounds. */
1710 numiter = estimated_loop_iterations_int (loop, false);
1712 if (numiter >= 0
1713 && compare_tree_int (tmp, numiter) > 0)
1715 free_conflict_function (*overlaps_a);
1716 free_conflict_function (*overlaps_b);
1717 *overlaps_a = conflict_fn_no_dependence ();
1718 *overlaps_b = conflict_fn_no_dependence ();
1719 *last_conflicts = integer_zero_node;
1720 dependence_stats.num_siv_independent++;
1721 return;
1723 dependence_stats.num_siv_dependent++;
1724 return;
1727 /* When the step does not divide the difference, there are
1728 no overlaps. */
1729 else
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1735 return;
1739 else
1741 /* Example:
1742 chrec_a = 12
1743 chrec_b = {10, +, -1}
1745 In this case, chrec_a will not overlap with chrec_b. */
1746 *overlaps_a = conflict_fn_no_dependence ();
1747 *overlaps_b = conflict_fn_no_dependence ();
1748 *last_conflicts = integer_zero_node;
1749 dependence_stats.num_siv_independent++;
1750 return;
1754 else
1756 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1759 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1761 *overlaps_a = conflict_fn_not_known ();
1762 *overlaps_b = conflict_fn_not_known ();
1763 *last_conflicts = chrec_dont_know;
1764 dependence_stats.num_siv_unimplemented++;
1765 return;
1767 else
1769 if (value2 == false)
1771 /* Example:
1772 chrec_a = 3
1773 chrec_b = {10, +, -1}
1775 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1777 HOST_WIDE_INT numiter;
1778 struct loop *loop = get_chrec_loop (chrec_b);
1780 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1781 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1782 CHREC_RIGHT (chrec_b));
1783 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1784 *last_conflicts = integer_one_node;
1786 /* Perform weak-zero siv test to see if overlap is
1787 outside the loop bounds. */
1788 numiter = estimated_loop_iterations_int (loop, false);
1790 if (numiter >= 0
1791 && compare_tree_int (tmp, numiter) > 0)
1793 free_conflict_function (*overlaps_a);
1794 free_conflict_function (*overlaps_b);
1795 *overlaps_a = conflict_fn_no_dependence ();
1796 *overlaps_b = conflict_fn_no_dependence ();
1797 *last_conflicts = integer_zero_node;
1798 dependence_stats.num_siv_independent++;
1799 return;
1801 dependence_stats.num_siv_dependent++;
1802 return;
1805 /* When the step does not divide the difference, there
1806 are no overlaps. */
1807 else
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1813 return;
1816 else
1818 /* Example:
1819 chrec_a = 3
1820 chrec_b = {4, +, 1}
1822 In this case, chrec_a will not overlap with chrec_b. */
1823 *overlaps_a = conflict_fn_no_dependence ();
1824 *overlaps_b = conflict_fn_no_dependence ();
1825 *last_conflicts = integer_zero_node;
1826 dependence_stats.num_siv_independent++;
1827 return;
1834 /* Helper recursive function for initializing the matrix A. Returns
1835 the initial value of CHREC. */
1837 static tree
1838 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1840 gcc_assert (chrec);
1842 switch (TREE_CODE (chrec))
1844 case POLYNOMIAL_CHREC:
1845 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1847 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1848 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1850 case PLUS_EXPR:
1851 case MULT_EXPR:
1852 case MINUS_EXPR:
1854 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1855 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1857 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1860 case NOP_EXPR:
1862 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1863 return chrec_convert (chrec_type (chrec), op, NULL);
1866 case BIT_NOT_EXPR:
1868 /* Handle ~X as -1 - X. */
1869 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1870 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1871 build_int_cst (TREE_TYPE (chrec), -1), op);
1874 case INTEGER_CST:
1875 return chrec;
1877 default:
1878 gcc_unreachable ();
1879 return NULL_TREE;
1883 #define FLOOR_DIV(x,y) ((x) / (y))
1885 /* Solves the special case of the Diophantine equation:
1886 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1888 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1889 number of iterations that loops X and Y run. The overlaps will be
1890 constructed as evolutions in dimension DIM. */
1892 static void
1893 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1894 affine_fn *overlaps_a,
1895 affine_fn *overlaps_b,
1896 tree *last_conflicts, int dim)
1898 if (((step_a > 0 && step_b > 0)
1899 || (step_a < 0 && step_b < 0)))
1901 int step_overlaps_a, step_overlaps_b;
1902 int gcd_steps_a_b, last_conflict, tau2;
1904 gcd_steps_a_b = gcd (step_a, step_b);
1905 step_overlaps_a = step_b / gcd_steps_a_b;
1906 step_overlaps_b = step_a / gcd_steps_a_b;
1908 if (niter > 0)
1910 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1911 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1912 last_conflict = tau2;
1913 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1915 else
1916 *last_conflicts = chrec_dont_know;
1918 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1919 build_int_cst (NULL_TREE,
1920 step_overlaps_a));
1921 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1922 build_int_cst (NULL_TREE,
1923 step_overlaps_b));
1926 else
1928 *overlaps_a = affine_fn_cst (integer_zero_node);
1929 *overlaps_b = affine_fn_cst (integer_zero_node);
1930 *last_conflicts = integer_zero_node;
1934 /* Solves the special case of a Diophantine equation where CHREC_A is
1935 an affine bivariate function, and CHREC_B is an affine univariate
1936 function. For example,
1938 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1940 has the following overlapping functions:
1942 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1943 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1944 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1946 FORNOW: This is a specialized implementation for a case occurring in
1947 a common benchmark. Implement the general algorithm. */
1949 static void
1950 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1951 conflict_function **overlaps_a,
1952 conflict_function **overlaps_b,
1953 tree *last_conflicts)
1955 bool xz_p, yz_p, xyz_p;
1956 int step_x, step_y, step_z;
1957 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1958 affine_fn overlaps_a_xz, overlaps_b_xz;
1959 affine_fn overlaps_a_yz, overlaps_b_yz;
1960 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1961 affine_fn ova1, ova2, ovb;
1962 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1964 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1965 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1966 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1968 niter_x =
1969 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1970 false);
1971 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1972 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1974 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1976 if (dump_file && (dump_flags & TDF_DETAILS))
1977 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1979 *overlaps_a = conflict_fn_not_known ();
1980 *overlaps_b = conflict_fn_not_known ();
1981 *last_conflicts = chrec_dont_know;
1982 return;
1985 niter = MIN (niter_x, niter_z);
1986 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1987 &overlaps_a_xz,
1988 &overlaps_b_xz,
1989 &last_conflicts_xz, 1);
1990 niter = MIN (niter_y, niter_z);
1991 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1992 &overlaps_a_yz,
1993 &overlaps_b_yz,
1994 &last_conflicts_yz, 2);
1995 niter = MIN (niter_x, niter_z);
1996 niter = MIN (niter_y, niter);
1997 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1998 &overlaps_a_xyz,
1999 &overlaps_b_xyz,
2000 &last_conflicts_xyz, 3);
2002 xz_p = !integer_zerop (last_conflicts_xz);
2003 yz_p = !integer_zerop (last_conflicts_yz);
2004 xyz_p = !integer_zerop (last_conflicts_xyz);
2006 if (xz_p || yz_p || xyz_p)
2008 ova1 = affine_fn_cst (integer_zero_node);
2009 ova2 = affine_fn_cst (integer_zero_node);
2010 ovb = affine_fn_cst (integer_zero_node);
2011 if (xz_p)
2013 affine_fn t0 = ova1;
2014 affine_fn t2 = ovb;
2016 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2017 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2018 affine_fn_free (t0);
2019 affine_fn_free (t2);
2020 *last_conflicts = last_conflicts_xz;
2022 if (yz_p)
2024 affine_fn t0 = ova2;
2025 affine_fn t2 = ovb;
2027 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2028 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2029 affine_fn_free (t0);
2030 affine_fn_free (t2);
2031 *last_conflicts = last_conflicts_yz;
2033 if (xyz_p)
2035 affine_fn t0 = ova1;
2036 affine_fn t2 = ova2;
2037 affine_fn t4 = ovb;
2039 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2040 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2041 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2042 affine_fn_free (t0);
2043 affine_fn_free (t2);
2044 affine_fn_free (t4);
2045 *last_conflicts = last_conflicts_xyz;
2047 *overlaps_a = conflict_fn (2, ova1, ova2);
2048 *overlaps_b = conflict_fn (1, ovb);
2050 else
2052 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2053 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2054 *last_conflicts = integer_zero_node;
2057 affine_fn_free (overlaps_a_xz);
2058 affine_fn_free (overlaps_b_xz);
2059 affine_fn_free (overlaps_a_yz);
2060 affine_fn_free (overlaps_b_yz);
2061 affine_fn_free (overlaps_a_xyz);
2062 affine_fn_free (overlaps_b_xyz);
2065 /* Determines the overlapping elements due to accesses CHREC_A and
2066 CHREC_B, that are affine functions. This function cannot handle
2067 symbolic evolution functions, ie. when initial conditions are
2068 parameters, because it uses lambda matrices of integers. */
2070 static void
2071 analyze_subscript_affine_affine (tree chrec_a,
2072 tree chrec_b,
2073 conflict_function **overlaps_a,
2074 conflict_function **overlaps_b,
2075 tree *last_conflicts)
2077 unsigned nb_vars_a, nb_vars_b, dim;
2078 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2079 lambda_matrix A, U, S;
2080 struct obstack scratch_obstack;
2082 if (eq_evolutions_p (chrec_a, chrec_b))
2084 /* The accessed index overlaps for each iteration in the
2085 loop. */
2086 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2087 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2088 *last_conflicts = chrec_dont_know;
2089 return;
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2094 /* For determining the initial intersection, we have to solve a
2095 Diophantine equation. This is the most time consuming part.
2097 For answering to the question: "Is there a dependence?" we have
2098 to prove that there exists a solution to the Diophantine
2099 equation, and that the solution is in the iteration domain,
2100 i.e. the solution is positive or zero, and that the solution
2101 happens before the upper bound loop.nb_iterations. Otherwise
2102 there is no dependence. This function outputs a description of
2103 the iterations that hold the intersections. */
2105 nb_vars_a = nb_vars_in_chrec (chrec_a);
2106 nb_vars_b = nb_vars_in_chrec (chrec_b);
2108 gcc_obstack_init (&scratch_obstack);
2110 dim = nb_vars_a + nb_vars_b;
2111 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2112 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2113 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2115 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2116 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2117 gamma = init_b - init_a;
2119 /* Don't do all the hard work of solving the Diophantine equation
2120 when we already know the solution: for example,
2121 | {3, +, 1}_1
2122 | {3, +, 4}_2
2123 | gamma = 3 - 3 = 0.
2124 Then the first overlap occurs during the first iterations:
2125 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2127 if (gamma == 0)
2129 if (nb_vars_a == 1 && nb_vars_b == 1)
2131 HOST_WIDE_INT step_a, step_b;
2132 HOST_WIDE_INT niter, niter_a, niter_b;
2133 affine_fn ova, ovb;
2135 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2136 false);
2137 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2138 false);
2139 niter = MIN (niter_a, niter_b);
2140 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2141 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2143 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2144 &ova, &ovb,
2145 last_conflicts, 1);
2146 *overlaps_a = conflict_fn (1, ova);
2147 *overlaps_b = conflict_fn (1, ovb);
2150 else if (nb_vars_a == 2 && nb_vars_b == 1)
2151 compute_overlap_steps_for_affine_1_2
2152 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2154 else if (nb_vars_a == 1 && nb_vars_b == 2)
2155 compute_overlap_steps_for_affine_1_2
2156 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2158 else
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2162 *overlaps_a = conflict_fn_not_known ();
2163 *overlaps_b = conflict_fn_not_known ();
2164 *last_conflicts = chrec_dont_know;
2166 goto end_analyze_subs_aa;
2169 /* U.A = S */
2170 lambda_matrix_right_hermite (A, dim, 1, S, U);
2172 if (S[0][0] < 0)
2174 S[0][0] *= -1;
2175 lambda_matrix_row_negate (U, dim, 0);
2177 gcd_alpha_beta = S[0][0];
2179 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2180 but that is a quite strange case. Instead of ICEing, answer
2181 don't know. */
2182 if (gcd_alpha_beta == 0)
2184 *overlaps_a = conflict_fn_not_known ();
2185 *overlaps_b = conflict_fn_not_known ();
2186 *last_conflicts = chrec_dont_know;
2187 goto end_analyze_subs_aa;
2190 /* The classic "gcd-test". */
2191 if (!int_divides_p (gcd_alpha_beta, gamma))
2193 /* The "gcd-test" has determined that there is no integer
2194 solution, i.e. there is no dependence. */
2195 *overlaps_a = conflict_fn_no_dependence ();
2196 *overlaps_b = conflict_fn_no_dependence ();
2197 *last_conflicts = integer_zero_node;
2200 /* Both access functions are univariate. This includes SIV and MIV cases. */
2201 else if (nb_vars_a == 1 && nb_vars_b == 1)
2203 /* Both functions should have the same evolution sign. */
2204 if (((A[0][0] > 0 && -A[1][0] > 0)
2205 || (A[0][0] < 0 && -A[1][0] < 0)))
2207 /* The solutions are given by:
2209 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2210 | [u21 u22] [y0]
2212 For a given integer t. Using the following variables,
2214 | i0 = u11 * gamma / gcd_alpha_beta
2215 | j0 = u12 * gamma / gcd_alpha_beta
2216 | i1 = u21
2217 | j1 = u22
2219 the solutions are:
2221 | x0 = i0 + i1 * t,
2222 | y0 = j0 + j1 * t. */
2223 HOST_WIDE_INT i0, j0, i1, j1;
2225 i0 = U[0][0] * gamma / gcd_alpha_beta;
2226 j0 = U[0][1] * gamma / gcd_alpha_beta;
2227 i1 = U[1][0];
2228 j1 = U[1][1];
2230 if ((i1 == 0 && i0 < 0)
2231 || (j1 == 0 && j0 < 0))
2233 /* There is no solution.
2234 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2235 falls in here, but for the moment we don't look at the
2236 upper bound of the iteration domain. */
2237 *overlaps_a = conflict_fn_no_dependence ();
2238 *overlaps_b = conflict_fn_no_dependence ();
2239 *last_conflicts = integer_zero_node;
2240 goto end_analyze_subs_aa;
2243 if (i1 > 0 && j1 > 0)
2245 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2246 (get_chrec_loop (chrec_a), false);
2247 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2248 (get_chrec_loop (chrec_b), false);
2249 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2251 /* (X0, Y0) is a solution of the Diophantine equation:
2252 "chrec_a (X0) = chrec_b (Y0)". */
2253 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2254 CEIL (-j0, j1));
2255 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2256 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2258 /* (X1, Y1) is the smallest positive solution of the eq
2259 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2260 first conflict occurs. */
2261 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2262 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2263 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2265 if (niter > 0)
2267 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2268 FLOOR_DIV (niter - j0, j1));
2269 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2271 /* If the overlap occurs outside of the bounds of the
2272 loop, there is no dependence. */
2273 if (x1 >= niter || y1 >= niter)
2275 *overlaps_a = conflict_fn_no_dependence ();
2276 *overlaps_b = conflict_fn_no_dependence ();
2277 *last_conflicts = integer_zero_node;
2278 goto end_analyze_subs_aa;
2280 else
2281 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2283 else
2284 *last_conflicts = chrec_dont_know;
2286 *overlaps_a
2287 = conflict_fn (1,
2288 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2290 build_int_cst (NULL_TREE, i1)));
2291 *overlaps_b
2292 = conflict_fn (1,
2293 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2295 build_int_cst (NULL_TREE, j1)));
2297 else
2299 /* FIXME: For the moment, the upper bound of the
2300 iteration domain for i and j is not checked. */
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2303 *overlaps_a = conflict_fn_not_known ();
2304 *overlaps_b = conflict_fn_not_known ();
2305 *last_conflicts = chrec_dont_know;
2308 else
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2312 *overlaps_a = conflict_fn_not_known ();
2313 *overlaps_b = conflict_fn_not_known ();
2314 *last_conflicts = chrec_dont_know;
2317 else
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2321 *overlaps_a = conflict_fn_not_known ();
2322 *overlaps_b = conflict_fn_not_known ();
2323 *last_conflicts = chrec_dont_know;
2326 end_analyze_subs_aa:
2327 obstack_free (&scratch_obstack, NULL);
2328 if (dump_file && (dump_flags & TDF_DETAILS))
2330 fprintf (dump_file, " (overlaps_a = ");
2331 dump_conflict_function (dump_file, *overlaps_a);
2332 fprintf (dump_file, ")\n (overlaps_b = ");
2333 dump_conflict_function (dump_file, *overlaps_b);
2334 fprintf (dump_file, ")\n");
2335 fprintf (dump_file, ")\n");
2339 /* Returns true when analyze_subscript_affine_affine can be used for
2340 determining the dependence relation between chrec_a and chrec_b,
2341 that contain symbols. This function modifies chrec_a and chrec_b
2342 such that the analysis result is the same, and such that they don't
2343 contain symbols, and then can safely be passed to the analyzer.
2345 Example: The analysis of the following tuples of evolutions produce
2346 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2347 vs. {0, +, 1}_1
2349 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2350 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2353 static bool
2354 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2356 tree diff, type, left_a, left_b, right_b;
2358 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2359 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2360 /* FIXME: For the moment not handled. Might be refined later. */
2361 return false;
2363 type = chrec_type (*chrec_a);
2364 left_a = CHREC_LEFT (*chrec_a);
2365 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2366 diff = chrec_fold_minus (type, left_a, left_b);
2368 if (!evolution_function_is_constant_p (diff))
2369 return false;
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2374 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2375 diff, CHREC_RIGHT (*chrec_a));
2376 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2377 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2378 build_int_cst (type, 0),
2379 right_b);
2380 return true;
2383 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2384 *OVERLAPS_B are initialized to the functions that describe the
2385 relation between the elements accessed twice by CHREC_A and
2386 CHREC_B. For k >= 0, the following property is verified:
2388 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2390 static void
2391 analyze_siv_subscript (tree chrec_a,
2392 tree chrec_b,
2393 conflict_function **overlaps_a,
2394 conflict_function **overlaps_b,
2395 tree *last_conflicts,
2396 int loop_nest_num)
2398 dependence_stats.num_siv++;
2400 if (dump_file && (dump_flags & TDF_DETAILS))
2401 fprintf (dump_file, "(analyze_siv_subscript \n");
2403 if (evolution_function_is_constant_p (chrec_a)
2404 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2405 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2406 overlaps_a, overlaps_b, last_conflicts);
2408 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2409 && evolution_function_is_constant_p (chrec_b))
2410 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2411 overlaps_b, overlaps_a, last_conflicts);
2413 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2414 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2416 if (!chrec_contains_symbols (chrec_a)
2417 && !chrec_contains_symbols (chrec_b))
2419 analyze_subscript_affine_affine (chrec_a, chrec_b,
2420 overlaps_a, overlaps_b,
2421 last_conflicts);
2423 if (CF_NOT_KNOWN_P (*overlaps_a)
2424 || CF_NOT_KNOWN_P (*overlaps_b))
2425 dependence_stats.num_siv_unimplemented++;
2426 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2427 || CF_NO_DEPENDENCE_P (*overlaps_b))
2428 dependence_stats.num_siv_independent++;
2429 else
2430 dependence_stats.num_siv_dependent++;
2432 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2433 &chrec_b))
2435 analyze_subscript_affine_affine (chrec_a, chrec_b,
2436 overlaps_a, overlaps_b,
2437 last_conflicts);
2439 if (CF_NOT_KNOWN_P (*overlaps_a)
2440 || CF_NOT_KNOWN_P (*overlaps_b))
2441 dependence_stats.num_siv_unimplemented++;
2442 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2443 || CF_NO_DEPENDENCE_P (*overlaps_b))
2444 dependence_stats.num_siv_independent++;
2445 else
2446 dependence_stats.num_siv_dependent++;
2448 else
2449 goto siv_subscript_dontknow;
2452 else
2454 siv_subscript_dontknow:;
2455 if (dump_file && (dump_flags & TDF_DETAILS))
2456 fprintf (dump_file, "siv test failed: unimplemented.\n");
2457 *overlaps_a = conflict_fn_not_known ();
2458 *overlaps_b = conflict_fn_not_known ();
2459 *last_conflicts = chrec_dont_know;
2460 dependence_stats.num_siv_unimplemented++;
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, ")\n");
2467 /* Returns false if we can prove that the greatest common divisor of the steps
2468 of CHREC does not divide CST, false otherwise. */
2470 static bool
2471 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2473 HOST_WIDE_INT cd = 0, val;
2474 tree step;
2476 if (!host_integerp (cst, 0))
2477 return true;
2478 val = tree_low_cst (cst, 0);
2480 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2482 step = CHREC_RIGHT (chrec);
2483 if (!host_integerp (step, 0))
2484 return true;
2485 cd = gcd (cd, tree_low_cst (step, 0));
2486 chrec = CHREC_LEFT (chrec);
2489 return val % cd == 0;
2492 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2493 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2494 functions that describe the relation between the elements accessed
2495 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2496 is verified:
2498 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2500 static void
2501 analyze_miv_subscript (tree chrec_a,
2502 tree chrec_b,
2503 conflict_function **overlaps_a,
2504 conflict_function **overlaps_b,
2505 tree *last_conflicts,
2506 struct loop *loop_nest)
2508 /* FIXME: This is a MIV subscript, not yet handled.
2509 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2510 (A[i] vs. A[j]).
2512 In the SIV test we had to solve a Diophantine equation with two
2513 variables. In the MIV case we have to solve a Diophantine
2514 equation with 2*n variables (if the subscript uses n IVs).
2516 tree type, difference;
2518 dependence_stats.num_miv++;
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2520 fprintf (dump_file, "(analyze_miv_subscript \n");
2522 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2523 chrec_a = chrec_convert (type, chrec_a, NULL);
2524 chrec_b = chrec_convert (type, chrec_b, NULL);
2525 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2527 if (eq_evolutions_p (chrec_a, chrec_b))
2529 /* Access functions are the same: all the elements are accessed
2530 in the same order. */
2531 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2532 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2533 *last_conflicts = estimated_loop_iterations_tree
2534 (get_chrec_loop (chrec_a), true);
2535 dependence_stats.num_miv_dependent++;
2538 else if (evolution_function_is_constant_p (difference)
2539 /* For the moment, the following is verified:
2540 evolution_function_is_affine_multivariate_p (chrec_a,
2541 loop_nest->num) */
2542 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2544 /* testsuite/.../ssa-chrec-33.c
2545 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2547 The difference is 1, and all the evolution steps are multiples
2548 of 2, consequently there are no overlapping elements. */
2549 *overlaps_a = conflict_fn_no_dependence ();
2550 *overlaps_b = conflict_fn_no_dependence ();
2551 *last_conflicts = integer_zero_node;
2552 dependence_stats.num_miv_independent++;
2555 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2556 && !chrec_contains_symbols (chrec_a)
2557 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2558 && !chrec_contains_symbols (chrec_b))
2560 /* testsuite/.../ssa-chrec-35.c
2561 {0, +, 1}_2 vs. {0, +, 1}_3
2562 the overlapping elements are respectively located at iterations:
2563 {0, +, 1}_x and {0, +, 1}_x,
2564 in other words, we have the equality:
2565 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2567 Other examples:
2568 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2569 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2571 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2572 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2574 analyze_subscript_affine_affine (chrec_a, chrec_b,
2575 overlaps_a, overlaps_b, last_conflicts);
2577 if (CF_NOT_KNOWN_P (*overlaps_a)
2578 || CF_NOT_KNOWN_P (*overlaps_b))
2579 dependence_stats.num_miv_unimplemented++;
2580 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2581 || CF_NO_DEPENDENCE_P (*overlaps_b))
2582 dependence_stats.num_miv_independent++;
2583 else
2584 dependence_stats.num_miv_dependent++;
2587 else
2589 /* When the analysis is too difficult, answer "don't know". */
2590 if (dump_file && (dump_flags & TDF_DETAILS))
2591 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2593 *overlaps_a = conflict_fn_not_known ();
2594 *overlaps_b = conflict_fn_not_known ();
2595 *last_conflicts = chrec_dont_know;
2596 dependence_stats.num_miv_unimplemented++;
2599 if (dump_file && (dump_flags & TDF_DETAILS))
2600 fprintf (dump_file, ")\n");
2603 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2604 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2605 OVERLAP_ITERATIONS_B are initialized with two functions that
2606 describe the iterations that contain conflicting elements.
2608 Remark: For an integer k >= 0, the following equality is true:
2610 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2613 static void
2614 analyze_overlapping_iterations (tree chrec_a,
2615 tree chrec_b,
2616 conflict_function **overlap_iterations_a,
2617 conflict_function **overlap_iterations_b,
2618 tree *last_conflicts, struct loop *loop_nest)
2620 unsigned int lnn = loop_nest->num;
2622 dependence_stats.num_subscript_tests++;
2624 if (dump_file && (dump_flags & TDF_DETAILS))
2626 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2627 fprintf (dump_file, " (chrec_a = ");
2628 print_generic_expr (dump_file, chrec_a, 0);
2629 fprintf (dump_file, ")\n (chrec_b = ");
2630 print_generic_expr (dump_file, chrec_b, 0);
2631 fprintf (dump_file, ")\n");
2634 if (chrec_a == NULL_TREE
2635 || chrec_b == NULL_TREE
2636 || chrec_contains_undetermined (chrec_a)
2637 || chrec_contains_undetermined (chrec_b))
2639 dependence_stats.num_subscript_undetermined++;
2641 *overlap_iterations_a = conflict_fn_not_known ();
2642 *overlap_iterations_b = conflict_fn_not_known ();
2645 /* If they are the same chrec, and are affine, they overlap
2646 on every iteration. */
2647 else if (eq_evolutions_p (chrec_a, chrec_b)
2648 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2649 || operand_equal_p (chrec_a, chrec_b, 0)))
2651 dependence_stats.num_same_subscript_function++;
2652 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2653 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2654 *last_conflicts = chrec_dont_know;
2657 /* If they aren't the same, and aren't affine, we can't do anything
2658 yet. */
2659 else if ((chrec_contains_symbols (chrec_a)
2660 || chrec_contains_symbols (chrec_b))
2661 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2662 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2664 dependence_stats.num_subscript_undetermined++;
2665 *overlap_iterations_a = conflict_fn_not_known ();
2666 *overlap_iterations_b = conflict_fn_not_known ();
2669 else if (ziv_subscript_p (chrec_a, chrec_b))
2670 analyze_ziv_subscript (chrec_a, chrec_b,
2671 overlap_iterations_a, overlap_iterations_b,
2672 last_conflicts);
2674 else if (siv_subscript_p (chrec_a, chrec_b))
2675 analyze_siv_subscript (chrec_a, chrec_b,
2676 overlap_iterations_a, overlap_iterations_b,
2677 last_conflicts, lnn);
2679 else
2680 analyze_miv_subscript (chrec_a, chrec_b,
2681 overlap_iterations_a, overlap_iterations_b,
2682 last_conflicts, loop_nest);
2684 if (dump_file && (dump_flags & TDF_DETAILS))
2686 fprintf (dump_file, " (overlap_iterations_a = ");
2687 dump_conflict_function (dump_file, *overlap_iterations_a);
2688 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2689 dump_conflict_function (dump_file, *overlap_iterations_b);
2690 fprintf (dump_file, ")\n");
2691 fprintf (dump_file, ")\n");
2695 /* Helper function for uniquely inserting distance vectors. */
2697 static void
2698 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2700 unsigned i;
2701 lambda_vector v;
2703 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2704 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2705 return;
2707 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2710 /* Helper function for uniquely inserting direction vectors. */
2712 static void
2713 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2715 unsigned i;
2716 lambda_vector v;
2718 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2719 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2720 return;
2722 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2725 /* Add a distance of 1 on all the loops outer than INDEX. If we
2726 haven't yet determined a distance for this outer loop, push a new
2727 distance vector composed of the previous distance, and a distance
2728 of 1 for this outer loop. Example:
2730 | loop_1
2731 | loop_2
2732 | A[10]
2733 | endloop_2
2734 | endloop_1
2736 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2737 save (0, 1), then we have to save (1, 0). */
2739 static void
2740 add_outer_distances (struct data_dependence_relation *ddr,
2741 lambda_vector dist_v, int index)
2743 /* For each outer loop where init_v is not set, the accesses are
2744 in dependence of distance 1 in the loop. */
2745 while (--index >= 0)
2747 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2748 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2749 save_v[index] = 1;
2750 save_dist_v (ddr, save_v);
2754 /* Return false when fail to represent the data dependence as a
2755 distance vector. INIT_B is set to true when a component has been
2756 added to the distance vector DIST_V. INDEX_CARRY is then set to
2757 the index in DIST_V that carries the dependence. */
2759 static bool
2760 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2761 struct data_reference *ddr_a,
2762 struct data_reference *ddr_b,
2763 lambda_vector dist_v, bool *init_b,
2764 int *index_carry)
2766 unsigned i;
2767 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2769 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2771 tree access_fn_a, access_fn_b;
2772 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2774 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2776 non_affine_dependence_relation (ddr);
2777 return false;
2780 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2781 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2783 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2784 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2786 int dist, index;
2787 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2788 DDR_LOOP_NEST (ddr));
2789 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2790 DDR_LOOP_NEST (ddr));
2792 /* The dependence is carried by the outermost loop. Example:
2793 | loop_1
2794 | A[{4, +, 1}_1]
2795 | loop_2
2796 | A[{5, +, 1}_2]
2797 | endloop_2
2798 | endloop_1
2799 In this case, the dependence is carried by loop_1. */
2800 index = index_a < index_b ? index_a : index_b;
2801 *index_carry = MIN (index, *index_carry);
2803 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2805 non_affine_dependence_relation (ddr);
2806 return false;
2809 dist = int_cst_value (SUB_DISTANCE (subscript));
2811 /* This is the subscript coupling test. If we have already
2812 recorded a distance for this loop (a distance coming from
2813 another subscript), it should be the same. For example,
2814 in the following code, there is no dependence:
2816 | loop i = 0, N, 1
2817 | T[i+1][i] = ...
2818 | ... = T[i][i]
2819 | endloop
2821 if (init_v[index] != 0 && dist_v[index] != dist)
2823 finalize_ddr_dependent (ddr, chrec_known);
2824 return false;
2827 dist_v[index] = dist;
2828 init_v[index] = 1;
2829 *init_b = true;
2831 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2833 /* This can be for example an affine vs. constant dependence
2834 (T[i] vs. T[3]) that is not an affine dependence and is
2835 not representable as a distance vector. */
2836 non_affine_dependence_relation (ddr);
2837 return false;
2841 return true;
2844 /* Return true when the DDR contains only constant access functions. */
2846 static bool
2847 constant_access_functions (const struct data_dependence_relation *ddr)
2849 unsigned i;
2851 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2852 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2853 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2854 return false;
2856 return true;
2859 /* Helper function for the case where DDR_A and DDR_B are the same
2860 multivariate access function with a constant step. For an example
2861 see pr34635-1.c. */
2863 static void
2864 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2866 int x_1, x_2;
2867 tree c_1 = CHREC_LEFT (c_2);
2868 tree c_0 = CHREC_LEFT (c_1);
2869 lambda_vector dist_v;
2870 int v1, v2, cd;
2872 /* Polynomials with more than 2 variables are not handled yet. When
2873 the evolution steps are parameters, it is not possible to
2874 represent the dependence using classical distance vectors. */
2875 if (TREE_CODE (c_0) != INTEGER_CST
2876 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2877 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2879 DDR_AFFINE_P (ddr) = false;
2880 return;
2883 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2884 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2886 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2887 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2888 v1 = int_cst_value (CHREC_RIGHT (c_1));
2889 v2 = int_cst_value (CHREC_RIGHT (c_2));
2890 cd = gcd (v1, v2);
2891 v1 /= cd;
2892 v2 /= cd;
2894 if (v2 < 0)
2896 v2 = -v2;
2897 v1 = -v1;
2900 dist_v[x_1] = v2;
2901 dist_v[x_2] = -v1;
2902 save_dist_v (ddr, dist_v);
2904 add_outer_distances (ddr, dist_v, x_1);
2907 /* Helper function for the case where DDR_A and DDR_B are the same
2908 access functions. */
2910 static void
2911 add_other_self_distances (struct data_dependence_relation *ddr)
2913 lambda_vector dist_v;
2914 unsigned i;
2915 int index_carry = DDR_NB_LOOPS (ddr);
2917 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2919 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2921 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2923 if (!evolution_function_is_univariate_p (access_fun))
2925 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2927 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2928 return;
2931 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2933 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2934 add_multivariate_self_dist (ddr, access_fun);
2935 else
2936 /* The evolution step is not constant: it varies in
2937 the outer loop, so this cannot be represented by a
2938 distance vector. For example in pr34635.c the
2939 evolution is {0, +, {0, +, 4}_1}_2. */
2940 DDR_AFFINE_P (ddr) = false;
2942 return;
2945 index_carry = MIN (index_carry,
2946 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2947 DDR_LOOP_NEST (ddr)));
2951 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2952 add_outer_distances (ddr, dist_v, index_carry);
2955 static void
2956 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2958 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2960 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2961 save_dist_v (ddr, dist_v);
2964 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2965 is the case for example when access functions are the same and
2966 equal to a constant, as in:
2968 | loop_1
2969 | A[3] = ...
2970 | ... = A[3]
2971 | endloop_1
2973 in which case the distance vectors are (0) and (1). */
2975 static void
2976 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2978 unsigned i, j;
2980 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2982 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2983 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2984 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2986 for (j = 0; j < ca->n; j++)
2987 if (affine_function_zero_p (ca->fns[j]))
2989 insert_innermost_unit_dist_vector (ddr);
2990 return;
2993 for (j = 0; j < cb->n; j++)
2994 if (affine_function_zero_p (cb->fns[j]))
2996 insert_innermost_unit_dist_vector (ddr);
2997 return;
3002 /* Compute the classic per loop distance vector. DDR is the data
3003 dependence relation to build a vector from. Return false when fail
3004 to represent the data dependence as a distance vector. */
3006 static bool
3007 build_classic_dist_vector (struct data_dependence_relation *ddr,
3008 struct loop *loop_nest)
3010 bool init_b = false;
3011 int index_carry = DDR_NB_LOOPS (ddr);
3012 lambda_vector dist_v;
3014 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3015 return false;
3017 if (same_access_functions (ddr))
3019 /* Save the 0 vector. */
3020 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3021 save_dist_v (ddr, dist_v);
3023 if (constant_access_functions (ddr))
3024 add_distance_for_zero_overlaps (ddr);
3026 if (DDR_NB_LOOPS (ddr) > 1)
3027 add_other_self_distances (ddr);
3029 return true;
3032 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3033 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3034 dist_v, &init_b, &index_carry))
3035 return false;
3037 /* Save the distance vector if we initialized one. */
3038 if (init_b)
3040 /* Verify a basic constraint: classic distance vectors should
3041 always be lexicographically positive.
3043 Data references are collected in the order of execution of
3044 the program, thus for the following loop
3046 | for (i = 1; i < 100; i++)
3047 | for (j = 1; j < 100; j++)
3049 | t = T[j+1][i-1]; // A
3050 | T[j][i] = t + 2; // B
3053 references are collected following the direction of the wind:
3054 A then B. The data dependence tests are performed also
3055 following this order, such that we're looking at the distance
3056 separating the elements accessed by A from the elements later
3057 accessed by B. But in this example, the distance returned by
3058 test_dep (A, B) is lexicographically negative (-1, 1), that
3059 means that the access A occurs later than B with respect to
3060 the outer loop, ie. we're actually looking upwind. In this
3061 case we solve test_dep (B, A) looking downwind to the
3062 lexicographically positive solution, that returns the
3063 distance vector (1, -1). */
3064 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3066 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3067 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3068 loop_nest))
3069 return false;
3070 compute_subscript_distance (ddr);
3071 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3072 save_v, &init_b, &index_carry))
3073 return false;
3074 save_dist_v (ddr, save_v);
3075 DDR_REVERSED_P (ddr) = true;
3077 /* In this case there is a dependence forward for all the
3078 outer loops:
3080 | for (k = 1; k < 100; k++)
3081 | for (i = 1; i < 100; i++)
3082 | for (j = 1; j < 100; j++)
3084 | t = T[j+1][i-1]; // A
3085 | T[j][i] = t + 2; // B
3088 the vectors are:
3089 (0, 1, -1)
3090 (1, 1, -1)
3091 (1, -1, 1)
3093 if (DDR_NB_LOOPS (ddr) > 1)
3095 add_outer_distances (ddr, save_v, index_carry);
3096 add_outer_distances (ddr, dist_v, index_carry);
3099 else
3101 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3102 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3104 if (DDR_NB_LOOPS (ddr) > 1)
3106 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3108 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3109 DDR_A (ddr), loop_nest))
3110 return false;
3111 compute_subscript_distance (ddr);
3112 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3113 opposite_v, &init_b,
3114 &index_carry))
3115 return false;
3117 save_dist_v (ddr, save_v);
3118 add_outer_distances (ddr, dist_v, index_carry);
3119 add_outer_distances (ddr, opposite_v, index_carry);
3121 else
3122 save_dist_v (ddr, save_v);
3125 else
3127 /* There is a distance of 1 on all the outer loops: Example:
3128 there is a dependence of distance 1 on loop_1 for the array A.
3130 | loop_1
3131 | A[5] = ...
3132 | endloop
3134 add_outer_distances (ddr, dist_v,
3135 lambda_vector_first_nz (dist_v,
3136 DDR_NB_LOOPS (ddr), 0));
3139 if (dump_file && (dump_flags & TDF_DETAILS))
3141 unsigned i;
3143 fprintf (dump_file, "(build_classic_dist_vector\n");
3144 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3146 fprintf (dump_file, " dist_vector = (");
3147 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3148 DDR_NB_LOOPS (ddr));
3149 fprintf (dump_file, " )\n");
3151 fprintf (dump_file, ")\n");
3154 return true;
3157 /* Return the direction for a given distance.
3158 FIXME: Computing dir this way is suboptimal, since dir can catch
3159 cases that dist is unable to represent. */
3161 static inline enum data_dependence_direction
3162 dir_from_dist (int dist)
3164 if (dist > 0)
3165 return dir_positive;
3166 else if (dist < 0)
3167 return dir_negative;
3168 else
3169 return dir_equal;
3172 /* Compute the classic per loop direction vector. DDR is the data
3173 dependence relation to build a vector from. */
3175 static void
3176 build_classic_dir_vector (struct data_dependence_relation *ddr)
3178 unsigned i, j;
3179 lambda_vector dist_v;
3181 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3183 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3185 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3186 dir_v[j] = dir_from_dist (dist_v[j]);
3188 save_dir_v (ddr, dir_v);
3192 /* Helper function. Returns true when there is a dependence between
3193 data references DRA and DRB. */
3195 static bool
3196 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3197 struct data_reference *dra,
3198 struct data_reference *drb,
3199 struct loop *loop_nest)
3201 unsigned int i;
3202 tree last_conflicts;
3203 struct subscript *subscript;
3205 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3206 i++)
3208 conflict_function *overlaps_a, *overlaps_b;
3210 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3211 DR_ACCESS_FN (drb, i),
3212 &overlaps_a, &overlaps_b,
3213 &last_conflicts, loop_nest);
3215 if (CF_NOT_KNOWN_P (overlaps_a)
3216 || CF_NOT_KNOWN_P (overlaps_b))
3218 finalize_ddr_dependent (ddr, chrec_dont_know);
3219 dependence_stats.num_dependence_undetermined++;
3220 free_conflict_function (overlaps_a);
3221 free_conflict_function (overlaps_b);
3222 return false;
3225 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3226 || CF_NO_DEPENDENCE_P (overlaps_b))
3228 finalize_ddr_dependent (ddr, chrec_known);
3229 dependence_stats.num_dependence_independent++;
3230 free_conflict_function (overlaps_a);
3231 free_conflict_function (overlaps_b);
3232 return false;
3235 else
3237 if (SUB_CONFLICTS_IN_A (subscript))
3238 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3239 if (SUB_CONFLICTS_IN_B (subscript))
3240 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3242 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3243 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3244 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3248 return true;
3251 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3253 static void
3254 subscript_dependence_tester (struct data_dependence_relation *ddr,
3255 struct loop *loop_nest)
3258 if (dump_file && (dump_flags & TDF_DETAILS))
3259 fprintf (dump_file, "(subscript_dependence_tester \n");
3261 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3262 dependence_stats.num_dependence_dependent++;
3264 compute_subscript_distance (ddr);
3265 if (build_classic_dist_vector (ddr, loop_nest))
3266 build_classic_dir_vector (ddr);
3268 if (dump_file && (dump_flags & TDF_DETAILS))
3269 fprintf (dump_file, ")\n");
3272 /* Returns true when all the access functions of A are affine or
3273 constant with respect to LOOP_NEST. */
3275 static bool
3276 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3277 const struct loop *loop_nest)
3279 unsigned int i;
3280 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3281 tree t;
3283 FOR_EACH_VEC_ELT (tree, fns, i, t)
3284 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3285 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3286 return false;
3288 return true;
3291 /* Initializes an equation for an OMEGA problem using the information
3292 contained in the ACCESS_FUN. Returns true when the operation
3293 succeeded.
3295 PB is the omega constraint system.
3296 EQ is the number of the equation to be initialized.
3297 OFFSET is used for shifting the variables names in the constraints:
3298 a constrain is composed of 2 * the number of variables surrounding
3299 dependence accesses. OFFSET is set either to 0 for the first n variables,
3300 then it is set to n.
3301 ACCESS_FUN is expected to be an affine chrec. */
3303 static bool
3304 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3305 unsigned int offset, tree access_fun,
3306 struct data_dependence_relation *ddr)
3308 switch (TREE_CODE (access_fun))
3310 case POLYNOMIAL_CHREC:
3312 tree left = CHREC_LEFT (access_fun);
3313 tree right = CHREC_RIGHT (access_fun);
3314 int var = CHREC_VARIABLE (access_fun);
3315 unsigned var_idx;
3317 if (TREE_CODE (right) != INTEGER_CST)
3318 return false;
3320 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3321 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3323 /* Compute the innermost loop index. */
3324 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3326 if (offset == 0)
3327 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3328 += int_cst_value (right);
3330 switch (TREE_CODE (left))
3332 case POLYNOMIAL_CHREC:
3333 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3335 case INTEGER_CST:
3336 pb->eqs[eq].coef[0] += int_cst_value (left);
3337 return true;
3339 default:
3340 return false;
3344 case INTEGER_CST:
3345 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3346 return true;
3348 default:
3349 return false;
3353 /* As explained in the comments preceding init_omega_for_ddr, we have
3354 to set up a system for each loop level, setting outer loops
3355 variation to zero, and current loop variation to positive or zero.
3356 Save each lexico positive distance vector. */
3358 static void
3359 omega_extract_distance_vectors (omega_pb pb,
3360 struct data_dependence_relation *ddr)
3362 int eq, geq;
3363 unsigned i, j;
3364 struct loop *loopi, *loopj;
3365 enum omega_result res;
3367 /* Set a new problem for each loop in the nest. The basis is the
3368 problem that we have initialized until now. On top of this we
3369 add new constraints. */
3370 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3371 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3373 int dist = 0;
3374 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3375 DDR_NB_LOOPS (ddr));
3377 omega_copy_problem (copy, pb);
3379 /* For all the outer loops "loop_j", add "dj = 0". */
3380 for (j = 0;
3381 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3383 eq = omega_add_zero_eq (copy, omega_black);
3384 copy->eqs[eq].coef[j + 1] = 1;
3387 /* For "loop_i", add "0 <= di". */
3388 geq = omega_add_zero_geq (copy, omega_black);
3389 copy->geqs[geq].coef[i + 1] = 1;
3391 /* Reduce the constraint system, and test that the current
3392 problem is feasible. */
3393 res = omega_simplify_problem (copy);
3394 if (res == omega_false
3395 || res == omega_unknown
3396 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3397 goto next_problem;
3399 for (eq = 0; eq < copy->num_subs; eq++)
3400 if (copy->subs[eq].key == (int) i + 1)
3402 dist = copy->subs[eq].coef[0];
3403 goto found_dist;
3406 if (dist == 0)
3408 /* Reinitialize problem... */
3409 omega_copy_problem (copy, pb);
3410 for (j = 0;
3411 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3413 eq = omega_add_zero_eq (copy, omega_black);
3414 copy->eqs[eq].coef[j + 1] = 1;
3417 /* ..., but this time "di = 1". */
3418 eq = omega_add_zero_eq (copy, omega_black);
3419 copy->eqs[eq].coef[i + 1] = 1;
3420 copy->eqs[eq].coef[0] = -1;
3422 res = omega_simplify_problem (copy);
3423 if (res == omega_false
3424 || res == omega_unknown
3425 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3426 goto next_problem;
3428 for (eq = 0; eq < copy->num_subs; eq++)
3429 if (copy->subs[eq].key == (int) i + 1)
3431 dist = copy->subs[eq].coef[0];
3432 goto found_dist;
3436 found_dist:;
3437 /* Save the lexicographically positive distance vector. */
3438 if (dist >= 0)
3440 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3441 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3443 dist_v[i] = dist;
3445 for (eq = 0; eq < copy->num_subs; eq++)
3446 if (copy->subs[eq].key > 0)
3448 dist = copy->subs[eq].coef[0];
3449 dist_v[copy->subs[eq].key - 1] = dist;
3452 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3453 dir_v[j] = dir_from_dist (dist_v[j]);
3455 save_dist_v (ddr, dist_v);
3456 save_dir_v (ddr, dir_v);
3459 next_problem:;
3460 omega_free_problem (copy);
3464 /* This is called for each subscript of a tuple of data references:
3465 insert an equality for representing the conflicts. */
3467 static bool
3468 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3469 struct data_dependence_relation *ddr,
3470 omega_pb pb, bool *maybe_dependent)
3472 int eq;
3473 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3474 TREE_TYPE (access_fun_b));
3475 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3476 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3477 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3478 tree minus_one;
3480 /* When the fun_a - fun_b is not constant, the dependence is not
3481 captured by the classic distance vector representation. */
3482 if (TREE_CODE (difference) != INTEGER_CST)
3483 return false;
3485 /* ZIV test. */
3486 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3488 /* There is no dependence. */
3489 *maybe_dependent = false;
3490 return true;
3493 minus_one = build_int_cst (type, -1);
3494 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3496 eq = omega_add_zero_eq (pb, omega_black);
3497 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3498 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3499 /* There is probably a dependence, but the system of
3500 constraints cannot be built: answer "don't know". */
3501 return false;
3503 /* GCD test. */
3504 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3505 && !int_divides_p (lambda_vector_gcd
3506 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3507 2 * DDR_NB_LOOPS (ddr)),
3508 pb->eqs[eq].coef[0]))
3510 /* There is no dependence. */
3511 *maybe_dependent = false;
3512 return true;
3515 return true;
3518 /* Helper function, same as init_omega_for_ddr but specialized for
3519 data references A and B. */
3521 static bool
3522 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3523 struct data_dependence_relation *ddr,
3524 omega_pb pb, bool *maybe_dependent)
3526 unsigned i;
3527 int ineq;
3528 struct loop *loopi;
3529 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3531 /* Insert an equality per subscript. */
3532 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3534 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3535 ddr, pb, maybe_dependent))
3536 return false;
3537 else if (*maybe_dependent == false)
3539 /* There is no dependence. */
3540 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3541 return true;
3545 /* Insert inequalities: constraints corresponding to the iteration
3546 domain, i.e. the loops surrounding the references "loop_x" and
3547 the distance variables "dx". The layout of the OMEGA
3548 representation is as follows:
3549 - coef[0] is the constant
3550 - coef[1..nb_loops] are the protected variables that will not be
3551 removed by the solver: the "dx"
3552 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3554 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3555 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3557 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3559 /* 0 <= loop_x */
3560 ineq = omega_add_zero_geq (pb, omega_black);
3561 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3563 /* 0 <= loop_x + dx */
3564 ineq = omega_add_zero_geq (pb, omega_black);
3565 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3566 pb->geqs[ineq].coef[i + 1] = 1;
3568 if (nbi != -1)
3570 /* loop_x <= nb_iters */
3571 ineq = omega_add_zero_geq (pb, omega_black);
3572 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3573 pb->geqs[ineq].coef[0] = nbi;
3575 /* loop_x + dx <= nb_iters */
3576 ineq = omega_add_zero_geq (pb, omega_black);
3577 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3578 pb->geqs[ineq].coef[i + 1] = -1;
3579 pb->geqs[ineq].coef[0] = nbi;
3581 /* A step "dx" bigger than nb_iters is not feasible, so
3582 add "0 <= nb_iters + dx", */
3583 ineq = omega_add_zero_geq (pb, omega_black);
3584 pb->geqs[ineq].coef[i + 1] = 1;
3585 pb->geqs[ineq].coef[0] = nbi;
3586 /* and "dx <= nb_iters". */
3587 ineq = omega_add_zero_geq (pb, omega_black);
3588 pb->geqs[ineq].coef[i + 1] = -1;
3589 pb->geqs[ineq].coef[0] = nbi;
3593 omega_extract_distance_vectors (pb, ddr);
3595 return true;
3598 /* Sets up the Omega dependence problem for the data dependence
3599 relation DDR. Returns false when the constraint system cannot be
3600 built, ie. when the test answers "don't know". Returns true
3601 otherwise, and when independence has been proved (using one of the
3602 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3603 set MAYBE_DEPENDENT to true.
3605 Example: for setting up the dependence system corresponding to the
3606 conflicting accesses
3608 | loop_i
3609 | loop_j
3610 | A[i, i+1] = ...
3611 | ... A[2*j, 2*(i + j)]
3612 | endloop_j
3613 | endloop_i
3615 the following constraints come from the iteration domain:
3617 0 <= i <= Ni
3618 0 <= i + di <= Ni
3619 0 <= j <= Nj
3620 0 <= j + dj <= Nj
3622 where di, dj are the distance variables. The constraints
3623 representing the conflicting elements are:
3625 i = 2 * (j + dj)
3626 i + 1 = 2 * (i + di + j + dj)
3628 For asking that the resulting distance vector (di, dj) be
3629 lexicographically positive, we insert the constraint "di >= 0". If
3630 "di = 0" in the solution, we fix that component to zero, and we
3631 look at the inner loops: we set a new problem where all the outer
3632 loop distances are zero, and fix this inner component to be
3633 positive. When one of the components is positive, we save that
3634 distance, and set a new problem where the distance on this loop is
3635 zero, searching for other distances in the inner loops. Here is
3636 the classic example that illustrates that we have to set for each
3637 inner loop a new problem:
3639 | loop_1
3640 | loop_2
3641 | A[10]
3642 | endloop_2
3643 | endloop_1
3645 we have to save two distances (1, 0) and (0, 1).
3647 Given two array references, refA and refB, we have to set the
3648 dependence problem twice, refA vs. refB and refB vs. refA, and we
3649 cannot do a single test, as refB might occur before refA in the
3650 inner loops, and the contrary when considering outer loops: ex.
3652 | loop_0
3653 | loop_1
3654 | loop_2
3655 | T[{1,+,1}_2][{1,+,1}_1] // refA
3656 | T[{2,+,1}_2][{0,+,1}_1] // refB
3657 | endloop_2
3658 | endloop_1
3659 | endloop_0
3661 refB touches the elements in T before refA, and thus for the same
3662 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3663 but for successive loop_0 iterations, we have (1, -1, 1)
3665 The Omega solver expects the distance variables ("di" in the
3666 previous example) to come first in the constraint system (as
3667 variables to be protected, or "safe" variables), the constraint
3668 system is built using the following layout:
3670 "cst | distance vars | index vars".
3673 static bool
3674 init_omega_for_ddr (struct data_dependence_relation *ddr,
3675 bool *maybe_dependent)
3677 omega_pb pb;
3678 bool res = false;
3680 *maybe_dependent = true;
3682 if (same_access_functions (ddr))
3684 unsigned j;
3685 lambda_vector dir_v;
3687 /* Save the 0 vector. */
3688 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3689 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3690 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3691 dir_v[j] = dir_equal;
3692 save_dir_v (ddr, dir_v);
3694 /* Save the dependences carried by outer loops. */
3695 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3696 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3697 maybe_dependent);
3698 omega_free_problem (pb);
3699 return res;
3702 /* Omega expects the protected variables (those that have to be kept
3703 after elimination) to appear first in the constraint system.
3704 These variables are the distance variables. In the following
3705 initialization we declare NB_LOOPS safe variables, and the total
3706 number of variables for the constraint system is 2*NB_LOOPS. */
3707 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3708 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3709 maybe_dependent);
3710 omega_free_problem (pb);
3712 /* Stop computation if not decidable, or no dependence. */
3713 if (res == false || *maybe_dependent == false)
3714 return res;
3716 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3717 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3718 maybe_dependent);
3719 omega_free_problem (pb);
3721 return res;
3724 /* Return true when DDR contains the same information as that stored
3725 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3727 static bool
3728 ddr_consistent_p (FILE *file,
3729 struct data_dependence_relation *ddr,
3730 VEC (lambda_vector, heap) *dist_vects,
3731 VEC (lambda_vector, heap) *dir_vects)
3733 unsigned int i, j;
3735 /* If dump_file is set, output there. */
3736 if (dump_file && (dump_flags & TDF_DETAILS))
3737 file = dump_file;
3739 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3741 lambda_vector b_dist_v;
3742 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3743 VEC_length (lambda_vector, dist_vects),
3744 DDR_NUM_DIST_VECTS (ddr));
3746 fprintf (file, "Banerjee dist vectors:\n");
3747 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3748 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3750 fprintf (file, "Omega dist vectors:\n");
3751 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3752 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3754 fprintf (file, "data dependence relation:\n");
3755 dump_data_dependence_relation (file, ddr);
3757 fprintf (file, ")\n");
3758 return false;
3761 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3763 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3764 VEC_length (lambda_vector, dir_vects),
3765 DDR_NUM_DIR_VECTS (ddr));
3766 return false;
3769 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3771 lambda_vector a_dist_v;
3772 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3774 /* Distance vectors are not ordered in the same way in the DDR
3775 and in the DIST_VECTS: search for a matching vector. */
3776 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3777 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3778 break;
3780 if (j == VEC_length (lambda_vector, dist_vects))
3782 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3783 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3784 fprintf (file, "not found in Omega dist vectors:\n");
3785 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3786 fprintf (file, "data dependence relation:\n");
3787 dump_data_dependence_relation (file, ddr);
3788 fprintf (file, ")\n");
3792 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3794 lambda_vector a_dir_v;
3795 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3797 /* Direction vectors are not ordered in the same way in the DDR
3798 and in the DIR_VECTS: search for a matching vector. */
3799 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3800 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3801 break;
3803 if (j == VEC_length (lambda_vector, dist_vects))
3805 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3806 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3807 fprintf (file, "not found in Omega dir vectors:\n");
3808 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3809 fprintf (file, "data dependence relation:\n");
3810 dump_data_dependence_relation (file, ddr);
3811 fprintf (file, ")\n");
3815 return true;
3818 /* This computes the affine dependence relation between A and B with
3819 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3820 independence between two accesses, while CHREC_DONT_KNOW is used
3821 for representing the unknown relation.
3823 Note that it is possible to stop the computation of the dependence
3824 relation the first time we detect a CHREC_KNOWN element for a given
3825 subscript. */
3827 static void
3828 compute_affine_dependence (struct data_dependence_relation *ddr,
3829 struct loop *loop_nest)
3831 struct data_reference *dra = DDR_A (ddr);
3832 struct data_reference *drb = DDR_B (ddr);
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3836 fprintf (dump_file, "(compute_affine_dependence\n");
3837 fprintf (dump_file, " (stmt_a = \n");
3838 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3839 fprintf (dump_file, ")\n (stmt_b = \n");
3840 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3841 fprintf (dump_file, ")\n");
3844 /* Analyze only when the dependence relation is not yet known. */
3845 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3846 && !DDR_SELF_REFERENCE (ddr))
3848 dependence_stats.num_dependence_tests++;
3850 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3851 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3853 if (flag_check_data_deps)
3855 /* Compute the dependences using the first algorithm. */
3856 subscript_dependence_tester (ddr, loop_nest);
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3860 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3861 dump_data_dependence_relation (dump_file, ddr);
3864 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3866 bool maybe_dependent;
3867 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3869 /* Save the result of the first DD analyzer. */
3870 dist_vects = DDR_DIST_VECTS (ddr);
3871 dir_vects = DDR_DIR_VECTS (ddr);
3873 /* Reset the information. */
3874 DDR_DIST_VECTS (ddr) = NULL;
3875 DDR_DIR_VECTS (ddr) = NULL;
3877 /* Compute the same information using Omega. */
3878 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3879 goto csys_dont_know;
3881 if (dump_file && (dump_flags & TDF_DETAILS))
3883 fprintf (dump_file, "Omega Analyzer\n");
3884 dump_data_dependence_relation (dump_file, ddr);
3887 /* Check that we get the same information. */
3888 if (maybe_dependent)
3889 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3890 dir_vects));
3893 else
3894 subscript_dependence_tester (ddr, loop_nest);
3897 /* As a last case, if the dependence cannot be determined, or if
3898 the dependence is considered too difficult to determine, answer
3899 "don't know". */
3900 else
3902 csys_dont_know:;
3903 dependence_stats.num_dependence_undetermined++;
3905 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, "Data ref a:\n");
3908 dump_data_reference (dump_file, dra);
3909 fprintf (dump_file, "Data ref b:\n");
3910 dump_data_reference (dump_file, drb);
3911 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3913 finalize_ddr_dependent (ddr, chrec_dont_know);
3917 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, ")\n");
3921 /* This computes the dependence relation for the same data
3922 reference into DDR. */
3924 static void
3925 compute_self_dependence (struct data_dependence_relation *ddr)
3927 unsigned int i;
3928 struct subscript *subscript;
3930 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3931 return;
3933 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3934 i++)
3936 if (SUB_CONFLICTS_IN_A (subscript))
3937 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3938 if (SUB_CONFLICTS_IN_B (subscript))
3939 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3941 /* The accessed index overlaps for each iteration. */
3942 SUB_CONFLICTS_IN_A (subscript)
3943 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3944 SUB_CONFLICTS_IN_B (subscript)
3945 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3946 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3949 /* The distance vector is the zero vector. */
3950 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3951 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3954 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3955 the data references in DATAREFS, in the LOOP_NEST. When
3956 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3957 relations. */
3959 void
3960 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3961 VEC (ddr_p, heap) **dependence_relations,
3962 VEC (loop_p, heap) *loop_nest,
3963 bool compute_self_and_rr)
3965 struct data_dependence_relation *ddr;
3966 struct data_reference *a, *b;
3967 unsigned int i, j;
3969 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3970 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3971 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
3973 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3974 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3975 if (loop_nest)
3976 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3979 if (compute_self_and_rr)
3980 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3982 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3983 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3984 compute_self_dependence (ddr);
3988 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3989 true if STMT clobbers memory, false otherwise. */
3991 bool
3992 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
3994 bool clobbers_memory = false;
3995 data_ref_loc *ref;
3996 tree *op0, *op1;
3997 enum gimple_code stmt_code = gimple_code (stmt);
3999 *references = NULL;
4001 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4002 Calls have side-effects, except those to const or pure
4003 functions. */
4004 if ((stmt_code == GIMPLE_CALL
4005 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4006 || (stmt_code == GIMPLE_ASM
4007 && gimple_asm_volatile_p (stmt)))
4008 clobbers_memory = true;
4010 if (!gimple_vuse (stmt))
4011 return clobbers_memory;
4013 if (stmt_code == GIMPLE_ASSIGN)
4015 tree base;
4016 op0 = gimple_assign_lhs_ptr (stmt);
4017 op1 = gimple_assign_rhs1_ptr (stmt);
4019 if (DECL_P (*op1)
4020 || (REFERENCE_CLASS_P (*op1)
4021 && (base = get_base_address (*op1))
4022 && TREE_CODE (base) != SSA_NAME))
4024 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4025 ref->pos = op1;
4026 ref->is_read = true;
4029 if (DECL_P (*op0)
4030 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4032 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4033 ref->pos = op0;
4034 ref->is_read = false;
4037 else if (stmt_code == GIMPLE_CALL)
4039 unsigned i, n = gimple_call_num_args (stmt);
4041 for (i = 0; i < n; i++)
4043 op0 = gimple_call_arg_ptr (stmt, i);
4045 if (DECL_P (*op0)
4046 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4048 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4049 ref->pos = op0;
4050 ref->is_read = true;
4055 return clobbers_memory;
4058 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4059 reference, returns false, otherwise returns true. NEST is the outermost
4060 loop of the loop nest in which the references should be analyzed. */
4062 bool
4063 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4064 VEC (data_reference_p, heap) **datarefs)
4066 unsigned i;
4067 VEC (data_ref_loc, heap) *references;
4068 data_ref_loc *ref;
4069 bool ret = true;
4070 data_reference_p dr;
4072 if (get_references_in_stmt (stmt, &references))
4074 VEC_free (data_ref_loc, heap, references);
4075 return false;
4078 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4080 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4081 gcc_assert (dr != NULL);
4083 /* FIXME -- data dependence analysis does not work correctly for objects
4084 with invariant addresses in loop nests. Let us fail here until the
4085 problem is fixed. */
4086 if (dr_address_invariant_p (dr) && nest)
4088 free_data_ref (dr);
4089 if (dump_file && (dump_flags & TDF_DETAILS))
4090 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4091 ret = false;
4092 break;
4095 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4097 VEC_free (data_ref_loc, heap, references);
4098 return ret;
4101 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4102 reference, returns false, otherwise returns true. NEST is the outermost
4103 loop of the loop nest in which the references should be analyzed. */
4105 bool
4106 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4107 VEC (data_reference_p, heap) **datarefs)
4109 unsigned i;
4110 VEC (data_ref_loc, heap) *references;
4111 data_ref_loc *ref;
4112 bool ret = true;
4113 data_reference_p dr;
4115 if (get_references_in_stmt (stmt, &references))
4117 VEC_free (data_ref_loc, heap, references);
4118 return false;
4121 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4123 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4124 gcc_assert (dr != NULL);
4125 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4128 VEC_free (data_ref_loc, heap, references);
4129 return ret;
4132 /* Search the data references in LOOP, and record the information into
4133 DATAREFS. Returns chrec_dont_know when failing to analyze a
4134 difficult case, returns NULL_TREE otherwise. */
4136 static tree
4137 find_data_references_in_bb (struct loop *loop, basic_block bb,
4138 VEC (data_reference_p, heap) **datarefs)
4140 gimple_stmt_iterator bsi;
4142 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4144 gimple stmt = gsi_stmt (bsi);
4146 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4148 struct data_reference *res;
4149 res = XCNEW (struct data_reference);
4150 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4152 return chrec_dont_know;
4156 return NULL_TREE;
4159 /* Search the data references in LOOP, and record the information into
4160 DATAREFS. Returns chrec_dont_know when failing to analyze a
4161 difficult case, returns NULL_TREE otherwise.
4163 TODO: This function should be made smarter so that it can handle address
4164 arithmetic as if they were array accesses, etc. */
4166 tree
4167 find_data_references_in_loop (struct loop *loop,
4168 VEC (data_reference_p, heap) **datarefs)
4170 basic_block bb, *bbs;
4171 unsigned int i;
4173 bbs = get_loop_body_in_dom_order (loop);
4175 for (i = 0; i < loop->num_nodes; i++)
4177 bb = bbs[i];
4179 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4181 free (bbs);
4182 return chrec_dont_know;
4185 free (bbs);
4187 return NULL_TREE;
4190 /* Recursive helper function. */
4192 static bool
4193 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4195 /* Inner loops of the nest should not contain siblings. Example:
4196 when there are two consecutive loops,
4198 | loop_0
4199 | loop_1
4200 | A[{0, +, 1}_1]
4201 | endloop_1
4202 | loop_2
4203 | A[{0, +, 1}_2]
4204 | endloop_2
4205 | endloop_0
4207 the dependence relation cannot be captured by the distance
4208 abstraction. */
4209 if (loop->next)
4210 return false;
4212 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4213 if (loop->inner)
4214 return find_loop_nest_1 (loop->inner, loop_nest);
4215 return true;
4218 /* Return false when the LOOP is not well nested. Otherwise return
4219 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4220 contain the loops from the outermost to the innermost, as they will
4221 appear in the classic distance vector. */
4223 bool
4224 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4226 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4227 if (loop->inner)
4228 return find_loop_nest_1 (loop->inner, loop_nest);
4229 return true;
4232 /* Returns true when the data dependences have been computed, false otherwise.
4233 Given a loop nest LOOP, the following vectors are returned:
4234 DATAREFS is initialized to all the array elements contained in this loop,
4235 DEPENDENCE_RELATIONS contains the relations between the data references.
4236 Compute read-read and self relations if
4237 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4239 bool
4240 compute_data_dependences_for_loop (struct loop *loop,
4241 bool compute_self_and_read_read_dependences,
4242 VEC (loop_p, heap) **loop_nest,
4243 VEC (data_reference_p, heap) **datarefs,
4244 VEC (ddr_p, heap) **dependence_relations)
4246 bool res = true;
4248 memset (&dependence_stats, 0, sizeof (dependence_stats));
4250 /* If the loop nest is not well formed, or one of the data references
4251 is not computable, give up without spending time to compute other
4252 dependences. */
4253 if (!loop
4254 || !find_loop_nest (loop, loop_nest)
4255 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4257 struct data_dependence_relation *ddr;
4259 /* Insert a single relation into dependence_relations:
4260 chrec_dont_know. */
4261 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4262 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4263 res = false;
4265 else
4266 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4267 compute_self_and_read_read_dependences);
4269 if (dump_file && (dump_flags & TDF_STATS))
4271 fprintf (dump_file, "Dependence tester statistics:\n");
4273 fprintf (dump_file, "Number of dependence tests: %d\n",
4274 dependence_stats.num_dependence_tests);
4275 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4276 dependence_stats.num_dependence_dependent);
4277 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4278 dependence_stats.num_dependence_independent);
4279 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4280 dependence_stats.num_dependence_undetermined);
4282 fprintf (dump_file, "Number of subscript tests: %d\n",
4283 dependence_stats.num_subscript_tests);
4284 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4285 dependence_stats.num_subscript_undetermined);
4286 fprintf (dump_file, "Number of same subscript function: %d\n",
4287 dependence_stats.num_same_subscript_function);
4289 fprintf (dump_file, "Number of ziv tests: %d\n",
4290 dependence_stats.num_ziv);
4291 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4292 dependence_stats.num_ziv_dependent);
4293 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4294 dependence_stats.num_ziv_independent);
4295 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4296 dependence_stats.num_ziv_unimplemented);
4298 fprintf (dump_file, "Number of siv tests: %d\n",
4299 dependence_stats.num_siv);
4300 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4301 dependence_stats.num_siv_dependent);
4302 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4303 dependence_stats.num_siv_independent);
4304 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4305 dependence_stats.num_siv_unimplemented);
4307 fprintf (dump_file, "Number of miv tests: %d\n",
4308 dependence_stats.num_miv);
4309 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4310 dependence_stats.num_miv_dependent);
4311 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4312 dependence_stats.num_miv_independent);
4313 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4314 dependence_stats.num_miv_unimplemented);
4317 return res;
4320 /* Returns true when the data dependences for the basic block BB have been
4321 computed, false otherwise.
4322 DATAREFS is initialized to all the array elements contained in this basic
4323 block, DEPENDENCE_RELATIONS contains the relations between the data
4324 references. Compute read-read and self relations if
4325 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4326 bool
4327 compute_data_dependences_for_bb (basic_block bb,
4328 bool compute_self_and_read_read_dependences,
4329 VEC (data_reference_p, heap) **datarefs,
4330 VEC (ddr_p, heap) **dependence_relations)
4332 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4333 return false;
4335 compute_all_dependences (*datarefs, dependence_relations, NULL,
4336 compute_self_and_read_read_dependences);
4337 return true;
4340 /* Entry point (for testing only). Analyze all the data references
4341 and the dependence relations in LOOP.
4343 The data references are computed first.
4345 A relation on these nodes is represented by a complete graph. Some
4346 of the relations could be of no interest, thus the relations can be
4347 computed on demand.
4349 In the following function we compute all the relations. This is
4350 just a first implementation that is here for:
4351 - for showing how to ask for the dependence relations,
4352 - for the debugging the whole dependence graph,
4353 - for the dejagnu testcases and maintenance.
4355 It is possible to ask only for a part of the graph, avoiding to
4356 compute the whole dependence graph. The computed dependences are
4357 stored in a knowledge base (KB) such that later queries don't
4358 recompute the same information. The implementation of this KB is
4359 transparent to the optimizer, and thus the KB can be changed with a
4360 more efficient implementation, or the KB could be disabled. */
4361 static void
4362 analyze_all_data_dependences (struct loop *loop)
4364 unsigned int i;
4365 int nb_data_refs = 10;
4366 VEC (data_reference_p, heap) *datarefs =
4367 VEC_alloc (data_reference_p, heap, nb_data_refs);
4368 VEC (ddr_p, heap) *dependence_relations =
4369 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4370 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4372 /* Compute DDs on the whole function. */
4373 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4374 &dependence_relations);
4376 if (dump_file)
4378 dump_data_dependence_relations (dump_file, dependence_relations);
4379 fprintf (dump_file, "\n\n");
4381 if (dump_flags & TDF_DETAILS)
4382 dump_dist_dir_vectors (dump_file, dependence_relations);
4384 if (dump_flags & TDF_STATS)
4386 unsigned nb_top_relations = 0;
4387 unsigned nb_bot_relations = 0;
4388 unsigned nb_chrec_relations = 0;
4389 struct data_dependence_relation *ddr;
4391 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4393 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4394 nb_top_relations++;
4396 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4397 nb_bot_relations++;
4399 else
4400 nb_chrec_relations++;
4403 gather_stats_on_scev_database ();
4407 VEC_free (loop_p, heap, loop_nest);
4408 free_dependence_relations (dependence_relations);
4409 free_data_refs (datarefs);
4412 /* Computes all the data dependences and check that the results of
4413 several analyzers are the same. */
4415 void
4416 tree_check_data_deps (void)
4418 loop_iterator li;
4419 struct loop *loop_nest;
4421 FOR_EACH_LOOP (li, loop_nest, 0)
4422 analyze_all_data_dependences (loop_nest);
4425 /* Free the memory used by a data dependence relation DDR. */
4427 void
4428 free_dependence_relation (struct data_dependence_relation *ddr)
4430 if (ddr == NULL)
4431 return;
4433 if (DDR_SUBSCRIPTS (ddr))
4434 free_subscripts (DDR_SUBSCRIPTS (ddr));
4435 if (DDR_DIST_VECTS (ddr))
4436 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4437 if (DDR_DIR_VECTS (ddr))
4438 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4440 free (ddr);
4443 /* Free the memory used by the data dependence relations from
4444 DEPENDENCE_RELATIONS. */
4446 void
4447 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4449 unsigned int i;
4450 struct data_dependence_relation *ddr;
4452 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4453 if (ddr)
4454 free_dependence_relation (ddr);
4456 VEC_free (ddr_p, heap, dependence_relations);
4459 /* Free the memory used by the data references from DATAREFS. */
4461 void
4462 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4464 unsigned int i;
4465 struct data_reference *dr;
4467 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4468 free_data_ref (dr);
4469 VEC_free (data_reference_p, heap, datarefs);
4474 /* Dump vertex I in RDG to FILE. */
4476 void
4477 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4479 struct vertex *v = &(rdg->vertices[i]);
4480 struct graph_edge *e;
4482 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4483 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4484 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4486 if (v->pred)
4487 for (e = v->pred; e; e = e->pred_next)
4488 fprintf (file, " %d", e->src);
4490 fprintf (file, ") (out:");
4492 if (v->succ)
4493 for (e = v->succ; e; e = e->succ_next)
4494 fprintf (file, " %d", e->dest);
4496 fprintf (file, ")\n");
4497 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4498 fprintf (file, ")\n");
4501 /* Call dump_rdg_vertex on stderr. */
4503 DEBUG_FUNCTION void
4504 debug_rdg_vertex (struct graph *rdg, int i)
4506 dump_rdg_vertex (stderr, rdg, i);
4509 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4510 dumped vertices to that bitmap. */
4512 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4514 int i;
4516 fprintf (file, "(%d\n", c);
4518 for (i = 0; i < rdg->n_vertices; i++)
4519 if (rdg->vertices[i].component == c)
4521 if (dumped)
4522 bitmap_set_bit (dumped, i);
4524 dump_rdg_vertex (file, rdg, i);
4527 fprintf (file, ")\n");
4530 /* Call dump_rdg_vertex on stderr. */
4532 DEBUG_FUNCTION void
4533 debug_rdg_component (struct graph *rdg, int c)
4535 dump_rdg_component (stderr, rdg, c, NULL);
4538 /* Dump the reduced dependence graph RDG to FILE. */
4540 void
4541 dump_rdg (FILE *file, struct graph *rdg)
4543 int i;
4544 bitmap dumped = BITMAP_ALLOC (NULL);
4546 fprintf (file, "(rdg\n");
4548 for (i = 0; i < rdg->n_vertices; i++)
4549 if (!bitmap_bit_p (dumped, i))
4550 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4552 fprintf (file, ")\n");
4553 BITMAP_FREE (dumped);
4556 /* Call dump_rdg on stderr. */
4558 DEBUG_FUNCTION void
4559 debug_rdg (struct graph *rdg)
4561 dump_rdg (stderr, rdg);
4564 static void
4565 dot_rdg_1 (FILE *file, struct graph *rdg)
4567 int i;
4569 fprintf (file, "digraph RDG {\n");
4571 for (i = 0; i < rdg->n_vertices; i++)
4573 struct vertex *v = &(rdg->vertices[i]);
4574 struct graph_edge *e;
4576 /* Highlight reads from memory. */
4577 if (RDG_MEM_READS_STMT (rdg, i))
4578 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4580 /* Highlight stores to memory. */
4581 if (RDG_MEM_WRITE_STMT (rdg, i))
4582 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4584 if (v->succ)
4585 for (e = v->succ; e; e = e->succ_next)
4586 switch (RDGE_TYPE (e))
4588 case input_dd:
4589 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4590 break;
4592 case output_dd:
4593 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4594 break;
4596 case flow_dd:
4597 /* These are the most common dependences: don't print these. */
4598 fprintf (file, "%d -> %d \n", i, e->dest);
4599 break;
4601 case anti_dd:
4602 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4603 break;
4605 default:
4606 gcc_unreachable ();
4610 fprintf (file, "}\n\n");
4613 /* Display the Reduced Dependence Graph using dotty. */
4614 extern void dot_rdg (struct graph *);
4616 DEBUG_FUNCTION void
4617 dot_rdg (struct graph *rdg)
4619 /* When debugging, enable the following code. This cannot be used
4620 in production compilers because it calls "system". */
4621 #if 0
4622 FILE *file = fopen ("/tmp/rdg.dot", "w");
4623 gcc_assert (file != NULL);
4625 dot_rdg_1 (file, rdg);
4626 fclose (file);
4628 system ("dotty /tmp/rdg.dot &");
4629 #else
4630 dot_rdg_1 (stderr, rdg);
4631 #endif
4634 /* This structure is used for recording the mapping statement index in
4635 the RDG. */
4637 struct GTY(()) rdg_vertex_info
4639 gimple stmt;
4640 int index;
4643 /* Returns the index of STMT in RDG. */
4646 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4648 struct rdg_vertex_info rvi, *slot;
4650 rvi.stmt = stmt;
4651 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4653 if (!slot)
4654 return -1;
4656 return slot->index;
4659 /* Creates an edge in RDG for each distance vector from DDR. The
4660 order that we keep track of in the RDG is the order in which
4661 statements have to be executed. */
4663 static void
4664 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4666 struct graph_edge *e;
4667 int va, vb;
4668 data_reference_p dra = DDR_A (ddr);
4669 data_reference_p drb = DDR_B (ddr);
4670 unsigned level = ddr_dependence_level (ddr);
4672 /* For non scalar dependences, when the dependence is REVERSED,
4673 statement B has to be executed before statement A. */
4674 if (level > 0
4675 && !DDR_REVERSED_P (ddr))
4677 data_reference_p tmp = dra;
4678 dra = drb;
4679 drb = tmp;
4682 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4683 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4685 if (va < 0 || vb < 0)
4686 return;
4688 e = add_edge (rdg, va, vb);
4689 e->data = XNEW (struct rdg_edge);
4691 RDGE_LEVEL (e) = level;
4692 RDGE_RELATION (e) = ddr;
4694 /* Determines the type of the data dependence. */
4695 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4696 RDGE_TYPE (e) = input_dd;
4697 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4698 RDGE_TYPE (e) = output_dd;
4699 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4700 RDGE_TYPE (e) = flow_dd;
4701 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4702 RDGE_TYPE (e) = anti_dd;
4705 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4706 the index of DEF in RDG. */
4708 static void
4709 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4711 use_operand_p imm_use_p;
4712 imm_use_iterator iterator;
4714 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4716 struct graph_edge *e;
4717 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4719 if (use < 0)
4720 continue;
4722 e = add_edge (rdg, idef, use);
4723 e->data = XNEW (struct rdg_edge);
4724 RDGE_TYPE (e) = flow_dd;
4725 RDGE_RELATION (e) = NULL;
4729 /* Creates the edges of the reduced dependence graph RDG. */
4731 static void
4732 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4734 int i;
4735 struct data_dependence_relation *ddr;
4736 def_operand_p def_p;
4737 ssa_op_iter iter;
4739 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4740 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4741 create_rdg_edge_for_ddr (rdg, ddr);
4743 for (i = 0; i < rdg->n_vertices; i++)
4744 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4745 iter, SSA_OP_DEF)
4746 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4749 /* Build the vertices of the reduced dependence graph RDG. */
4751 void
4752 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4754 int i, j;
4755 gimple stmt;
4757 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4759 VEC (data_ref_loc, heap) *references;
4760 data_ref_loc *ref;
4761 struct vertex *v = &(rdg->vertices[i]);
4762 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4763 struct rdg_vertex_info **slot;
4765 rvi->stmt = stmt;
4766 rvi->index = i;
4767 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4769 if (!*slot)
4770 *slot = rvi;
4771 else
4772 free (rvi);
4774 v->data = XNEW (struct rdg_vertex);
4775 RDG_STMT (rdg, i) = stmt;
4777 RDG_MEM_WRITE_STMT (rdg, i) = false;
4778 RDG_MEM_READS_STMT (rdg, i) = false;
4779 if (gimple_code (stmt) == GIMPLE_PHI)
4780 continue;
4782 get_references_in_stmt (stmt, &references);
4783 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4784 if (!ref->is_read)
4785 RDG_MEM_WRITE_STMT (rdg, i) = true;
4786 else
4787 RDG_MEM_READS_STMT (rdg, i) = true;
4789 VEC_free (data_ref_loc, heap, references);
4793 /* Initialize STMTS with all the statements of LOOP. When
4794 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4795 which we discover statements is important as
4796 generate_loops_for_partition is using the same traversal for
4797 identifying statements. */
4799 static void
4800 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4802 unsigned int i;
4803 basic_block *bbs = get_loop_body_in_dom_order (loop);
4805 for (i = 0; i < loop->num_nodes; i++)
4807 basic_block bb = bbs[i];
4808 gimple_stmt_iterator bsi;
4809 gimple stmt;
4811 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4812 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4814 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4816 stmt = gsi_stmt (bsi);
4817 if (gimple_code (stmt) != GIMPLE_LABEL)
4818 VEC_safe_push (gimple, heap, *stmts, stmt);
4822 free (bbs);
4825 /* Returns true when all the dependences are computable. */
4827 static bool
4828 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4830 ddr_p ddr;
4831 unsigned int i;
4833 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4834 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4835 return false;
4837 return true;
4840 /* Computes a hash function for element ELT. */
4842 static hashval_t
4843 hash_stmt_vertex_info (const void *elt)
4845 const struct rdg_vertex_info *const rvi =
4846 (const struct rdg_vertex_info *) elt;
4847 gimple stmt = rvi->stmt;
4849 return htab_hash_pointer (stmt);
4852 /* Compares database elements E1 and E2. */
4854 static int
4855 eq_stmt_vertex_info (const void *e1, const void *e2)
4857 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4858 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4860 return elt1->stmt == elt2->stmt;
4863 /* Free the element E. */
4865 static void
4866 hash_stmt_vertex_del (void *e)
4868 free (e);
4871 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4872 statement of the loop nest, and one edge per data dependence or
4873 scalar dependence. */
4875 struct graph *
4876 build_empty_rdg (int n_stmts)
4878 int nb_data_refs = 10;
4879 struct graph *rdg = new_graph (n_stmts);
4881 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4882 eq_stmt_vertex_info, hash_stmt_vertex_del);
4883 return rdg;
4886 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4887 statement of the loop nest, and one edge per data dependence or
4888 scalar dependence. */
4890 struct graph *
4891 build_rdg (struct loop *loop,
4892 VEC (loop_p, heap) **loop_nest,
4893 VEC (ddr_p, heap) **dependence_relations,
4894 VEC (data_reference_p, heap) **datarefs)
4896 struct graph *rdg = NULL;
4897 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
4899 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
4900 dependence_relations);
4902 if (known_dependences_p (*dependence_relations))
4904 stmts_from_loop (loop, &stmts);
4905 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4906 create_rdg_vertices (rdg, stmts);
4907 create_rdg_edges (rdg, *dependence_relations);
4910 VEC_free (gimple, heap, stmts);
4911 return rdg;
4914 /* Free the reduced dependence graph RDG. */
4916 void
4917 free_rdg (struct graph *rdg)
4919 int i;
4921 for (i = 0; i < rdg->n_vertices; i++)
4923 struct vertex *v = &(rdg->vertices[i]);
4924 struct graph_edge *e;
4926 for (e = v->succ; e; e = e->succ_next)
4927 if (e->data)
4928 free (e->data);
4930 if (v->data)
4931 free (v->data);
4934 htab_delete (rdg->indices);
4935 free_graph (rdg);
4938 /* Initialize STMTS with all the statements of LOOP that contain a
4939 store to memory. */
4941 void
4942 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4944 unsigned int i;
4945 basic_block *bbs = get_loop_body_in_dom_order (loop);
4947 for (i = 0; i < loop->num_nodes; i++)
4949 basic_block bb = bbs[i];
4950 gimple_stmt_iterator bsi;
4952 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4953 if (gimple_vdef (gsi_stmt (bsi)))
4954 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4957 free (bbs);
4960 /* Returns true when the statement at STMT is of the form "A[i] = 0"
4961 that contains a data reference on its LHS with a stride of the same
4962 size as its unit type. */
4964 bool
4965 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
4967 tree op0, op1;
4968 bool res;
4969 struct data_reference *dr;
4971 if (!stmt
4972 || !gimple_vdef (stmt)
4973 || !is_gimple_assign (stmt)
4974 || !gimple_assign_single_p (stmt)
4975 || !(op1 = gimple_assign_rhs1 (stmt))
4976 || !(integer_zerop (op1) || real_zerop (op1)))
4977 return false;
4979 dr = XCNEW (struct data_reference);
4980 op0 = gimple_assign_lhs (stmt);
4982 DR_STMT (dr) = stmt;
4983 DR_REF (dr) = op0;
4985 res = dr_analyze_innermost (dr)
4986 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
4988 free_data_ref (dr);
4989 return res;
4992 /* Initialize STMTS with all the statements of LOOP that contain a
4993 store to memory of the form "A[i] = 0". */
4995 void
4996 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4998 unsigned int i;
4999 basic_block bb;
5000 gimple_stmt_iterator si;
5001 gimple stmt;
5002 basic_block *bbs = get_loop_body_in_dom_order (loop);
5004 for (i = 0; i < loop->num_nodes; i++)
5005 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5006 if ((stmt = gsi_stmt (si))
5007 && stmt_with_adjacent_zero_store_dr_p (stmt))
5008 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5010 free (bbs);
5013 /* For a data reference REF, return the declaration of its base
5014 address or NULL_TREE if the base is not determined. */
5016 static inline tree
5017 ref_base_address (gimple stmt, data_ref_loc *ref)
5019 tree base = NULL_TREE;
5020 tree base_address;
5021 struct data_reference *dr = XCNEW (struct data_reference);
5023 DR_STMT (dr) = stmt;
5024 DR_REF (dr) = *ref->pos;
5025 dr_analyze_innermost (dr);
5026 base_address = DR_BASE_ADDRESS (dr);
5028 if (!base_address)
5029 goto end;
5031 switch (TREE_CODE (base_address))
5033 case ADDR_EXPR:
5034 base = TREE_OPERAND (base_address, 0);
5035 break;
5037 default:
5038 base = base_address;
5039 break;
5042 end:
5043 free_data_ref (dr);
5044 return base;
5047 /* Determines whether the statement from vertex V of the RDG has a
5048 definition used outside the loop that contains this statement. */
5050 bool
5051 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5053 gimple stmt = RDG_STMT (rdg, v);
5054 struct loop *loop = loop_containing_stmt (stmt);
5055 use_operand_p imm_use_p;
5056 imm_use_iterator iterator;
5057 ssa_op_iter it;
5058 def_operand_p def_p;
5060 if (!loop)
5061 return true;
5063 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5065 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5067 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5068 return true;
5072 return false;
5075 /* Determines whether statements S1 and S2 access to similar memory
5076 locations. Two memory accesses are considered similar when they
5077 have the same base address declaration, i.e. when their
5078 ref_base_address is the same. */
5080 bool
5081 have_similar_memory_accesses (gimple s1, gimple s2)
5083 bool res = false;
5084 unsigned i, j;
5085 VEC (data_ref_loc, heap) *refs1, *refs2;
5086 data_ref_loc *ref1, *ref2;
5088 get_references_in_stmt (s1, &refs1);
5089 get_references_in_stmt (s2, &refs2);
5091 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5093 tree base1 = ref_base_address (s1, ref1);
5095 if (base1)
5096 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5097 if (base1 == ref_base_address (s2, ref2))
5099 res = true;
5100 goto end;
5104 end:
5105 VEC_free (data_ref_loc, heap, refs1);
5106 VEC_free (data_ref_loc, heap, refs2);
5107 return res;
5110 /* Helper function for the hashtab. */
5112 static int
5113 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5115 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5116 CONST_CAST_GIMPLE ((const_gimple) s2));
5119 /* Helper function for the hashtab. */
5121 static hashval_t
5122 ref_base_address_1 (const void *s)
5124 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5125 unsigned i;
5126 VEC (data_ref_loc, heap) *refs;
5127 data_ref_loc *ref;
5128 hashval_t res = 0;
5130 get_references_in_stmt (stmt, &refs);
5132 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5133 if (!ref->is_read)
5135 res = htab_hash_pointer (ref_base_address (stmt, ref));
5136 break;
5139 VEC_free (data_ref_loc, heap, refs);
5140 return res;
5143 /* Try to remove duplicated write data references from STMTS. */
5145 void
5146 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5148 unsigned i;
5149 gimple stmt;
5150 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5151 have_similar_memory_accesses_1, NULL);
5153 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5155 void **slot;
5157 slot = htab_find_slot (seen, stmt, INSERT);
5159 if (*slot)
5160 VEC_ordered_remove (gimple, *stmts, i);
5161 else
5163 *slot = (void *) stmt;
5164 i++;
5168 htab_delete (seen);
5171 /* Returns the index of PARAMETER in the parameters vector of the
5172 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5175 access_matrix_get_index_for_parameter (tree parameter,
5176 struct access_matrix *access_matrix)
5178 int i;
5179 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5180 tree lambda_parameter;
5182 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5183 if (lambda_parameter == parameter)
5184 return i + AM_NB_INDUCTION_VARS (access_matrix);
5186 return -1;