2011-11-06 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-data-ref.c
blob4e0de052c15e086133b9bd23ed5157233f3d6414
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
122 static inline bool
123 tree_fold_divides_p (const_tree a, const_tree b)
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
130 /* Returns true iff A divides B. */
132 static inline bool
133 int_divides_p (int a, int b)
135 return ((b % a) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
142 void
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 unsigned int i;
146 struct data_reference *dr;
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
152 /* Dump into STDERR all the data references from DATAREFS. */
154 DEBUG_FUNCTION void
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 dump_data_references (stderr, datarefs);
160 /* Dump to STDERR all the dependence relations from DDRS. */
162 DEBUG_FUNCTION void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
174 unsigned int i;
175 struct data_dependence_relation *ddr;
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
181 /* Print to STDERR the data_reference DR. */
183 DEBUG_FUNCTION void
184 debug_data_reference (struct data_reference *dr)
186 dump_data_reference (stderr, dr);
189 /* Dump function for a DATA_REFERENCE structure. */
191 void
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
195 unsigned int i;
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 fprintf (outf, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
216 static void
217 dump_affine_function (FILE *outf, affine_fn fn)
219 unsigned i;
220 tree coef;
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
231 /* Dumps the conflict function CF to the file OUTF. */
233 static void
234 dump_conflict_function (FILE *outf, conflict_function *cf)
236 unsigned i;
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
242 else
244 for (i = 0; i < cf->n; i++)
246 fprintf (outf, "[");
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
255 void
256 dump_subscript (FILE *outf, struct subscript *subscript)
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
288 void
289 print_direction_vector (FILE *outf,
290 lambda_vector dirv,
291 int length)
293 int eq;
295 for (eq = 0; eq < length; eq++)
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
298 dirv[eq]);
300 switch (dir)
302 case dir_positive:
303 fprintf (outf, " +");
304 break;
305 case dir_negative:
306 fprintf (outf, " -");
307 break;
308 case dir_equal:
309 fprintf (outf, " =");
310 break;
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
313 break;
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
316 break;
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
319 break;
320 case dir_star:
321 fprintf (outf, " *");
322 break;
323 default:
324 fprintf (outf, "indep");
325 break;
328 fprintf (outf, "\n");
331 /* Print a vector of direction vectors. */
333 void
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
335 int length)
337 unsigned j;
338 lambda_vector v;
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
344 /* Print out a vector VEC of length N to OUTFILE. */
346 static inline void
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
349 int i;
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
356 /* Print a vector of distance vectors. */
358 void
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
360 int length)
362 unsigned j;
363 lambda_vector v;
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
369 /* Debug version. */
371 DEBUG_FUNCTION void
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 dump_data_dependence_relation (stderr, ddr);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 void
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
383 struct data_reference *dra, *drb;
385 fprintf (outf, "(Data Dep: \n");
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389 if (ddr)
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 if (dra)
394 dump_data_reference (outf, dra);
395 else
396 fprintf (outf, " (nil)\n");
397 if (drb)
398 dump_data_reference (outf, drb);
399 else
400 fprintf (outf, " (nil)\n");
402 fprintf (outf, " (don't know)\n)\n");
403 return;
406 dra = DDR_A (ddr);
407 drb = DDR_B (ddr);
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
416 unsigned int i;
417 struct loop *loopi;
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
445 DDR_NB_LOOPS (ddr));
449 fprintf (outf, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 void
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
458 switch (dir)
460 case dir_positive:
461 fprintf (file, "+");
462 break;
464 case dir_negative:
465 fprintf (file, "-");
466 break;
468 case dir_equal:
469 fprintf (file, "=");
470 break;
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
474 break;
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
478 break;
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
482 break;
484 case dir_star:
485 fprintf (file, "*");
486 break;
488 default:
489 break;
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
496 considered nest. */
498 void
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 unsigned int i, j;
502 struct data_dependence_relation *ddr;
503 lambda_vector v;
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
523 fprintf (file, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
528 void
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 unsigned int i;
532 struct data_dependence_relation *ddr;
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
537 fprintf (file, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
544 is returned. */
546 static bool
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
550 tree var0, var1;
551 tree off0, off1;
552 enum tree_code ocode = code;
554 *var = NULL_TREE;
555 *off = NULL_TREE;
557 switch (code)
559 case INTEGER_CST:
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
562 return true;
564 case POINTER_PLUS_EXPR:
565 ocode = PLUS_EXPR;
566 /* FALLTHROUGH */
567 case PLUS_EXPR:
568 case MINUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
573 return true;
575 case MULT_EXPR:
576 if (TREE_CODE (op1) != INTEGER_CST)
577 return false;
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
582 return true;
584 case ADDR_EXPR:
586 tree base, poffset;
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
591 op0 = TREE_OPERAND (op0, 0);
592 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
593 &pmode, &punsignedp, &pvolatilep, false);
595 if (pbitpos % BITS_PER_UNIT != 0)
596 return false;
597 base = build_fold_addr_expr (base);
598 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
600 if (poffset)
602 split_constant_offset (poffset, &poffset, &off1);
603 off0 = size_binop (PLUS_EXPR, off0, off1);
604 if (POINTER_TYPE_P (TREE_TYPE (base)))
605 base = fold_build_pointer_plus (base, poffset);
606 else
607 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
608 fold_convert (TREE_TYPE (base), poffset));
611 var0 = fold_convert (type, base);
613 /* If variable length types are involved, punt, otherwise casts
614 might be converted into ARRAY_REFs in gimplify_conversion.
615 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
616 possibly no longer appears in current GIMPLE, might resurface.
617 This perhaps could run
618 if (CONVERT_EXPR_P (var0))
620 gimplify_conversion (&var0);
621 // Attempt to fill in any within var0 found ARRAY_REF's
622 // element size from corresponding op embedded ARRAY_REF,
623 // if unsuccessful, just punt.
624 } */
625 while (POINTER_TYPE_P (type))
626 type = TREE_TYPE (type);
627 if (int_size_in_bytes (type) < 0)
628 return false;
630 *var = var0;
631 *off = off0;
632 return true;
635 case SSA_NAME:
637 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
638 enum tree_code subcode;
640 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
641 return false;
643 var0 = gimple_assign_rhs1 (def_stmt);
644 subcode = gimple_assign_rhs_code (def_stmt);
645 var1 = gimple_assign_rhs2 (def_stmt);
647 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
649 CASE_CONVERT:
651 /* We must not introduce undefined overflow, and we must not change the value.
652 Hence we're okay if the inner type doesn't overflow to start with
653 (pointer or signed), the outer type also is an integer or pointer
654 and the outer precision is at least as large as the inner. */
655 tree itype = TREE_TYPE (op0);
656 if ((POINTER_TYPE_P (itype)
657 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
658 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
659 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
661 split_constant_offset (op0, &var0, off);
662 *var = fold_convert (type, var0);
663 return true;
665 return false;
668 default:
669 return false;
673 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
674 will be ssizetype. */
676 void
677 split_constant_offset (tree exp, tree *var, tree *off)
679 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
680 enum tree_code code;
682 *var = exp;
683 *off = ssize_int (0);
684 STRIP_NOPS (exp);
686 if (tree_is_chrec (exp)
687 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
688 return;
690 otype = TREE_TYPE (exp);
691 code = TREE_CODE (exp);
692 extract_ops_from_tree (exp, &code, &op0, &op1);
693 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
695 *var = fold_convert (type, e);
696 *off = o;
700 /* Returns the address ADDR of an object in a canonical shape (without nop
701 casts, and with type of pointer to the object). */
703 static tree
704 canonicalize_base_object_address (tree addr)
706 tree orig = addr;
708 STRIP_NOPS (addr);
710 /* The base address may be obtained by casting from integer, in that case
711 keep the cast. */
712 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
713 return orig;
715 if (TREE_CODE (addr) != ADDR_EXPR)
716 return addr;
718 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
722 basic block that contains it. Returns true if analysis succeed or false
723 otherwise. */
725 bool
726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
728 gimple stmt = DR_STMT (dr);
729 struct loop *loop = loop_containing_stmt (stmt);
730 tree ref = DR_REF (dr);
731 HOST_WIDE_INT pbitsize, pbitpos;
732 tree base, poffset;
733 enum machine_mode pmode;
734 int punsignedp, pvolatilep;
735 affine_iv base_iv, offset_iv;
736 tree init, dinit, step;
737 bool in_loop = (loop && loop->num);
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "analyze_innermost: ");
742 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
743 &pmode, &punsignedp, &pvolatilep, false);
744 gcc_assert (base != NULL_TREE);
746 if (pbitpos % BITS_PER_UNIT != 0)
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 fprintf (dump_file, "failed: bit offset alignment.\n");
750 return false;
753 if (TREE_CODE (base) == MEM_REF)
755 if (!integer_zerop (TREE_OPERAND (base, 1)))
757 if (!poffset)
759 double_int moff = mem_ref_offset (base);
760 poffset = double_int_to_tree (sizetype, moff);
762 else
763 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
765 base = TREE_OPERAND (base, 0);
767 else
768 base = build_fold_addr_expr (base);
770 if (in_loop)
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
775 if (nest)
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, "failed: evolution of base is not"
779 " affine.\n");
780 return false;
782 else
784 base_iv.base = base;
785 base_iv.step = ssize_int (0);
786 base_iv.no_overflow = true;
790 else
792 base_iv.base = base;
793 base_iv.step = ssize_int (0);
794 base_iv.no_overflow = true;
797 if (!poffset)
799 offset_iv.base = ssize_int (0);
800 offset_iv.step = ssize_int (0);
802 else
804 if (!in_loop)
806 offset_iv.base = poffset;
807 offset_iv.step = ssize_int (0);
809 else if (!simple_iv (loop, loop_containing_stmt (stmt),
810 poffset, &offset_iv, false))
812 if (nest)
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "failed: evolution of offset is not"
816 " affine.\n");
817 return false;
819 else
821 offset_iv.base = poffset;
822 offset_iv.step = ssize_int (0);
827 init = ssize_int (pbitpos / BITS_PER_UNIT);
828 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
829 init = size_binop (PLUS_EXPR, init, dinit);
830 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
831 init = size_binop (PLUS_EXPR, init, dinit);
833 step = size_binop (PLUS_EXPR,
834 fold_convert (ssizetype, base_iv.step),
835 fold_convert (ssizetype, offset_iv.step));
837 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
839 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
840 DR_INIT (dr) = init;
841 DR_STEP (dr) = step;
843 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "success.\n");
848 return true;
851 /* Determines the base object and the list of indices of memory reference
852 DR, analyzed in LOOP and instantiated in loop nest NEST. */
854 static void
855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
857 VEC (tree, heap) *access_fns = NULL;
858 tree ref, *aref, op;
859 tree base, off, access_fn;
860 basic_block before_loop;
862 /* If analyzing a basic-block there are no indices to analyze
863 and thus no access functions. */
864 if (!nest)
866 DR_BASE_OBJECT (dr) = DR_REF (dr);
867 DR_ACCESS_FNS (dr) = NULL;
868 return;
871 ref = unshare_expr (DR_REF (dr));
872 before_loop = block_before_loop (nest);
874 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
875 into a two element array with a constant index. The base is
876 then just the immediate underlying object. */
877 if (TREE_CODE (ref) == REALPART_EXPR)
879 ref = TREE_OPERAND (ref, 0);
880 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
882 else if (TREE_CODE (ref) == IMAGPART_EXPR)
884 ref = TREE_OPERAND (ref, 0);
885 VEC_safe_push (tree, heap, access_fns, integer_one_node);
888 /* Analyze access functions of dimensions we know to be independent. */
889 aref = &ref;
890 while (handled_component_p (*aref))
892 if (TREE_CODE (*aref) == ARRAY_REF)
894 op = TREE_OPERAND (*aref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
898 /* For ARRAY_REFs the base is the reference with the index replaced
899 by zero if we can not strip it as the outermost component. */
900 if (*aref == ref)
902 *aref = TREE_OPERAND (*aref, 0);
903 continue;
905 else
906 TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
909 aref = &TREE_OPERAND (*aref, 0);
912 /* If the address operand of a MEM_REF base has an evolution in the
913 analyzed nest, add it as an additional independent access-function. */
914 if (TREE_CODE (*aref) == MEM_REF)
916 op = TREE_OPERAND (*aref, 0);
917 access_fn = analyze_scalar_evolution (loop, op);
918 access_fn = instantiate_scev (before_loop, loop, access_fn);
919 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
921 tree orig_type;
922 base = initial_condition (access_fn);
923 orig_type = TREE_TYPE (base);
924 STRIP_USELESS_TYPE_CONVERSION (base);
925 split_constant_offset (base, &base, &off);
926 /* Fold the MEM_REF offset into the evolutions initial
927 value to make more bases comparable. */
928 if (!integer_zerop (TREE_OPERAND (*aref, 1)))
930 off = size_binop (PLUS_EXPR, off,
931 fold_convert (ssizetype,
932 TREE_OPERAND (*aref, 1)));
933 TREE_OPERAND (*aref, 1)
934 = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
936 access_fn = chrec_replace_initial_condition
937 (access_fn, fold_convert (orig_type, off));
938 *aref = fold_build2_loc (EXPR_LOCATION (*aref),
939 MEM_REF, TREE_TYPE (*aref),
940 base, TREE_OPERAND (*aref, 1));
941 VEC_safe_push (tree, heap, access_fns, access_fn);
945 DR_BASE_OBJECT (dr) = ref;
946 DR_ACCESS_FNS (dr) = access_fns;
949 /* Extracts the alias analysis information from the memory reference DR. */
951 static void
952 dr_analyze_alias (struct data_reference *dr)
954 tree ref = DR_REF (dr);
955 tree base = get_base_address (ref), addr;
957 if (INDIRECT_REF_P (base)
958 || TREE_CODE (base) == MEM_REF)
960 addr = TREE_OPERAND (base, 0);
961 if (TREE_CODE (addr) == SSA_NAME)
962 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
966 /* Frees data reference DR. */
968 void
969 free_data_ref (data_reference_p dr)
971 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
972 free (dr);
975 /* Analyzes memory reference MEMREF accessed in STMT. The reference
976 is read if IS_READ is true, write otherwise. Returns the
977 data_reference description of MEMREF. NEST is the outermost loop
978 in which the reference should be instantiated, LOOP is the loop in
979 which the data reference should be analyzed. */
981 struct data_reference *
982 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
983 bool is_read)
985 struct data_reference *dr;
987 if (dump_file && (dump_flags & TDF_DETAILS))
989 fprintf (dump_file, "Creating dr for ");
990 print_generic_expr (dump_file, memref, TDF_SLIM);
991 fprintf (dump_file, "\n");
994 dr = XCNEW (struct data_reference);
995 DR_STMT (dr) = stmt;
996 DR_REF (dr) = memref;
997 DR_IS_READ (dr) = is_read;
999 dr_analyze_innermost (dr, nest);
1000 dr_analyze_indices (dr, nest, loop);
1001 dr_analyze_alias (dr);
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1005 unsigned i;
1006 fprintf (dump_file, "\tbase_address: ");
1007 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1008 fprintf (dump_file, "\n\toffset from base address: ");
1009 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1010 fprintf (dump_file, "\n\tconstant offset from base address: ");
1011 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1012 fprintf (dump_file, "\n\tstep: ");
1013 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1014 fprintf (dump_file, "\n\taligned to: ");
1015 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1016 fprintf (dump_file, "\n\tbase_object: ");
1017 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1018 fprintf (dump_file, "\n");
1019 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1021 fprintf (dump_file, "\tAccess function %d: ", i);
1022 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1026 return dr;
1029 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1030 expressions. */
1031 static bool
1032 dr_equal_offsets_p1 (tree offset1, tree offset2)
1034 bool res;
1036 STRIP_NOPS (offset1);
1037 STRIP_NOPS (offset2);
1039 if (offset1 == offset2)
1040 return true;
1042 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1043 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1044 return false;
1046 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1047 TREE_OPERAND (offset2, 0));
1049 if (!res || !BINARY_CLASS_P (offset1))
1050 return res;
1052 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1053 TREE_OPERAND (offset2, 1));
1055 return res;
1058 /* Check if DRA and DRB have equal offsets. */
1059 bool
1060 dr_equal_offsets_p (struct data_reference *dra,
1061 struct data_reference *drb)
1063 tree offset1, offset2;
1065 offset1 = DR_OFFSET (dra);
1066 offset2 = DR_OFFSET (drb);
1068 return dr_equal_offsets_p1 (offset1, offset2);
1071 /* Returns true if FNA == FNB. */
1073 static bool
1074 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1076 unsigned i, n = VEC_length (tree, fna);
1078 if (n != VEC_length (tree, fnb))
1079 return false;
1081 for (i = 0; i < n; i++)
1082 if (!operand_equal_p (VEC_index (tree, fna, i),
1083 VEC_index (tree, fnb, i), 0))
1084 return false;
1086 return true;
1089 /* If all the functions in CF are the same, returns one of them,
1090 otherwise returns NULL. */
1092 static affine_fn
1093 common_affine_function (conflict_function *cf)
1095 unsigned i;
1096 affine_fn comm;
1098 if (!CF_NONTRIVIAL_P (cf))
1099 return NULL;
1101 comm = cf->fns[0];
1103 for (i = 1; i < cf->n; i++)
1104 if (!affine_function_equal_p (comm, cf->fns[i]))
1105 return NULL;
1107 return comm;
1110 /* Returns the base of the affine function FN. */
1112 static tree
1113 affine_function_base (affine_fn fn)
1115 return VEC_index (tree, fn, 0);
1118 /* Returns true if FN is a constant. */
1120 static bool
1121 affine_function_constant_p (affine_fn fn)
1123 unsigned i;
1124 tree coef;
1126 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1127 if (!integer_zerop (coef))
1128 return false;
1130 return true;
1133 /* Returns true if FN is the zero constant function. */
1135 static bool
1136 affine_function_zero_p (affine_fn fn)
1138 return (integer_zerop (affine_function_base (fn))
1139 && affine_function_constant_p (fn));
1142 /* Returns a signed integer type with the largest precision from TA
1143 and TB. */
1145 static tree
1146 signed_type_for_types (tree ta, tree tb)
1148 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1149 return signed_type_for (ta);
1150 else
1151 return signed_type_for (tb);
1154 /* Applies operation OP on affine functions FNA and FNB, and returns the
1155 result. */
1157 static affine_fn
1158 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1160 unsigned i, n, m;
1161 affine_fn ret;
1162 tree coef;
1164 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1166 n = VEC_length (tree, fna);
1167 m = VEC_length (tree, fnb);
1169 else
1171 n = VEC_length (tree, fnb);
1172 m = VEC_length (tree, fna);
1175 ret = VEC_alloc (tree, heap, m);
1176 for (i = 0; i < n; i++)
1178 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1179 TREE_TYPE (VEC_index (tree, fnb, i)));
1181 VEC_quick_push (tree, ret,
1182 fold_build2 (op, type,
1183 VEC_index (tree, fna, i),
1184 VEC_index (tree, fnb, i)));
1187 for (; VEC_iterate (tree, fna, i, coef); i++)
1188 VEC_quick_push (tree, ret,
1189 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1190 coef, integer_zero_node));
1191 for (; VEC_iterate (tree, fnb, i, coef); i++)
1192 VEC_quick_push (tree, ret,
1193 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1194 integer_zero_node, coef));
1196 return ret;
1199 /* Returns the sum of affine functions FNA and FNB. */
1201 static affine_fn
1202 affine_fn_plus (affine_fn fna, affine_fn fnb)
1204 return affine_fn_op (PLUS_EXPR, fna, fnb);
1207 /* Returns the difference of affine functions FNA and FNB. */
1209 static affine_fn
1210 affine_fn_minus (affine_fn fna, affine_fn fnb)
1212 return affine_fn_op (MINUS_EXPR, fna, fnb);
1215 /* Frees affine function FN. */
1217 static void
1218 affine_fn_free (affine_fn fn)
1220 VEC_free (tree, heap, fn);
1223 /* Determine for each subscript in the data dependence relation DDR
1224 the distance. */
1226 static void
1227 compute_subscript_distance (struct data_dependence_relation *ddr)
1229 conflict_function *cf_a, *cf_b;
1230 affine_fn fn_a, fn_b, diff;
1232 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1234 unsigned int i;
1236 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1238 struct subscript *subscript;
1240 subscript = DDR_SUBSCRIPT (ddr, i);
1241 cf_a = SUB_CONFLICTS_IN_A (subscript);
1242 cf_b = SUB_CONFLICTS_IN_B (subscript);
1244 fn_a = common_affine_function (cf_a);
1245 fn_b = common_affine_function (cf_b);
1246 if (!fn_a || !fn_b)
1248 SUB_DISTANCE (subscript) = chrec_dont_know;
1249 return;
1251 diff = affine_fn_minus (fn_a, fn_b);
1253 if (affine_function_constant_p (diff))
1254 SUB_DISTANCE (subscript) = affine_function_base (diff);
1255 else
1256 SUB_DISTANCE (subscript) = chrec_dont_know;
1258 affine_fn_free (diff);
1263 /* Returns the conflict function for "unknown". */
1265 static conflict_function *
1266 conflict_fn_not_known (void)
1268 conflict_function *fn = XCNEW (conflict_function);
1269 fn->n = NOT_KNOWN;
1271 return fn;
1274 /* Returns the conflict function for "independent". */
1276 static conflict_function *
1277 conflict_fn_no_dependence (void)
1279 conflict_function *fn = XCNEW (conflict_function);
1280 fn->n = NO_DEPENDENCE;
1282 return fn;
1285 /* Returns true if the address of OBJ is invariant in LOOP. */
1287 static bool
1288 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1290 while (handled_component_p (obj))
1292 if (TREE_CODE (obj) == ARRAY_REF)
1294 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1295 need to check the stride and the lower bound of the reference. */
1296 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297 loop->num)
1298 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1299 loop->num))
1300 return false;
1302 else if (TREE_CODE (obj) == COMPONENT_REF)
1304 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1305 loop->num))
1306 return false;
1308 obj = TREE_OPERAND (obj, 0);
1311 if (!INDIRECT_REF_P (obj)
1312 && TREE_CODE (obj) != MEM_REF)
1313 return true;
1315 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1316 loop->num);
1319 /* Returns false if we can prove that data references A and B do not alias,
1320 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1321 considered. */
1323 bool
1324 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1325 bool loop_nest)
1327 tree addr_a = DR_BASE_OBJECT (a);
1328 tree addr_b = DR_BASE_OBJECT (b);
1330 /* If we are not processing a loop nest but scalar code we
1331 do not need to care about possible cross-iteration dependences
1332 and thus can process the full original reference. Do so,
1333 similar to how loop invariant motion applies extra offset-based
1334 disambiguation. */
1335 if (!loop_nest)
1337 aff_tree off1, off2;
1338 double_int size1, size2;
1339 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1340 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1341 aff_combination_scale (&off1, double_int_minus_one);
1342 aff_combination_add (&off2, &off1);
1343 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1344 return false;
1347 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1348 return refs_output_dependent_p (addr_a, addr_b);
1349 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1350 return refs_anti_dependent_p (addr_a, addr_b);
1351 return refs_may_alias_p (addr_a, addr_b);
1354 static void compute_self_dependence (struct data_dependence_relation *);
1356 /* Initialize a data dependence relation between data accesses A and
1357 B. NB_LOOPS is the number of loops surrounding the references: the
1358 size of the classic distance/direction vectors. */
1360 static struct data_dependence_relation *
1361 initialize_data_dependence_relation (struct data_reference *a,
1362 struct data_reference *b,
1363 VEC (loop_p, heap) *loop_nest)
1365 struct data_dependence_relation *res;
1366 unsigned int i;
1368 res = XNEW (struct data_dependence_relation);
1369 DDR_A (res) = a;
1370 DDR_B (res) = b;
1371 DDR_LOOP_NEST (res) = NULL;
1372 DDR_REVERSED_P (res) = false;
1373 DDR_SUBSCRIPTS (res) = NULL;
1374 DDR_DIR_VECTS (res) = NULL;
1375 DDR_DIST_VECTS (res) = NULL;
1377 if (a == NULL || b == NULL)
1379 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1380 return res;
1383 /* If the data references do not alias, then they are independent. */
1384 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1386 DDR_ARE_DEPENDENT (res) = chrec_known;
1387 return res;
1390 /* When the references are exactly the same, don't spend time doing
1391 the data dependence tests, just initialize the ddr and return. */
1392 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1394 DDR_AFFINE_P (res) = true;
1395 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1396 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1397 DDR_LOOP_NEST (res) = loop_nest;
1398 DDR_INNER_LOOP (res) = 0;
1399 DDR_SELF_REFERENCE (res) = true;
1400 compute_self_dependence (res);
1401 return res;
1404 /* If the references do not access the same object, we do not know
1405 whether they alias or not. */
1406 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1408 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1409 return res;
1412 /* If the base of the object is not invariant in the loop nest, we cannot
1413 analyze it. TODO -- in fact, it would suffice to record that there may
1414 be arbitrary dependences in the loops where the base object varies. */
1415 if (loop_nest
1416 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1417 DR_BASE_OBJECT (a)))
1419 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1420 return res;
1423 /* If the number of dimensions of the access to not agree we can have
1424 a pointer access to a component of the array element type and an
1425 array access while the base-objects are still the same. Punt. */
1426 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1428 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1429 return res;
1432 DDR_AFFINE_P (res) = true;
1433 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1434 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1435 DDR_LOOP_NEST (res) = loop_nest;
1436 DDR_INNER_LOOP (res) = 0;
1437 DDR_SELF_REFERENCE (res) = false;
1439 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1441 struct subscript *subscript;
1443 subscript = XNEW (struct subscript);
1444 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1445 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1446 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1447 SUB_DISTANCE (subscript) = chrec_dont_know;
1448 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1451 return res;
1454 /* Frees memory used by the conflict function F. */
1456 static void
1457 free_conflict_function (conflict_function *f)
1459 unsigned i;
1461 if (CF_NONTRIVIAL_P (f))
1463 for (i = 0; i < f->n; i++)
1464 affine_fn_free (f->fns[i]);
1466 free (f);
1469 /* Frees memory used by SUBSCRIPTS. */
1471 static void
1472 free_subscripts (VEC (subscript_p, heap) *subscripts)
1474 unsigned i;
1475 subscript_p s;
1477 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1479 free_conflict_function (s->conflicting_iterations_in_a);
1480 free_conflict_function (s->conflicting_iterations_in_b);
1481 free (s);
1483 VEC_free (subscript_p, heap, subscripts);
1486 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1487 description. */
1489 static inline void
1490 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1491 tree chrec)
1493 if (dump_file && (dump_flags & TDF_DETAILS))
1495 fprintf (dump_file, "(dependence classified: ");
1496 print_generic_expr (dump_file, chrec, 0);
1497 fprintf (dump_file, ")\n");
1500 DDR_ARE_DEPENDENT (ddr) = chrec;
1501 free_subscripts (DDR_SUBSCRIPTS (ddr));
1502 DDR_SUBSCRIPTS (ddr) = NULL;
1505 /* The dependence relation DDR cannot be represented by a distance
1506 vector. */
1508 static inline void
1509 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1511 if (dump_file && (dump_flags & TDF_DETAILS))
1512 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1514 DDR_AFFINE_P (ddr) = false;
1519 /* This section contains the classic Banerjee tests. */
1521 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1522 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1524 static inline bool
1525 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1527 return (evolution_function_is_constant_p (chrec_a)
1528 && evolution_function_is_constant_p (chrec_b));
1531 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1532 variable, i.e., if the SIV (Single Index Variable) test is true. */
1534 static bool
1535 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1537 if ((evolution_function_is_constant_p (chrec_a)
1538 && evolution_function_is_univariate_p (chrec_b))
1539 || (evolution_function_is_constant_p (chrec_b)
1540 && evolution_function_is_univariate_p (chrec_a)))
1541 return true;
1543 if (evolution_function_is_univariate_p (chrec_a)
1544 && evolution_function_is_univariate_p (chrec_b))
1546 switch (TREE_CODE (chrec_a))
1548 case POLYNOMIAL_CHREC:
1549 switch (TREE_CODE (chrec_b))
1551 case POLYNOMIAL_CHREC:
1552 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1553 return false;
1555 default:
1556 return true;
1559 default:
1560 return true;
1564 return false;
1567 /* Creates a conflict function with N dimensions. The affine functions
1568 in each dimension follow. */
1570 static conflict_function *
1571 conflict_fn (unsigned n, ...)
1573 unsigned i;
1574 conflict_function *ret = XCNEW (conflict_function);
1575 va_list ap;
1577 gcc_assert (0 < n && n <= MAX_DIM);
1578 va_start(ap, n);
1580 ret->n = n;
1581 for (i = 0; i < n; i++)
1582 ret->fns[i] = va_arg (ap, affine_fn);
1583 va_end(ap);
1585 return ret;
1588 /* Returns constant affine function with value CST. */
1590 static affine_fn
1591 affine_fn_cst (tree cst)
1593 affine_fn fn = VEC_alloc (tree, heap, 1);
1594 VEC_quick_push (tree, fn, cst);
1595 return fn;
1598 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1600 static affine_fn
1601 affine_fn_univar (tree cst, unsigned dim, tree coef)
1603 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1604 unsigned i;
1606 gcc_assert (dim > 0);
1607 VEC_quick_push (tree, fn, cst);
1608 for (i = 1; i < dim; i++)
1609 VEC_quick_push (tree, fn, integer_zero_node);
1610 VEC_quick_push (tree, fn, coef);
1611 return fn;
1614 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1615 *OVERLAPS_B are initialized to the functions that describe the
1616 relation between the elements accessed twice by CHREC_A and
1617 CHREC_B. For k >= 0, the following property is verified:
1619 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1621 static void
1622 analyze_ziv_subscript (tree chrec_a,
1623 tree chrec_b,
1624 conflict_function **overlaps_a,
1625 conflict_function **overlaps_b,
1626 tree *last_conflicts)
1628 tree type, difference;
1629 dependence_stats.num_ziv++;
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1632 fprintf (dump_file, "(analyze_ziv_subscript \n");
1634 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1635 chrec_a = chrec_convert (type, chrec_a, NULL);
1636 chrec_b = chrec_convert (type, chrec_b, NULL);
1637 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1639 switch (TREE_CODE (difference))
1641 case INTEGER_CST:
1642 if (integer_zerop (difference))
1644 /* The difference is equal to zero: the accessed index
1645 overlaps for each iteration in the loop. */
1646 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1647 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1648 *last_conflicts = chrec_dont_know;
1649 dependence_stats.num_ziv_dependent++;
1651 else
1653 /* The accesses do not overlap. */
1654 *overlaps_a = conflict_fn_no_dependence ();
1655 *overlaps_b = conflict_fn_no_dependence ();
1656 *last_conflicts = integer_zero_node;
1657 dependence_stats.num_ziv_independent++;
1659 break;
1661 default:
1662 /* We're not sure whether the indexes overlap. For the moment,
1663 conservatively answer "don't know". */
1664 if (dump_file && (dump_flags & TDF_DETAILS))
1665 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1667 *overlaps_a = conflict_fn_not_known ();
1668 *overlaps_b = conflict_fn_not_known ();
1669 *last_conflicts = chrec_dont_know;
1670 dependence_stats.num_ziv_unimplemented++;
1671 break;
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, ")\n");
1678 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1679 and only if it fits to the int type. If this is not the case, or the
1680 bound on the number of iterations of LOOP could not be derived, returns
1681 chrec_dont_know. */
1683 static tree
1684 max_stmt_executions_tree (struct loop *loop)
1686 double_int nit;
1688 if (!max_stmt_executions (loop, true, &nit))
1689 return chrec_dont_know;
1691 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1692 return chrec_dont_know;
1694 return double_int_to_tree (unsigned_type_node, nit);
1697 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1698 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1699 *OVERLAPS_B are initialized to the functions that describe the
1700 relation between the elements accessed twice by CHREC_A and
1701 CHREC_B. For k >= 0, the following property is verified:
1703 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1705 static void
1706 analyze_siv_subscript_cst_affine (tree chrec_a,
1707 tree chrec_b,
1708 conflict_function **overlaps_a,
1709 conflict_function **overlaps_b,
1710 tree *last_conflicts)
1712 bool value0, value1, value2;
1713 tree type, difference, tmp;
1715 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1716 chrec_a = chrec_convert (type, chrec_a, NULL);
1717 chrec_b = chrec_convert (type, chrec_b, NULL);
1718 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1720 if (!chrec_is_positive (initial_condition (difference), &value0))
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1725 dependence_stats.num_siv_unimplemented++;
1726 *overlaps_a = conflict_fn_not_known ();
1727 *overlaps_b = conflict_fn_not_known ();
1728 *last_conflicts = chrec_dont_know;
1729 return;
1731 else
1733 if (value0 == false)
1735 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1738 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1740 *overlaps_a = conflict_fn_not_known ();
1741 *overlaps_b = conflict_fn_not_known ();
1742 *last_conflicts = chrec_dont_know;
1743 dependence_stats.num_siv_unimplemented++;
1744 return;
1746 else
1748 if (value1 == true)
1750 /* Example:
1751 chrec_a = 12
1752 chrec_b = {10, +, 1}
1755 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1757 HOST_WIDE_INT numiter;
1758 struct loop *loop = get_chrec_loop (chrec_b);
1760 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1761 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1762 fold_build1 (ABS_EXPR, type, difference),
1763 CHREC_RIGHT (chrec_b));
1764 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1765 *last_conflicts = integer_one_node;
1768 /* Perform weak-zero siv test to see if overlap is
1769 outside the loop bounds. */
1770 numiter = max_stmt_executions_int (loop, true);
1772 if (numiter >= 0
1773 && compare_tree_int (tmp, numiter) > 0)
1775 free_conflict_function (*overlaps_a);
1776 free_conflict_function (*overlaps_b);
1777 *overlaps_a = conflict_fn_no_dependence ();
1778 *overlaps_b = conflict_fn_no_dependence ();
1779 *last_conflicts = integer_zero_node;
1780 dependence_stats.num_siv_independent++;
1781 return;
1783 dependence_stats.num_siv_dependent++;
1784 return;
1787 /* When the step does not divide the difference, there are
1788 no overlaps. */
1789 else
1791 *overlaps_a = conflict_fn_no_dependence ();
1792 *overlaps_b = conflict_fn_no_dependence ();
1793 *last_conflicts = integer_zero_node;
1794 dependence_stats.num_siv_independent++;
1795 return;
1799 else
1801 /* Example:
1802 chrec_a = 12
1803 chrec_b = {10, +, -1}
1805 In this case, chrec_a will not overlap with chrec_b. */
1806 *overlaps_a = conflict_fn_no_dependence ();
1807 *overlaps_b = conflict_fn_no_dependence ();
1808 *last_conflicts = integer_zero_node;
1809 dependence_stats.num_siv_independent++;
1810 return;
1814 else
1816 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1818 if (dump_file && (dump_flags & TDF_DETAILS))
1819 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1821 *overlaps_a = conflict_fn_not_known ();
1822 *overlaps_b = conflict_fn_not_known ();
1823 *last_conflicts = chrec_dont_know;
1824 dependence_stats.num_siv_unimplemented++;
1825 return;
1827 else
1829 if (value2 == false)
1831 /* Example:
1832 chrec_a = 3
1833 chrec_b = {10, +, -1}
1835 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1837 HOST_WIDE_INT numiter;
1838 struct loop *loop = get_chrec_loop (chrec_b);
1840 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1841 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1842 CHREC_RIGHT (chrec_b));
1843 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1844 *last_conflicts = integer_one_node;
1846 /* Perform weak-zero siv test to see if overlap is
1847 outside the loop bounds. */
1848 numiter = max_stmt_executions_int (loop, true);
1850 if (numiter >= 0
1851 && compare_tree_int (tmp, numiter) > 0)
1853 free_conflict_function (*overlaps_a);
1854 free_conflict_function (*overlaps_b);
1855 *overlaps_a = conflict_fn_no_dependence ();
1856 *overlaps_b = conflict_fn_no_dependence ();
1857 *last_conflicts = integer_zero_node;
1858 dependence_stats.num_siv_independent++;
1859 return;
1861 dependence_stats.num_siv_dependent++;
1862 return;
1865 /* When the step does not divide the difference, there
1866 are no overlaps. */
1867 else
1869 *overlaps_a = conflict_fn_no_dependence ();
1870 *overlaps_b = conflict_fn_no_dependence ();
1871 *last_conflicts = integer_zero_node;
1872 dependence_stats.num_siv_independent++;
1873 return;
1876 else
1878 /* Example:
1879 chrec_a = 3
1880 chrec_b = {4, +, 1}
1882 In this case, chrec_a will not overlap with chrec_b. */
1883 *overlaps_a = conflict_fn_no_dependence ();
1884 *overlaps_b = conflict_fn_no_dependence ();
1885 *last_conflicts = integer_zero_node;
1886 dependence_stats.num_siv_independent++;
1887 return;
1894 /* Helper recursive function for initializing the matrix A. Returns
1895 the initial value of CHREC. */
1897 static tree
1898 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1900 gcc_assert (chrec);
1902 switch (TREE_CODE (chrec))
1904 case POLYNOMIAL_CHREC:
1905 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1907 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1908 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1910 case PLUS_EXPR:
1911 case MULT_EXPR:
1912 case MINUS_EXPR:
1914 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1915 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1917 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1920 case NOP_EXPR:
1922 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1923 return chrec_convert (chrec_type (chrec), op, NULL);
1926 case BIT_NOT_EXPR:
1928 /* Handle ~X as -1 - X. */
1929 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1930 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1931 build_int_cst (TREE_TYPE (chrec), -1), op);
1934 case INTEGER_CST:
1935 return chrec;
1937 default:
1938 gcc_unreachable ();
1939 return NULL_TREE;
1943 #define FLOOR_DIV(x,y) ((x) / (y))
1945 /* Solves the special case of the Diophantine equation:
1946 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1948 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1949 number of iterations that loops X and Y run. The overlaps will be
1950 constructed as evolutions in dimension DIM. */
1952 static void
1953 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1954 affine_fn *overlaps_a,
1955 affine_fn *overlaps_b,
1956 tree *last_conflicts, int dim)
1958 if (((step_a > 0 && step_b > 0)
1959 || (step_a < 0 && step_b < 0)))
1961 int step_overlaps_a, step_overlaps_b;
1962 int gcd_steps_a_b, last_conflict, tau2;
1964 gcd_steps_a_b = gcd (step_a, step_b);
1965 step_overlaps_a = step_b / gcd_steps_a_b;
1966 step_overlaps_b = step_a / gcd_steps_a_b;
1968 if (niter > 0)
1970 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1971 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1972 last_conflict = tau2;
1973 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1975 else
1976 *last_conflicts = chrec_dont_know;
1978 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1979 build_int_cst (NULL_TREE,
1980 step_overlaps_a));
1981 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1982 build_int_cst (NULL_TREE,
1983 step_overlaps_b));
1986 else
1988 *overlaps_a = affine_fn_cst (integer_zero_node);
1989 *overlaps_b = affine_fn_cst (integer_zero_node);
1990 *last_conflicts = integer_zero_node;
1994 /* Solves the special case of a Diophantine equation where CHREC_A is
1995 an affine bivariate function, and CHREC_B is an affine univariate
1996 function. For example,
1998 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2000 has the following overlapping functions:
2002 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2003 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2004 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2006 FORNOW: This is a specialized implementation for a case occurring in
2007 a common benchmark. Implement the general algorithm. */
2009 static void
2010 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2011 conflict_function **overlaps_a,
2012 conflict_function **overlaps_b,
2013 tree *last_conflicts)
2015 bool xz_p, yz_p, xyz_p;
2016 int step_x, step_y, step_z;
2017 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2018 affine_fn overlaps_a_xz, overlaps_b_xz;
2019 affine_fn overlaps_a_yz, overlaps_b_yz;
2020 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2021 affine_fn ova1, ova2, ovb;
2022 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2024 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2025 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2026 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2028 niter_x =
2029 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2030 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2031 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2033 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2035 if (dump_file && (dump_flags & TDF_DETAILS))
2036 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2038 *overlaps_a = conflict_fn_not_known ();
2039 *overlaps_b = conflict_fn_not_known ();
2040 *last_conflicts = chrec_dont_know;
2041 return;
2044 niter = MIN (niter_x, niter_z);
2045 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2046 &overlaps_a_xz,
2047 &overlaps_b_xz,
2048 &last_conflicts_xz, 1);
2049 niter = MIN (niter_y, niter_z);
2050 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2051 &overlaps_a_yz,
2052 &overlaps_b_yz,
2053 &last_conflicts_yz, 2);
2054 niter = MIN (niter_x, niter_z);
2055 niter = MIN (niter_y, niter);
2056 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2057 &overlaps_a_xyz,
2058 &overlaps_b_xyz,
2059 &last_conflicts_xyz, 3);
2061 xz_p = !integer_zerop (last_conflicts_xz);
2062 yz_p = !integer_zerop (last_conflicts_yz);
2063 xyz_p = !integer_zerop (last_conflicts_xyz);
2065 if (xz_p || yz_p || xyz_p)
2067 ova1 = affine_fn_cst (integer_zero_node);
2068 ova2 = affine_fn_cst (integer_zero_node);
2069 ovb = affine_fn_cst (integer_zero_node);
2070 if (xz_p)
2072 affine_fn t0 = ova1;
2073 affine_fn t2 = ovb;
2075 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2076 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2077 affine_fn_free (t0);
2078 affine_fn_free (t2);
2079 *last_conflicts = last_conflicts_xz;
2081 if (yz_p)
2083 affine_fn t0 = ova2;
2084 affine_fn t2 = ovb;
2086 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2087 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2088 affine_fn_free (t0);
2089 affine_fn_free (t2);
2090 *last_conflicts = last_conflicts_yz;
2092 if (xyz_p)
2094 affine_fn t0 = ova1;
2095 affine_fn t2 = ova2;
2096 affine_fn t4 = ovb;
2098 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2099 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2100 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2101 affine_fn_free (t0);
2102 affine_fn_free (t2);
2103 affine_fn_free (t4);
2104 *last_conflicts = last_conflicts_xyz;
2106 *overlaps_a = conflict_fn (2, ova1, ova2);
2107 *overlaps_b = conflict_fn (1, ovb);
2109 else
2111 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2112 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2113 *last_conflicts = integer_zero_node;
2116 affine_fn_free (overlaps_a_xz);
2117 affine_fn_free (overlaps_b_xz);
2118 affine_fn_free (overlaps_a_yz);
2119 affine_fn_free (overlaps_b_yz);
2120 affine_fn_free (overlaps_a_xyz);
2121 affine_fn_free (overlaps_b_xyz);
2124 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2126 static void
2127 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2128 int size)
2130 memcpy (vec2, vec1, size * sizeof (*vec1));
2133 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2135 static void
2136 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2137 int m, int n)
2139 int i;
2141 for (i = 0; i < m; i++)
2142 lambda_vector_copy (mat1[i], mat2[i], n);
2145 /* Store the N x N identity matrix in MAT. */
2147 static void
2148 lambda_matrix_id (lambda_matrix mat, int size)
2150 int i, j;
2152 for (i = 0; i < size; i++)
2153 for (j = 0; j < size; j++)
2154 mat[i][j] = (i == j) ? 1 : 0;
2157 /* Return the first nonzero element of vector VEC1 between START and N.
2158 We must have START <= N. Returns N if VEC1 is the zero vector. */
2160 static int
2161 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2163 int j = start;
2164 while (j < n && vec1[j] == 0)
2165 j++;
2166 return j;
2169 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2170 R2 = R2 + CONST1 * R1. */
2172 static void
2173 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2175 int i;
2177 if (const1 == 0)
2178 return;
2180 for (i = 0; i < n; i++)
2181 mat[r2][i] += const1 * mat[r1][i];
2184 /* Swap rows R1 and R2 in matrix MAT. */
2186 static void
2187 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2189 lambda_vector row;
2191 row = mat[r1];
2192 mat[r1] = mat[r2];
2193 mat[r2] = row;
2196 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2197 and store the result in VEC2. */
2199 static void
2200 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2201 int size, int const1)
2203 int i;
2205 if (const1 == 0)
2206 lambda_vector_clear (vec2, size);
2207 else
2208 for (i = 0; i < size; i++)
2209 vec2[i] = const1 * vec1[i];
2212 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2214 static void
2215 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2216 int size)
2218 lambda_vector_mult_const (vec1, vec2, size, -1);
2221 /* Negate row R1 of matrix MAT which has N columns. */
2223 static void
2224 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2226 lambda_vector_negate (mat[r1], mat[r1], n);
2229 /* Return true if two vectors are equal. */
2231 static bool
2232 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2234 int i;
2235 for (i = 0; i < size; i++)
2236 if (vec1[i] != vec2[i])
2237 return false;
2238 return true;
2241 /* Given an M x N integer matrix A, this function determines an M x
2242 M unimodular matrix U, and an M x N echelon matrix S such that
2243 "U.A = S". This decomposition is also known as "right Hermite".
2245 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2246 Restructuring Compilers" Utpal Banerjee. */
2248 static void
2249 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2250 lambda_matrix S, lambda_matrix U)
2252 int i, j, i0 = 0;
2254 lambda_matrix_copy (A, S, m, n);
2255 lambda_matrix_id (U, m);
2257 for (j = 0; j < n; j++)
2259 if (lambda_vector_first_nz (S[j], m, i0) < m)
2261 ++i0;
2262 for (i = m - 1; i >= i0; i--)
2264 while (S[i][j] != 0)
2266 int sigma, factor, a, b;
2268 a = S[i-1][j];
2269 b = S[i][j];
2270 sigma = (a * b < 0) ? -1: 1;
2271 a = abs (a);
2272 b = abs (b);
2273 factor = sigma * (a / b);
2275 lambda_matrix_row_add (S, n, i, i-1, -factor);
2276 lambda_matrix_row_exchange (S, i, i-1);
2278 lambda_matrix_row_add (U, m, i, i-1, -factor);
2279 lambda_matrix_row_exchange (U, i, i-1);
2286 /* Determines the overlapping elements due to accesses CHREC_A and
2287 CHREC_B, that are affine functions. This function cannot handle
2288 symbolic evolution functions, ie. when initial conditions are
2289 parameters, because it uses lambda matrices of integers. */
2291 static void
2292 analyze_subscript_affine_affine (tree chrec_a,
2293 tree chrec_b,
2294 conflict_function **overlaps_a,
2295 conflict_function **overlaps_b,
2296 tree *last_conflicts)
2298 unsigned nb_vars_a, nb_vars_b, dim;
2299 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2300 lambda_matrix A, U, S;
2301 struct obstack scratch_obstack;
2303 if (eq_evolutions_p (chrec_a, chrec_b))
2305 /* The accessed index overlaps for each iteration in the
2306 loop. */
2307 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2308 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2309 *last_conflicts = chrec_dont_know;
2310 return;
2312 if (dump_file && (dump_flags & TDF_DETAILS))
2313 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2315 /* For determining the initial intersection, we have to solve a
2316 Diophantine equation. This is the most time consuming part.
2318 For answering to the question: "Is there a dependence?" we have
2319 to prove that there exists a solution to the Diophantine
2320 equation, and that the solution is in the iteration domain,
2321 i.e. the solution is positive or zero, and that the solution
2322 happens before the upper bound loop.nb_iterations. Otherwise
2323 there is no dependence. This function outputs a description of
2324 the iterations that hold the intersections. */
2326 nb_vars_a = nb_vars_in_chrec (chrec_a);
2327 nb_vars_b = nb_vars_in_chrec (chrec_b);
2329 gcc_obstack_init (&scratch_obstack);
2331 dim = nb_vars_a + nb_vars_b;
2332 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2333 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2334 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2336 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2337 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2338 gamma = init_b - init_a;
2340 /* Don't do all the hard work of solving the Diophantine equation
2341 when we already know the solution: for example,
2342 | {3, +, 1}_1
2343 | {3, +, 4}_2
2344 | gamma = 3 - 3 = 0.
2345 Then the first overlap occurs during the first iterations:
2346 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2348 if (gamma == 0)
2350 if (nb_vars_a == 1 && nb_vars_b == 1)
2352 HOST_WIDE_INT step_a, step_b;
2353 HOST_WIDE_INT niter, niter_a, niter_b;
2354 affine_fn ova, ovb;
2356 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2357 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2358 niter = MIN (niter_a, niter_b);
2359 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2360 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2362 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2363 &ova, &ovb,
2364 last_conflicts, 1);
2365 *overlaps_a = conflict_fn (1, ova);
2366 *overlaps_b = conflict_fn (1, ovb);
2369 else if (nb_vars_a == 2 && nb_vars_b == 1)
2370 compute_overlap_steps_for_affine_1_2
2371 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2373 else if (nb_vars_a == 1 && nb_vars_b == 2)
2374 compute_overlap_steps_for_affine_1_2
2375 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2377 else
2379 if (dump_file && (dump_flags & TDF_DETAILS))
2380 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2381 *overlaps_a = conflict_fn_not_known ();
2382 *overlaps_b = conflict_fn_not_known ();
2383 *last_conflicts = chrec_dont_know;
2385 goto end_analyze_subs_aa;
2388 /* U.A = S */
2389 lambda_matrix_right_hermite (A, dim, 1, S, U);
2391 if (S[0][0] < 0)
2393 S[0][0] *= -1;
2394 lambda_matrix_row_negate (U, dim, 0);
2396 gcd_alpha_beta = S[0][0];
2398 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2399 but that is a quite strange case. Instead of ICEing, answer
2400 don't know. */
2401 if (gcd_alpha_beta == 0)
2403 *overlaps_a = conflict_fn_not_known ();
2404 *overlaps_b = conflict_fn_not_known ();
2405 *last_conflicts = chrec_dont_know;
2406 goto end_analyze_subs_aa;
2409 /* The classic "gcd-test". */
2410 if (!int_divides_p (gcd_alpha_beta, gamma))
2412 /* The "gcd-test" has determined that there is no integer
2413 solution, i.e. there is no dependence. */
2414 *overlaps_a = conflict_fn_no_dependence ();
2415 *overlaps_b = conflict_fn_no_dependence ();
2416 *last_conflicts = integer_zero_node;
2419 /* Both access functions are univariate. This includes SIV and MIV cases. */
2420 else if (nb_vars_a == 1 && nb_vars_b == 1)
2422 /* Both functions should have the same evolution sign. */
2423 if (((A[0][0] > 0 && -A[1][0] > 0)
2424 || (A[0][0] < 0 && -A[1][0] < 0)))
2426 /* The solutions are given by:
2428 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2429 | [u21 u22] [y0]
2431 For a given integer t. Using the following variables,
2433 | i0 = u11 * gamma / gcd_alpha_beta
2434 | j0 = u12 * gamma / gcd_alpha_beta
2435 | i1 = u21
2436 | j1 = u22
2438 the solutions are:
2440 | x0 = i0 + i1 * t,
2441 | y0 = j0 + j1 * t. */
2442 HOST_WIDE_INT i0, j0, i1, j1;
2444 i0 = U[0][0] * gamma / gcd_alpha_beta;
2445 j0 = U[0][1] * gamma / gcd_alpha_beta;
2446 i1 = U[1][0];
2447 j1 = U[1][1];
2449 if ((i1 == 0 && i0 < 0)
2450 || (j1 == 0 && j0 < 0))
2452 /* There is no solution.
2453 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2454 falls in here, but for the moment we don't look at the
2455 upper bound of the iteration domain. */
2456 *overlaps_a = conflict_fn_no_dependence ();
2457 *overlaps_b = conflict_fn_no_dependence ();
2458 *last_conflicts = integer_zero_node;
2459 goto end_analyze_subs_aa;
2462 if (i1 > 0 && j1 > 0)
2464 HOST_WIDE_INT niter_a = max_stmt_executions_int
2465 (get_chrec_loop (chrec_a), true);
2466 HOST_WIDE_INT niter_b = max_stmt_executions_int
2467 (get_chrec_loop (chrec_b), true);
2468 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2470 /* (X0, Y0) is a solution of the Diophantine equation:
2471 "chrec_a (X0) = chrec_b (Y0)". */
2472 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2473 CEIL (-j0, j1));
2474 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2475 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2477 /* (X1, Y1) is the smallest positive solution of the eq
2478 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2479 first conflict occurs. */
2480 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2481 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2482 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2484 if (niter > 0)
2486 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2487 FLOOR_DIV (niter - j0, j1));
2488 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2490 /* If the overlap occurs outside of the bounds of the
2491 loop, there is no dependence. */
2492 if (x1 >= niter || y1 >= niter)
2494 *overlaps_a = conflict_fn_no_dependence ();
2495 *overlaps_b = conflict_fn_no_dependence ();
2496 *last_conflicts = integer_zero_node;
2497 goto end_analyze_subs_aa;
2499 else
2500 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2502 else
2503 *last_conflicts = chrec_dont_know;
2505 *overlaps_a
2506 = conflict_fn (1,
2507 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2509 build_int_cst (NULL_TREE, i1)));
2510 *overlaps_b
2511 = conflict_fn (1,
2512 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2514 build_int_cst (NULL_TREE, j1)));
2516 else
2518 /* FIXME: For the moment, the upper bound of the
2519 iteration domain for i and j is not checked. */
2520 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2522 *overlaps_a = conflict_fn_not_known ();
2523 *overlaps_b = conflict_fn_not_known ();
2524 *last_conflicts = chrec_dont_know;
2527 else
2529 if (dump_file && (dump_flags & TDF_DETAILS))
2530 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2531 *overlaps_a = conflict_fn_not_known ();
2532 *overlaps_b = conflict_fn_not_known ();
2533 *last_conflicts = chrec_dont_know;
2536 else
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2540 *overlaps_a = conflict_fn_not_known ();
2541 *overlaps_b = conflict_fn_not_known ();
2542 *last_conflicts = chrec_dont_know;
2545 end_analyze_subs_aa:
2546 obstack_free (&scratch_obstack, NULL);
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file, " (overlaps_a = ");
2550 dump_conflict_function (dump_file, *overlaps_a);
2551 fprintf (dump_file, ")\n (overlaps_b = ");
2552 dump_conflict_function (dump_file, *overlaps_b);
2553 fprintf (dump_file, ")\n");
2554 fprintf (dump_file, ")\n");
2558 /* Returns true when analyze_subscript_affine_affine can be used for
2559 determining the dependence relation between chrec_a and chrec_b,
2560 that contain symbols. This function modifies chrec_a and chrec_b
2561 such that the analysis result is the same, and such that they don't
2562 contain symbols, and then can safely be passed to the analyzer.
2564 Example: The analysis of the following tuples of evolutions produce
2565 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2566 vs. {0, +, 1}_1
2568 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2569 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2572 static bool
2573 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2575 tree diff, type, left_a, left_b, right_b;
2577 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2578 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2579 /* FIXME: For the moment not handled. Might be refined later. */
2580 return false;
2582 type = chrec_type (*chrec_a);
2583 left_a = CHREC_LEFT (*chrec_a);
2584 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2585 diff = chrec_fold_minus (type, left_a, left_b);
2587 if (!evolution_function_is_constant_p (diff))
2588 return false;
2590 if (dump_file && (dump_flags & TDF_DETAILS))
2591 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2593 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2594 diff, CHREC_RIGHT (*chrec_a));
2595 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2596 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2597 build_int_cst (type, 0),
2598 right_b);
2599 return true;
2602 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2603 *OVERLAPS_B are initialized to the functions that describe the
2604 relation between the elements accessed twice by CHREC_A and
2605 CHREC_B. For k >= 0, the following property is verified:
2607 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2609 static void
2610 analyze_siv_subscript (tree chrec_a,
2611 tree chrec_b,
2612 conflict_function **overlaps_a,
2613 conflict_function **overlaps_b,
2614 tree *last_conflicts,
2615 int loop_nest_num)
2617 dependence_stats.num_siv++;
2619 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file, "(analyze_siv_subscript \n");
2622 if (evolution_function_is_constant_p (chrec_a)
2623 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2624 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2625 overlaps_a, overlaps_b, last_conflicts);
2627 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2628 && evolution_function_is_constant_p (chrec_b))
2629 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2630 overlaps_b, overlaps_a, last_conflicts);
2632 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2633 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2635 if (!chrec_contains_symbols (chrec_a)
2636 && !chrec_contains_symbols (chrec_b))
2638 analyze_subscript_affine_affine (chrec_a, chrec_b,
2639 overlaps_a, overlaps_b,
2640 last_conflicts);
2642 if (CF_NOT_KNOWN_P (*overlaps_a)
2643 || CF_NOT_KNOWN_P (*overlaps_b))
2644 dependence_stats.num_siv_unimplemented++;
2645 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2646 || CF_NO_DEPENDENCE_P (*overlaps_b))
2647 dependence_stats.num_siv_independent++;
2648 else
2649 dependence_stats.num_siv_dependent++;
2651 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2652 &chrec_b))
2654 analyze_subscript_affine_affine (chrec_a, chrec_b,
2655 overlaps_a, overlaps_b,
2656 last_conflicts);
2658 if (CF_NOT_KNOWN_P (*overlaps_a)
2659 || CF_NOT_KNOWN_P (*overlaps_b))
2660 dependence_stats.num_siv_unimplemented++;
2661 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2662 || CF_NO_DEPENDENCE_P (*overlaps_b))
2663 dependence_stats.num_siv_independent++;
2664 else
2665 dependence_stats.num_siv_dependent++;
2667 else
2668 goto siv_subscript_dontknow;
2671 else
2673 siv_subscript_dontknow:;
2674 if (dump_file && (dump_flags & TDF_DETAILS))
2675 fprintf (dump_file, "siv test failed: unimplemented.\n");
2676 *overlaps_a = conflict_fn_not_known ();
2677 *overlaps_b = conflict_fn_not_known ();
2678 *last_conflicts = chrec_dont_know;
2679 dependence_stats.num_siv_unimplemented++;
2682 if (dump_file && (dump_flags & TDF_DETAILS))
2683 fprintf (dump_file, ")\n");
2686 /* Returns false if we can prove that the greatest common divisor of the steps
2687 of CHREC does not divide CST, false otherwise. */
2689 static bool
2690 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2692 HOST_WIDE_INT cd = 0, val;
2693 tree step;
2695 if (!host_integerp (cst, 0))
2696 return true;
2697 val = tree_low_cst (cst, 0);
2699 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2701 step = CHREC_RIGHT (chrec);
2702 if (!host_integerp (step, 0))
2703 return true;
2704 cd = gcd (cd, tree_low_cst (step, 0));
2705 chrec = CHREC_LEFT (chrec);
2708 return val % cd == 0;
2711 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2712 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2713 functions that describe the relation between the elements accessed
2714 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2715 is verified:
2717 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2719 static void
2720 analyze_miv_subscript (tree chrec_a,
2721 tree chrec_b,
2722 conflict_function **overlaps_a,
2723 conflict_function **overlaps_b,
2724 tree *last_conflicts,
2725 struct loop *loop_nest)
2727 tree type, difference;
2729 dependence_stats.num_miv++;
2730 if (dump_file && (dump_flags & TDF_DETAILS))
2731 fprintf (dump_file, "(analyze_miv_subscript \n");
2733 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2734 chrec_a = chrec_convert (type, chrec_a, NULL);
2735 chrec_b = chrec_convert (type, chrec_b, NULL);
2736 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2738 if (eq_evolutions_p (chrec_a, chrec_b))
2740 /* Access functions are the same: all the elements are accessed
2741 in the same order. */
2742 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2743 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2744 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2745 dependence_stats.num_miv_dependent++;
2748 else if (evolution_function_is_constant_p (difference)
2749 /* For the moment, the following is verified:
2750 evolution_function_is_affine_multivariate_p (chrec_a,
2751 loop_nest->num) */
2752 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2754 /* testsuite/.../ssa-chrec-33.c
2755 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2757 The difference is 1, and all the evolution steps are multiples
2758 of 2, consequently there are no overlapping elements. */
2759 *overlaps_a = conflict_fn_no_dependence ();
2760 *overlaps_b = conflict_fn_no_dependence ();
2761 *last_conflicts = integer_zero_node;
2762 dependence_stats.num_miv_independent++;
2765 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2766 && !chrec_contains_symbols (chrec_a)
2767 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2768 && !chrec_contains_symbols (chrec_b))
2770 /* testsuite/.../ssa-chrec-35.c
2771 {0, +, 1}_2 vs. {0, +, 1}_3
2772 the overlapping elements are respectively located at iterations:
2773 {0, +, 1}_x and {0, +, 1}_x,
2774 in other words, we have the equality:
2775 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2777 Other examples:
2778 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2779 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2781 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2782 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2784 analyze_subscript_affine_affine (chrec_a, chrec_b,
2785 overlaps_a, overlaps_b, last_conflicts);
2787 if (CF_NOT_KNOWN_P (*overlaps_a)
2788 || CF_NOT_KNOWN_P (*overlaps_b))
2789 dependence_stats.num_miv_unimplemented++;
2790 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2791 || CF_NO_DEPENDENCE_P (*overlaps_b))
2792 dependence_stats.num_miv_independent++;
2793 else
2794 dependence_stats.num_miv_dependent++;
2797 else
2799 /* When the analysis is too difficult, answer "don't know". */
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2801 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2803 *overlaps_a = conflict_fn_not_known ();
2804 *overlaps_b = conflict_fn_not_known ();
2805 *last_conflicts = chrec_dont_know;
2806 dependence_stats.num_miv_unimplemented++;
2809 if (dump_file && (dump_flags & TDF_DETAILS))
2810 fprintf (dump_file, ")\n");
2813 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2814 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2815 OVERLAP_ITERATIONS_B are initialized with two functions that
2816 describe the iterations that contain conflicting elements.
2818 Remark: For an integer k >= 0, the following equality is true:
2820 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2823 static void
2824 analyze_overlapping_iterations (tree chrec_a,
2825 tree chrec_b,
2826 conflict_function **overlap_iterations_a,
2827 conflict_function **overlap_iterations_b,
2828 tree *last_conflicts, struct loop *loop_nest)
2830 unsigned int lnn = loop_nest->num;
2832 dependence_stats.num_subscript_tests++;
2834 if (dump_file && (dump_flags & TDF_DETAILS))
2836 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2837 fprintf (dump_file, " (chrec_a = ");
2838 print_generic_expr (dump_file, chrec_a, 0);
2839 fprintf (dump_file, ")\n (chrec_b = ");
2840 print_generic_expr (dump_file, chrec_b, 0);
2841 fprintf (dump_file, ")\n");
2844 if (chrec_a == NULL_TREE
2845 || chrec_b == NULL_TREE
2846 || chrec_contains_undetermined (chrec_a)
2847 || chrec_contains_undetermined (chrec_b))
2849 dependence_stats.num_subscript_undetermined++;
2851 *overlap_iterations_a = conflict_fn_not_known ();
2852 *overlap_iterations_b = conflict_fn_not_known ();
2855 /* If they are the same chrec, and are affine, they overlap
2856 on every iteration. */
2857 else if (eq_evolutions_p (chrec_a, chrec_b)
2858 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2859 || operand_equal_p (chrec_a, chrec_b, 0)))
2861 dependence_stats.num_same_subscript_function++;
2862 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2863 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2864 *last_conflicts = chrec_dont_know;
2867 /* If they aren't the same, and aren't affine, we can't do anything
2868 yet. */
2869 else if ((chrec_contains_symbols (chrec_a)
2870 || chrec_contains_symbols (chrec_b))
2871 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2872 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2874 dependence_stats.num_subscript_undetermined++;
2875 *overlap_iterations_a = conflict_fn_not_known ();
2876 *overlap_iterations_b = conflict_fn_not_known ();
2879 else if (ziv_subscript_p (chrec_a, chrec_b))
2880 analyze_ziv_subscript (chrec_a, chrec_b,
2881 overlap_iterations_a, overlap_iterations_b,
2882 last_conflicts);
2884 else if (siv_subscript_p (chrec_a, chrec_b))
2885 analyze_siv_subscript (chrec_a, chrec_b,
2886 overlap_iterations_a, overlap_iterations_b,
2887 last_conflicts, lnn);
2889 else
2890 analyze_miv_subscript (chrec_a, chrec_b,
2891 overlap_iterations_a, overlap_iterations_b,
2892 last_conflicts, loop_nest);
2894 if (dump_file && (dump_flags & TDF_DETAILS))
2896 fprintf (dump_file, " (overlap_iterations_a = ");
2897 dump_conflict_function (dump_file, *overlap_iterations_a);
2898 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2899 dump_conflict_function (dump_file, *overlap_iterations_b);
2900 fprintf (dump_file, ")\n");
2901 fprintf (dump_file, ")\n");
2905 /* Helper function for uniquely inserting distance vectors. */
2907 static void
2908 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2910 unsigned i;
2911 lambda_vector v;
2913 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2914 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2915 return;
2917 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2920 /* Helper function for uniquely inserting direction vectors. */
2922 static void
2923 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2925 unsigned i;
2926 lambda_vector v;
2928 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2929 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2930 return;
2932 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2935 /* Add a distance of 1 on all the loops outer than INDEX. If we
2936 haven't yet determined a distance for this outer loop, push a new
2937 distance vector composed of the previous distance, and a distance
2938 of 1 for this outer loop. Example:
2940 | loop_1
2941 | loop_2
2942 | A[10]
2943 | endloop_2
2944 | endloop_1
2946 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2947 save (0, 1), then we have to save (1, 0). */
2949 static void
2950 add_outer_distances (struct data_dependence_relation *ddr,
2951 lambda_vector dist_v, int index)
2953 /* For each outer loop where init_v is not set, the accesses are
2954 in dependence of distance 1 in the loop. */
2955 while (--index >= 0)
2957 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2958 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2959 save_v[index] = 1;
2960 save_dist_v (ddr, save_v);
2964 /* Return false when fail to represent the data dependence as a
2965 distance vector. INIT_B is set to true when a component has been
2966 added to the distance vector DIST_V. INDEX_CARRY is then set to
2967 the index in DIST_V that carries the dependence. */
2969 static bool
2970 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2971 struct data_reference *ddr_a,
2972 struct data_reference *ddr_b,
2973 lambda_vector dist_v, bool *init_b,
2974 int *index_carry)
2976 unsigned i;
2977 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2979 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2981 tree access_fn_a, access_fn_b;
2982 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2984 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2986 non_affine_dependence_relation (ddr);
2987 return false;
2990 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2991 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2993 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2994 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2996 int dist, index;
2997 int var_a = CHREC_VARIABLE (access_fn_a);
2998 int var_b = CHREC_VARIABLE (access_fn_b);
3000 if (var_a != var_b
3001 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3003 non_affine_dependence_relation (ddr);
3004 return false;
3007 dist = int_cst_value (SUB_DISTANCE (subscript));
3008 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3009 *index_carry = MIN (index, *index_carry);
3011 /* This is the subscript coupling test. If we have already
3012 recorded a distance for this loop (a distance coming from
3013 another subscript), it should be the same. For example,
3014 in the following code, there is no dependence:
3016 | loop i = 0, N, 1
3017 | T[i+1][i] = ...
3018 | ... = T[i][i]
3019 | endloop
3021 if (init_v[index] != 0 && dist_v[index] != dist)
3023 finalize_ddr_dependent (ddr, chrec_known);
3024 return false;
3027 dist_v[index] = dist;
3028 init_v[index] = 1;
3029 *init_b = true;
3031 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3033 /* This can be for example an affine vs. constant dependence
3034 (T[i] vs. T[3]) that is not an affine dependence and is
3035 not representable as a distance vector. */
3036 non_affine_dependence_relation (ddr);
3037 return false;
3041 return true;
3044 /* Return true when the DDR contains only constant access functions. */
3046 static bool
3047 constant_access_functions (const struct data_dependence_relation *ddr)
3049 unsigned i;
3051 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3052 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3053 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3054 return false;
3056 return true;
3059 /* Helper function for the case where DDR_A and DDR_B are the same
3060 multivariate access function with a constant step. For an example
3061 see pr34635-1.c. */
3063 static void
3064 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3066 int x_1, x_2;
3067 tree c_1 = CHREC_LEFT (c_2);
3068 tree c_0 = CHREC_LEFT (c_1);
3069 lambda_vector dist_v;
3070 int v1, v2, cd;
3072 /* Polynomials with more than 2 variables are not handled yet. When
3073 the evolution steps are parameters, it is not possible to
3074 represent the dependence using classical distance vectors. */
3075 if (TREE_CODE (c_0) != INTEGER_CST
3076 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3077 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3079 DDR_AFFINE_P (ddr) = false;
3080 return;
3083 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3084 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3086 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3087 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3088 v1 = int_cst_value (CHREC_RIGHT (c_1));
3089 v2 = int_cst_value (CHREC_RIGHT (c_2));
3090 cd = gcd (v1, v2);
3091 v1 /= cd;
3092 v2 /= cd;
3094 if (v2 < 0)
3096 v2 = -v2;
3097 v1 = -v1;
3100 dist_v[x_1] = v2;
3101 dist_v[x_2] = -v1;
3102 save_dist_v (ddr, dist_v);
3104 add_outer_distances (ddr, dist_v, x_1);
3107 /* Helper function for the case where DDR_A and DDR_B are the same
3108 access functions. */
3110 static void
3111 add_other_self_distances (struct data_dependence_relation *ddr)
3113 lambda_vector dist_v;
3114 unsigned i;
3115 int index_carry = DDR_NB_LOOPS (ddr);
3117 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3119 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3121 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3123 if (!evolution_function_is_univariate_p (access_fun))
3125 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3127 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3128 return;
3131 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3133 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3134 add_multivariate_self_dist (ddr, access_fun);
3135 else
3136 /* The evolution step is not constant: it varies in
3137 the outer loop, so this cannot be represented by a
3138 distance vector. For example in pr34635.c the
3139 evolution is {0, +, {0, +, 4}_1}_2. */
3140 DDR_AFFINE_P (ddr) = false;
3142 return;
3145 index_carry = MIN (index_carry,
3146 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3147 DDR_LOOP_NEST (ddr)));
3151 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3152 add_outer_distances (ddr, dist_v, index_carry);
3155 static void
3156 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3158 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3160 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3161 save_dist_v (ddr, dist_v);
3164 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3165 is the case for example when access functions are the same and
3166 equal to a constant, as in:
3168 | loop_1
3169 | A[3] = ...
3170 | ... = A[3]
3171 | endloop_1
3173 in which case the distance vectors are (0) and (1). */
3175 static void
3176 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3178 unsigned i, j;
3180 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3182 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3183 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3184 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3186 for (j = 0; j < ca->n; j++)
3187 if (affine_function_zero_p (ca->fns[j]))
3189 insert_innermost_unit_dist_vector (ddr);
3190 return;
3193 for (j = 0; j < cb->n; j++)
3194 if (affine_function_zero_p (cb->fns[j]))
3196 insert_innermost_unit_dist_vector (ddr);
3197 return;
3202 /* Compute the classic per loop distance vector. DDR is the data
3203 dependence relation to build a vector from. Return false when fail
3204 to represent the data dependence as a distance vector. */
3206 static bool
3207 build_classic_dist_vector (struct data_dependence_relation *ddr,
3208 struct loop *loop_nest)
3210 bool init_b = false;
3211 int index_carry = DDR_NB_LOOPS (ddr);
3212 lambda_vector dist_v;
3214 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3215 return false;
3217 if (same_access_functions (ddr))
3219 /* Save the 0 vector. */
3220 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3221 save_dist_v (ddr, dist_v);
3223 if (constant_access_functions (ddr))
3224 add_distance_for_zero_overlaps (ddr);
3226 if (DDR_NB_LOOPS (ddr) > 1)
3227 add_other_self_distances (ddr);
3229 return true;
3232 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3233 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3234 dist_v, &init_b, &index_carry))
3235 return false;
3237 /* Save the distance vector if we initialized one. */
3238 if (init_b)
3240 /* Verify a basic constraint: classic distance vectors should
3241 always be lexicographically positive.
3243 Data references are collected in the order of execution of
3244 the program, thus for the following loop
3246 | for (i = 1; i < 100; i++)
3247 | for (j = 1; j < 100; j++)
3249 | t = T[j+1][i-1]; // A
3250 | T[j][i] = t + 2; // B
3253 references are collected following the direction of the wind:
3254 A then B. The data dependence tests are performed also
3255 following this order, such that we're looking at the distance
3256 separating the elements accessed by A from the elements later
3257 accessed by B. But in this example, the distance returned by
3258 test_dep (A, B) is lexicographically negative (-1, 1), that
3259 means that the access A occurs later than B with respect to
3260 the outer loop, ie. we're actually looking upwind. In this
3261 case we solve test_dep (B, A) looking downwind to the
3262 lexicographically positive solution, that returns the
3263 distance vector (1, -1). */
3264 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3266 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3267 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3268 loop_nest))
3269 return false;
3270 compute_subscript_distance (ddr);
3271 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3272 save_v, &init_b, &index_carry))
3273 return false;
3274 save_dist_v (ddr, save_v);
3275 DDR_REVERSED_P (ddr) = true;
3277 /* In this case there is a dependence forward for all the
3278 outer loops:
3280 | for (k = 1; k < 100; k++)
3281 | for (i = 1; i < 100; i++)
3282 | for (j = 1; j < 100; j++)
3284 | t = T[j+1][i-1]; // A
3285 | T[j][i] = t + 2; // B
3288 the vectors are:
3289 (0, 1, -1)
3290 (1, 1, -1)
3291 (1, -1, 1)
3293 if (DDR_NB_LOOPS (ddr) > 1)
3295 add_outer_distances (ddr, save_v, index_carry);
3296 add_outer_distances (ddr, dist_v, index_carry);
3299 else
3301 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3302 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3304 if (DDR_NB_LOOPS (ddr) > 1)
3306 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3308 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3309 DDR_A (ddr), loop_nest))
3310 return false;
3311 compute_subscript_distance (ddr);
3312 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3313 opposite_v, &init_b,
3314 &index_carry))
3315 return false;
3317 save_dist_v (ddr, save_v);
3318 add_outer_distances (ddr, dist_v, index_carry);
3319 add_outer_distances (ddr, opposite_v, index_carry);
3321 else
3322 save_dist_v (ddr, save_v);
3325 else
3327 /* There is a distance of 1 on all the outer loops: Example:
3328 there is a dependence of distance 1 on loop_1 for the array A.
3330 | loop_1
3331 | A[5] = ...
3332 | endloop
3334 add_outer_distances (ddr, dist_v,
3335 lambda_vector_first_nz (dist_v,
3336 DDR_NB_LOOPS (ddr), 0));
3339 if (dump_file && (dump_flags & TDF_DETAILS))
3341 unsigned i;
3343 fprintf (dump_file, "(build_classic_dist_vector\n");
3344 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3346 fprintf (dump_file, " dist_vector = (");
3347 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3348 DDR_NB_LOOPS (ddr));
3349 fprintf (dump_file, " )\n");
3351 fprintf (dump_file, ")\n");
3354 return true;
3357 /* Return the direction for a given distance.
3358 FIXME: Computing dir this way is suboptimal, since dir can catch
3359 cases that dist is unable to represent. */
3361 static inline enum data_dependence_direction
3362 dir_from_dist (int dist)
3364 if (dist > 0)
3365 return dir_positive;
3366 else if (dist < 0)
3367 return dir_negative;
3368 else
3369 return dir_equal;
3372 /* Compute the classic per loop direction vector. DDR is the data
3373 dependence relation to build a vector from. */
3375 static void
3376 build_classic_dir_vector (struct data_dependence_relation *ddr)
3378 unsigned i, j;
3379 lambda_vector dist_v;
3381 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3383 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3385 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3386 dir_v[j] = dir_from_dist (dist_v[j]);
3388 save_dir_v (ddr, dir_v);
3392 /* Helper function. Returns true when there is a dependence between
3393 data references DRA and DRB. */
3395 static bool
3396 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3397 struct data_reference *dra,
3398 struct data_reference *drb,
3399 struct loop *loop_nest)
3401 unsigned int i;
3402 tree last_conflicts;
3403 struct subscript *subscript;
3405 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3406 i++)
3408 conflict_function *overlaps_a, *overlaps_b;
3410 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3411 DR_ACCESS_FN (drb, i),
3412 &overlaps_a, &overlaps_b,
3413 &last_conflicts, loop_nest);
3415 if (CF_NOT_KNOWN_P (overlaps_a)
3416 || CF_NOT_KNOWN_P (overlaps_b))
3418 finalize_ddr_dependent (ddr, chrec_dont_know);
3419 dependence_stats.num_dependence_undetermined++;
3420 free_conflict_function (overlaps_a);
3421 free_conflict_function (overlaps_b);
3422 return false;
3425 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3426 || CF_NO_DEPENDENCE_P (overlaps_b))
3428 finalize_ddr_dependent (ddr, chrec_known);
3429 dependence_stats.num_dependence_independent++;
3430 free_conflict_function (overlaps_a);
3431 free_conflict_function (overlaps_b);
3432 return false;
3435 else
3437 if (SUB_CONFLICTS_IN_A (subscript))
3438 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3439 if (SUB_CONFLICTS_IN_B (subscript))
3440 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3442 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3443 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3444 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3448 return true;
3451 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3453 static void
3454 subscript_dependence_tester (struct data_dependence_relation *ddr,
3455 struct loop *loop_nest)
3458 if (dump_file && (dump_flags & TDF_DETAILS))
3459 fprintf (dump_file, "(subscript_dependence_tester \n");
3461 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3462 dependence_stats.num_dependence_dependent++;
3464 compute_subscript_distance (ddr);
3465 if (build_classic_dist_vector (ddr, loop_nest))
3466 build_classic_dir_vector (ddr);
3468 if (dump_file && (dump_flags & TDF_DETAILS))
3469 fprintf (dump_file, ")\n");
3472 /* Returns true when all the access functions of A are affine or
3473 constant with respect to LOOP_NEST. */
3475 static bool
3476 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3477 const struct loop *loop_nest)
3479 unsigned int i;
3480 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3481 tree t;
3483 FOR_EACH_VEC_ELT (tree, fns, i, t)
3484 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3485 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3486 return false;
3488 return true;
3491 /* Initializes an equation for an OMEGA problem using the information
3492 contained in the ACCESS_FUN. Returns true when the operation
3493 succeeded.
3495 PB is the omega constraint system.
3496 EQ is the number of the equation to be initialized.
3497 OFFSET is used for shifting the variables names in the constraints:
3498 a constrain is composed of 2 * the number of variables surrounding
3499 dependence accesses. OFFSET is set either to 0 for the first n variables,
3500 then it is set to n.
3501 ACCESS_FUN is expected to be an affine chrec. */
3503 static bool
3504 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3505 unsigned int offset, tree access_fun,
3506 struct data_dependence_relation *ddr)
3508 switch (TREE_CODE (access_fun))
3510 case POLYNOMIAL_CHREC:
3512 tree left = CHREC_LEFT (access_fun);
3513 tree right = CHREC_RIGHT (access_fun);
3514 int var = CHREC_VARIABLE (access_fun);
3515 unsigned var_idx;
3517 if (TREE_CODE (right) != INTEGER_CST)
3518 return false;
3520 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3521 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3523 /* Compute the innermost loop index. */
3524 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3526 if (offset == 0)
3527 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3528 += int_cst_value (right);
3530 switch (TREE_CODE (left))
3532 case POLYNOMIAL_CHREC:
3533 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3535 case INTEGER_CST:
3536 pb->eqs[eq].coef[0] += int_cst_value (left);
3537 return true;
3539 default:
3540 return false;
3544 case INTEGER_CST:
3545 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3546 return true;
3548 default:
3549 return false;
3553 /* As explained in the comments preceding init_omega_for_ddr, we have
3554 to set up a system for each loop level, setting outer loops
3555 variation to zero, and current loop variation to positive or zero.
3556 Save each lexico positive distance vector. */
3558 static void
3559 omega_extract_distance_vectors (omega_pb pb,
3560 struct data_dependence_relation *ddr)
3562 int eq, geq;
3563 unsigned i, j;
3564 struct loop *loopi, *loopj;
3565 enum omega_result res;
3567 /* Set a new problem for each loop in the nest. The basis is the
3568 problem that we have initialized until now. On top of this we
3569 add new constraints. */
3570 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3571 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3573 int dist = 0;
3574 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3575 DDR_NB_LOOPS (ddr));
3577 omega_copy_problem (copy, pb);
3579 /* For all the outer loops "loop_j", add "dj = 0". */
3580 for (j = 0;
3581 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3583 eq = omega_add_zero_eq (copy, omega_black);
3584 copy->eqs[eq].coef[j + 1] = 1;
3587 /* For "loop_i", add "0 <= di". */
3588 geq = omega_add_zero_geq (copy, omega_black);
3589 copy->geqs[geq].coef[i + 1] = 1;
3591 /* Reduce the constraint system, and test that the current
3592 problem is feasible. */
3593 res = omega_simplify_problem (copy);
3594 if (res == omega_false
3595 || res == omega_unknown
3596 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3597 goto next_problem;
3599 for (eq = 0; eq < copy->num_subs; eq++)
3600 if (copy->subs[eq].key == (int) i + 1)
3602 dist = copy->subs[eq].coef[0];
3603 goto found_dist;
3606 if (dist == 0)
3608 /* Reinitialize problem... */
3609 omega_copy_problem (copy, pb);
3610 for (j = 0;
3611 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3613 eq = omega_add_zero_eq (copy, omega_black);
3614 copy->eqs[eq].coef[j + 1] = 1;
3617 /* ..., but this time "di = 1". */
3618 eq = omega_add_zero_eq (copy, omega_black);
3619 copy->eqs[eq].coef[i + 1] = 1;
3620 copy->eqs[eq].coef[0] = -1;
3622 res = omega_simplify_problem (copy);
3623 if (res == omega_false
3624 || res == omega_unknown
3625 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3626 goto next_problem;
3628 for (eq = 0; eq < copy->num_subs; eq++)
3629 if (copy->subs[eq].key == (int) i + 1)
3631 dist = copy->subs[eq].coef[0];
3632 goto found_dist;
3636 found_dist:;
3637 /* Save the lexicographically positive distance vector. */
3638 if (dist >= 0)
3640 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3641 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3643 dist_v[i] = dist;
3645 for (eq = 0; eq < copy->num_subs; eq++)
3646 if (copy->subs[eq].key > 0)
3648 dist = copy->subs[eq].coef[0];
3649 dist_v[copy->subs[eq].key - 1] = dist;
3652 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3653 dir_v[j] = dir_from_dist (dist_v[j]);
3655 save_dist_v (ddr, dist_v);
3656 save_dir_v (ddr, dir_v);
3659 next_problem:;
3660 omega_free_problem (copy);
3664 /* This is called for each subscript of a tuple of data references:
3665 insert an equality for representing the conflicts. */
3667 static bool
3668 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3669 struct data_dependence_relation *ddr,
3670 omega_pb pb, bool *maybe_dependent)
3672 int eq;
3673 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3674 TREE_TYPE (access_fun_b));
3675 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3676 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3677 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3678 tree minus_one;
3680 /* When the fun_a - fun_b is not constant, the dependence is not
3681 captured by the classic distance vector representation. */
3682 if (TREE_CODE (difference) != INTEGER_CST)
3683 return false;
3685 /* ZIV test. */
3686 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3688 /* There is no dependence. */
3689 *maybe_dependent = false;
3690 return true;
3693 minus_one = build_int_cst (type, -1);
3694 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3696 eq = omega_add_zero_eq (pb, omega_black);
3697 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3698 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3699 /* There is probably a dependence, but the system of
3700 constraints cannot be built: answer "don't know". */
3701 return false;
3703 /* GCD test. */
3704 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3705 && !int_divides_p (lambda_vector_gcd
3706 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3707 2 * DDR_NB_LOOPS (ddr)),
3708 pb->eqs[eq].coef[0]))
3710 /* There is no dependence. */
3711 *maybe_dependent = false;
3712 return true;
3715 return true;
3718 /* Helper function, same as init_omega_for_ddr but specialized for
3719 data references A and B. */
3721 static bool
3722 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3723 struct data_dependence_relation *ddr,
3724 omega_pb pb, bool *maybe_dependent)
3726 unsigned i;
3727 int ineq;
3728 struct loop *loopi;
3729 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3731 /* Insert an equality per subscript. */
3732 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3734 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3735 ddr, pb, maybe_dependent))
3736 return false;
3737 else if (*maybe_dependent == false)
3739 /* There is no dependence. */
3740 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3741 return true;
3745 /* Insert inequalities: constraints corresponding to the iteration
3746 domain, i.e. the loops surrounding the references "loop_x" and
3747 the distance variables "dx". The layout of the OMEGA
3748 representation is as follows:
3749 - coef[0] is the constant
3750 - coef[1..nb_loops] are the protected variables that will not be
3751 removed by the solver: the "dx"
3752 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3754 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3755 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3757 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3759 /* 0 <= loop_x */
3760 ineq = omega_add_zero_geq (pb, omega_black);
3761 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3763 /* 0 <= loop_x + dx */
3764 ineq = omega_add_zero_geq (pb, omega_black);
3765 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3766 pb->geqs[ineq].coef[i + 1] = 1;
3768 if (nbi != -1)
3770 /* loop_x <= nb_iters */
3771 ineq = omega_add_zero_geq (pb, omega_black);
3772 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3773 pb->geqs[ineq].coef[0] = nbi;
3775 /* loop_x + dx <= nb_iters */
3776 ineq = omega_add_zero_geq (pb, omega_black);
3777 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3778 pb->geqs[ineq].coef[i + 1] = -1;
3779 pb->geqs[ineq].coef[0] = nbi;
3781 /* A step "dx" bigger than nb_iters is not feasible, so
3782 add "0 <= nb_iters + dx", */
3783 ineq = omega_add_zero_geq (pb, omega_black);
3784 pb->geqs[ineq].coef[i + 1] = 1;
3785 pb->geqs[ineq].coef[0] = nbi;
3786 /* and "dx <= nb_iters". */
3787 ineq = omega_add_zero_geq (pb, omega_black);
3788 pb->geqs[ineq].coef[i + 1] = -1;
3789 pb->geqs[ineq].coef[0] = nbi;
3793 omega_extract_distance_vectors (pb, ddr);
3795 return true;
3798 /* Sets up the Omega dependence problem for the data dependence
3799 relation DDR. Returns false when the constraint system cannot be
3800 built, ie. when the test answers "don't know". Returns true
3801 otherwise, and when independence has been proved (using one of the
3802 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3803 set MAYBE_DEPENDENT to true.
3805 Example: for setting up the dependence system corresponding to the
3806 conflicting accesses
3808 | loop_i
3809 | loop_j
3810 | A[i, i+1] = ...
3811 | ... A[2*j, 2*(i + j)]
3812 | endloop_j
3813 | endloop_i
3815 the following constraints come from the iteration domain:
3817 0 <= i <= Ni
3818 0 <= i + di <= Ni
3819 0 <= j <= Nj
3820 0 <= j + dj <= Nj
3822 where di, dj are the distance variables. The constraints
3823 representing the conflicting elements are:
3825 i = 2 * (j + dj)
3826 i + 1 = 2 * (i + di + j + dj)
3828 For asking that the resulting distance vector (di, dj) be
3829 lexicographically positive, we insert the constraint "di >= 0". If
3830 "di = 0" in the solution, we fix that component to zero, and we
3831 look at the inner loops: we set a new problem where all the outer
3832 loop distances are zero, and fix this inner component to be
3833 positive. When one of the components is positive, we save that
3834 distance, and set a new problem where the distance on this loop is
3835 zero, searching for other distances in the inner loops. Here is
3836 the classic example that illustrates that we have to set for each
3837 inner loop a new problem:
3839 | loop_1
3840 | loop_2
3841 | A[10]
3842 | endloop_2
3843 | endloop_1
3845 we have to save two distances (1, 0) and (0, 1).
3847 Given two array references, refA and refB, we have to set the
3848 dependence problem twice, refA vs. refB and refB vs. refA, and we
3849 cannot do a single test, as refB might occur before refA in the
3850 inner loops, and the contrary when considering outer loops: ex.
3852 | loop_0
3853 | loop_1
3854 | loop_2
3855 | T[{1,+,1}_2][{1,+,1}_1] // refA
3856 | T[{2,+,1}_2][{0,+,1}_1] // refB
3857 | endloop_2
3858 | endloop_1
3859 | endloop_0
3861 refB touches the elements in T before refA, and thus for the same
3862 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3863 but for successive loop_0 iterations, we have (1, -1, 1)
3865 The Omega solver expects the distance variables ("di" in the
3866 previous example) to come first in the constraint system (as
3867 variables to be protected, or "safe" variables), the constraint
3868 system is built using the following layout:
3870 "cst | distance vars | index vars".
3873 static bool
3874 init_omega_for_ddr (struct data_dependence_relation *ddr,
3875 bool *maybe_dependent)
3877 omega_pb pb;
3878 bool res = false;
3880 *maybe_dependent = true;
3882 if (same_access_functions (ddr))
3884 unsigned j;
3885 lambda_vector dir_v;
3887 /* Save the 0 vector. */
3888 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3889 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3890 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3891 dir_v[j] = dir_equal;
3892 save_dir_v (ddr, dir_v);
3894 /* Save the dependences carried by outer loops. */
3895 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3896 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3897 maybe_dependent);
3898 omega_free_problem (pb);
3899 return res;
3902 /* Omega expects the protected variables (those that have to be kept
3903 after elimination) to appear first in the constraint system.
3904 These variables are the distance variables. In the following
3905 initialization we declare NB_LOOPS safe variables, and the total
3906 number of variables for the constraint system is 2*NB_LOOPS. */
3907 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3908 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3909 maybe_dependent);
3910 omega_free_problem (pb);
3912 /* Stop computation if not decidable, or no dependence. */
3913 if (res == false || *maybe_dependent == false)
3914 return res;
3916 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3917 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3918 maybe_dependent);
3919 omega_free_problem (pb);
3921 return res;
3924 /* Return true when DDR contains the same information as that stored
3925 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3927 static bool
3928 ddr_consistent_p (FILE *file,
3929 struct data_dependence_relation *ddr,
3930 VEC (lambda_vector, heap) *dist_vects,
3931 VEC (lambda_vector, heap) *dir_vects)
3933 unsigned int i, j;
3935 /* If dump_file is set, output there. */
3936 if (dump_file && (dump_flags & TDF_DETAILS))
3937 file = dump_file;
3939 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3941 lambda_vector b_dist_v;
3942 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3943 VEC_length (lambda_vector, dist_vects),
3944 DDR_NUM_DIST_VECTS (ddr));
3946 fprintf (file, "Banerjee dist vectors:\n");
3947 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3948 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3950 fprintf (file, "Omega dist vectors:\n");
3951 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3952 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3954 fprintf (file, "data dependence relation:\n");
3955 dump_data_dependence_relation (file, ddr);
3957 fprintf (file, ")\n");
3958 return false;
3961 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3963 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3964 VEC_length (lambda_vector, dir_vects),
3965 DDR_NUM_DIR_VECTS (ddr));
3966 return false;
3969 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3971 lambda_vector a_dist_v;
3972 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3974 /* Distance vectors are not ordered in the same way in the DDR
3975 and in the DIST_VECTS: search for a matching vector. */
3976 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3977 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3978 break;
3980 if (j == VEC_length (lambda_vector, dist_vects))
3982 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3983 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3984 fprintf (file, "not found in Omega dist vectors:\n");
3985 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3986 fprintf (file, "data dependence relation:\n");
3987 dump_data_dependence_relation (file, ddr);
3988 fprintf (file, ")\n");
3992 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3994 lambda_vector a_dir_v;
3995 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3997 /* Direction vectors are not ordered in the same way in the DDR
3998 and in the DIR_VECTS: search for a matching vector. */
3999 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4000 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4001 break;
4003 if (j == VEC_length (lambda_vector, dist_vects))
4005 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4006 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4007 fprintf (file, "not found in Omega dir vectors:\n");
4008 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4009 fprintf (file, "data dependence relation:\n");
4010 dump_data_dependence_relation (file, ddr);
4011 fprintf (file, ")\n");
4015 return true;
4018 /* This computes the affine dependence relation between A and B with
4019 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4020 independence between two accesses, while CHREC_DONT_KNOW is used
4021 for representing the unknown relation.
4023 Note that it is possible to stop the computation of the dependence
4024 relation the first time we detect a CHREC_KNOWN element for a given
4025 subscript. */
4027 static void
4028 compute_affine_dependence (struct data_dependence_relation *ddr,
4029 struct loop *loop_nest)
4031 struct data_reference *dra = DDR_A (ddr);
4032 struct data_reference *drb = DDR_B (ddr);
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4036 fprintf (dump_file, "(compute_affine_dependence\n");
4037 fprintf (dump_file, " (stmt_a = \n");
4038 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4039 fprintf (dump_file, ")\n (stmt_b = \n");
4040 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4041 fprintf (dump_file, ")\n");
4044 /* Analyze only when the dependence relation is not yet known. */
4045 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4046 && !DDR_SELF_REFERENCE (ddr))
4048 dependence_stats.num_dependence_tests++;
4050 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4051 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4053 if (flag_check_data_deps)
4055 /* Compute the dependences using the first algorithm. */
4056 subscript_dependence_tester (ddr, loop_nest);
4058 if (dump_file && (dump_flags & TDF_DETAILS))
4060 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4061 dump_data_dependence_relation (dump_file, ddr);
4064 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4066 bool maybe_dependent;
4067 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4069 /* Save the result of the first DD analyzer. */
4070 dist_vects = DDR_DIST_VECTS (ddr);
4071 dir_vects = DDR_DIR_VECTS (ddr);
4073 /* Reset the information. */
4074 DDR_DIST_VECTS (ddr) = NULL;
4075 DDR_DIR_VECTS (ddr) = NULL;
4077 /* Compute the same information using Omega. */
4078 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4079 goto csys_dont_know;
4081 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, "Omega Analyzer\n");
4084 dump_data_dependence_relation (dump_file, ddr);
4087 /* Check that we get the same information. */
4088 if (maybe_dependent)
4089 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4090 dir_vects));
4093 else
4094 subscript_dependence_tester (ddr, loop_nest);
4097 /* As a last case, if the dependence cannot be determined, or if
4098 the dependence is considered too difficult to determine, answer
4099 "don't know". */
4100 else
4102 csys_dont_know:;
4103 dependence_stats.num_dependence_undetermined++;
4105 if (dump_file && (dump_flags & TDF_DETAILS))
4107 fprintf (dump_file, "Data ref a:\n");
4108 dump_data_reference (dump_file, dra);
4109 fprintf (dump_file, "Data ref b:\n");
4110 dump_data_reference (dump_file, drb);
4111 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4113 finalize_ddr_dependent (ddr, chrec_dont_know);
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 fprintf (dump_file, ")\n");
4121 /* This computes the dependence relation for the same data
4122 reference into DDR. */
4124 static void
4125 compute_self_dependence (struct data_dependence_relation *ddr)
4127 unsigned int i;
4128 struct subscript *subscript;
4130 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4131 return;
4133 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4134 i++)
4136 if (SUB_CONFLICTS_IN_A (subscript))
4137 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4138 if (SUB_CONFLICTS_IN_B (subscript))
4139 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4141 /* The accessed index overlaps for each iteration. */
4142 SUB_CONFLICTS_IN_A (subscript)
4143 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4144 SUB_CONFLICTS_IN_B (subscript)
4145 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4146 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4149 /* The distance vector is the zero vector. */
4150 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4151 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4154 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4155 the data references in DATAREFS, in the LOOP_NEST. When
4156 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4157 relations. */
4159 void
4160 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4161 VEC (ddr_p, heap) **dependence_relations,
4162 VEC (loop_p, heap) *loop_nest,
4163 bool compute_self_and_rr)
4165 struct data_dependence_relation *ddr;
4166 struct data_reference *a, *b;
4167 unsigned int i, j;
4169 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4170 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4171 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4173 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4174 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4175 if (loop_nest)
4176 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4179 if (compute_self_and_rr)
4180 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4182 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4183 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4184 compute_self_dependence (ddr);
4188 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4189 true if STMT clobbers memory, false otherwise. */
4191 bool
4192 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4194 bool clobbers_memory = false;
4195 data_ref_loc *ref;
4196 tree *op0, *op1;
4197 enum gimple_code stmt_code = gimple_code (stmt);
4199 *references = NULL;
4201 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4202 Calls have side-effects, except those to const or pure
4203 functions. */
4204 if ((stmt_code == GIMPLE_CALL
4205 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4206 || (stmt_code == GIMPLE_ASM
4207 && gimple_asm_volatile_p (stmt)))
4208 clobbers_memory = true;
4210 if (!gimple_vuse (stmt))
4211 return clobbers_memory;
4213 if (stmt_code == GIMPLE_ASSIGN)
4215 tree base;
4216 op0 = gimple_assign_lhs_ptr (stmt);
4217 op1 = gimple_assign_rhs1_ptr (stmt);
4219 if (DECL_P (*op1)
4220 || (REFERENCE_CLASS_P (*op1)
4221 && (base = get_base_address (*op1))
4222 && TREE_CODE (base) != SSA_NAME))
4224 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4225 ref->pos = op1;
4226 ref->is_read = true;
4229 else if (stmt_code == GIMPLE_CALL)
4231 unsigned i, n;
4233 op0 = gimple_call_lhs_ptr (stmt);
4234 n = gimple_call_num_args (stmt);
4235 for (i = 0; i < n; i++)
4237 op1 = gimple_call_arg_ptr (stmt, i);
4239 if (DECL_P (*op1)
4240 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4242 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4243 ref->pos = op1;
4244 ref->is_read = true;
4248 else
4249 return clobbers_memory;
4251 if (*op0
4252 && (DECL_P (*op0)
4253 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4255 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4256 ref->pos = op0;
4257 ref->is_read = false;
4259 return clobbers_memory;
4262 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4263 reference, returns false, otherwise returns true. NEST is the outermost
4264 loop of the loop nest in which the references should be analyzed. */
4266 bool
4267 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4268 VEC (data_reference_p, heap) **datarefs)
4270 unsigned i;
4271 VEC (data_ref_loc, heap) *references;
4272 data_ref_loc *ref;
4273 bool ret = true;
4274 data_reference_p dr;
4276 if (get_references_in_stmt (stmt, &references))
4278 VEC_free (data_ref_loc, heap, references);
4279 return false;
4282 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4284 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4285 *ref->pos, stmt, ref->is_read);
4286 gcc_assert (dr != NULL);
4287 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4289 VEC_free (data_ref_loc, heap, references);
4290 return ret;
4293 /* Stores the data references in STMT to DATAREFS. If there is an
4294 unanalyzable reference, returns false, otherwise returns true.
4295 NEST is the outermost loop of the loop nest in which the references
4296 should be instantiated, LOOP is the loop in which the references
4297 should be analyzed. */
4299 bool
4300 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4301 VEC (data_reference_p, heap) **datarefs)
4303 unsigned i;
4304 VEC (data_ref_loc, heap) *references;
4305 data_ref_loc *ref;
4306 bool ret = true;
4307 data_reference_p dr;
4309 if (get_references_in_stmt (stmt, &references))
4311 VEC_free (data_ref_loc, heap, references);
4312 return false;
4315 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4317 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4318 gcc_assert (dr != NULL);
4319 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4322 VEC_free (data_ref_loc, heap, references);
4323 return ret;
4326 /* Search the data references in LOOP, and record the information into
4327 DATAREFS. Returns chrec_dont_know when failing to analyze a
4328 difficult case, returns NULL_TREE otherwise. */
4330 tree
4331 find_data_references_in_bb (struct loop *loop, basic_block bb,
4332 VEC (data_reference_p, heap) **datarefs)
4334 gimple_stmt_iterator bsi;
4336 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4338 gimple stmt = gsi_stmt (bsi);
4340 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4342 struct data_reference *res;
4343 res = XCNEW (struct data_reference);
4344 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4346 return chrec_dont_know;
4350 return NULL_TREE;
4353 /* Search the data references in LOOP, and record the information into
4354 DATAREFS. Returns chrec_dont_know when failing to analyze a
4355 difficult case, returns NULL_TREE otherwise.
4357 TODO: This function should be made smarter so that it can handle address
4358 arithmetic as if they were array accesses, etc. */
4360 tree
4361 find_data_references_in_loop (struct loop *loop,
4362 VEC (data_reference_p, heap) **datarefs)
4364 basic_block bb, *bbs;
4365 unsigned int i;
4367 bbs = get_loop_body_in_dom_order (loop);
4369 for (i = 0; i < loop->num_nodes; i++)
4371 bb = bbs[i];
4373 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4375 free (bbs);
4376 return chrec_dont_know;
4379 free (bbs);
4381 return NULL_TREE;
4384 /* Recursive helper function. */
4386 static bool
4387 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4389 /* Inner loops of the nest should not contain siblings. Example:
4390 when there are two consecutive loops,
4392 | loop_0
4393 | loop_1
4394 | A[{0, +, 1}_1]
4395 | endloop_1
4396 | loop_2
4397 | A[{0, +, 1}_2]
4398 | endloop_2
4399 | endloop_0
4401 the dependence relation cannot be captured by the distance
4402 abstraction. */
4403 if (loop->next)
4404 return false;
4406 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4407 if (loop->inner)
4408 return find_loop_nest_1 (loop->inner, loop_nest);
4409 return true;
4412 /* Return false when the LOOP is not well nested. Otherwise return
4413 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4414 contain the loops from the outermost to the innermost, as they will
4415 appear in the classic distance vector. */
4417 bool
4418 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4420 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4421 if (loop->inner)
4422 return find_loop_nest_1 (loop->inner, loop_nest);
4423 return true;
4426 /* Returns true when the data dependences have been computed, false otherwise.
4427 Given a loop nest LOOP, the following vectors are returned:
4428 DATAREFS is initialized to all the array elements contained in this loop,
4429 DEPENDENCE_RELATIONS contains the relations between the data references.
4430 Compute read-read and self relations if
4431 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4433 bool
4434 compute_data_dependences_for_loop (struct loop *loop,
4435 bool compute_self_and_read_read_dependences,
4436 VEC (loop_p, heap) **loop_nest,
4437 VEC (data_reference_p, heap) **datarefs,
4438 VEC (ddr_p, heap) **dependence_relations)
4440 bool res = true;
4442 memset (&dependence_stats, 0, sizeof (dependence_stats));
4444 /* If the loop nest is not well formed, or one of the data references
4445 is not computable, give up without spending time to compute other
4446 dependences. */
4447 if (!loop
4448 || !find_loop_nest (loop, loop_nest)
4449 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4451 struct data_dependence_relation *ddr;
4453 /* Insert a single relation into dependence_relations:
4454 chrec_dont_know. */
4455 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4456 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4457 res = false;
4459 else
4460 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4461 compute_self_and_read_read_dependences);
4463 if (dump_file && (dump_flags & TDF_STATS))
4465 fprintf (dump_file, "Dependence tester statistics:\n");
4467 fprintf (dump_file, "Number of dependence tests: %d\n",
4468 dependence_stats.num_dependence_tests);
4469 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4470 dependence_stats.num_dependence_dependent);
4471 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4472 dependence_stats.num_dependence_independent);
4473 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4474 dependence_stats.num_dependence_undetermined);
4476 fprintf (dump_file, "Number of subscript tests: %d\n",
4477 dependence_stats.num_subscript_tests);
4478 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4479 dependence_stats.num_subscript_undetermined);
4480 fprintf (dump_file, "Number of same subscript function: %d\n",
4481 dependence_stats.num_same_subscript_function);
4483 fprintf (dump_file, "Number of ziv tests: %d\n",
4484 dependence_stats.num_ziv);
4485 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4486 dependence_stats.num_ziv_dependent);
4487 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4488 dependence_stats.num_ziv_independent);
4489 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4490 dependence_stats.num_ziv_unimplemented);
4492 fprintf (dump_file, "Number of siv tests: %d\n",
4493 dependence_stats.num_siv);
4494 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4495 dependence_stats.num_siv_dependent);
4496 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4497 dependence_stats.num_siv_independent);
4498 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4499 dependence_stats.num_siv_unimplemented);
4501 fprintf (dump_file, "Number of miv tests: %d\n",
4502 dependence_stats.num_miv);
4503 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4504 dependence_stats.num_miv_dependent);
4505 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4506 dependence_stats.num_miv_independent);
4507 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4508 dependence_stats.num_miv_unimplemented);
4511 return res;
4514 /* Returns true when the data dependences for the basic block BB have been
4515 computed, false otherwise.
4516 DATAREFS is initialized to all the array elements contained in this basic
4517 block, DEPENDENCE_RELATIONS contains the relations between the data
4518 references. Compute read-read and self relations if
4519 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4520 bool
4521 compute_data_dependences_for_bb (basic_block bb,
4522 bool compute_self_and_read_read_dependences,
4523 VEC (data_reference_p, heap) **datarefs,
4524 VEC (ddr_p, heap) **dependence_relations)
4526 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4527 return false;
4529 compute_all_dependences (*datarefs, dependence_relations, NULL,
4530 compute_self_and_read_read_dependences);
4531 return true;
4534 /* Entry point (for testing only). Analyze all the data references
4535 and the dependence relations in LOOP.
4537 The data references are computed first.
4539 A relation on these nodes is represented by a complete graph. Some
4540 of the relations could be of no interest, thus the relations can be
4541 computed on demand.
4543 In the following function we compute all the relations. This is
4544 just a first implementation that is here for:
4545 - for showing how to ask for the dependence relations,
4546 - for the debugging the whole dependence graph,
4547 - for the dejagnu testcases and maintenance.
4549 It is possible to ask only for a part of the graph, avoiding to
4550 compute the whole dependence graph. The computed dependences are
4551 stored in a knowledge base (KB) such that later queries don't
4552 recompute the same information. The implementation of this KB is
4553 transparent to the optimizer, and thus the KB can be changed with a
4554 more efficient implementation, or the KB could be disabled. */
4555 static void
4556 analyze_all_data_dependences (struct loop *loop)
4558 unsigned int i;
4559 int nb_data_refs = 10;
4560 VEC (data_reference_p, heap) *datarefs =
4561 VEC_alloc (data_reference_p, heap, nb_data_refs);
4562 VEC (ddr_p, heap) *dependence_relations =
4563 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4564 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4566 /* Compute DDs on the whole function. */
4567 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4568 &dependence_relations);
4570 if (dump_file)
4572 dump_data_dependence_relations (dump_file, dependence_relations);
4573 fprintf (dump_file, "\n\n");
4575 if (dump_flags & TDF_DETAILS)
4576 dump_dist_dir_vectors (dump_file, dependence_relations);
4578 if (dump_flags & TDF_STATS)
4580 unsigned nb_top_relations = 0;
4581 unsigned nb_bot_relations = 0;
4582 unsigned nb_chrec_relations = 0;
4583 struct data_dependence_relation *ddr;
4585 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4587 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4588 nb_top_relations++;
4590 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4591 nb_bot_relations++;
4593 else
4594 nb_chrec_relations++;
4597 gather_stats_on_scev_database ();
4601 VEC_free (loop_p, heap, loop_nest);
4602 free_dependence_relations (dependence_relations);
4603 free_data_refs (datarefs);
4606 /* Computes all the data dependences and check that the results of
4607 several analyzers are the same. */
4609 void
4610 tree_check_data_deps (void)
4612 loop_iterator li;
4613 struct loop *loop_nest;
4615 FOR_EACH_LOOP (li, loop_nest, 0)
4616 analyze_all_data_dependences (loop_nest);
4619 /* Free the memory used by a data dependence relation DDR. */
4621 void
4622 free_dependence_relation (struct data_dependence_relation *ddr)
4624 if (ddr == NULL)
4625 return;
4627 if (DDR_SUBSCRIPTS (ddr))
4628 free_subscripts (DDR_SUBSCRIPTS (ddr));
4629 if (DDR_DIST_VECTS (ddr))
4630 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4631 if (DDR_DIR_VECTS (ddr))
4632 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4634 free (ddr);
4637 /* Free the memory used by the data dependence relations from
4638 DEPENDENCE_RELATIONS. */
4640 void
4641 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4643 unsigned int i;
4644 struct data_dependence_relation *ddr;
4646 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4647 if (ddr)
4648 free_dependence_relation (ddr);
4650 VEC_free (ddr_p, heap, dependence_relations);
4653 /* Free the memory used by the data references from DATAREFS. */
4655 void
4656 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4658 unsigned int i;
4659 struct data_reference *dr;
4661 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4662 free_data_ref (dr);
4663 VEC_free (data_reference_p, heap, datarefs);
4668 /* Dump vertex I in RDG to FILE. */
4670 void
4671 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4673 struct vertex *v = &(rdg->vertices[i]);
4674 struct graph_edge *e;
4676 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4677 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4678 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4680 if (v->pred)
4681 for (e = v->pred; e; e = e->pred_next)
4682 fprintf (file, " %d", e->src);
4684 fprintf (file, ") (out:");
4686 if (v->succ)
4687 for (e = v->succ; e; e = e->succ_next)
4688 fprintf (file, " %d", e->dest);
4690 fprintf (file, ")\n");
4691 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4692 fprintf (file, ")\n");
4695 /* Call dump_rdg_vertex on stderr. */
4697 DEBUG_FUNCTION void
4698 debug_rdg_vertex (struct graph *rdg, int i)
4700 dump_rdg_vertex (stderr, rdg, i);
4703 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4704 dumped vertices to that bitmap. */
4706 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4708 int i;
4710 fprintf (file, "(%d\n", c);
4712 for (i = 0; i < rdg->n_vertices; i++)
4713 if (rdg->vertices[i].component == c)
4715 if (dumped)
4716 bitmap_set_bit (dumped, i);
4718 dump_rdg_vertex (file, rdg, i);
4721 fprintf (file, ")\n");
4724 /* Call dump_rdg_vertex on stderr. */
4726 DEBUG_FUNCTION void
4727 debug_rdg_component (struct graph *rdg, int c)
4729 dump_rdg_component (stderr, rdg, c, NULL);
4732 /* Dump the reduced dependence graph RDG to FILE. */
4734 void
4735 dump_rdg (FILE *file, struct graph *rdg)
4737 int i;
4738 bitmap dumped = BITMAP_ALLOC (NULL);
4740 fprintf (file, "(rdg\n");
4742 for (i = 0; i < rdg->n_vertices; i++)
4743 if (!bitmap_bit_p (dumped, i))
4744 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4746 fprintf (file, ")\n");
4747 BITMAP_FREE (dumped);
4750 /* Call dump_rdg on stderr. */
4752 DEBUG_FUNCTION void
4753 debug_rdg (struct graph *rdg)
4755 dump_rdg (stderr, rdg);
4758 static void
4759 dot_rdg_1 (FILE *file, struct graph *rdg)
4761 int i;
4763 fprintf (file, "digraph RDG {\n");
4765 for (i = 0; i < rdg->n_vertices; i++)
4767 struct vertex *v = &(rdg->vertices[i]);
4768 struct graph_edge *e;
4770 /* Highlight reads from memory. */
4771 if (RDG_MEM_READS_STMT (rdg, i))
4772 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4774 /* Highlight stores to memory. */
4775 if (RDG_MEM_WRITE_STMT (rdg, i))
4776 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4778 if (v->succ)
4779 for (e = v->succ; e; e = e->succ_next)
4780 switch (RDGE_TYPE (e))
4782 case input_dd:
4783 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4784 break;
4786 case output_dd:
4787 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4788 break;
4790 case flow_dd:
4791 /* These are the most common dependences: don't print these. */
4792 fprintf (file, "%d -> %d \n", i, e->dest);
4793 break;
4795 case anti_dd:
4796 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4797 break;
4799 default:
4800 gcc_unreachable ();
4804 fprintf (file, "}\n\n");
4807 /* Display the Reduced Dependence Graph using dotty. */
4808 extern void dot_rdg (struct graph *);
4810 DEBUG_FUNCTION void
4811 dot_rdg (struct graph *rdg)
4813 /* When debugging, enable the following code. This cannot be used
4814 in production compilers because it calls "system". */
4815 #if 0
4816 FILE *file = fopen ("/tmp/rdg.dot", "w");
4817 gcc_assert (file != NULL);
4819 dot_rdg_1 (file, rdg);
4820 fclose (file);
4822 system ("dotty /tmp/rdg.dot &");
4823 #else
4824 dot_rdg_1 (stderr, rdg);
4825 #endif
4828 /* This structure is used for recording the mapping statement index in
4829 the RDG. */
4831 struct GTY(()) rdg_vertex_info
4833 gimple stmt;
4834 int index;
4837 /* Returns the index of STMT in RDG. */
4840 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4842 struct rdg_vertex_info rvi, *slot;
4844 rvi.stmt = stmt;
4845 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4847 if (!slot)
4848 return -1;
4850 return slot->index;
4853 /* Creates an edge in RDG for each distance vector from DDR. The
4854 order that we keep track of in the RDG is the order in which
4855 statements have to be executed. */
4857 static void
4858 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4860 struct graph_edge *e;
4861 int va, vb;
4862 data_reference_p dra = DDR_A (ddr);
4863 data_reference_p drb = DDR_B (ddr);
4864 unsigned level = ddr_dependence_level (ddr);
4866 /* For non scalar dependences, when the dependence is REVERSED,
4867 statement B has to be executed before statement A. */
4868 if (level > 0
4869 && !DDR_REVERSED_P (ddr))
4871 data_reference_p tmp = dra;
4872 dra = drb;
4873 drb = tmp;
4876 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4877 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4879 if (va < 0 || vb < 0)
4880 return;
4882 e = add_edge (rdg, va, vb);
4883 e->data = XNEW (struct rdg_edge);
4885 RDGE_LEVEL (e) = level;
4886 RDGE_RELATION (e) = ddr;
4888 /* Determines the type of the data dependence. */
4889 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4890 RDGE_TYPE (e) = input_dd;
4891 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4892 RDGE_TYPE (e) = output_dd;
4893 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4894 RDGE_TYPE (e) = flow_dd;
4895 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4896 RDGE_TYPE (e) = anti_dd;
4899 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4900 the index of DEF in RDG. */
4902 static void
4903 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4905 use_operand_p imm_use_p;
4906 imm_use_iterator iterator;
4908 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4910 struct graph_edge *e;
4911 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4913 if (use < 0)
4914 continue;
4916 e = add_edge (rdg, idef, use);
4917 e->data = XNEW (struct rdg_edge);
4918 RDGE_TYPE (e) = flow_dd;
4919 RDGE_RELATION (e) = NULL;
4923 /* Creates the edges of the reduced dependence graph RDG. */
4925 static void
4926 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4928 int i;
4929 struct data_dependence_relation *ddr;
4930 def_operand_p def_p;
4931 ssa_op_iter iter;
4933 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4934 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4935 create_rdg_edge_for_ddr (rdg, ddr);
4937 for (i = 0; i < rdg->n_vertices; i++)
4938 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4939 iter, SSA_OP_DEF)
4940 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4943 /* Build the vertices of the reduced dependence graph RDG. */
4945 void
4946 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4948 int i, j;
4949 gimple stmt;
4951 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4953 VEC (data_ref_loc, heap) *references;
4954 data_ref_loc *ref;
4955 struct vertex *v = &(rdg->vertices[i]);
4956 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4957 struct rdg_vertex_info **slot;
4959 rvi->stmt = stmt;
4960 rvi->index = i;
4961 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4963 if (!*slot)
4964 *slot = rvi;
4965 else
4966 free (rvi);
4968 v->data = XNEW (struct rdg_vertex);
4969 RDG_STMT (rdg, i) = stmt;
4971 RDG_MEM_WRITE_STMT (rdg, i) = false;
4972 RDG_MEM_READS_STMT (rdg, i) = false;
4973 if (gimple_code (stmt) == GIMPLE_PHI)
4974 continue;
4976 get_references_in_stmt (stmt, &references);
4977 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4978 if (!ref->is_read)
4979 RDG_MEM_WRITE_STMT (rdg, i) = true;
4980 else
4981 RDG_MEM_READS_STMT (rdg, i) = true;
4983 VEC_free (data_ref_loc, heap, references);
4987 /* Initialize STMTS with all the statements of LOOP. When
4988 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4989 which we discover statements is important as
4990 generate_loops_for_partition is using the same traversal for
4991 identifying statements. */
4993 static void
4994 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4996 unsigned int i;
4997 basic_block *bbs = get_loop_body_in_dom_order (loop);
4999 for (i = 0; i < loop->num_nodes; i++)
5001 basic_block bb = bbs[i];
5002 gimple_stmt_iterator bsi;
5003 gimple stmt;
5005 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5006 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5008 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5010 stmt = gsi_stmt (bsi);
5011 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5012 VEC_safe_push (gimple, heap, *stmts, stmt);
5016 free (bbs);
5019 /* Returns true when all the dependences are computable. */
5021 static bool
5022 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5024 ddr_p ddr;
5025 unsigned int i;
5027 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5028 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5029 return false;
5031 return true;
5034 /* Computes a hash function for element ELT. */
5036 static hashval_t
5037 hash_stmt_vertex_info (const void *elt)
5039 const struct rdg_vertex_info *const rvi =
5040 (const struct rdg_vertex_info *) elt;
5041 gimple stmt = rvi->stmt;
5043 return htab_hash_pointer (stmt);
5046 /* Compares database elements E1 and E2. */
5048 static int
5049 eq_stmt_vertex_info (const void *e1, const void *e2)
5051 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5052 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5054 return elt1->stmt == elt2->stmt;
5057 /* Free the element E. */
5059 static void
5060 hash_stmt_vertex_del (void *e)
5062 free (e);
5065 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5066 statement of the loop nest, and one edge per data dependence or
5067 scalar dependence. */
5069 struct graph *
5070 build_empty_rdg (int n_stmts)
5072 int nb_data_refs = 10;
5073 struct graph *rdg = new_graph (n_stmts);
5075 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5076 eq_stmt_vertex_info, hash_stmt_vertex_del);
5077 return rdg;
5080 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5081 statement of the loop nest, and one edge per data dependence or
5082 scalar dependence. */
5084 struct graph *
5085 build_rdg (struct loop *loop,
5086 VEC (loop_p, heap) **loop_nest,
5087 VEC (ddr_p, heap) **dependence_relations,
5088 VEC (data_reference_p, heap) **datarefs)
5090 struct graph *rdg = NULL;
5091 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5093 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5094 dependence_relations);
5096 if (known_dependences_p (*dependence_relations))
5098 stmts_from_loop (loop, &stmts);
5099 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5100 create_rdg_vertices (rdg, stmts);
5101 create_rdg_edges (rdg, *dependence_relations);
5104 VEC_free (gimple, heap, stmts);
5105 return rdg;
5108 /* Free the reduced dependence graph RDG. */
5110 void
5111 free_rdg (struct graph *rdg)
5113 int i;
5115 for (i = 0; i < rdg->n_vertices; i++)
5117 struct vertex *v = &(rdg->vertices[i]);
5118 struct graph_edge *e;
5120 for (e = v->succ; e; e = e->succ_next)
5121 free (e->data);
5123 free (v->data);
5126 htab_delete (rdg->indices);
5127 free_graph (rdg);
5130 /* Initialize STMTS with all the statements of LOOP that contain a
5131 store to memory. */
5133 void
5134 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5136 unsigned int i;
5137 basic_block *bbs = get_loop_body_in_dom_order (loop);
5139 for (i = 0; i < loop->num_nodes; i++)
5141 basic_block bb = bbs[i];
5142 gimple_stmt_iterator bsi;
5144 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5145 if (gimple_vdef (gsi_stmt (bsi)))
5146 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5149 free (bbs);
5152 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5153 that contains a data reference on its LHS with a stride of the same
5154 size as its unit type. */
5156 bool
5157 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5159 tree op0, op1;
5160 bool res;
5161 struct data_reference *dr;
5163 if (!stmt
5164 || !gimple_vdef (stmt)
5165 || !is_gimple_assign (stmt)
5166 || !gimple_assign_single_p (stmt)
5167 || !(op1 = gimple_assign_rhs1 (stmt))
5168 || !(integer_zerop (op1) || real_zerop (op1)))
5169 return false;
5171 dr = XCNEW (struct data_reference);
5172 op0 = gimple_assign_lhs (stmt);
5174 DR_STMT (dr) = stmt;
5175 DR_REF (dr) = op0;
5177 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5178 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5180 free_data_ref (dr);
5181 return res;
5184 /* Initialize STMTS with all the statements of LOOP that contain a
5185 store to memory of the form "A[i] = 0". */
5187 void
5188 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5190 unsigned int i;
5191 basic_block bb;
5192 gimple_stmt_iterator si;
5193 gimple stmt;
5194 basic_block *bbs = get_loop_body_in_dom_order (loop);
5196 for (i = 0; i < loop->num_nodes; i++)
5197 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5198 if ((stmt = gsi_stmt (si))
5199 && stmt_with_adjacent_zero_store_dr_p (stmt))
5200 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5202 free (bbs);
5205 /* For a data reference REF, return the declaration of its base
5206 address or NULL_TREE if the base is not determined. */
5208 static inline tree
5209 ref_base_address (gimple stmt, data_ref_loc *ref)
5211 tree base = NULL_TREE;
5212 tree base_address;
5213 struct data_reference *dr = XCNEW (struct data_reference);
5215 DR_STMT (dr) = stmt;
5216 DR_REF (dr) = *ref->pos;
5217 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5218 base_address = DR_BASE_ADDRESS (dr);
5220 if (!base_address)
5221 goto end;
5223 switch (TREE_CODE (base_address))
5225 case ADDR_EXPR:
5226 base = TREE_OPERAND (base_address, 0);
5227 break;
5229 default:
5230 base = base_address;
5231 break;
5234 end:
5235 free_data_ref (dr);
5236 return base;
5239 /* Determines whether the statement from vertex V of the RDG has a
5240 definition used outside the loop that contains this statement. */
5242 bool
5243 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5245 gimple stmt = RDG_STMT (rdg, v);
5246 struct loop *loop = loop_containing_stmt (stmt);
5247 use_operand_p imm_use_p;
5248 imm_use_iterator iterator;
5249 ssa_op_iter it;
5250 def_operand_p def_p;
5252 if (!loop)
5253 return true;
5255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5257 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5259 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5260 return true;
5264 return false;
5267 /* Determines whether statements S1 and S2 access to similar memory
5268 locations. Two memory accesses are considered similar when they
5269 have the same base address declaration, i.e. when their
5270 ref_base_address is the same. */
5272 bool
5273 have_similar_memory_accesses (gimple s1, gimple s2)
5275 bool res = false;
5276 unsigned i, j;
5277 VEC (data_ref_loc, heap) *refs1, *refs2;
5278 data_ref_loc *ref1, *ref2;
5280 get_references_in_stmt (s1, &refs1);
5281 get_references_in_stmt (s2, &refs2);
5283 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5285 tree base1 = ref_base_address (s1, ref1);
5287 if (base1)
5288 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5289 if (base1 == ref_base_address (s2, ref2))
5291 res = true;
5292 goto end;
5296 end:
5297 VEC_free (data_ref_loc, heap, refs1);
5298 VEC_free (data_ref_loc, heap, refs2);
5299 return res;
5302 /* Helper function for the hashtab. */
5304 static int
5305 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5307 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5308 CONST_CAST_GIMPLE ((const_gimple) s2));
5311 /* Helper function for the hashtab. */
5313 static hashval_t
5314 ref_base_address_1 (const void *s)
5316 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5317 unsigned i;
5318 VEC (data_ref_loc, heap) *refs;
5319 data_ref_loc *ref;
5320 hashval_t res = 0;
5322 get_references_in_stmt (stmt, &refs);
5324 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5325 if (!ref->is_read)
5327 res = htab_hash_pointer (ref_base_address (stmt, ref));
5328 break;
5331 VEC_free (data_ref_loc, heap, refs);
5332 return res;
5335 /* Try to remove duplicated write data references from STMTS. */
5337 void
5338 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5340 unsigned i;
5341 gimple stmt;
5342 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5343 have_similar_memory_accesses_1, NULL);
5345 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5347 void **slot;
5349 slot = htab_find_slot (seen, stmt, INSERT);
5351 if (*slot)
5352 VEC_ordered_remove (gimple, *stmts, i);
5353 else
5355 *slot = (void *) stmt;
5356 i++;
5360 htab_delete (seen);
5363 /* Returns the index of PARAMETER in the parameters vector of the
5364 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5367 access_matrix_get_index_for_parameter (tree parameter,
5368 struct access_matrix *access_matrix)
5370 int i;
5371 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5372 tree lambda_parameter;
5374 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5375 if (lambda_parameter == parameter)
5376 return i + AM_NB_INDUCTION_VARS (access_matrix);
5378 return -1;