2012-11-28 Oleg Raikhman <oleg@adapteva.com>
[official-gcc.git] / gcc / tree-data-ref.c
blob7e95ad710b5535c9b12145888b7f08eed4327711
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "dumpfile.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
90 static struct datadep_stats
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
131 /* Returns true iff A divides B. */
133 static inline bool
134 int_divides_p (int a, int b)
136 return ((b % a) == 0);
141 /* Dump into FILE all the data references from DATAREFS. */
143 static void
144 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
146 unsigned int i;
147 struct data_reference *dr;
149 FOR_EACH_VEC_ELT (datarefs, i, dr)
150 dump_data_reference (file, dr);
153 /* Dump into STDERR all the data references from DATAREFS. */
155 DEBUG_FUNCTION void
156 debug_data_references (vec<data_reference_p> datarefs)
158 dump_data_references (stderr, datarefs);
161 /* Print to STDERR the data_reference DR. */
163 DEBUG_FUNCTION void
164 debug_data_reference (struct data_reference *dr)
166 dump_data_reference (stderr, dr);
169 /* Dump function for a DATA_REFERENCE structure. */
171 void
172 dump_data_reference (FILE *outf,
173 struct data_reference *dr)
175 unsigned int i;
177 fprintf (outf, "#(Data Ref: \n");
178 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
179 fprintf (outf, "# stmt: ");
180 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
181 fprintf (outf, "# ref: ");
182 print_generic_stmt (outf, DR_REF (dr), 0);
183 fprintf (outf, "# base_object: ");
184 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
186 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
188 fprintf (outf, "# Access function %d: ", i);
189 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
191 fprintf (outf, "#)\n");
194 /* Dumps the affine function described by FN to the file OUTF. */
196 static void
197 dump_affine_function (FILE *outf, affine_fn fn)
199 unsigned i;
200 tree coef;
202 print_generic_expr (outf, fn[0], TDF_SLIM);
203 for (i = 1; fn.iterate (i, &coef); i++)
205 fprintf (outf, " + ");
206 print_generic_expr (outf, coef, TDF_SLIM);
207 fprintf (outf, " * x_%u", i);
211 /* Dumps the conflict function CF to the file OUTF. */
213 static void
214 dump_conflict_function (FILE *outf, conflict_function *cf)
216 unsigned i;
218 if (cf->n == NO_DEPENDENCE)
219 fprintf (outf, "no dependence\n");
220 else if (cf->n == NOT_KNOWN)
221 fprintf (outf, "not known\n");
222 else
224 for (i = 0; i < cf->n; i++)
226 fprintf (outf, "[");
227 dump_affine_function (outf, cf->fns[i]);
228 fprintf (outf, "]\n");
233 /* Dump function for a SUBSCRIPT structure. */
235 static void
236 dump_subscript (FILE *outf, struct subscript *subscript)
238 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
240 fprintf (outf, "\n (subscript \n");
241 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
242 dump_conflict_function (outf, cf);
243 if (CF_NONTRIVIAL_P (cf))
245 tree last_iteration = SUB_LAST_CONFLICT (subscript);
246 fprintf (outf, " last_conflict: ");
247 print_generic_stmt (outf, last_iteration, 0);
250 cf = SUB_CONFLICTS_IN_B (subscript);
251 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
260 fprintf (outf, " (Subscript distance: ");
261 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
262 fprintf (outf, " )\n");
263 fprintf (outf, " )\n");
266 /* Print the classic direction vector DIRV to OUTF. */
268 static void
269 print_direction_vector (FILE *outf,
270 lambda_vector dirv,
271 int length)
273 int eq;
275 for (eq = 0; eq < length; eq++)
277 enum data_dependence_direction dir = ((enum data_dependence_direction)
278 dirv[eq]);
280 switch (dir)
282 case dir_positive:
283 fprintf (outf, " +");
284 break;
285 case dir_negative:
286 fprintf (outf, " -");
287 break;
288 case dir_equal:
289 fprintf (outf, " =");
290 break;
291 case dir_positive_or_equal:
292 fprintf (outf, " +=");
293 break;
294 case dir_positive_or_negative:
295 fprintf (outf, " +-");
296 break;
297 case dir_negative_or_equal:
298 fprintf (outf, " -=");
299 break;
300 case dir_star:
301 fprintf (outf, " *");
302 break;
303 default:
304 fprintf (outf, "indep");
305 break;
308 fprintf (outf, "\n");
311 /* Print a vector of direction vectors. */
313 static void
314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
315 int length)
317 unsigned j;
318 lambda_vector v;
320 FOR_EACH_VEC_ELT (dir_vects, j, v)
321 print_direction_vector (outf, v, length);
324 /* Print out a vector VEC of length N to OUTFILE. */
326 static inline void
327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
329 int i;
331 for (i = 0; i < n; i++)
332 fprintf (outfile, "%3d ", vector[i]);
333 fprintf (outfile, "\n");
336 /* Print a vector of distance vectors. */
338 static void
339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
340 int length)
342 unsigned j;
343 lambda_vector v;
345 FOR_EACH_VEC_ELT (dist_vects, j, v)
346 print_lambda_vector (outf, v, length);
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
351 static void
352 dump_data_dependence_relation (FILE *outf,
353 struct data_dependence_relation *ddr)
355 struct data_reference *dra, *drb;
357 fprintf (outf, "(Data Dep: \n");
359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
361 if (ddr)
363 dra = DDR_A (ddr);
364 drb = DDR_B (ddr);
365 if (dra)
366 dump_data_reference (outf, dra);
367 else
368 fprintf (outf, " (nil)\n");
369 if (drb)
370 dump_data_reference (outf, drb);
371 else
372 fprintf (outf, " (nil)\n");
374 fprintf (outf, " (don't know)\n)\n");
375 return;
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
380 dump_data_reference (outf, dra);
381 dump_data_reference (outf, drb);
383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384 fprintf (outf, " (no dependence)\n");
386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
388 unsigned int i;
389 struct loop *loopi;
391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
393 fprintf (outf, " access_fn_A: ");
394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 fprintf (outf, " access_fn_B: ");
396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401 fprintf (outf, " loop nest: (");
402 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
403 fprintf (outf, "%d ", loopi->num);
404 fprintf (outf, ")\n");
406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
408 fprintf (outf, " distance_vector: ");
409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
415 fprintf (outf, " direction_vector: ");
416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 DDR_NB_LOOPS (ddr));
421 fprintf (outf, ")\n");
424 /* Debug version. */
426 DEBUG_FUNCTION void
427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
429 dump_data_dependence_relation (stderr, ddr);
432 /* Dump into FILE all the dependence relations from DDRS. */
434 void
435 dump_data_dependence_relations (FILE *file,
436 vec<ddr_p> ddrs)
438 unsigned int i;
439 struct data_dependence_relation *ddr;
441 FOR_EACH_VEC_ELT (ddrs, i, ddr)
442 dump_data_dependence_relation (file, ddr);
445 /* Dump to STDERR all the dependence relations from DDRS. */
447 DEBUG_FUNCTION void
448 debug_data_dependence_relations (vec<ddr_p> ddrs)
450 dump_data_dependence_relations (stderr, ddrs);
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
456 considered nest. */
458 static void
459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
461 unsigned int i, j;
462 struct data_dependence_relation *ddr;
463 lambda_vector v;
465 FOR_EACH_VEC_ELT (ddrs, i, ddr)
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
468 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
475 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
483 fprintf (file, "\n\n");
486 /* Dumps the data dependence relations DDRS in FILE. */
488 static void
489 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
491 unsigned int i;
492 struct data_dependence_relation *ddr;
494 FOR_EACH_VEC_ELT (ddrs, i, ddr)
495 dump_data_dependence_relation (file, ddr);
497 fprintf (file, "\n\n");
500 DEBUG_FUNCTION void
501 debug_ddrs (vec<ddr_p> ddrs)
503 dump_ddrs (stderr, ddrs);
506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510 is returned. */
512 static bool
513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 tree *var, tree *off)
516 tree var0, var1;
517 tree off0, off1;
518 enum tree_code ocode = code;
520 *var = NULL_TREE;
521 *off = NULL_TREE;
523 switch (code)
525 case INTEGER_CST:
526 *var = build_int_cst (type, 0);
527 *off = fold_convert (ssizetype, op0);
528 return true;
530 case POINTER_PLUS_EXPR:
531 ocode = PLUS_EXPR;
532 /* FALLTHROUGH */
533 case PLUS_EXPR:
534 case MINUS_EXPR:
535 split_constant_offset (op0, &var0, &off0);
536 split_constant_offset (op1, &var1, &off1);
537 *var = fold_build2 (code, type, var0, var1);
538 *off = size_binop (ocode, off0, off1);
539 return true;
541 case MULT_EXPR:
542 if (TREE_CODE (op1) != INTEGER_CST)
543 return false;
545 split_constant_offset (op0, &var0, &off0);
546 *var = fold_build2 (MULT_EXPR, type, var0, op1);
547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548 return true;
550 case ADDR_EXPR:
552 tree base, poffset;
553 HOST_WIDE_INT pbitsize, pbitpos;
554 enum machine_mode pmode;
555 int punsignedp, pvolatilep;
557 op0 = TREE_OPERAND (op0, 0);
558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 &pmode, &punsignedp, &pvolatilep, false);
561 if (pbitpos % BITS_PER_UNIT != 0)
562 return false;
563 base = build_fold_addr_expr (base);
564 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
566 if (poffset)
568 split_constant_offset (poffset, &poffset, &off1);
569 off0 = size_binop (PLUS_EXPR, off0, off1);
570 if (POINTER_TYPE_P (TREE_TYPE (base)))
571 base = fold_build_pointer_plus (base, poffset);
572 else
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
577 var0 = fold_convert (type, base);
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (CONVERT_EXPR_P (var0))
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
590 } */
591 while (POINTER_TYPE_P (type))
592 type = TREE_TYPE (type);
593 if (int_size_in_bytes (type) < 0)
594 return false;
596 *var = var0;
597 *off = off0;
598 return true;
601 case SSA_NAME:
603 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 enum tree_code subcode;
606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 return false;
609 var0 = gimple_assign_rhs1 (def_stmt);
610 subcode = gimple_assign_rhs_code (def_stmt);
611 var1 = gimple_assign_rhs2 (def_stmt);
613 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
615 CASE_CONVERT:
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype = TREE_TYPE (op0);
622 if ((POINTER_TYPE_P (itype)
623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
627 split_constant_offset (op0, &var0, off);
628 *var = fold_convert (type, var0);
629 return true;
631 return false;
634 default:
635 return false;
639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
642 void
643 split_constant_offset (tree exp, tree *var, tree *off)
645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646 enum tree_code code;
648 *var = exp;
649 *off = ssize_int (0);
650 STRIP_NOPS (exp);
652 if (tree_is_chrec (exp)
653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654 return;
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
661 *var = fold_convert (type, e);
662 *off = o;
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
669 static tree
670 canonicalize_base_object_address (tree addr)
672 tree orig = addr;
674 STRIP_NOPS (addr);
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
689 otherwise. */
691 bool
692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
694 gimple stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
697 HOST_WIDE_INT pbitsize, pbitpos;
698 tree base, poffset;
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
703 bool in_loop = (loop && loop->num);
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
712 if (pbitpos % BITS_PER_UNIT != 0)
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
716 return false;
719 if (TREE_CODE (base) == MEM_REF)
721 if (!integer_zerop (TREE_OPERAND (base, 1)))
723 double_int moff = mem_ref_offset (base);
724 tree mofft = double_int_to_tree (sizetype, moff);
725 if (!poffset)
726 poffset = mofft;
727 else
728 poffset = size_binop (PLUS_EXPR, poffset, mofft);
730 base = TREE_OPERAND (base, 0);
732 else
733 base = build_fold_addr_expr (base);
735 if (in_loop)
737 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
738 nest ? true : false))
740 if (nest)
742 if (dump_file && (dump_flags & TDF_DETAILS))
743 fprintf (dump_file, "failed: evolution of base is not"
744 " affine.\n");
745 return false;
747 else
749 base_iv.base = base;
750 base_iv.step = ssize_int (0);
751 base_iv.no_overflow = true;
755 else
757 base_iv.base = base;
758 base_iv.step = ssize_int (0);
759 base_iv.no_overflow = true;
762 if (!poffset)
764 offset_iv.base = ssize_int (0);
765 offset_iv.step = ssize_int (0);
767 else
769 if (!in_loop)
771 offset_iv.base = poffset;
772 offset_iv.step = ssize_int (0);
774 else if (!simple_iv (loop, loop_containing_stmt (stmt),
775 poffset, &offset_iv,
776 nest ? true : false))
778 if (nest)
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "failed: evolution of offset is not"
782 " affine.\n");
783 return false;
785 else
787 offset_iv.base = poffset;
788 offset_iv.step = ssize_int (0);
793 init = ssize_int (pbitpos / BITS_PER_UNIT);
794 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795 init = size_binop (PLUS_EXPR, init, dinit);
796 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797 init = size_binop (PLUS_EXPR, init, dinit);
799 step = size_binop (PLUS_EXPR,
800 fold_convert (ssizetype, base_iv.step),
801 fold_convert (ssizetype, offset_iv.step));
803 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
805 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806 DR_INIT (dr) = init;
807 DR_STEP (dr) = step;
809 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "success.\n");
814 return true;
817 /* Determines the base object and the list of indices of memory reference
818 DR, analyzed in LOOP and instantiated in loop nest NEST. */
820 static void
821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
823 vec<tree> access_fns = vNULL;
824 tree ref, op;
825 tree base, off, access_fn;
826 basic_block before_loop;
828 /* If analyzing a basic-block there are no indices to analyze
829 and thus no access functions. */
830 if (!nest)
832 DR_BASE_OBJECT (dr) = DR_REF (dr);
833 DR_ACCESS_FNS (dr).create (0);
834 return;
837 ref = DR_REF (dr);
838 before_loop = block_before_loop (nest);
840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841 into a two element array with a constant index. The base is
842 then just the immediate underlying object. */
843 if (TREE_CODE (ref) == REALPART_EXPR)
845 ref = TREE_OPERAND (ref, 0);
846 access_fns.safe_push (integer_zero_node);
848 else if (TREE_CODE (ref) == IMAGPART_EXPR)
850 ref = TREE_OPERAND (ref, 0);
851 access_fns.safe_push (integer_one_node);
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (ref))
857 if (TREE_CODE (ref) == ARRAY_REF)
859 op = TREE_OPERAND (ref, 1);
860 access_fn = analyze_scalar_evolution (loop, op);
861 access_fn = instantiate_scev (before_loop, loop, access_fn);
862 access_fns.safe_push (access_fn);
864 else if (TREE_CODE (ref) == COMPONENT_REF
865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
867 /* For COMPONENT_REFs of records (but not unions!) use the
868 FIELD_DECL offset as constant access function so we can
869 disambiguate a[i].f1 and a[i].f2. */
870 tree off = component_ref_field_offset (ref);
871 off = size_binop (PLUS_EXPR,
872 size_binop (MULT_EXPR,
873 fold_convert (bitsizetype, off),
874 bitsize_int (BITS_PER_UNIT)),
875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 access_fns.safe_push (off);
878 else
879 /* If we have an unhandled component we could not translate
880 to an access function stop analyzing. We have determined
881 our base object in this case. */
882 break;
884 ref = TREE_OPERAND (ref, 0);
887 /* If the address operand of a MEM_REF base has an evolution in the
888 analyzed nest, add it as an additional independent access-function. */
889 if (TREE_CODE (ref) == MEM_REF)
891 op = TREE_OPERAND (ref, 0);
892 access_fn = analyze_scalar_evolution (loop, op);
893 access_fn = instantiate_scev (before_loop, loop, access_fn);
894 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
896 tree orig_type;
897 tree memoff = TREE_OPERAND (ref, 1);
898 base = initial_condition (access_fn);
899 orig_type = TREE_TYPE (base);
900 STRIP_USELESS_TYPE_CONVERSION (base);
901 split_constant_offset (base, &base, &off);
902 /* Fold the MEM_REF offset into the evolutions initial
903 value to make more bases comparable. */
904 if (!integer_zerop (memoff))
906 off = size_binop (PLUS_EXPR, off,
907 fold_convert (ssizetype, memoff));
908 memoff = build_int_cst (TREE_TYPE (memoff), 0);
910 access_fn = chrec_replace_initial_condition
911 (access_fn, fold_convert (orig_type, off));
912 /* ??? This is still not a suitable base object for
913 dr_may_alias_p - the base object needs to be an
914 access that covers the object as whole. With
915 an evolution in the pointer this cannot be
916 guaranteed.
917 As a band-aid, mark the access so we can special-case
918 it in dr_may_alias_p. */
919 ref = fold_build2_loc (EXPR_LOCATION (ref),
920 MEM_REF, TREE_TYPE (ref),
921 base, memoff);
922 DR_UNCONSTRAINED_BASE (dr) = true;
923 access_fns.safe_push (access_fn);
926 else if (DECL_P (ref))
928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
929 ref = build2 (MEM_REF, TREE_TYPE (ref),
930 build_fold_addr_expr (ref),
931 build_int_cst (reference_alias_ptr_type (ref), 0));
934 DR_BASE_OBJECT (dr) = ref;
935 DR_ACCESS_FNS (dr) = access_fns;
938 /* Extracts the alias analysis information from the memory reference DR. */
940 static void
941 dr_analyze_alias (struct data_reference *dr)
943 tree ref = DR_REF (dr);
944 tree base = get_base_address (ref), addr;
946 if (INDIRECT_REF_P (base)
947 || TREE_CODE (base) == MEM_REF)
949 addr = TREE_OPERAND (base, 0);
950 if (TREE_CODE (addr) == SSA_NAME)
951 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
955 /* Frees data reference DR. */
957 void
958 free_data_ref (data_reference_p dr)
960 DR_ACCESS_FNS (dr).release ();
961 free (dr);
964 /* Analyzes memory reference MEMREF accessed in STMT. The reference
965 is read if IS_READ is true, write otherwise. Returns the
966 data_reference description of MEMREF. NEST is the outermost loop
967 in which the reference should be instantiated, LOOP is the loop in
968 which the data reference should be analyzed. */
970 struct data_reference *
971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 bool is_read)
974 struct data_reference *dr;
976 if (dump_file && (dump_flags & TDF_DETAILS))
978 fprintf (dump_file, "Creating dr for ");
979 print_generic_expr (dump_file, memref, TDF_SLIM);
980 fprintf (dump_file, "\n");
983 dr = XCNEW (struct data_reference);
984 DR_STMT (dr) = stmt;
985 DR_REF (dr) = memref;
986 DR_IS_READ (dr) = is_read;
988 dr_analyze_innermost (dr, nest);
989 dr_analyze_indices (dr, nest, loop);
990 dr_analyze_alias (dr);
992 if (dump_file && (dump_flags & TDF_DETAILS))
994 unsigned i;
995 fprintf (dump_file, "\tbase_address: ");
996 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997 fprintf (dump_file, "\n\toffset from base address: ");
998 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999 fprintf (dump_file, "\n\tconstant offset from base address: ");
1000 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1001 fprintf (dump_file, "\n\tstep: ");
1002 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1003 fprintf (dump_file, "\n\taligned to: ");
1004 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005 fprintf (dump_file, "\n\tbase_object: ");
1006 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1007 fprintf (dump_file, "\n");
1008 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1010 fprintf (dump_file, "\tAccess function %d: ", i);
1011 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1015 return dr;
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019 expressions. */
1020 static bool
1021 dr_equal_offsets_p1 (tree offset1, tree offset2)
1023 bool res;
1025 STRIP_NOPS (offset1);
1026 STRIP_NOPS (offset2);
1028 if (offset1 == offset2)
1029 return true;
1031 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033 return false;
1035 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036 TREE_OPERAND (offset2, 0));
1038 if (!res || !BINARY_CLASS_P (offset1))
1039 return res;
1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042 TREE_OPERAND (offset2, 1));
1044 return res;
1047 /* Check if DRA and DRB have equal offsets. */
1048 bool
1049 dr_equal_offsets_p (struct data_reference *dra,
1050 struct data_reference *drb)
1052 tree offset1, offset2;
1054 offset1 = DR_OFFSET (dra);
1055 offset2 = DR_OFFSET (drb);
1057 return dr_equal_offsets_p1 (offset1, offset2);
1060 /* Returns true if FNA == FNB. */
1062 static bool
1063 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1065 unsigned i, n = fna.length ();
1067 if (n != fnb.length ())
1068 return false;
1070 for (i = 0; i < n; i++)
1071 if (!operand_equal_p (fna[i], fnb[i], 0))
1072 return false;
1074 return true;
1077 /* If all the functions in CF are the same, returns one of them,
1078 otherwise returns NULL. */
1080 static affine_fn
1081 common_affine_function (conflict_function *cf)
1083 unsigned i;
1084 affine_fn comm;
1086 if (!CF_NONTRIVIAL_P (cf))
1087 return affine_fn();
1089 comm = cf->fns[0];
1091 for (i = 1; i < cf->n; i++)
1092 if (!affine_function_equal_p (comm, cf->fns[i]))
1093 return affine_fn();
1095 return comm;
1098 /* Returns the base of the affine function FN. */
1100 static tree
1101 affine_function_base (affine_fn fn)
1103 return fn[0];
1106 /* Returns true if FN is a constant. */
1108 static bool
1109 affine_function_constant_p (affine_fn fn)
1111 unsigned i;
1112 tree coef;
1114 for (i = 1; fn.iterate (i, &coef); i++)
1115 if (!integer_zerop (coef))
1116 return false;
1118 return true;
1121 /* Returns true if FN is the zero constant function. */
1123 static bool
1124 affine_function_zero_p (affine_fn fn)
1126 return (integer_zerop (affine_function_base (fn))
1127 && affine_function_constant_p (fn));
1130 /* Returns a signed integer type with the largest precision from TA
1131 and TB. */
1133 static tree
1134 signed_type_for_types (tree ta, tree tb)
1136 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1137 return signed_type_for (ta);
1138 else
1139 return signed_type_for (tb);
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1143 result. */
1145 static affine_fn
1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1148 unsigned i, n, m;
1149 affine_fn ret;
1150 tree coef;
1152 if (fnb.length () > fna.length ())
1154 n = fna.length ();
1155 m = fnb.length ();
1157 else
1159 n = fnb.length ();
1160 m = fna.length ();
1163 ret.create (m);
1164 for (i = 0; i < n; i++)
1166 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1167 TREE_TYPE (fnb[i]));
1168 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1171 for (; fna.iterate (i, &coef); i++)
1172 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 coef, integer_zero_node));
1174 for (; fnb.iterate (i, &coef); i++)
1175 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1176 integer_zero_node, coef));
1178 return ret;
1181 /* Returns the sum of affine functions FNA and FNB. */
1183 static affine_fn
1184 affine_fn_plus (affine_fn fna, affine_fn fnb)
1186 return affine_fn_op (PLUS_EXPR, fna, fnb);
1189 /* Returns the difference of affine functions FNA and FNB. */
1191 static affine_fn
1192 affine_fn_minus (affine_fn fna, affine_fn fnb)
1194 return affine_fn_op (MINUS_EXPR, fna, fnb);
1197 /* Frees affine function FN. */
1199 static void
1200 affine_fn_free (affine_fn fn)
1202 fn.release ();
1205 /* Determine for each subscript in the data dependence relation DDR
1206 the distance. */
1208 static void
1209 compute_subscript_distance (struct data_dependence_relation *ddr)
1211 conflict_function *cf_a, *cf_b;
1212 affine_fn fn_a, fn_b, diff;
1214 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1216 unsigned int i;
1218 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1220 struct subscript *subscript;
1222 subscript = DDR_SUBSCRIPT (ddr, i);
1223 cf_a = SUB_CONFLICTS_IN_A (subscript);
1224 cf_b = SUB_CONFLICTS_IN_B (subscript);
1226 fn_a = common_affine_function (cf_a);
1227 fn_b = common_affine_function (cf_b);
1228 if (!fn_a.exists () || !fn_b.exists ())
1230 SUB_DISTANCE (subscript) = chrec_dont_know;
1231 return;
1233 diff = affine_fn_minus (fn_a, fn_b);
1235 if (affine_function_constant_p (diff))
1236 SUB_DISTANCE (subscript) = affine_function_base (diff);
1237 else
1238 SUB_DISTANCE (subscript) = chrec_dont_know;
1240 affine_fn_free (diff);
1245 /* Returns the conflict function for "unknown". */
1247 static conflict_function *
1248 conflict_fn_not_known (void)
1250 conflict_function *fn = XCNEW (conflict_function);
1251 fn->n = NOT_KNOWN;
1253 return fn;
1256 /* Returns the conflict function for "independent". */
1258 static conflict_function *
1259 conflict_fn_no_dependence (void)
1261 conflict_function *fn = XCNEW (conflict_function);
1262 fn->n = NO_DEPENDENCE;
1264 return fn;
1267 /* Returns true if the address of OBJ is invariant in LOOP. */
1269 static bool
1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1272 while (handled_component_p (obj))
1274 if (TREE_CODE (obj) == ARRAY_REF)
1276 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 need to check the stride and the lower bound of the reference. */
1278 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1279 loop->num)
1280 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1281 loop->num))
1282 return false;
1284 else if (TREE_CODE (obj) == COMPONENT_REF)
1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 loop->num))
1288 return false;
1290 obj = TREE_OPERAND (obj, 0);
1293 if (!INDIRECT_REF_P (obj)
1294 && TREE_CODE (obj) != MEM_REF)
1295 return true;
1297 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1298 loop->num);
1301 /* Returns false if we can prove that data references A and B do not alias,
1302 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1303 considered. */
1305 bool
1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1307 bool loop_nest)
1309 tree addr_a = DR_BASE_OBJECT (a);
1310 tree addr_b = DR_BASE_OBJECT (b);
1312 /* If we are not processing a loop nest but scalar code we
1313 do not need to care about possible cross-iteration dependences
1314 and thus can process the full original reference. Do so,
1315 similar to how loop invariant motion applies extra offset-based
1316 disambiguation. */
1317 if (!loop_nest)
1319 aff_tree off1, off2;
1320 double_int size1, size2;
1321 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1322 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1323 aff_combination_scale (&off1, double_int_minus_one);
1324 aff_combination_add (&off2, &off1);
1325 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1326 return false;
1329 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1330 the size of the base-object. So we cannot do any offset/overlap
1331 based analysis but have to rely on points-to information only. */
1332 if (TREE_CODE (addr_a) == MEM_REF
1333 && DR_UNCONSTRAINED_BASE (a))
1335 if (TREE_CODE (addr_b) == MEM_REF
1336 && DR_UNCONSTRAINED_BASE (b))
1337 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1338 TREE_OPERAND (addr_b, 0));
1339 else
1340 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1341 build_fold_addr_expr (addr_b));
1343 else if (TREE_CODE (addr_b) == MEM_REF
1344 && DR_UNCONSTRAINED_BASE (b))
1345 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1346 TREE_OPERAND (addr_b, 0));
1348 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1349 that is being subsetted in the loop nest. */
1350 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1351 return refs_output_dependent_p (addr_a, addr_b);
1352 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1353 return refs_anti_dependent_p (addr_a, addr_b);
1354 return refs_may_alias_p (addr_a, addr_b);
1357 /* Initialize a data dependence relation between data accesses A and
1358 B. NB_LOOPS is the number of loops surrounding the references: the
1359 size of the classic distance/direction vectors. */
1361 struct data_dependence_relation *
1362 initialize_data_dependence_relation (struct data_reference *a,
1363 struct data_reference *b,
1364 vec<loop_p> loop_nest)
1366 struct data_dependence_relation *res;
1367 unsigned int i;
1369 res = XNEW (struct data_dependence_relation);
1370 DDR_A (res) = a;
1371 DDR_B (res) = b;
1372 DDR_LOOP_NEST (res).create (0);
1373 DDR_REVERSED_P (res) = false;
1374 DDR_SUBSCRIPTS (res).create (0);
1375 DDR_DIR_VECTS (res).create (0);
1376 DDR_DIST_VECTS (res).create (0);
1378 if (a == NULL || b == NULL)
1380 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1381 return res;
1384 /* If the data references do not alias, then they are independent. */
1385 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1387 DDR_ARE_DEPENDENT (res) = chrec_known;
1388 return res;
1391 /* The case where the references are exactly the same. */
1392 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1394 if (loop_nest.exists ()
1395 && !object_address_invariant_in_loop_p (loop_nest[0],
1396 DR_BASE_OBJECT (a)))
1398 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1399 return res;
1401 DDR_AFFINE_P (res) = true;
1402 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1403 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1404 DDR_LOOP_NEST (res) = loop_nest;
1405 DDR_INNER_LOOP (res) = 0;
1406 DDR_SELF_REFERENCE (res) = true;
1407 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1409 struct subscript *subscript;
1411 subscript = XNEW (struct subscript);
1412 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1413 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1414 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1415 SUB_DISTANCE (subscript) = chrec_dont_know;
1416 DDR_SUBSCRIPTS (res).safe_push (subscript);
1418 return res;
1421 /* If the references do not access the same object, we do not know
1422 whether they alias or not. */
1423 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1425 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1426 return res;
1429 /* If the base of the object is not invariant in the loop nest, we cannot
1430 analyze it. TODO -- in fact, it would suffice to record that there may
1431 be arbitrary dependences in the loops where the base object varies. */
1432 if (loop_nest.exists ()
1433 && !object_address_invariant_in_loop_p (loop_nest[0],
1434 DR_BASE_OBJECT (a)))
1436 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1437 return res;
1440 /* If the number of dimensions of the access to not agree we can have
1441 a pointer access to a component of the array element type and an
1442 array access while the base-objects are still the same. Punt. */
1443 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1445 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1446 return res;
1449 DDR_AFFINE_P (res) = true;
1450 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1451 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1452 DDR_LOOP_NEST (res) = loop_nest;
1453 DDR_INNER_LOOP (res) = 0;
1454 DDR_SELF_REFERENCE (res) = false;
1456 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1458 struct subscript *subscript;
1460 subscript = XNEW (struct subscript);
1461 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1462 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1463 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1464 SUB_DISTANCE (subscript) = chrec_dont_know;
1465 DDR_SUBSCRIPTS (res).safe_push (subscript);
1468 return res;
1471 /* Frees memory used by the conflict function F. */
1473 static void
1474 free_conflict_function (conflict_function *f)
1476 unsigned i;
1478 if (CF_NONTRIVIAL_P (f))
1480 for (i = 0; i < f->n; i++)
1481 affine_fn_free (f->fns[i]);
1483 free (f);
1486 /* Frees memory used by SUBSCRIPTS. */
1488 static void
1489 free_subscripts (vec<subscript_p> subscripts)
1491 unsigned i;
1492 subscript_p s;
1494 FOR_EACH_VEC_ELT (subscripts, i, s)
1496 free_conflict_function (s->conflicting_iterations_in_a);
1497 free_conflict_function (s->conflicting_iterations_in_b);
1498 free (s);
1500 subscripts.release ();
1503 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1504 description. */
1506 static inline void
1507 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1508 tree chrec)
1510 if (dump_file && (dump_flags & TDF_DETAILS))
1512 fprintf (dump_file, "(dependence classified: ");
1513 print_generic_expr (dump_file, chrec, 0);
1514 fprintf (dump_file, ")\n");
1517 DDR_ARE_DEPENDENT (ddr) = chrec;
1518 free_subscripts (DDR_SUBSCRIPTS (ddr));
1519 DDR_SUBSCRIPTS (ddr).create (0);
1522 /* The dependence relation DDR cannot be represented by a distance
1523 vector. */
1525 static inline void
1526 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1529 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1531 DDR_AFFINE_P (ddr) = false;
1536 /* This section contains the classic Banerjee tests. */
1538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1541 static inline bool
1542 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1544 return (evolution_function_is_constant_p (chrec_a)
1545 && evolution_function_is_constant_p (chrec_b));
1548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1549 variable, i.e., if the SIV (Single Index Variable) test is true. */
1551 static bool
1552 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1554 if ((evolution_function_is_constant_p (chrec_a)
1555 && evolution_function_is_univariate_p (chrec_b))
1556 || (evolution_function_is_constant_p (chrec_b)
1557 && evolution_function_is_univariate_p (chrec_a)))
1558 return true;
1560 if (evolution_function_is_univariate_p (chrec_a)
1561 && evolution_function_is_univariate_p (chrec_b))
1563 switch (TREE_CODE (chrec_a))
1565 case POLYNOMIAL_CHREC:
1566 switch (TREE_CODE (chrec_b))
1568 case POLYNOMIAL_CHREC:
1569 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1570 return false;
1572 default:
1573 return true;
1576 default:
1577 return true;
1581 return false;
1584 /* Creates a conflict function with N dimensions. The affine functions
1585 in each dimension follow. */
1587 static conflict_function *
1588 conflict_fn (unsigned n, ...)
1590 unsigned i;
1591 conflict_function *ret = XCNEW (conflict_function);
1592 va_list ap;
1594 gcc_assert (0 < n && n <= MAX_DIM);
1595 va_start(ap, n);
1597 ret->n = n;
1598 for (i = 0; i < n; i++)
1599 ret->fns[i] = va_arg (ap, affine_fn);
1600 va_end(ap);
1602 return ret;
1605 /* Returns constant affine function with value CST. */
1607 static affine_fn
1608 affine_fn_cst (tree cst)
1610 affine_fn fn;
1611 fn.create (1);
1612 fn.quick_push (cst);
1613 return fn;
1616 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1618 static affine_fn
1619 affine_fn_univar (tree cst, unsigned dim, tree coef)
1621 affine_fn fn;
1622 fn.create (dim + 1);
1623 unsigned i;
1625 gcc_assert (dim > 0);
1626 fn.quick_push (cst);
1627 for (i = 1; i < dim; i++)
1628 fn.quick_push (integer_zero_node);
1629 fn.quick_push (coef);
1630 return fn;
1633 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1634 *OVERLAPS_B are initialized to the functions that describe the
1635 relation between the elements accessed twice by CHREC_A and
1636 CHREC_B. For k >= 0, the following property is verified:
1638 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1640 static void
1641 analyze_ziv_subscript (tree chrec_a,
1642 tree chrec_b,
1643 conflict_function **overlaps_a,
1644 conflict_function **overlaps_b,
1645 tree *last_conflicts)
1647 tree type, difference;
1648 dependence_stats.num_ziv++;
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 fprintf (dump_file, "(analyze_ziv_subscript \n");
1653 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1654 chrec_a = chrec_convert (type, chrec_a, NULL);
1655 chrec_b = chrec_convert (type, chrec_b, NULL);
1656 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1658 switch (TREE_CODE (difference))
1660 case INTEGER_CST:
1661 if (integer_zerop (difference))
1663 /* The difference is equal to zero: the accessed index
1664 overlaps for each iteration in the loop. */
1665 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1666 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1667 *last_conflicts = chrec_dont_know;
1668 dependence_stats.num_ziv_dependent++;
1670 else
1672 /* The accesses do not overlap. */
1673 *overlaps_a = conflict_fn_no_dependence ();
1674 *overlaps_b = conflict_fn_no_dependence ();
1675 *last_conflicts = integer_zero_node;
1676 dependence_stats.num_ziv_independent++;
1678 break;
1680 default:
1681 /* We're not sure whether the indexes overlap. For the moment,
1682 conservatively answer "don't know". */
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1684 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1686 *overlaps_a = conflict_fn_not_known ();
1687 *overlaps_b = conflict_fn_not_known ();
1688 *last_conflicts = chrec_dont_know;
1689 dependence_stats.num_ziv_unimplemented++;
1690 break;
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, ")\n");
1697 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1698 and only if it fits to the int type. If this is not the case, or the
1699 bound on the number of iterations of LOOP could not be derived, returns
1700 chrec_dont_know. */
1702 static tree
1703 max_stmt_executions_tree (struct loop *loop)
1705 double_int nit;
1707 if (!max_stmt_executions (loop, &nit))
1708 return chrec_dont_know;
1710 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1711 return chrec_dont_know;
1713 return double_int_to_tree (unsigned_type_node, nit);
1716 /* Determine whether the CHREC is always positive/negative. If the expression
1717 cannot be statically analyzed, return false, otherwise set the answer into
1718 VALUE. */
1720 static bool
1721 chrec_is_positive (tree chrec, bool *value)
1723 bool value0, value1, value2;
1724 tree end_value, nb_iter;
1726 switch (TREE_CODE (chrec))
1728 case POLYNOMIAL_CHREC:
1729 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1730 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1731 return false;
1733 /* FIXME -- overflows. */
1734 if (value0 == value1)
1736 *value = value0;
1737 return true;
1740 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1741 and the proof consists in showing that the sign never
1742 changes during the execution of the loop, from 0 to
1743 loop->nb_iterations. */
1744 if (!evolution_function_is_affine_p (chrec))
1745 return false;
1747 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1748 if (chrec_contains_undetermined (nb_iter))
1749 return false;
1751 #if 0
1752 /* TODO -- If the test is after the exit, we may decrease the number of
1753 iterations by one. */
1754 if (after_exit)
1755 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1756 #endif
1758 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1760 if (!chrec_is_positive (end_value, &value2))
1761 return false;
1763 *value = value0;
1764 return value0 == value1;
1766 case INTEGER_CST:
1767 switch (tree_int_cst_sgn (chrec))
1769 case -1:
1770 *value = false;
1771 break;
1772 case 1:
1773 *value = true;
1774 break;
1775 default:
1776 return false;
1778 return true;
1780 default:
1781 return false;
1786 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1787 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1788 *OVERLAPS_B are initialized to the functions that describe the
1789 relation between the elements accessed twice by CHREC_A and
1790 CHREC_B. For k >= 0, the following property is verified:
1792 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1794 static void
1795 analyze_siv_subscript_cst_affine (tree chrec_a,
1796 tree chrec_b,
1797 conflict_function **overlaps_a,
1798 conflict_function **overlaps_b,
1799 tree *last_conflicts)
1801 bool value0, value1, value2;
1802 tree type, difference, tmp;
1804 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1805 chrec_a = chrec_convert (type, chrec_a, NULL);
1806 chrec_b = chrec_convert (type, chrec_b, NULL);
1807 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1809 /* Special case overlap in the first iteration. */
1810 if (integer_zerop (difference))
1812 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1813 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1814 *last_conflicts = integer_one_node;
1815 return;
1818 if (!chrec_is_positive (initial_condition (difference), &value0))
1820 if (dump_file && (dump_flags & TDF_DETAILS))
1821 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1823 dependence_stats.num_siv_unimplemented++;
1824 *overlaps_a = conflict_fn_not_known ();
1825 *overlaps_b = conflict_fn_not_known ();
1826 *last_conflicts = chrec_dont_know;
1827 return;
1829 else
1831 if (value0 == false)
1833 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1835 if (dump_file && (dump_flags & TDF_DETAILS))
1836 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1838 *overlaps_a = conflict_fn_not_known ();
1839 *overlaps_b = conflict_fn_not_known ();
1840 *last_conflicts = chrec_dont_know;
1841 dependence_stats.num_siv_unimplemented++;
1842 return;
1844 else
1846 if (value1 == true)
1848 /* Example:
1849 chrec_a = 12
1850 chrec_b = {10, +, 1}
1853 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1855 HOST_WIDE_INT numiter;
1856 struct loop *loop = get_chrec_loop (chrec_b);
1858 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1859 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1860 fold_build1 (ABS_EXPR, type, difference),
1861 CHREC_RIGHT (chrec_b));
1862 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1863 *last_conflicts = integer_one_node;
1866 /* Perform weak-zero siv test to see if overlap is
1867 outside the loop bounds. */
1868 numiter = max_stmt_executions_int (loop);
1870 if (numiter >= 0
1871 && compare_tree_int (tmp, numiter) > 0)
1873 free_conflict_function (*overlaps_a);
1874 free_conflict_function (*overlaps_b);
1875 *overlaps_a = conflict_fn_no_dependence ();
1876 *overlaps_b = conflict_fn_no_dependence ();
1877 *last_conflicts = integer_zero_node;
1878 dependence_stats.num_siv_independent++;
1879 return;
1881 dependence_stats.num_siv_dependent++;
1882 return;
1885 /* When the step does not divide the difference, there are
1886 no overlaps. */
1887 else
1889 *overlaps_a = conflict_fn_no_dependence ();
1890 *overlaps_b = conflict_fn_no_dependence ();
1891 *last_conflicts = integer_zero_node;
1892 dependence_stats.num_siv_independent++;
1893 return;
1897 else
1899 /* Example:
1900 chrec_a = 12
1901 chrec_b = {10, +, -1}
1903 In this case, chrec_a will not overlap with chrec_b. */
1904 *overlaps_a = conflict_fn_no_dependence ();
1905 *overlaps_b = conflict_fn_no_dependence ();
1906 *last_conflicts = integer_zero_node;
1907 dependence_stats.num_siv_independent++;
1908 return;
1912 else
1914 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1916 if (dump_file && (dump_flags & TDF_DETAILS))
1917 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1919 *overlaps_a = conflict_fn_not_known ();
1920 *overlaps_b = conflict_fn_not_known ();
1921 *last_conflicts = chrec_dont_know;
1922 dependence_stats.num_siv_unimplemented++;
1923 return;
1925 else
1927 if (value2 == false)
1929 /* Example:
1930 chrec_a = 3
1931 chrec_b = {10, +, -1}
1933 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1935 HOST_WIDE_INT numiter;
1936 struct loop *loop = get_chrec_loop (chrec_b);
1938 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1939 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1940 CHREC_RIGHT (chrec_b));
1941 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1942 *last_conflicts = integer_one_node;
1944 /* Perform weak-zero siv test to see if overlap is
1945 outside the loop bounds. */
1946 numiter = max_stmt_executions_int (loop);
1948 if (numiter >= 0
1949 && compare_tree_int (tmp, numiter) > 0)
1951 free_conflict_function (*overlaps_a);
1952 free_conflict_function (*overlaps_b);
1953 *overlaps_a = conflict_fn_no_dependence ();
1954 *overlaps_b = conflict_fn_no_dependence ();
1955 *last_conflicts = integer_zero_node;
1956 dependence_stats.num_siv_independent++;
1957 return;
1959 dependence_stats.num_siv_dependent++;
1960 return;
1963 /* When the step does not divide the difference, there
1964 are no overlaps. */
1965 else
1967 *overlaps_a = conflict_fn_no_dependence ();
1968 *overlaps_b = conflict_fn_no_dependence ();
1969 *last_conflicts = integer_zero_node;
1970 dependence_stats.num_siv_independent++;
1971 return;
1974 else
1976 /* Example:
1977 chrec_a = 3
1978 chrec_b = {4, +, 1}
1980 In this case, chrec_a will not overlap with chrec_b. */
1981 *overlaps_a = conflict_fn_no_dependence ();
1982 *overlaps_b = conflict_fn_no_dependence ();
1983 *last_conflicts = integer_zero_node;
1984 dependence_stats.num_siv_independent++;
1985 return;
1992 /* Helper recursive function for initializing the matrix A. Returns
1993 the initial value of CHREC. */
1995 static tree
1996 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1998 gcc_assert (chrec);
2000 switch (TREE_CODE (chrec))
2002 case POLYNOMIAL_CHREC:
2003 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2005 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2006 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2008 case PLUS_EXPR:
2009 case MULT_EXPR:
2010 case MINUS_EXPR:
2012 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2013 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2015 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2018 case NOP_EXPR:
2020 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2021 return chrec_convert (chrec_type (chrec), op, NULL);
2024 case BIT_NOT_EXPR:
2026 /* Handle ~X as -1 - X. */
2027 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2028 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2029 build_int_cst (TREE_TYPE (chrec), -1), op);
2032 case INTEGER_CST:
2033 return chrec;
2035 default:
2036 gcc_unreachable ();
2037 return NULL_TREE;
2041 #define FLOOR_DIV(x,y) ((x) / (y))
2043 /* Solves the special case of the Diophantine equation:
2044 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2046 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2047 number of iterations that loops X and Y run. The overlaps will be
2048 constructed as evolutions in dimension DIM. */
2050 static void
2051 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2052 affine_fn *overlaps_a,
2053 affine_fn *overlaps_b,
2054 tree *last_conflicts, int dim)
2056 if (((step_a > 0 && step_b > 0)
2057 || (step_a < 0 && step_b < 0)))
2059 int step_overlaps_a, step_overlaps_b;
2060 int gcd_steps_a_b, last_conflict, tau2;
2062 gcd_steps_a_b = gcd (step_a, step_b);
2063 step_overlaps_a = step_b / gcd_steps_a_b;
2064 step_overlaps_b = step_a / gcd_steps_a_b;
2066 if (niter > 0)
2068 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2069 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2070 last_conflict = tau2;
2071 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2073 else
2074 *last_conflicts = chrec_dont_know;
2076 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2077 build_int_cst (NULL_TREE,
2078 step_overlaps_a));
2079 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2080 build_int_cst (NULL_TREE,
2081 step_overlaps_b));
2084 else
2086 *overlaps_a = affine_fn_cst (integer_zero_node);
2087 *overlaps_b = affine_fn_cst (integer_zero_node);
2088 *last_conflicts = integer_zero_node;
2092 /* Solves the special case of a Diophantine equation where CHREC_A is
2093 an affine bivariate function, and CHREC_B is an affine univariate
2094 function. For example,
2096 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2098 has the following overlapping functions:
2100 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2101 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2102 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2104 FORNOW: This is a specialized implementation for a case occurring in
2105 a common benchmark. Implement the general algorithm. */
2107 static void
2108 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2109 conflict_function **overlaps_a,
2110 conflict_function **overlaps_b,
2111 tree *last_conflicts)
2113 bool xz_p, yz_p, xyz_p;
2114 int step_x, step_y, step_z;
2115 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2116 affine_fn overlaps_a_xz, overlaps_b_xz;
2117 affine_fn overlaps_a_yz, overlaps_b_yz;
2118 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2119 affine_fn ova1, ova2, ovb;
2120 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2122 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2123 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2124 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2126 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2127 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2128 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2130 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2132 if (dump_file && (dump_flags & TDF_DETAILS))
2133 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2135 *overlaps_a = conflict_fn_not_known ();
2136 *overlaps_b = conflict_fn_not_known ();
2137 *last_conflicts = chrec_dont_know;
2138 return;
2141 niter = MIN (niter_x, niter_z);
2142 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2143 &overlaps_a_xz,
2144 &overlaps_b_xz,
2145 &last_conflicts_xz, 1);
2146 niter = MIN (niter_y, niter_z);
2147 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2148 &overlaps_a_yz,
2149 &overlaps_b_yz,
2150 &last_conflicts_yz, 2);
2151 niter = MIN (niter_x, niter_z);
2152 niter = MIN (niter_y, niter);
2153 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2154 &overlaps_a_xyz,
2155 &overlaps_b_xyz,
2156 &last_conflicts_xyz, 3);
2158 xz_p = !integer_zerop (last_conflicts_xz);
2159 yz_p = !integer_zerop (last_conflicts_yz);
2160 xyz_p = !integer_zerop (last_conflicts_xyz);
2162 if (xz_p || yz_p || xyz_p)
2164 ova1 = affine_fn_cst (integer_zero_node);
2165 ova2 = affine_fn_cst (integer_zero_node);
2166 ovb = affine_fn_cst (integer_zero_node);
2167 if (xz_p)
2169 affine_fn t0 = ova1;
2170 affine_fn t2 = ovb;
2172 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2173 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2174 affine_fn_free (t0);
2175 affine_fn_free (t2);
2176 *last_conflicts = last_conflicts_xz;
2178 if (yz_p)
2180 affine_fn t0 = ova2;
2181 affine_fn t2 = ovb;
2183 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2184 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2185 affine_fn_free (t0);
2186 affine_fn_free (t2);
2187 *last_conflicts = last_conflicts_yz;
2189 if (xyz_p)
2191 affine_fn t0 = ova1;
2192 affine_fn t2 = ova2;
2193 affine_fn t4 = ovb;
2195 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2196 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2197 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2198 affine_fn_free (t0);
2199 affine_fn_free (t2);
2200 affine_fn_free (t4);
2201 *last_conflicts = last_conflicts_xyz;
2203 *overlaps_a = conflict_fn (2, ova1, ova2);
2204 *overlaps_b = conflict_fn (1, ovb);
2206 else
2208 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2209 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2210 *last_conflicts = integer_zero_node;
2213 affine_fn_free (overlaps_a_xz);
2214 affine_fn_free (overlaps_b_xz);
2215 affine_fn_free (overlaps_a_yz);
2216 affine_fn_free (overlaps_b_yz);
2217 affine_fn_free (overlaps_a_xyz);
2218 affine_fn_free (overlaps_b_xyz);
2221 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2223 static void
2224 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2225 int size)
2227 memcpy (vec2, vec1, size * sizeof (*vec1));
2230 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2232 static void
2233 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2234 int m, int n)
2236 int i;
2238 for (i = 0; i < m; i++)
2239 lambda_vector_copy (mat1[i], mat2[i], n);
2242 /* Store the N x N identity matrix in MAT. */
2244 static void
2245 lambda_matrix_id (lambda_matrix mat, int size)
2247 int i, j;
2249 for (i = 0; i < size; i++)
2250 for (j = 0; j < size; j++)
2251 mat[i][j] = (i == j) ? 1 : 0;
2254 /* Return the first nonzero element of vector VEC1 between START and N.
2255 We must have START <= N. Returns N if VEC1 is the zero vector. */
2257 static int
2258 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2260 int j = start;
2261 while (j < n && vec1[j] == 0)
2262 j++;
2263 return j;
2266 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2267 R2 = R2 + CONST1 * R1. */
2269 static void
2270 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2272 int i;
2274 if (const1 == 0)
2275 return;
2277 for (i = 0; i < n; i++)
2278 mat[r2][i] += const1 * mat[r1][i];
2281 /* Swap rows R1 and R2 in matrix MAT. */
2283 static void
2284 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2286 lambda_vector row;
2288 row = mat[r1];
2289 mat[r1] = mat[r2];
2290 mat[r2] = row;
2293 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2294 and store the result in VEC2. */
2296 static void
2297 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2298 int size, int const1)
2300 int i;
2302 if (const1 == 0)
2303 lambda_vector_clear (vec2, size);
2304 else
2305 for (i = 0; i < size; i++)
2306 vec2[i] = const1 * vec1[i];
2309 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2311 static void
2312 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2313 int size)
2315 lambda_vector_mult_const (vec1, vec2, size, -1);
2318 /* Negate row R1 of matrix MAT which has N columns. */
2320 static void
2321 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2323 lambda_vector_negate (mat[r1], mat[r1], n);
2326 /* Return true if two vectors are equal. */
2328 static bool
2329 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2331 int i;
2332 for (i = 0; i < size; i++)
2333 if (vec1[i] != vec2[i])
2334 return false;
2335 return true;
2338 /* Given an M x N integer matrix A, this function determines an M x
2339 M unimodular matrix U, and an M x N echelon matrix S such that
2340 "U.A = S". This decomposition is also known as "right Hermite".
2342 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2343 Restructuring Compilers" Utpal Banerjee. */
2345 static void
2346 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2347 lambda_matrix S, lambda_matrix U)
2349 int i, j, i0 = 0;
2351 lambda_matrix_copy (A, S, m, n);
2352 lambda_matrix_id (U, m);
2354 for (j = 0; j < n; j++)
2356 if (lambda_vector_first_nz (S[j], m, i0) < m)
2358 ++i0;
2359 for (i = m - 1; i >= i0; i--)
2361 while (S[i][j] != 0)
2363 int sigma, factor, a, b;
2365 a = S[i-1][j];
2366 b = S[i][j];
2367 sigma = (a * b < 0) ? -1: 1;
2368 a = abs (a);
2369 b = abs (b);
2370 factor = sigma * (a / b);
2372 lambda_matrix_row_add (S, n, i, i-1, -factor);
2373 lambda_matrix_row_exchange (S, i, i-1);
2375 lambda_matrix_row_add (U, m, i, i-1, -factor);
2376 lambda_matrix_row_exchange (U, i, i-1);
2383 /* Determines the overlapping elements due to accesses CHREC_A and
2384 CHREC_B, that are affine functions. This function cannot handle
2385 symbolic evolution functions, ie. when initial conditions are
2386 parameters, because it uses lambda matrices of integers. */
2388 static void
2389 analyze_subscript_affine_affine (tree chrec_a,
2390 tree chrec_b,
2391 conflict_function **overlaps_a,
2392 conflict_function **overlaps_b,
2393 tree *last_conflicts)
2395 unsigned nb_vars_a, nb_vars_b, dim;
2396 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2397 lambda_matrix A, U, S;
2398 struct obstack scratch_obstack;
2400 if (eq_evolutions_p (chrec_a, chrec_b))
2402 /* The accessed index overlaps for each iteration in the
2403 loop. */
2404 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2405 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2406 *last_conflicts = chrec_dont_know;
2407 return;
2409 if (dump_file && (dump_flags & TDF_DETAILS))
2410 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2412 /* For determining the initial intersection, we have to solve a
2413 Diophantine equation. This is the most time consuming part.
2415 For answering to the question: "Is there a dependence?" we have
2416 to prove that there exists a solution to the Diophantine
2417 equation, and that the solution is in the iteration domain,
2418 i.e. the solution is positive or zero, and that the solution
2419 happens before the upper bound loop.nb_iterations. Otherwise
2420 there is no dependence. This function outputs a description of
2421 the iterations that hold the intersections. */
2423 nb_vars_a = nb_vars_in_chrec (chrec_a);
2424 nb_vars_b = nb_vars_in_chrec (chrec_b);
2426 gcc_obstack_init (&scratch_obstack);
2428 dim = nb_vars_a + nb_vars_b;
2429 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2430 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2431 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2433 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2434 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2435 gamma = init_b - init_a;
2437 /* Don't do all the hard work of solving the Diophantine equation
2438 when we already know the solution: for example,
2439 | {3, +, 1}_1
2440 | {3, +, 4}_2
2441 | gamma = 3 - 3 = 0.
2442 Then the first overlap occurs during the first iterations:
2443 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2445 if (gamma == 0)
2447 if (nb_vars_a == 1 && nb_vars_b == 1)
2449 HOST_WIDE_INT step_a, step_b;
2450 HOST_WIDE_INT niter, niter_a, niter_b;
2451 affine_fn ova, ovb;
2453 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2454 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2455 niter = MIN (niter_a, niter_b);
2456 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2457 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2459 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2460 &ova, &ovb,
2461 last_conflicts, 1);
2462 *overlaps_a = conflict_fn (1, ova);
2463 *overlaps_b = conflict_fn (1, ovb);
2466 else if (nb_vars_a == 2 && nb_vars_b == 1)
2467 compute_overlap_steps_for_affine_1_2
2468 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2470 else if (nb_vars_a == 1 && nb_vars_b == 2)
2471 compute_overlap_steps_for_affine_1_2
2472 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2474 else
2476 if (dump_file && (dump_flags & TDF_DETAILS))
2477 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2478 *overlaps_a = conflict_fn_not_known ();
2479 *overlaps_b = conflict_fn_not_known ();
2480 *last_conflicts = chrec_dont_know;
2482 goto end_analyze_subs_aa;
2485 /* U.A = S */
2486 lambda_matrix_right_hermite (A, dim, 1, S, U);
2488 if (S[0][0] < 0)
2490 S[0][0] *= -1;
2491 lambda_matrix_row_negate (U, dim, 0);
2493 gcd_alpha_beta = S[0][0];
2495 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2496 but that is a quite strange case. Instead of ICEing, answer
2497 don't know. */
2498 if (gcd_alpha_beta == 0)
2500 *overlaps_a = conflict_fn_not_known ();
2501 *overlaps_b = conflict_fn_not_known ();
2502 *last_conflicts = chrec_dont_know;
2503 goto end_analyze_subs_aa;
2506 /* The classic "gcd-test". */
2507 if (!int_divides_p (gcd_alpha_beta, gamma))
2509 /* The "gcd-test" has determined that there is no integer
2510 solution, i.e. there is no dependence. */
2511 *overlaps_a = conflict_fn_no_dependence ();
2512 *overlaps_b = conflict_fn_no_dependence ();
2513 *last_conflicts = integer_zero_node;
2516 /* Both access functions are univariate. This includes SIV and MIV cases. */
2517 else if (nb_vars_a == 1 && nb_vars_b == 1)
2519 /* Both functions should have the same evolution sign. */
2520 if (((A[0][0] > 0 && -A[1][0] > 0)
2521 || (A[0][0] < 0 && -A[1][0] < 0)))
2523 /* The solutions are given by:
2525 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2526 | [u21 u22] [y0]
2528 For a given integer t. Using the following variables,
2530 | i0 = u11 * gamma / gcd_alpha_beta
2531 | j0 = u12 * gamma / gcd_alpha_beta
2532 | i1 = u21
2533 | j1 = u22
2535 the solutions are:
2537 | x0 = i0 + i1 * t,
2538 | y0 = j0 + j1 * t. */
2539 HOST_WIDE_INT i0, j0, i1, j1;
2541 i0 = U[0][0] * gamma / gcd_alpha_beta;
2542 j0 = U[0][1] * gamma / gcd_alpha_beta;
2543 i1 = U[1][0];
2544 j1 = U[1][1];
2546 if ((i1 == 0 && i0 < 0)
2547 || (j1 == 0 && j0 < 0))
2549 /* There is no solution.
2550 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2551 falls in here, but for the moment we don't look at the
2552 upper bound of the iteration domain. */
2553 *overlaps_a = conflict_fn_no_dependence ();
2554 *overlaps_b = conflict_fn_no_dependence ();
2555 *last_conflicts = integer_zero_node;
2556 goto end_analyze_subs_aa;
2559 if (i1 > 0 && j1 > 0)
2561 HOST_WIDE_INT niter_a
2562 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2563 HOST_WIDE_INT niter_b
2564 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2565 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2567 /* (X0, Y0) is a solution of the Diophantine equation:
2568 "chrec_a (X0) = chrec_b (Y0)". */
2569 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2570 CEIL (-j0, j1));
2571 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2572 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2574 /* (X1, Y1) is the smallest positive solution of the eq
2575 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2576 first conflict occurs. */
2577 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2578 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2579 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2581 if (niter > 0)
2583 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2584 FLOOR_DIV (niter - j0, j1));
2585 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2587 /* If the overlap occurs outside of the bounds of the
2588 loop, there is no dependence. */
2589 if (x1 >= niter || y1 >= niter)
2591 *overlaps_a = conflict_fn_no_dependence ();
2592 *overlaps_b = conflict_fn_no_dependence ();
2593 *last_conflicts = integer_zero_node;
2594 goto end_analyze_subs_aa;
2596 else
2597 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2599 else
2600 *last_conflicts = chrec_dont_know;
2602 *overlaps_a
2603 = conflict_fn (1,
2604 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2606 build_int_cst (NULL_TREE, i1)));
2607 *overlaps_b
2608 = conflict_fn (1,
2609 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2611 build_int_cst (NULL_TREE, j1)));
2613 else
2615 /* FIXME: For the moment, the upper bound of the
2616 iteration domain for i and j is not checked. */
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2618 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2619 *overlaps_a = conflict_fn_not_known ();
2620 *overlaps_b = conflict_fn_not_known ();
2621 *last_conflicts = chrec_dont_know;
2624 else
2626 if (dump_file && (dump_flags & TDF_DETAILS))
2627 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2628 *overlaps_a = conflict_fn_not_known ();
2629 *overlaps_b = conflict_fn_not_known ();
2630 *last_conflicts = chrec_dont_know;
2633 else
2635 if (dump_file && (dump_flags & TDF_DETAILS))
2636 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2637 *overlaps_a = conflict_fn_not_known ();
2638 *overlaps_b = conflict_fn_not_known ();
2639 *last_conflicts = chrec_dont_know;
2642 end_analyze_subs_aa:
2643 obstack_free (&scratch_obstack, NULL);
2644 if (dump_file && (dump_flags & TDF_DETAILS))
2646 fprintf (dump_file, " (overlaps_a = ");
2647 dump_conflict_function (dump_file, *overlaps_a);
2648 fprintf (dump_file, ")\n (overlaps_b = ");
2649 dump_conflict_function (dump_file, *overlaps_b);
2650 fprintf (dump_file, ")\n");
2651 fprintf (dump_file, ")\n");
2655 /* Returns true when analyze_subscript_affine_affine can be used for
2656 determining the dependence relation between chrec_a and chrec_b,
2657 that contain symbols. This function modifies chrec_a and chrec_b
2658 such that the analysis result is the same, and such that they don't
2659 contain symbols, and then can safely be passed to the analyzer.
2661 Example: The analysis of the following tuples of evolutions produce
2662 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2663 vs. {0, +, 1}_1
2665 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2666 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2669 static bool
2670 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2672 tree diff, type, left_a, left_b, right_b;
2674 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2675 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2676 /* FIXME: For the moment not handled. Might be refined later. */
2677 return false;
2679 type = chrec_type (*chrec_a);
2680 left_a = CHREC_LEFT (*chrec_a);
2681 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2682 diff = chrec_fold_minus (type, left_a, left_b);
2684 if (!evolution_function_is_constant_p (diff))
2685 return false;
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2690 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2691 diff, CHREC_RIGHT (*chrec_a));
2692 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2693 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2694 build_int_cst (type, 0),
2695 right_b);
2696 return true;
2699 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2700 *OVERLAPS_B are initialized to the functions that describe the
2701 relation between the elements accessed twice by CHREC_A and
2702 CHREC_B. For k >= 0, the following property is verified:
2704 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2706 static void
2707 analyze_siv_subscript (tree chrec_a,
2708 tree chrec_b,
2709 conflict_function **overlaps_a,
2710 conflict_function **overlaps_b,
2711 tree *last_conflicts,
2712 int loop_nest_num)
2714 dependence_stats.num_siv++;
2716 if (dump_file && (dump_flags & TDF_DETAILS))
2717 fprintf (dump_file, "(analyze_siv_subscript \n");
2719 if (evolution_function_is_constant_p (chrec_a)
2720 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2721 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2722 overlaps_a, overlaps_b, last_conflicts);
2724 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2725 && evolution_function_is_constant_p (chrec_b))
2726 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2727 overlaps_b, overlaps_a, last_conflicts);
2729 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2730 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2732 if (!chrec_contains_symbols (chrec_a)
2733 && !chrec_contains_symbols (chrec_b))
2735 analyze_subscript_affine_affine (chrec_a, chrec_b,
2736 overlaps_a, overlaps_b,
2737 last_conflicts);
2739 if (CF_NOT_KNOWN_P (*overlaps_a)
2740 || CF_NOT_KNOWN_P (*overlaps_b))
2741 dependence_stats.num_siv_unimplemented++;
2742 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2743 || CF_NO_DEPENDENCE_P (*overlaps_b))
2744 dependence_stats.num_siv_independent++;
2745 else
2746 dependence_stats.num_siv_dependent++;
2748 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2749 &chrec_b))
2751 analyze_subscript_affine_affine (chrec_a, chrec_b,
2752 overlaps_a, overlaps_b,
2753 last_conflicts);
2755 if (CF_NOT_KNOWN_P (*overlaps_a)
2756 || CF_NOT_KNOWN_P (*overlaps_b))
2757 dependence_stats.num_siv_unimplemented++;
2758 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2759 || CF_NO_DEPENDENCE_P (*overlaps_b))
2760 dependence_stats.num_siv_independent++;
2761 else
2762 dependence_stats.num_siv_dependent++;
2764 else
2765 goto siv_subscript_dontknow;
2768 else
2770 siv_subscript_dontknow:;
2771 if (dump_file && (dump_flags & TDF_DETAILS))
2772 fprintf (dump_file, "siv test failed: unimplemented.\n");
2773 *overlaps_a = conflict_fn_not_known ();
2774 *overlaps_b = conflict_fn_not_known ();
2775 *last_conflicts = chrec_dont_know;
2776 dependence_stats.num_siv_unimplemented++;
2779 if (dump_file && (dump_flags & TDF_DETAILS))
2780 fprintf (dump_file, ")\n");
2783 /* Returns false if we can prove that the greatest common divisor of the steps
2784 of CHREC does not divide CST, false otherwise. */
2786 static bool
2787 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2789 HOST_WIDE_INT cd = 0, val;
2790 tree step;
2792 if (!host_integerp (cst, 0))
2793 return true;
2794 val = tree_low_cst (cst, 0);
2796 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2798 step = CHREC_RIGHT (chrec);
2799 if (!host_integerp (step, 0))
2800 return true;
2801 cd = gcd (cd, tree_low_cst (step, 0));
2802 chrec = CHREC_LEFT (chrec);
2805 return val % cd == 0;
2808 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2809 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2810 functions that describe the relation between the elements accessed
2811 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2812 is verified:
2814 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2816 static void
2817 analyze_miv_subscript (tree chrec_a,
2818 tree chrec_b,
2819 conflict_function **overlaps_a,
2820 conflict_function **overlaps_b,
2821 tree *last_conflicts,
2822 struct loop *loop_nest)
2824 tree type, difference;
2826 dependence_stats.num_miv++;
2827 if (dump_file && (dump_flags & TDF_DETAILS))
2828 fprintf (dump_file, "(analyze_miv_subscript \n");
2830 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2831 chrec_a = chrec_convert (type, chrec_a, NULL);
2832 chrec_b = chrec_convert (type, chrec_b, NULL);
2833 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2835 if (eq_evolutions_p (chrec_a, chrec_b))
2837 /* Access functions are the same: all the elements are accessed
2838 in the same order. */
2839 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2840 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2841 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2842 dependence_stats.num_miv_dependent++;
2845 else if (evolution_function_is_constant_p (difference)
2846 /* For the moment, the following is verified:
2847 evolution_function_is_affine_multivariate_p (chrec_a,
2848 loop_nest->num) */
2849 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2851 /* testsuite/.../ssa-chrec-33.c
2852 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2854 The difference is 1, and all the evolution steps are multiples
2855 of 2, consequently there are no overlapping elements. */
2856 *overlaps_a = conflict_fn_no_dependence ();
2857 *overlaps_b = conflict_fn_no_dependence ();
2858 *last_conflicts = integer_zero_node;
2859 dependence_stats.num_miv_independent++;
2862 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2863 && !chrec_contains_symbols (chrec_a)
2864 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2865 && !chrec_contains_symbols (chrec_b))
2867 /* testsuite/.../ssa-chrec-35.c
2868 {0, +, 1}_2 vs. {0, +, 1}_3
2869 the overlapping elements are respectively located at iterations:
2870 {0, +, 1}_x and {0, +, 1}_x,
2871 in other words, we have the equality:
2872 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2874 Other examples:
2875 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2876 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2878 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2879 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2881 analyze_subscript_affine_affine (chrec_a, chrec_b,
2882 overlaps_a, overlaps_b, last_conflicts);
2884 if (CF_NOT_KNOWN_P (*overlaps_a)
2885 || CF_NOT_KNOWN_P (*overlaps_b))
2886 dependence_stats.num_miv_unimplemented++;
2887 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2888 || CF_NO_DEPENDENCE_P (*overlaps_b))
2889 dependence_stats.num_miv_independent++;
2890 else
2891 dependence_stats.num_miv_dependent++;
2894 else
2896 /* When the analysis is too difficult, answer "don't know". */
2897 if (dump_file && (dump_flags & TDF_DETAILS))
2898 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2900 *overlaps_a = conflict_fn_not_known ();
2901 *overlaps_b = conflict_fn_not_known ();
2902 *last_conflicts = chrec_dont_know;
2903 dependence_stats.num_miv_unimplemented++;
2906 if (dump_file && (dump_flags & TDF_DETAILS))
2907 fprintf (dump_file, ")\n");
2910 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2911 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2912 OVERLAP_ITERATIONS_B are initialized with two functions that
2913 describe the iterations that contain conflicting elements.
2915 Remark: For an integer k >= 0, the following equality is true:
2917 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2920 static void
2921 analyze_overlapping_iterations (tree chrec_a,
2922 tree chrec_b,
2923 conflict_function **overlap_iterations_a,
2924 conflict_function **overlap_iterations_b,
2925 tree *last_conflicts, struct loop *loop_nest)
2927 unsigned int lnn = loop_nest->num;
2929 dependence_stats.num_subscript_tests++;
2931 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2934 fprintf (dump_file, " (chrec_a = ");
2935 print_generic_expr (dump_file, chrec_a, 0);
2936 fprintf (dump_file, ")\n (chrec_b = ");
2937 print_generic_expr (dump_file, chrec_b, 0);
2938 fprintf (dump_file, ")\n");
2941 if (chrec_a == NULL_TREE
2942 || chrec_b == NULL_TREE
2943 || chrec_contains_undetermined (chrec_a)
2944 || chrec_contains_undetermined (chrec_b))
2946 dependence_stats.num_subscript_undetermined++;
2948 *overlap_iterations_a = conflict_fn_not_known ();
2949 *overlap_iterations_b = conflict_fn_not_known ();
2952 /* If they are the same chrec, and are affine, they overlap
2953 on every iteration. */
2954 else if (eq_evolutions_p (chrec_a, chrec_b)
2955 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2956 || operand_equal_p (chrec_a, chrec_b, 0)))
2958 dependence_stats.num_same_subscript_function++;
2959 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2960 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2961 *last_conflicts = chrec_dont_know;
2964 /* If they aren't the same, and aren't affine, we can't do anything
2965 yet. */
2966 else if ((chrec_contains_symbols (chrec_a)
2967 || chrec_contains_symbols (chrec_b))
2968 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2969 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2971 dependence_stats.num_subscript_undetermined++;
2972 *overlap_iterations_a = conflict_fn_not_known ();
2973 *overlap_iterations_b = conflict_fn_not_known ();
2976 else if (ziv_subscript_p (chrec_a, chrec_b))
2977 analyze_ziv_subscript (chrec_a, chrec_b,
2978 overlap_iterations_a, overlap_iterations_b,
2979 last_conflicts);
2981 else if (siv_subscript_p (chrec_a, chrec_b))
2982 analyze_siv_subscript (chrec_a, chrec_b,
2983 overlap_iterations_a, overlap_iterations_b,
2984 last_conflicts, lnn);
2986 else
2987 analyze_miv_subscript (chrec_a, chrec_b,
2988 overlap_iterations_a, overlap_iterations_b,
2989 last_conflicts, loop_nest);
2991 if (dump_file && (dump_flags & TDF_DETAILS))
2993 fprintf (dump_file, " (overlap_iterations_a = ");
2994 dump_conflict_function (dump_file, *overlap_iterations_a);
2995 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2996 dump_conflict_function (dump_file, *overlap_iterations_b);
2997 fprintf (dump_file, ")\n");
2998 fprintf (dump_file, ")\n");
3002 /* Helper function for uniquely inserting distance vectors. */
3004 static void
3005 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3007 unsigned i;
3008 lambda_vector v;
3010 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3011 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3012 return;
3014 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3017 /* Helper function for uniquely inserting direction vectors. */
3019 static void
3020 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3022 unsigned i;
3023 lambda_vector v;
3025 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3026 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3027 return;
3029 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3032 /* Add a distance of 1 on all the loops outer than INDEX. If we
3033 haven't yet determined a distance for this outer loop, push a new
3034 distance vector composed of the previous distance, and a distance
3035 of 1 for this outer loop. Example:
3037 | loop_1
3038 | loop_2
3039 | A[10]
3040 | endloop_2
3041 | endloop_1
3043 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3044 save (0, 1), then we have to save (1, 0). */
3046 static void
3047 add_outer_distances (struct data_dependence_relation *ddr,
3048 lambda_vector dist_v, int index)
3050 /* For each outer loop where init_v is not set, the accesses are
3051 in dependence of distance 1 in the loop. */
3052 while (--index >= 0)
3054 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3055 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3056 save_v[index] = 1;
3057 save_dist_v (ddr, save_v);
3061 /* Return false when fail to represent the data dependence as a
3062 distance vector. INIT_B is set to true when a component has been
3063 added to the distance vector DIST_V. INDEX_CARRY is then set to
3064 the index in DIST_V that carries the dependence. */
3066 static bool
3067 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3068 struct data_reference *ddr_a,
3069 struct data_reference *ddr_b,
3070 lambda_vector dist_v, bool *init_b,
3071 int *index_carry)
3073 unsigned i;
3074 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3076 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3078 tree access_fn_a, access_fn_b;
3079 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3081 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3083 non_affine_dependence_relation (ddr);
3084 return false;
3087 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3088 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3090 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3091 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3093 int dist, index;
3094 int var_a = CHREC_VARIABLE (access_fn_a);
3095 int var_b = CHREC_VARIABLE (access_fn_b);
3097 if (var_a != var_b
3098 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3100 non_affine_dependence_relation (ddr);
3101 return false;
3104 dist = int_cst_value (SUB_DISTANCE (subscript));
3105 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3106 *index_carry = MIN (index, *index_carry);
3108 /* This is the subscript coupling test. If we have already
3109 recorded a distance for this loop (a distance coming from
3110 another subscript), it should be the same. For example,
3111 in the following code, there is no dependence:
3113 | loop i = 0, N, 1
3114 | T[i+1][i] = ...
3115 | ... = T[i][i]
3116 | endloop
3118 if (init_v[index] != 0 && dist_v[index] != dist)
3120 finalize_ddr_dependent (ddr, chrec_known);
3121 return false;
3124 dist_v[index] = dist;
3125 init_v[index] = 1;
3126 *init_b = true;
3128 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3130 /* This can be for example an affine vs. constant dependence
3131 (T[i] vs. T[3]) that is not an affine dependence and is
3132 not representable as a distance vector. */
3133 non_affine_dependence_relation (ddr);
3134 return false;
3138 return true;
3141 /* Return true when the DDR contains only constant access functions. */
3143 static bool
3144 constant_access_functions (const struct data_dependence_relation *ddr)
3146 unsigned i;
3148 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3149 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3150 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3151 return false;
3153 return true;
3156 /* Helper function for the case where DDR_A and DDR_B are the same
3157 multivariate access function with a constant step. For an example
3158 see pr34635-1.c. */
3160 static void
3161 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3163 int x_1, x_2;
3164 tree c_1 = CHREC_LEFT (c_2);
3165 tree c_0 = CHREC_LEFT (c_1);
3166 lambda_vector dist_v;
3167 int v1, v2, cd;
3169 /* Polynomials with more than 2 variables are not handled yet. When
3170 the evolution steps are parameters, it is not possible to
3171 represent the dependence using classical distance vectors. */
3172 if (TREE_CODE (c_0) != INTEGER_CST
3173 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3174 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3176 DDR_AFFINE_P (ddr) = false;
3177 return;
3180 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3181 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3183 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3184 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3185 v1 = int_cst_value (CHREC_RIGHT (c_1));
3186 v2 = int_cst_value (CHREC_RIGHT (c_2));
3187 cd = gcd (v1, v2);
3188 v1 /= cd;
3189 v2 /= cd;
3191 if (v2 < 0)
3193 v2 = -v2;
3194 v1 = -v1;
3197 dist_v[x_1] = v2;
3198 dist_v[x_2] = -v1;
3199 save_dist_v (ddr, dist_v);
3201 add_outer_distances (ddr, dist_v, x_1);
3204 /* Helper function for the case where DDR_A and DDR_B are the same
3205 access functions. */
3207 static void
3208 add_other_self_distances (struct data_dependence_relation *ddr)
3210 lambda_vector dist_v;
3211 unsigned i;
3212 int index_carry = DDR_NB_LOOPS (ddr);
3214 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3216 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3218 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3220 if (!evolution_function_is_univariate_p (access_fun))
3222 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3224 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3225 return;
3228 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3230 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3231 add_multivariate_self_dist (ddr, access_fun);
3232 else
3233 /* The evolution step is not constant: it varies in
3234 the outer loop, so this cannot be represented by a
3235 distance vector. For example in pr34635.c the
3236 evolution is {0, +, {0, +, 4}_1}_2. */
3237 DDR_AFFINE_P (ddr) = false;
3239 return;
3242 index_carry = MIN (index_carry,
3243 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3244 DDR_LOOP_NEST (ddr)));
3248 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3249 add_outer_distances (ddr, dist_v, index_carry);
3252 static void
3253 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3255 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3257 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3258 save_dist_v (ddr, dist_v);
3261 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3262 is the case for example when access functions are the same and
3263 equal to a constant, as in:
3265 | loop_1
3266 | A[3] = ...
3267 | ... = A[3]
3268 | endloop_1
3270 in which case the distance vectors are (0) and (1). */
3272 static void
3273 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3275 unsigned i, j;
3277 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3279 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3280 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3281 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3283 for (j = 0; j < ca->n; j++)
3284 if (affine_function_zero_p (ca->fns[j]))
3286 insert_innermost_unit_dist_vector (ddr);
3287 return;
3290 for (j = 0; j < cb->n; j++)
3291 if (affine_function_zero_p (cb->fns[j]))
3293 insert_innermost_unit_dist_vector (ddr);
3294 return;
3299 /* Compute the classic per loop distance vector. DDR is the data
3300 dependence relation to build a vector from. Return false when fail
3301 to represent the data dependence as a distance vector. */
3303 static bool
3304 build_classic_dist_vector (struct data_dependence_relation *ddr,
3305 struct loop *loop_nest)
3307 bool init_b = false;
3308 int index_carry = DDR_NB_LOOPS (ddr);
3309 lambda_vector dist_v;
3311 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3312 return false;
3314 if (same_access_functions (ddr))
3316 /* Save the 0 vector. */
3317 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3318 save_dist_v (ddr, dist_v);
3320 if (constant_access_functions (ddr))
3321 add_distance_for_zero_overlaps (ddr);
3323 if (DDR_NB_LOOPS (ddr) > 1)
3324 add_other_self_distances (ddr);
3326 return true;
3329 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3330 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3331 dist_v, &init_b, &index_carry))
3332 return false;
3334 /* Save the distance vector if we initialized one. */
3335 if (init_b)
3337 /* Verify a basic constraint: classic distance vectors should
3338 always be lexicographically positive.
3340 Data references are collected in the order of execution of
3341 the program, thus for the following loop
3343 | for (i = 1; i < 100; i++)
3344 | for (j = 1; j < 100; j++)
3346 | t = T[j+1][i-1]; // A
3347 | T[j][i] = t + 2; // B
3350 references are collected following the direction of the wind:
3351 A then B. The data dependence tests are performed also
3352 following this order, such that we're looking at the distance
3353 separating the elements accessed by A from the elements later
3354 accessed by B. But in this example, the distance returned by
3355 test_dep (A, B) is lexicographically negative (-1, 1), that
3356 means that the access A occurs later than B with respect to
3357 the outer loop, ie. we're actually looking upwind. In this
3358 case we solve test_dep (B, A) looking downwind to the
3359 lexicographically positive solution, that returns the
3360 distance vector (1, -1). */
3361 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3363 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3364 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3365 loop_nest))
3366 return false;
3367 compute_subscript_distance (ddr);
3368 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3369 save_v, &init_b, &index_carry))
3370 return false;
3371 save_dist_v (ddr, save_v);
3372 DDR_REVERSED_P (ddr) = true;
3374 /* In this case there is a dependence forward for all the
3375 outer loops:
3377 | for (k = 1; k < 100; k++)
3378 | for (i = 1; i < 100; i++)
3379 | for (j = 1; j < 100; j++)
3381 | t = T[j+1][i-1]; // A
3382 | T[j][i] = t + 2; // B
3385 the vectors are:
3386 (0, 1, -1)
3387 (1, 1, -1)
3388 (1, -1, 1)
3390 if (DDR_NB_LOOPS (ddr) > 1)
3392 add_outer_distances (ddr, save_v, index_carry);
3393 add_outer_distances (ddr, dist_v, index_carry);
3396 else
3398 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3401 if (DDR_NB_LOOPS (ddr) > 1)
3403 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3405 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3406 DDR_A (ddr), loop_nest))
3407 return false;
3408 compute_subscript_distance (ddr);
3409 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3410 opposite_v, &init_b,
3411 &index_carry))
3412 return false;
3414 save_dist_v (ddr, save_v);
3415 add_outer_distances (ddr, dist_v, index_carry);
3416 add_outer_distances (ddr, opposite_v, index_carry);
3418 else
3419 save_dist_v (ddr, save_v);
3422 else
3424 /* There is a distance of 1 on all the outer loops: Example:
3425 there is a dependence of distance 1 on loop_1 for the array A.
3427 | loop_1
3428 | A[5] = ...
3429 | endloop
3431 add_outer_distances (ddr, dist_v,
3432 lambda_vector_first_nz (dist_v,
3433 DDR_NB_LOOPS (ddr), 0));
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3438 unsigned i;
3440 fprintf (dump_file, "(build_classic_dist_vector\n");
3441 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3443 fprintf (dump_file, " dist_vector = (");
3444 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3445 DDR_NB_LOOPS (ddr));
3446 fprintf (dump_file, " )\n");
3448 fprintf (dump_file, ")\n");
3451 return true;
3454 /* Return the direction for a given distance.
3455 FIXME: Computing dir this way is suboptimal, since dir can catch
3456 cases that dist is unable to represent. */
3458 static inline enum data_dependence_direction
3459 dir_from_dist (int dist)
3461 if (dist > 0)
3462 return dir_positive;
3463 else if (dist < 0)
3464 return dir_negative;
3465 else
3466 return dir_equal;
3469 /* Compute the classic per loop direction vector. DDR is the data
3470 dependence relation to build a vector from. */
3472 static void
3473 build_classic_dir_vector (struct data_dependence_relation *ddr)
3475 unsigned i, j;
3476 lambda_vector dist_v;
3478 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3480 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3482 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3483 dir_v[j] = dir_from_dist (dist_v[j]);
3485 save_dir_v (ddr, dir_v);
3489 /* Helper function. Returns true when there is a dependence between
3490 data references DRA and DRB. */
3492 static bool
3493 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3494 struct data_reference *dra,
3495 struct data_reference *drb,
3496 struct loop *loop_nest)
3498 unsigned int i;
3499 tree last_conflicts;
3500 struct subscript *subscript;
3501 tree res = NULL_TREE;
3503 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3505 conflict_function *overlaps_a, *overlaps_b;
3507 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3508 DR_ACCESS_FN (drb, i),
3509 &overlaps_a, &overlaps_b,
3510 &last_conflicts, loop_nest);
3512 if (SUB_CONFLICTS_IN_A (subscript))
3513 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3514 if (SUB_CONFLICTS_IN_B (subscript))
3515 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3517 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3518 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3519 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3521 /* If there is any undetermined conflict function we have to
3522 give a conservative answer in case we cannot prove that
3523 no dependence exists when analyzing another subscript. */
3524 if (CF_NOT_KNOWN_P (overlaps_a)
3525 || CF_NOT_KNOWN_P (overlaps_b))
3527 res = chrec_dont_know;
3528 continue;
3531 /* When there is a subscript with no dependence we can stop. */
3532 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3533 || CF_NO_DEPENDENCE_P (overlaps_b))
3535 res = chrec_known;
3536 break;
3540 if (res == NULL_TREE)
3541 return true;
3543 if (res == chrec_known)
3544 dependence_stats.num_dependence_independent++;
3545 else
3546 dependence_stats.num_dependence_undetermined++;
3547 finalize_ddr_dependent (ddr, res);
3548 return false;
3551 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3553 static void
3554 subscript_dependence_tester (struct data_dependence_relation *ddr,
3555 struct loop *loop_nest)
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3559 fprintf (dump_file, "(subscript_dependence_tester \n");
3561 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3562 dependence_stats.num_dependence_dependent++;
3564 compute_subscript_distance (ddr);
3565 if (build_classic_dist_vector (ddr, loop_nest))
3566 build_classic_dir_vector (ddr);
3568 if (dump_file && (dump_flags & TDF_DETAILS))
3569 fprintf (dump_file, ")\n");
3572 /* Returns true when all the access functions of A are affine or
3573 constant with respect to LOOP_NEST. */
3575 static bool
3576 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3577 const struct loop *loop_nest)
3579 unsigned int i;
3580 vec<tree> fns = DR_ACCESS_FNS (a);
3581 tree t;
3583 FOR_EACH_VEC_ELT (fns, i, t)
3584 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3585 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3586 return false;
3588 return true;
3591 /* Initializes an equation for an OMEGA problem using the information
3592 contained in the ACCESS_FUN. Returns true when the operation
3593 succeeded.
3595 PB is the omega constraint system.
3596 EQ is the number of the equation to be initialized.
3597 OFFSET is used for shifting the variables names in the constraints:
3598 a constrain is composed of 2 * the number of variables surrounding
3599 dependence accesses. OFFSET is set either to 0 for the first n variables,
3600 then it is set to n.
3601 ACCESS_FUN is expected to be an affine chrec. */
3603 static bool
3604 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3605 unsigned int offset, tree access_fun,
3606 struct data_dependence_relation *ddr)
3608 switch (TREE_CODE (access_fun))
3610 case POLYNOMIAL_CHREC:
3612 tree left = CHREC_LEFT (access_fun);
3613 tree right = CHREC_RIGHT (access_fun);
3614 int var = CHREC_VARIABLE (access_fun);
3615 unsigned var_idx;
3617 if (TREE_CODE (right) != INTEGER_CST)
3618 return false;
3620 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3621 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3623 /* Compute the innermost loop index. */
3624 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3626 if (offset == 0)
3627 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3628 += int_cst_value (right);
3630 switch (TREE_CODE (left))
3632 case POLYNOMIAL_CHREC:
3633 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3635 case INTEGER_CST:
3636 pb->eqs[eq].coef[0] += int_cst_value (left);
3637 return true;
3639 default:
3640 return false;
3644 case INTEGER_CST:
3645 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3646 return true;
3648 default:
3649 return false;
3653 /* As explained in the comments preceding init_omega_for_ddr, we have
3654 to set up a system for each loop level, setting outer loops
3655 variation to zero, and current loop variation to positive or zero.
3656 Save each lexico positive distance vector. */
3658 static void
3659 omega_extract_distance_vectors (omega_pb pb,
3660 struct data_dependence_relation *ddr)
3662 int eq, geq;
3663 unsigned i, j;
3664 struct loop *loopi, *loopj;
3665 enum omega_result res;
3667 /* Set a new problem for each loop in the nest. The basis is the
3668 problem that we have initialized until now. On top of this we
3669 add new constraints. */
3670 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3671 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3673 int dist = 0;
3674 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3675 DDR_NB_LOOPS (ddr));
3677 omega_copy_problem (copy, pb);
3679 /* For all the outer loops "loop_j", add "dj = 0". */
3680 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3682 eq = omega_add_zero_eq (copy, omega_black);
3683 copy->eqs[eq].coef[j + 1] = 1;
3686 /* For "loop_i", add "0 <= di". */
3687 geq = omega_add_zero_geq (copy, omega_black);
3688 copy->geqs[geq].coef[i + 1] = 1;
3690 /* Reduce the constraint system, and test that the current
3691 problem is feasible. */
3692 res = omega_simplify_problem (copy);
3693 if (res == omega_false
3694 || res == omega_unknown
3695 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3696 goto next_problem;
3698 for (eq = 0; eq < copy->num_subs; eq++)
3699 if (copy->subs[eq].key == (int) i + 1)
3701 dist = copy->subs[eq].coef[0];
3702 goto found_dist;
3705 if (dist == 0)
3707 /* Reinitialize problem... */
3708 omega_copy_problem (copy, pb);
3709 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3711 eq = omega_add_zero_eq (copy, omega_black);
3712 copy->eqs[eq].coef[j + 1] = 1;
3715 /* ..., but this time "di = 1". */
3716 eq = omega_add_zero_eq (copy, omega_black);
3717 copy->eqs[eq].coef[i + 1] = 1;
3718 copy->eqs[eq].coef[0] = -1;
3720 res = omega_simplify_problem (copy);
3721 if (res == omega_false
3722 || res == omega_unknown
3723 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3724 goto next_problem;
3726 for (eq = 0; eq < copy->num_subs; eq++)
3727 if (copy->subs[eq].key == (int) i + 1)
3729 dist = copy->subs[eq].coef[0];
3730 goto found_dist;
3734 found_dist:;
3735 /* Save the lexicographically positive distance vector. */
3736 if (dist >= 0)
3738 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3739 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3741 dist_v[i] = dist;
3743 for (eq = 0; eq < copy->num_subs; eq++)
3744 if (copy->subs[eq].key > 0)
3746 dist = copy->subs[eq].coef[0];
3747 dist_v[copy->subs[eq].key - 1] = dist;
3750 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3751 dir_v[j] = dir_from_dist (dist_v[j]);
3753 save_dist_v (ddr, dist_v);
3754 save_dir_v (ddr, dir_v);
3757 next_problem:;
3758 omega_free_problem (copy);
3762 /* This is called for each subscript of a tuple of data references:
3763 insert an equality for representing the conflicts. */
3765 static bool
3766 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3767 struct data_dependence_relation *ddr,
3768 omega_pb pb, bool *maybe_dependent)
3770 int eq;
3771 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3772 TREE_TYPE (access_fun_b));
3773 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3774 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3775 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3776 tree minus_one;
3778 /* When the fun_a - fun_b is not constant, the dependence is not
3779 captured by the classic distance vector representation. */
3780 if (TREE_CODE (difference) != INTEGER_CST)
3781 return false;
3783 /* ZIV test. */
3784 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3786 /* There is no dependence. */
3787 *maybe_dependent = false;
3788 return true;
3791 minus_one = build_int_cst (type, -1);
3792 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3794 eq = omega_add_zero_eq (pb, omega_black);
3795 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3796 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3797 /* There is probably a dependence, but the system of
3798 constraints cannot be built: answer "don't know". */
3799 return false;
3801 /* GCD test. */
3802 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3803 && !int_divides_p (lambda_vector_gcd
3804 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3805 2 * DDR_NB_LOOPS (ddr)),
3806 pb->eqs[eq].coef[0]))
3808 /* There is no dependence. */
3809 *maybe_dependent = false;
3810 return true;
3813 return true;
3816 /* Helper function, same as init_omega_for_ddr but specialized for
3817 data references A and B. */
3819 static bool
3820 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3821 struct data_dependence_relation *ddr,
3822 omega_pb pb, bool *maybe_dependent)
3824 unsigned i;
3825 int ineq;
3826 struct loop *loopi;
3827 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3829 /* Insert an equality per subscript. */
3830 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3832 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3833 ddr, pb, maybe_dependent))
3834 return false;
3835 else if (*maybe_dependent == false)
3837 /* There is no dependence. */
3838 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3839 return true;
3843 /* Insert inequalities: constraints corresponding to the iteration
3844 domain, i.e. the loops surrounding the references "loop_x" and
3845 the distance variables "dx". The layout of the OMEGA
3846 representation is as follows:
3847 - coef[0] is the constant
3848 - coef[1..nb_loops] are the protected variables that will not be
3849 removed by the solver: the "dx"
3850 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3852 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3853 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3855 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3857 /* 0 <= loop_x */
3858 ineq = omega_add_zero_geq (pb, omega_black);
3859 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3861 /* 0 <= loop_x + dx */
3862 ineq = omega_add_zero_geq (pb, omega_black);
3863 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3864 pb->geqs[ineq].coef[i + 1] = 1;
3866 if (nbi != -1)
3868 /* loop_x <= nb_iters */
3869 ineq = omega_add_zero_geq (pb, omega_black);
3870 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3871 pb->geqs[ineq].coef[0] = nbi;
3873 /* loop_x + dx <= nb_iters */
3874 ineq = omega_add_zero_geq (pb, omega_black);
3875 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3876 pb->geqs[ineq].coef[i + 1] = -1;
3877 pb->geqs[ineq].coef[0] = nbi;
3879 /* A step "dx" bigger than nb_iters is not feasible, so
3880 add "0 <= nb_iters + dx", */
3881 ineq = omega_add_zero_geq (pb, omega_black);
3882 pb->geqs[ineq].coef[i + 1] = 1;
3883 pb->geqs[ineq].coef[0] = nbi;
3884 /* and "dx <= nb_iters". */
3885 ineq = omega_add_zero_geq (pb, omega_black);
3886 pb->geqs[ineq].coef[i + 1] = -1;
3887 pb->geqs[ineq].coef[0] = nbi;
3891 omega_extract_distance_vectors (pb, ddr);
3893 return true;
3896 /* Sets up the Omega dependence problem for the data dependence
3897 relation DDR. Returns false when the constraint system cannot be
3898 built, ie. when the test answers "don't know". Returns true
3899 otherwise, and when independence has been proved (using one of the
3900 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3901 set MAYBE_DEPENDENT to true.
3903 Example: for setting up the dependence system corresponding to the
3904 conflicting accesses
3906 | loop_i
3907 | loop_j
3908 | A[i, i+1] = ...
3909 | ... A[2*j, 2*(i + j)]
3910 | endloop_j
3911 | endloop_i
3913 the following constraints come from the iteration domain:
3915 0 <= i <= Ni
3916 0 <= i + di <= Ni
3917 0 <= j <= Nj
3918 0 <= j + dj <= Nj
3920 where di, dj are the distance variables. The constraints
3921 representing the conflicting elements are:
3923 i = 2 * (j + dj)
3924 i + 1 = 2 * (i + di + j + dj)
3926 For asking that the resulting distance vector (di, dj) be
3927 lexicographically positive, we insert the constraint "di >= 0". If
3928 "di = 0" in the solution, we fix that component to zero, and we
3929 look at the inner loops: we set a new problem where all the outer
3930 loop distances are zero, and fix this inner component to be
3931 positive. When one of the components is positive, we save that
3932 distance, and set a new problem where the distance on this loop is
3933 zero, searching for other distances in the inner loops. Here is
3934 the classic example that illustrates that we have to set for each
3935 inner loop a new problem:
3937 | loop_1
3938 | loop_2
3939 | A[10]
3940 | endloop_2
3941 | endloop_1
3943 we have to save two distances (1, 0) and (0, 1).
3945 Given two array references, refA and refB, we have to set the
3946 dependence problem twice, refA vs. refB and refB vs. refA, and we
3947 cannot do a single test, as refB might occur before refA in the
3948 inner loops, and the contrary when considering outer loops: ex.
3950 | loop_0
3951 | loop_1
3952 | loop_2
3953 | T[{1,+,1}_2][{1,+,1}_1] // refA
3954 | T[{2,+,1}_2][{0,+,1}_1] // refB
3955 | endloop_2
3956 | endloop_1
3957 | endloop_0
3959 refB touches the elements in T before refA, and thus for the same
3960 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3961 but for successive loop_0 iterations, we have (1, -1, 1)
3963 The Omega solver expects the distance variables ("di" in the
3964 previous example) to come first in the constraint system (as
3965 variables to be protected, or "safe" variables), the constraint
3966 system is built using the following layout:
3968 "cst | distance vars | index vars".
3971 static bool
3972 init_omega_for_ddr (struct data_dependence_relation *ddr,
3973 bool *maybe_dependent)
3975 omega_pb pb;
3976 bool res = false;
3978 *maybe_dependent = true;
3980 if (same_access_functions (ddr))
3982 unsigned j;
3983 lambda_vector dir_v;
3985 /* Save the 0 vector. */
3986 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3987 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3988 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3989 dir_v[j] = dir_equal;
3990 save_dir_v (ddr, dir_v);
3992 /* Save the dependences carried by outer loops. */
3993 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3994 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3995 maybe_dependent);
3996 omega_free_problem (pb);
3997 return res;
4000 /* Omega expects the protected variables (those that have to be kept
4001 after elimination) to appear first in the constraint system.
4002 These variables are the distance variables. In the following
4003 initialization we declare NB_LOOPS safe variables, and the total
4004 number of variables for the constraint system is 2*NB_LOOPS. */
4005 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4006 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4007 maybe_dependent);
4008 omega_free_problem (pb);
4010 /* Stop computation if not decidable, or no dependence. */
4011 if (res == false || *maybe_dependent == false)
4012 return res;
4014 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4015 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4016 maybe_dependent);
4017 omega_free_problem (pb);
4019 return res;
4022 /* Return true when DDR contains the same information as that stored
4023 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4025 static bool
4026 ddr_consistent_p (FILE *file,
4027 struct data_dependence_relation *ddr,
4028 vec<lambda_vector> dist_vects,
4029 vec<lambda_vector> dir_vects)
4031 unsigned int i, j;
4033 /* If dump_file is set, output there. */
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4035 file = dump_file;
4037 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4039 lambda_vector b_dist_v;
4040 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4041 dist_vects.length (),
4042 DDR_NUM_DIST_VECTS (ddr));
4044 fprintf (file, "Banerjee dist vectors:\n");
4045 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4046 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4048 fprintf (file, "Omega dist vectors:\n");
4049 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4050 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4052 fprintf (file, "data dependence relation:\n");
4053 dump_data_dependence_relation (file, ddr);
4055 fprintf (file, ")\n");
4056 return false;
4059 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4061 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4062 dir_vects.length (),
4063 DDR_NUM_DIR_VECTS (ddr));
4064 return false;
4067 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4069 lambda_vector a_dist_v;
4070 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4072 /* Distance vectors are not ordered in the same way in the DDR
4073 and in the DIST_VECTS: search for a matching vector. */
4074 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4075 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4076 break;
4078 if (j == dist_vects.length ())
4080 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4081 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4082 fprintf (file, "not found in Omega dist vectors:\n");
4083 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4084 fprintf (file, "data dependence relation:\n");
4085 dump_data_dependence_relation (file, ddr);
4086 fprintf (file, ")\n");
4090 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4092 lambda_vector a_dir_v;
4093 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4095 /* Direction vectors are not ordered in the same way in the DDR
4096 and in the DIR_VECTS: search for a matching vector. */
4097 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4098 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4099 break;
4101 if (j == dist_vects.length ())
4103 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4104 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4105 fprintf (file, "not found in Omega dir vectors:\n");
4106 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4107 fprintf (file, "data dependence relation:\n");
4108 dump_data_dependence_relation (file, ddr);
4109 fprintf (file, ")\n");
4113 return true;
4116 /* This computes the affine dependence relation between A and B with
4117 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4118 independence between two accesses, while CHREC_DONT_KNOW is used
4119 for representing the unknown relation.
4121 Note that it is possible to stop the computation of the dependence
4122 relation the first time we detect a CHREC_KNOWN element for a given
4123 subscript. */
4125 void
4126 compute_affine_dependence (struct data_dependence_relation *ddr,
4127 struct loop *loop_nest)
4129 struct data_reference *dra = DDR_A (ddr);
4130 struct data_reference *drb = DDR_B (ddr);
4132 if (dump_file && (dump_flags & TDF_DETAILS))
4134 fprintf (dump_file, "(compute_affine_dependence\n");
4135 fprintf (dump_file, " stmt_a: ");
4136 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4137 fprintf (dump_file, " stmt_b: ");
4138 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4141 /* Analyze only when the dependence relation is not yet known. */
4142 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4144 dependence_stats.num_dependence_tests++;
4146 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4147 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4149 if (flag_check_data_deps)
4151 /* Compute the dependences using the first algorithm. */
4152 subscript_dependence_tester (ddr, loop_nest);
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4157 dump_data_dependence_relation (dump_file, ddr);
4160 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4162 bool maybe_dependent;
4163 vec<lambda_vector> dir_vects, dist_vects;
4165 /* Save the result of the first DD analyzer. */
4166 dist_vects = DDR_DIST_VECTS (ddr);
4167 dir_vects = DDR_DIR_VECTS (ddr);
4169 /* Reset the information. */
4170 DDR_DIST_VECTS (ddr).create (0);
4171 DDR_DIR_VECTS (ddr).create (0);
4173 /* Compute the same information using Omega. */
4174 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4175 goto csys_dont_know;
4177 if (dump_file && (dump_flags & TDF_DETAILS))
4179 fprintf (dump_file, "Omega Analyzer\n");
4180 dump_data_dependence_relation (dump_file, ddr);
4183 /* Check that we get the same information. */
4184 if (maybe_dependent)
4185 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4186 dir_vects));
4189 else
4190 subscript_dependence_tester (ddr, loop_nest);
4193 /* As a last case, if the dependence cannot be determined, or if
4194 the dependence is considered too difficult to determine, answer
4195 "don't know". */
4196 else
4198 csys_dont_know:;
4199 dependence_stats.num_dependence_undetermined++;
4201 if (dump_file && (dump_flags & TDF_DETAILS))
4203 fprintf (dump_file, "Data ref a:\n");
4204 dump_data_reference (dump_file, dra);
4205 fprintf (dump_file, "Data ref b:\n");
4206 dump_data_reference (dump_file, drb);
4207 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4209 finalize_ddr_dependent (ddr, chrec_dont_know);
4213 if (dump_file && (dump_flags & TDF_DETAILS))
4215 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4216 fprintf (dump_file, ") -> no dependence\n");
4217 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4218 fprintf (dump_file, ") -> dependence analysis failed\n");
4219 else
4220 fprintf (dump_file, ")\n");
4224 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4225 the data references in DATAREFS, in the LOOP_NEST. When
4226 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4227 relations. Return true when successful, i.e. data references number
4228 is small enough to be handled. */
4230 bool
4231 compute_all_dependences (vec<data_reference_p> datarefs,
4232 vec<ddr_p> *dependence_relations,
4233 vec<loop_p> loop_nest,
4234 bool compute_self_and_rr)
4236 struct data_dependence_relation *ddr;
4237 struct data_reference *a, *b;
4238 unsigned int i, j;
4240 if ((int) datarefs.length ()
4241 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4243 struct data_dependence_relation *ddr;
4245 /* Insert a single relation into dependence_relations:
4246 chrec_dont_know. */
4247 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4248 dependence_relations->safe_push (ddr);
4249 return false;
4252 FOR_EACH_VEC_ELT (datarefs, i, a)
4253 for (j = i + 1; datarefs.iterate (j, &b); j++)
4254 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4256 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4257 dependence_relations->safe_push (ddr);
4258 if (loop_nest.exists ())
4259 compute_affine_dependence (ddr, loop_nest[0]);
4262 if (compute_self_and_rr)
4263 FOR_EACH_VEC_ELT (datarefs, i, a)
4265 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4266 dependence_relations->safe_push (ddr);
4267 if (loop_nest.exists ())
4268 compute_affine_dependence (ddr, loop_nest[0]);
4271 return true;
4274 /* Describes a location of a memory reference. */
4276 typedef struct data_ref_loc_d
4278 /* Position of the memory reference. */
4279 tree *pos;
4281 /* True if the memory reference is read. */
4282 bool is_read;
4283 } data_ref_loc;
4286 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4287 true if STMT clobbers memory, false otherwise. */
4289 static bool
4290 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
4292 bool clobbers_memory = false;
4293 data_ref_loc ref;
4294 tree *op0, *op1;
4295 enum gimple_code stmt_code = gimple_code (stmt);
4297 references->create (0);
4299 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4300 As we cannot model data-references to not spelled out
4301 accesses give up if they may occur. */
4302 if ((stmt_code == GIMPLE_CALL
4303 && !(gimple_call_flags (stmt) & ECF_CONST))
4304 || (stmt_code == GIMPLE_ASM
4305 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4306 clobbers_memory = true;
4308 if (!gimple_vuse (stmt))
4309 return clobbers_memory;
4311 if (stmt_code == GIMPLE_ASSIGN)
4313 tree base;
4314 op0 = gimple_assign_lhs_ptr (stmt);
4315 op1 = gimple_assign_rhs1_ptr (stmt);
4317 if (DECL_P (*op1)
4318 || (REFERENCE_CLASS_P (*op1)
4319 && (base = get_base_address (*op1))
4320 && TREE_CODE (base) != SSA_NAME))
4322 ref.pos = op1;
4323 ref.is_read = true;
4324 references->safe_push (ref);
4327 else if (stmt_code == GIMPLE_CALL)
4329 unsigned i, n;
4331 op0 = gimple_call_lhs_ptr (stmt);
4332 n = gimple_call_num_args (stmt);
4333 for (i = 0; i < n; i++)
4335 op1 = gimple_call_arg_ptr (stmt, i);
4337 if (DECL_P (*op1)
4338 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4340 ref.pos = op1;
4341 ref.is_read = true;
4342 references->safe_push (ref);
4346 else
4347 return clobbers_memory;
4349 if (*op0
4350 && (DECL_P (*op0)
4351 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4353 ref.pos = op0;
4354 ref.is_read = false;
4355 references->safe_push (ref);
4357 return clobbers_memory;
4360 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4361 reference, returns false, otherwise returns true. NEST is the outermost
4362 loop of the loop nest in which the references should be analyzed. */
4364 bool
4365 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4366 vec<data_reference_p> *datarefs)
4368 unsigned i;
4369 vec<data_ref_loc> references;
4370 data_ref_loc *ref;
4371 bool ret = true;
4372 data_reference_p dr;
4374 if (get_references_in_stmt (stmt, &references))
4376 references.release ();
4377 return false;
4380 FOR_EACH_VEC_ELT (references, i, ref)
4382 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4383 *ref->pos, stmt, ref->is_read);
4384 gcc_assert (dr != NULL);
4385 datarefs->safe_push (dr);
4387 references.release ();
4388 return ret;
4391 /* Stores the data references in STMT to DATAREFS. If there is an
4392 unanalyzable reference, returns false, otherwise returns true.
4393 NEST is the outermost loop of the loop nest in which the references
4394 should be instantiated, LOOP is the loop in which the references
4395 should be analyzed. */
4397 bool
4398 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4399 vec<data_reference_p> *datarefs)
4401 unsigned i;
4402 vec<data_ref_loc> references;
4403 data_ref_loc *ref;
4404 bool ret = true;
4405 data_reference_p dr;
4407 if (get_references_in_stmt (stmt, &references))
4409 references.release ();
4410 return false;
4413 FOR_EACH_VEC_ELT (references, i, ref)
4415 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4416 gcc_assert (dr != NULL);
4417 datarefs->safe_push (dr);
4420 references.release ();
4421 return ret;
4424 /* Search the data references in LOOP, and record the information into
4425 DATAREFS. Returns chrec_dont_know when failing to analyze a
4426 difficult case, returns NULL_TREE otherwise. */
4428 tree
4429 find_data_references_in_bb (struct loop *loop, basic_block bb,
4430 vec<data_reference_p> *datarefs)
4432 gimple_stmt_iterator bsi;
4434 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4436 gimple stmt = gsi_stmt (bsi);
4438 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4440 struct data_reference *res;
4441 res = XCNEW (struct data_reference);
4442 datarefs->safe_push (res);
4444 return chrec_dont_know;
4448 return NULL_TREE;
4451 /* Search the data references in LOOP, and record the information into
4452 DATAREFS. Returns chrec_dont_know when failing to analyze a
4453 difficult case, returns NULL_TREE otherwise.
4455 TODO: This function should be made smarter so that it can handle address
4456 arithmetic as if they were array accesses, etc. */
4458 static tree
4459 find_data_references_in_loop (struct loop *loop,
4460 vec<data_reference_p> *datarefs)
4462 basic_block bb, *bbs;
4463 unsigned int i;
4465 bbs = get_loop_body_in_dom_order (loop);
4467 for (i = 0; i < loop->num_nodes; i++)
4469 bb = bbs[i];
4471 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4473 free (bbs);
4474 return chrec_dont_know;
4477 free (bbs);
4479 return NULL_TREE;
4482 /* Recursive helper function. */
4484 static bool
4485 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4487 /* Inner loops of the nest should not contain siblings. Example:
4488 when there are two consecutive loops,
4490 | loop_0
4491 | loop_1
4492 | A[{0, +, 1}_1]
4493 | endloop_1
4494 | loop_2
4495 | A[{0, +, 1}_2]
4496 | endloop_2
4497 | endloop_0
4499 the dependence relation cannot be captured by the distance
4500 abstraction. */
4501 if (loop->next)
4502 return false;
4504 loop_nest->safe_push (loop);
4505 if (loop->inner)
4506 return find_loop_nest_1 (loop->inner, loop_nest);
4507 return true;
4510 /* Return false when the LOOP is not well nested. Otherwise return
4511 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4512 contain the loops from the outermost to the innermost, as they will
4513 appear in the classic distance vector. */
4515 bool
4516 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4518 loop_nest->safe_push (loop);
4519 if (loop->inner)
4520 return find_loop_nest_1 (loop->inner, loop_nest);
4521 return true;
4524 /* Returns true when the data dependences have been computed, false otherwise.
4525 Given a loop nest LOOP, the following vectors are returned:
4526 DATAREFS is initialized to all the array elements contained in this loop,
4527 DEPENDENCE_RELATIONS contains the relations between the data references.
4528 Compute read-read and self relations if
4529 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4531 bool
4532 compute_data_dependences_for_loop (struct loop *loop,
4533 bool compute_self_and_read_read_dependences,
4534 vec<loop_p> *loop_nest,
4535 vec<data_reference_p> *datarefs,
4536 vec<ddr_p> *dependence_relations)
4538 bool res = true;
4540 memset (&dependence_stats, 0, sizeof (dependence_stats));
4542 /* If the loop nest is not well formed, or one of the data references
4543 is not computable, give up without spending time to compute other
4544 dependences. */
4545 if (!loop
4546 || !find_loop_nest (loop, loop_nest)
4547 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4548 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4549 compute_self_and_read_read_dependences))
4550 res = false;
4552 if (dump_file && (dump_flags & TDF_STATS))
4554 fprintf (dump_file, "Dependence tester statistics:\n");
4556 fprintf (dump_file, "Number of dependence tests: %d\n",
4557 dependence_stats.num_dependence_tests);
4558 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4559 dependence_stats.num_dependence_dependent);
4560 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4561 dependence_stats.num_dependence_independent);
4562 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4563 dependence_stats.num_dependence_undetermined);
4565 fprintf (dump_file, "Number of subscript tests: %d\n",
4566 dependence_stats.num_subscript_tests);
4567 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4568 dependence_stats.num_subscript_undetermined);
4569 fprintf (dump_file, "Number of same subscript function: %d\n",
4570 dependence_stats.num_same_subscript_function);
4572 fprintf (dump_file, "Number of ziv tests: %d\n",
4573 dependence_stats.num_ziv);
4574 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4575 dependence_stats.num_ziv_dependent);
4576 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4577 dependence_stats.num_ziv_independent);
4578 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4579 dependence_stats.num_ziv_unimplemented);
4581 fprintf (dump_file, "Number of siv tests: %d\n",
4582 dependence_stats.num_siv);
4583 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4584 dependence_stats.num_siv_dependent);
4585 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4586 dependence_stats.num_siv_independent);
4587 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4588 dependence_stats.num_siv_unimplemented);
4590 fprintf (dump_file, "Number of miv tests: %d\n",
4591 dependence_stats.num_miv);
4592 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4593 dependence_stats.num_miv_dependent);
4594 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4595 dependence_stats.num_miv_independent);
4596 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4597 dependence_stats.num_miv_unimplemented);
4600 return res;
4603 /* Returns true when the data dependences for the basic block BB have been
4604 computed, false otherwise.
4605 DATAREFS is initialized to all the array elements contained in this basic
4606 block, DEPENDENCE_RELATIONS contains the relations between the data
4607 references. Compute read-read and self relations if
4608 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4609 bool
4610 compute_data_dependences_for_bb (basic_block bb,
4611 bool compute_self_and_read_read_dependences,
4612 vec<data_reference_p> *datarefs,
4613 vec<ddr_p> *dependence_relations)
4615 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4616 return false;
4618 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4619 compute_self_and_read_read_dependences);
4622 /* Entry point (for testing only). Analyze all the data references
4623 and the dependence relations in LOOP.
4625 The data references are computed first.
4627 A relation on these nodes is represented by a complete graph. Some
4628 of the relations could be of no interest, thus the relations can be
4629 computed on demand.
4631 In the following function we compute all the relations. This is
4632 just a first implementation that is here for:
4633 - for showing how to ask for the dependence relations,
4634 - for the debugging the whole dependence graph,
4635 - for the dejagnu testcases and maintenance.
4637 It is possible to ask only for a part of the graph, avoiding to
4638 compute the whole dependence graph. The computed dependences are
4639 stored in a knowledge base (KB) such that later queries don't
4640 recompute the same information. The implementation of this KB is
4641 transparent to the optimizer, and thus the KB can be changed with a
4642 more efficient implementation, or the KB could be disabled. */
4643 static void
4644 analyze_all_data_dependences (struct loop *loop)
4646 unsigned int i;
4647 int nb_data_refs = 10;
4648 vec<data_reference_p> datarefs;
4649 datarefs.create (nb_data_refs);
4650 vec<ddr_p> dependence_relations;
4651 dependence_relations.create (nb_data_refs * nb_data_refs);
4652 vec<loop_p> loop_nest;
4653 loop_nest.create (3);
4655 /* Compute DDs on the whole function. */
4656 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4657 &dependence_relations);
4659 if (dump_file)
4661 dump_data_dependence_relations (dump_file, dependence_relations);
4662 fprintf (dump_file, "\n\n");
4664 if (dump_flags & TDF_DETAILS)
4665 dump_dist_dir_vectors (dump_file, dependence_relations);
4667 if (dump_flags & TDF_STATS)
4669 unsigned nb_top_relations = 0;
4670 unsigned nb_bot_relations = 0;
4671 unsigned nb_chrec_relations = 0;
4672 struct data_dependence_relation *ddr;
4674 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4676 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4677 nb_top_relations++;
4679 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4680 nb_bot_relations++;
4682 else
4683 nb_chrec_relations++;
4686 gather_stats_on_scev_database ();
4690 loop_nest.release ();
4691 free_dependence_relations (dependence_relations);
4692 free_data_refs (datarefs);
4695 /* Computes all the data dependences and check that the results of
4696 several analyzers are the same. */
4698 void
4699 tree_check_data_deps (void)
4701 loop_iterator li;
4702 struct loop *loop_nest;
4704 FOR_EACH_LOOP (li, loop_nest, 0)
4705 analyze_all_data_dependences (loop_nest);
4708 /* Free the memory used by a data dependence relation DDR. */
4710 void
4711 free_dependence_relation (struct data_dependence_relation *ddr)
4713 if (ddr == NULL)
4714 return;
4716 if (DDR_SUBSCRIPTS (ddr).exists ())
4717 free_subscripts (DDR_SUBSCRIPTS (ddr));
4718 DDR_DIST_VECTS (ddr).release ();
4719 DDR_DIR_VECTS (ddr).release ();
4721 free (ddr);
4724 /* Free the memory used by the data dependence relations from
4725 DEPENDENCE_RELATIONS. */
4727 void
4728 free_dependence_relations (vec<ddr_p> dependence_relations)
4730 unsigned int i;
4731 struct data_dependence_relation *ddr;
4733 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4734 if (ddr)
4735 free_dependence_relation (ddr);
4737 dependence_relations.release ();
4740 /* Free the memory used by the data references from DATAREFS. */
4742 void
4743 free_data_refs (vec<data_reference_p> datarefs)
4745 unsigned int i;
4746 struct data_reference *dr;
4748 FOR_EACH_VEC_ELT (datarefs, i, dr)
4749 free_data_ref (dr);
4750 datarefs.release ();
4755 /* Dump vertex I in RDG to FILE. */
4757 static void
4758 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4760 struct vertex *v = &(rdg->vertices[i]);
4761 struct graph_edge *e;
4763 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4764 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4765 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4767 if (v->pred)
4768 for (e = v->pred; e; e = e->pred_next)
4769 fprintf (file, " %d", e->src);
4771 fprintf (file, ") (out:");
4773 if (v->succ)
4774 for (e = v->succ; e; e = e->succ_next)
4775 fprintf (file, " %d", e->dest);
4777 fprintf (file, ")\n");
4778 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4779 fprintf (file, ")\n");
4782 /* Call dump_rdg_vertex on stderr. */
4784 DEBUG_FUNCTION void
4785 debug_rdg_vertex (struct graph *rdg, int i)
4787 dump_rdg_vertex (stderr, rdg, i);
4790 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4791 dumped vertices to that bitmap. */
4793 static void
4794 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4796 int i;
4798 fprintf (file, "(%d\n", c);
4800 for (i = 0; i < rdg->n_vertices; i++)
4801 if (rdg->vertices[i].component == c)
4803 if (dumped)
4804 bitmap_set_bit (dumped, i);
4806 dump_rdg_vertex (file, rdg, i);
4809 fprintf (file, ")\n");
4812 /* Call dump_rdg_vertex on stderr. */
4814 DEBUG_FUNCTION void
4815 debug_rdg_component (struct graph *rdg, int c)
4817 dump_rdg_component (stderr, rdg, c, NULL);
4820 /* Dump the reduced dependence graph RDG to FILE. */
4822 void
4823 dump_rdg (FILE *file, struct graph *rdg)
4825 int i;
4826 bitmap dumped = BITMAP_ALLOC (NULL);
4828 fprintf (file, "(rdg\n");
4830 for (i = 0; i < rdg->n_vertices; i++)
4831 if (!bitmap_bit_p (dumped, i))
4832 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4834 fprintf (file, ")\n");
4835 BITMAP_FREE (dumped);
4838 /* Call dump_rdg on stderr. */
4840 DEBUG_FUNCTION void
4841 debug_rdg (struct graph *rdg)
4843 dump_rdg (stderr, rdg);
4846 static void
4847 dot_rdg_1 (FILE *file, struct graph *rdg)
4849 int i;
4851 fprintf (file, "digraph RDG {\n");
4853 for (i = 0; i < rdg->n_vertices; i++)
4855 struct vertex *v = &(rdg->vertices[i]);
4856 struct graph_edge *e;
4858 /* Highlight reads from memory. */
4859 if (RDG_MEM_READS_STMT (rdg, i))
4860 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4862 /* Highlight stores to memory. */
4863 if (RDG_MEM_WRITE_STMT (rdg, i))
4864 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4866 if (v->succ)
4867 for (e = v->succ; e; e = e->succ_next)
4868 switch (RDGE_TYPE (e))
4870 case input_dd:
4871 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4872 break;
4874 case output_dd:
4875 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4876 break;
4878 case flow_dd:
4879 /* These are the most common dependences: don't print these. */
4880 fprintf (file, "%d -> %d \n", i, e->dest);
4881 break;
4883 case anti_dd:
4884 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4885 break;
4887 default:
4888 gcc_unreachable ();
4892 fprintf (file, "}\n\n");
4895 /* Display the Reduced Dependence Graph using dotty. */
4896 extern void dot_rdg (struct graph *);
4898 DEBUG_FUNCTION void
4899 dot_rdg (struct graph *rdg)
4901 /* When debugging, enable the following code. This cannot be used
4902 in production compilers because it calls "system". */
4903 #if 0
4904 FILE *file = fopen ("/tmp/rdg.dot", "w");
4905 gcc_assert (file != NULL);
4907 dot_rdg_1 (file, rdg);
4908 fclose (file);
4910 system ("dotty /tmp/rdg.dot &");
4911 #else
4912 dot_rdg_1 (stderr, rdg);
4913 #endif
4916 /* Returns the index of STMT in RDG. */
4919 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
4921 int index = gimple_uid (stmt);
4922 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4923 return index;
4926 /* Creates an edge in RDG for each distance vector from DDR. The
4927 order that we keep track of in the RDG is the order in which
4928 statements have to be executed. */
4930 static void
4931 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4933 struct graph_edge *e;
4934 int va, vb;
4935 data_reference_p dra = DDR_A (ddr);
4936 data_reference_p drb = DDR_B (ddr);
4937 unsigned level = ddr_dependence_level (ddr);
4939 /* For non scalar dependences, when the dependence is REVERSED,
4940 statement B has to be executed before statement A. */
4941 if (level > 0
4942 && !DDR_REVERSED_P (ddr))
4944 data_reference_p tmp = dra;
4945 dra = drb;
4946 drb = tmp;
4949 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4950 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4952 if (va < 0 || vb < 0)
4953 return;
4955 e = add_edge (rdg, va, vb);
4956 e->data = XNEW (struct rdg_edge);
4958 RDGE_LEVEL (e) = level;
4959 RDGE_RELATION (e) = ddr;
4961 /* Determines the type of the data dependence. */
4962 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4963 RDGE_TYPE (e) = input_dd;
4964 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4965 RDGE_TYPE (e) = output_dd;
4966 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4967 RDGE_TYPE (e) = flow_dd;
4968 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4969 RDGE_TYPE (e) = anti_dd;
4972 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4973 the index of DEF in RDG. */
4975 static void
4976 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4978 use_operand_p imm_use_p;
4979 imm_use_iterator iterator;
4981 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4983 struct graph_edge *e;
4984 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4986 if (use < 0)
4987 continue;
4989 e = add_edge (rdg, idef, use);
4990 e->data = XNEW (struct rdg_edge);
4991 RDGE_TYPE (e) = flow_dd;
4992 RDGE_RELATION (e) = NULL;
4996 /* Creates the edges of the reduced dependence graph RDG. */
4998 static void
4999 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
5001 int i;
5002 struct data_dependence_relation *ddr;
5003 def_operand_p def_p;
5004 ssa_op_iter iter;
5006 FOR_EACH_VEC_ELT (ddrs, i, ddr)
5007 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5008 create_rdg_edge_for_ddr (rdg, ddr);
5010 for (i = 0; i < rdg->n_vertices; i++)
5011 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5012 iter, SSA_OP_DEF)
5013 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5016 /* Build the vertices of the reduced dependence graph RDG. */
5018 static void
5019 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop)
5021 int i, j;
5022 gimple stmt;
5024 FOR_EACH_VEC_ELT (stmts, i, stmt)
5026 vec<data_ref_loc> references;
5027 data_ref_loc *ref;
5028 struct vertex *v = &(rdg->vertices[i]);
5030 /* Record statement to vertex mapping. */
5031 gimple_set_uid (stmt, i);
5033 v->data = XNEW (struct rdg_vertex);
5034 RDGV_STMT (v) = stmt;
5035 RDGV_DATAREFS (v).create (0);
5036 RDGV_HAS_MEM_WRITE (v) = false;
5037 RDGV_HAS_MEM_READS (v) = false;
5038 if (gimple_code (stmt) == GIMPLE_PHI)
5039 continue;
5041 get_references_in_stmt (stmt, &references);
5042 FOR_EACH_VEC_ELT (references, j, ref)
5044 data_reference_p dr;
5045 if (!ref->is_read)
5046 RDGV_HAS_MEM_WRITE (v) = true;
5047 else
5048 RDGV_HAS_MEM_READS (v) = true;
5049 dr = create_data_ref (loop, loop_containing_stmt (stmt),
5050 *ref->pos, stmt, ref->is_read);
5051 if (dr)
5052 RDGV_DATAREFS (v).safe_push (dr);
5054 references.release ();
5058 /* Initialize STMTS with all the statements of LOOP. When
5059 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5060 which we discover statements is important as
5061 generate_loops_for_partition is using the same traversal for
5062 identifying statements. */
5064 static void
5065 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
5067 unsigned int i;
5068 basic_block *bbs = get_loop_body_in_dom_order (loop);
5070 for (i = 0; i < loop->num_nodes; i++)
5072 basic_block bb = bbs[i];
5073 gimple_stmt_iterator bsi;
5074 gimple stmt;
5076 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5077 stmts->safe_push (gsi_stmt (bsi));
5079 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5081 stmt = gsi_stmt (bsi);
5082 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5083 stmts->safe_push (stmt);
5087 free (bbs);
5090 /* Returns true when all the dependences are computable. */
5092 static bool
5093 known_dependences_p (vec<ddr_p> dependence_relations)
5095 ddr_p ddr;
5096 unsigned int i;
5098 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5099 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5100 return false;
5102 return true;
5105 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5106 statement of the loop nest, and one edge per data dependence or
5107 scalar dependence. */
5109 struct graph *
5110 build_empty_rdg (int n_stmts)
5112 struct graph *rdg = new_graph (n_stmts);
5113 return rdg;
5116 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5117 statement of the loop nest, and one edge per data dependence or
5118 scalar dependence. */
5120 struct graph *
5121 build_rdg (struct loop *loop,
5122 vec<loop_p> *loop_nest,
5123 vec<ddr_p> *dependence_relations,
5124 vec<data_reference_p> *datarefs)
5126 struct graph *rdg = NULL;
5128 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5129 dependence_relations)
5130 && known_dependences_p (*dependence_relations))
5132 vec<gimple> stmts;
5133 stmts.create (10);
5134 stmts_from_loop (loop, &stmts);
5135 rdg = build_empty_rdg (stmts.length ());
5136 create_rdg_vertices (rdg, stmts, loop);
5137 create_rdg_edges (rdg, *dependence_relations);
5138 stmts.release ();
5141 return rdg;
5144 /* Free the reduced dependence graph RDG. */
5146 void
5147 free_rdg (struct graph *rdg)
5149 int i;
5151 for (i = 0; i < rdg->n_vertices; i++)
5153 struct vertex *v = &(rdg->vertices[i]);
5154 struct graph_edge *e;
5156 for (e = v->succ; e; e = e->succ_next)
5157 free (e->data);
5159 gimple_set_uid (RDGV_STMT (v), -1);
5160 free_data_refs (RDGV_DATAREFS (v));
5161 free (v->data);
5164 free_graph (rdg);
5167 /* Determines whether the statement from vertex V of the RDG has a
5168 definition used outside the loop that contains this statement. */
5170 bool
5171 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5173 gimple stmt = RDG_STMT (rdg, v);
5174 struct loop *loop = loop_containing_stmt (stmt);
5175 use_operand_p imm_use_p;
5176 imm_use_iterator iterator;
5177 ssa_op_iter it;
5178 def_operand_p def_p;
5180 if (!loop)
5181 return true;
5183 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5185 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5187 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5188 return true;
5192 return false;