ChangeLog config
[official-gcc.git] / gcc / tree-data-ref.c
blobe1d2dfcadca5d6c2acf9288c001522cba94d62d4
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "flags.h"
83 #include "tree.h"
84 #include "basic-block.h"
85 #include "tree-pretty-print.h"
86 #include "gimple-pretty-print.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-data-ref.h"
92 #include "tree-scalar-evolution.h"
93 #include "tree-pass.h"
94 #include "langhooks.h"
96 static struct datadep_stats
98 int num_dependence_tests;
99 int num_dependence_dependent;
100 int num_dependence_independent;
101 int num_dependence_undetermined;
103 int num_subscript_tests;
104 int num_subscript_undetermined;
105 int num_same_subscript_function;
107 int num_ziv;
108 int num_ziv_independent;
109 int num_ziv_dependent;
110 int num_ziv_unimplemented;
112 int num_siv;
113 int num_siv_independent;
114 int num_siv_dependent;
115 int num_siv_unimplemented;
117 int num_miv;
118 int num_miv_independent;
119 int num_miv_dependent;
120 int num_miv_unimplemented;
121 } dependence_stats;
123 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
124 struct data_reference *,
125 struct data_reference *,
126 struct loop *);
127 /* Returns true iff A divides B. */
129 static inline bool
130 tree_fold_divides_p (const_tree a, const_tree b)
132 gcc_assert (TREE_CODE (a) == INTEGER_CST);
133 gcc_assert (TREE_CODE (b) == INTEGER_CST);
134 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
137 /* Returns true iff A divides B. */
139 static inline bool
140 int_divides_p (int a, int b)
142 return ((b % a) == 0);
147 /* Dump into FILE all the data references from DATAREFS. */
149 void
150 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
152 unsigned int i;
153 struct data_reference *dr;
155 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
156 dump_data_reference (file, dr);
159 /* Dump into STDERR all the data references from DATAREFS. */
161 DEBUG_FUNCTION void
162 debug_data_references (VEC (data_reference_p, heap) *datarefs)
164 dump_data_references (stderr, datarefs);
167 /* Dump to STDERR all the dependence relations from DDRS. */
169 DEBUG_FUNCTION void
170 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
172 dump_data_dependence_relations (stderr, ddrs);
175 /* Dump into FILE all the dependence relations from DDRS. */
177 void
178 dump_data_dependence_relations (FILE *file,
179 VEC (ddr_p, heap) *ddrs)
181 unsigned int i;
182 struct data_dependence_relation *ddr;
184 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
185 dump_data_dependence_relation (file, ddr);
188 /* Print to STDERR the data_reference DR. */
190 DEBUG_FUNCTION void
191 debug_data_reference (struct data_reference *dr)
193 dump_data_reference (stderr, dr);
196 /* Dump function for a DATA_REFERENCE structure. */
198 void
199 dump_data_reference (FILE *outf,
200 struct data_reference *dr)
202 unsigned int i;
204 fprintf (outf, "#(Data Ref: \n# stmt: ");
205 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
206 fprintf (outf, "# ref: ");
207 print_generic_stmt (outf, DR_REF (dr), 0);
208 fprintf (outf, "# base_object: ");
209 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
211 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
213 fprintf (outf, "# Access function %d: ", i);
214 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
216 fprintf (outf, "#)\n");
219 /* Dumps the affine function described by FN to the file OUTF. */
221 static void
222 dump_affine_function (FILE *outf, affine_fn fn)
224 unsigned i;
225 tree coef;
227 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
228 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
230 fprintf (outf, " + ");
231 print_generic_expr (outf, coef, TDF_SLIM);
232 fprintf (outf, " * x_%u", i);
236 /* Dumps the conflict function CF to the file OUTF. */
238 static void
239 dump_conflict_function (FILE *outf, conflict_function *cf)
241 unsigned i;
243 if (cf->n == NO_DEPENDENCE)
244 fprintf (outf, "no dependence\n");
245 else if (cf->n == NOT_KNOWN)
246 fprintf (outf, "not known\n");
247 else
249 for (i = 0; i < cf->n; i++)
251 fprintf (outf, "[");
252 dump_affine_function (outf, cf->fns[i]);
253 fprintf (outf, "]\n");
258 /* Dump function for a SUBSCRIPT structure. */
260 void
261 dump_subscript (FILE *outf, struct subscript *subscript)
263 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
265 fprintf (outf, "\n (subscript \n");
266 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
267 dump_conflict_function (outf, cf);
268 if (CF_NONTRIVIAL_P (cf))
270 tree last_iteration = SUB_LAST_CONFLICT (subscript);
271 fprintf (outf, " last_conflict: ");
272 print_generic_stmt (outf, last_iteration, 0);
275 cf = SUB_CONFLICTS_IN_B (subscript);
276 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
277 dump_conflict_function (outf, cf);
278 if (CF_NONTRIVIAL_P (cf))
280 tree last_iteration = SUB_LAST_CONFLICT (subscript);
281 fprintf (outf, " last_conflict: ");
282 print_generic_stmt (outf, last_iteration, 0);
285 fprintf (outf, " (Subscript distance: ");
286 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
287 fprintf (outf, " )\n");
288 fprintf (outf, " )\n");
291 /* Print the classic direction vector DIRV to OUTF. */
293 void
294 print_direction_vector (FILE *outf,
295 lambda_vector dirv,
296 int length)
298 int eq;
300 for (eq = 0; eq < length; eq++)
302 enum data_dependence_direction dir = ((enum data_dependence_direction)
303 dirv[eq]);
305 switch (dir)
307 case dir_positive:
308 fprintf (outf, " +");
309 break;
310 case dir_negative:
311 fprintf (outf, " -");
312 break;
313 case dir_equal:
314 fprintf (outf, " =");
315 break;
316 case dir_positive_or_equal:
317 fprintf (outf, " +=");
318 break;
319 case dir_positive_or_negative:
320 fprintf (outf, " +-");
321 break;
322 case dir_negative_or_equal:
323 fprintf (outf, " -=");
324 break;
325 case dir_star:
326 fprintf (outf, " *");
327 break;
328 default:
329 fprintf (outf, "indep");
330 break;
333 fprintf (outf, "\n");
336 /* Print a vector of direction vectors. */
338 void
339 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
340 int length)
342 unsigned j;
343 lambda_vector v;
345 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
346 print_direction_vector (outf, v, length);
349 /* Print a vector of distance vectors. */
351 void
352 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
353 int length)
355 unsigned j;
356 lambda_vector v;
358 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
359 print_lambda_vector (outf, v, length);
362 /* Debug version. */
364 DEBUG_FUNCTION void
365 debug_data_dependence_relation (struct data_dependence_relation *ddr)
367 dump_data_dependence_relation (stderr, ddr);
370 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
372 void
373 dump_data_dependence_relation (FILE *outf,
374 struct data_dependence_relation *ddr)
376 struct data_reference *dra, *drb;
378 fprintf (outf, "(Data Dep: \n");
380 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
382 if (ddr)
384 dra = DDR_A (ddr);
385 drb = DDR_B (ddr);
386 if (dra)
387 dump_data_reference (outf, dra);
388 else
389 fprintf (outf, " (nil)\n");
390 if (drb)
391 dump_data_reference (outf, drb);
392 else
393 fprintf (outf, " (nil)\n");
395 fprintf (outf, " (don't know)\n)\n");
396 return;
399 dra = DDR_A (ddr);
400 drb = DDR_B (ddr);
401 dump_data_reference (outf, dra);
402 dump_data_reference (outf, drb);
404 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
405 fprintf (outf, " (no dependence)\n");
407 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
409 unsigned int i;
410 struct loop *loopi;
412 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
414 fprintf (outf, " access_fn_A: ");
415 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
416 fprintf (outf, " access_fn_B: ");
417 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
418 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
421 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
422 fprintf (outf, " loop nest: (");
423 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
424 fprintf (outf, "%d ", loopi->num);
425 fprintf (outf, ")\n");
427 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
429 fprintf (outf, " distance_vector: ");
430 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
431 DDR_NB_LOOPS (ddr));
434 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
436 fprintf (outf, " direction_vector: ");
437 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
442 fprintf (outf, ")\n");
445 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
447 void
448 dump_data_dependence_direction (FILE *file,
449 enum data_dependence_direction dir)
451 switch (dir)
453 case dir_positive:
454 fprintf (file, "+");
455 break;
457 case dir_negative:
458 fprintf (file, "-");
459 break;
461 case dir_equal:
462 fprintf (file, "=");
463 break;
465 case dir_positive_or_negative:
466 fprintf (file, "+-");
467 break;
469 case dir_positive_or_equal:
470 fprintf (file, "+=");
471 break;
473 case dir_negative_or_equal:
474 fprintf (file, "-=");
475 break;
477 case dir_star:
478 fprintf (file, "*");
479 break;
481 default:
482 break;
486 /* Dumps the distance and direction vectors in FILE. DDRS contains
487 the dependence relations, and VECT_SIZE is the size of the
488 dependence vectors, or in other words the number of loops in the
489 considered nest. */
491 void
492 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
494 unsigned int i, j;
495 struct data_dependence_relation *ddr;
496 lambda_vector v;
498 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
499 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
501 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
503 fprintf (file, "DISTANCE_V (");
504 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
505 fprintf (file, ")\n");
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
510 fprintf (file, "DIRECTION_V (");
511 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
516 fprintf (file, "\n\n");
519 /* Dumps the data dependence relations DDRS in FILE. */
521 void
522 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
524 unsigned int i;
525 struct data_dependence_relation *ddr;
527 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
528 dump_data_dependence_relation (file, ddr);
530 fprintf (file, "\n\n");
533 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
534 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
535 constant of type ssizetype, and returns true. If we cannot do this
536 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
537 is returned. */
539 static bool
540 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
541 tree *var, tree *off)
543 tree var0, var1;
544 tree off0, off1;
545 enum tree_code ocode = code;
547 *var = NULL_TREE;
548 *off = NULL_TREE;
550 switch (code)
552 case INTEGER_CST:
553 *var = build_int_cst (type, 0);
554 *off = fold_convert (ssizetype, op0);
555 return true;
557 case POINTER_PLUS_EXPR:
558 ocode = PLUS_EXPR;
559 /* FALLTHROUGH */
560 case PLUS_EXPR:
561 case MINUS_EXPR:
562 split_constant_offset (op0, &var0, &off0);
563 split_constant_offset (op1, &var1, &off1);
564 *var = fold_build2 (code, type, var0, var1);
565 *off = size_binop (ocode, off0, off1);
566 return true;
568 case MULT_EXPR:
569 if (TREE_CODE (op1) != INTEGER_CST)
570 return false;
572 split_constant_offset (op0, &var0, &off0);
573 *var = fold_build2 (MULT_EXPR, type, var0, op1);
574 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
575 return true;
577 case ADDR_EXPR:
579 tree base, poffset;
580 HOST_WIDE_INT pbitsize, pbitpos;
581 enum machine_mode pmode;
582 int punsignedp, pvolatilep;
584 op0 = TREE_OPERAND (op0, 0);
585 if (!handled_component_p (op0))
586 return false;
588 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
589 &pmode, &punsignedp, &pvolatilep, false);
591 if (pbitpos % BITS_PER_UNIT != 0)
592 return false;
593 base = build_fold_addr_expr (base);
594 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
596 if (poffset)
598 split_constant_offset (poffset, &poffset, &off1);
599 off0 = size_binop (PLUS_EXPR, off0, off1);
600 if (POINTER_TYPE_P (TREE_TYPE (base)))
601 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
602 base, fold_convert (sizetype, poffset));
603 else
604 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
605 fold_convert (TREE_TYPE (base), poffset));
608 var0 = fold_convert (type, base);
610 /* If variable length types are involved, punt, otherwise casts
611 might be converted into ARRAY_REFs in gimplify_conversion.
612 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
613 possibly no longer appears in current GIMPLE, might resurface.
614 This perhaps could run
615 if (CONVERT_EXPR_P (var0))
617 gimplify_conversion (&var0);
618 // Attempt to fill in any within var0 found ARRAY_REF's
619 // element size from corresponding op embedded ARRAY_REF,
620 // if unsuccessful, just punt.
621 } */
622 while (POINTER_TYPE_P (type))
623 type = TREE_TYPE (type);
624 if (int_size_in_bytes (type) < 0)
625 return false;
627 *var = var0;
628 *off = off0;
629 return true;
632 case SSA_NAME:
634 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
635 enum tree_code subcode;
637 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
638 return false;
640 var0 = gimple_assign_rhs1 (def_stmt);
641 subcode = gimple_assign_rhs_code (def_stmt);
642 var1 = gimple_assign_rhs2 (def_stmt);
644 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
646 CASE_CONVERT:
648 /* We must not introduce undefined overflow, and we must not change the value.
649 Hence we're okay if the inner type doesn't overflow to start with
650 (pointer or signed), the outer type also is an integer or pointer
651 and the outer precision is at least as large as the inner. */
652 tree itype = TREE_TYPE (op0);
653 if ((POINTER_TYPE_P (itype)
654 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
655 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
656 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
658 split_constant_offset (op0, &var0, off);
659 *var = fold_convert (type, var0);
660 return true;
662 return false;
665 default:
666 return false;
670 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
671 will be ssizetype. */
673 void
674 split_constant_offset (tree exp, tree *var, tree *off)
676 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
677 enum tree_code code;
679 *var = exp;
680 *off = ssize_int (0);
681 STRIP_NOPS (exp);
683 if (automatically_generated_chrec_p (exp))
684 return;
686 otype = TREE_TYPE (exp);
687 code = TREE_CODE (exp);
688 extract_ops_from_tree (exp, &code, &op0, &op1);
689 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
691 *var = fold_convert (type, e);
692 *off = o;
696 /* Returns the address ADDR of an object in a canonical shape (without nop
697 casts, and with type of pointer to the object). */
699 static tree
700 canonicalize_base_object_address (tree addr)
702 tree orig = addr;
704 STRIP_NOPS (addr);
706 /* The base address may be obtained by casting from integer, in that case
707 keep the cast. */
708 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
709 return orig;
711 if (TREE_CODE (addr) != ADDR_EXPR)
712 return addr;
714 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
717 /* Analyzes the behavior of the memory reference DR in the innermost loop or
718 basic block that contains it. Returns true if analysis succeed or false
719 otherwise. */
721 bool
722 dr_analyze_innermost (struct data_reference *dr)
724 gimple stmt = DR_STMT (dr);
725 struct loop *loop = loop_containing_stmt (stmt);
726 tree ref = DR_REF (dr);
727 HOST_WIDE_INT pbitsize, pbitpos;
728 tree base, poffset;
729 enum machine_mode pmode;
730 int punsignedp, pvolatilep;
731 affine_iv base_iv, offset_iv;
732 tree init, dinit, step;
733 bool in_loop = (loop && loop->num);
735 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, "analyze_innermost: ");
738 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
739 &pmode, &punsignedp, &pvolatilep, false);
740 gcc_assert (base != NULL_TREE);
742 if (pbitpos % BITS_PER_UNIT != 0)
744 if (dump_file && (dump_flags & TDF_DETAILS))
745 fprintf (dump_file, "failed: bit offset alignment.\n");
746 return false;
749 if (TREE_CODE (base) == MEM_REF)
751 if (!integer_zerop (TREE_OPERAND (base, 1)))
753 if (!poffset)
755 double_int moff = mem_ref_offset (base);
756 poffset = double_int_to_tree (sizetype, moff);
758 else
759 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
761 base = TREE_OPERAND (base, 0);
763 else
764 base = build_fold_addr_expr (base);
765 if (in_loop)
767 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
768 false))
770 if (dump_file && (dump_flags & TDF_DETAILS))
771 fprintf (dump_file, "failed: evolution of base is not affine.\n");
772 return false;
775 else
777 base_iv.base = base;
778 base_iv.step = ssize_int (0);
779 base_iv.no_overflow = true;
782 if (!poffset)
784 offset_iv.base = ssize_int (0);
785 offset_iv.step = ssize_int (0);
787 else
789 if (!in_loop)
791 offset_iv.base = poffset;
792 offset_iv.step = ssize_int (0);
794 else if (!simple_iv (loop, loop_containing_stmt (stmt),
795 poffset, &offset_iv, false))
797 if (dump_file && (dump_flags & TDF_DETAILS))
798 fprintf (dump_file, "failed: evolution of offset is not"
799 " affine.\n");
800 return false;
804 init = ssize_int (pbitpos / BITS_PER_UNIT);
805 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
806 init = size_binop (PLUS_EXPR, init, dinit);
807 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
808 init = size_binop (PLUS_EXPR, init, dinit);
810 step = size_binop (PLUS_EXPR,
811 fold_convert (ssizetype, base_iv.step),
812 fold_convert (ssizetype, offset_iv.step));
814 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
816 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
817 DR_INIT (dr) = init;
818 DR_STEP (dr) = step;
820 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
822 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "success.\n");
825 return true;
828 /* Determines the base object and the list of indices of memory reference
829 DR, analyzed in loop nest NEST. */
831 static void
832 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
834 gimple stmt = DR_STMT (dr);
835 struct loop *loop = loop_containing_stmt (stmt);
836 VEC (tree, heap) *access_fns = NULL;
837 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
838 tree base, off, access_fn = NULL_TREE;
839 basic_block before_loop = NULL;
841 if (nest)
842 before_loop = block_before_loop (nest);
844 while (handled_component_p (aref))
846 if (TREE_CODE (aref) == ARRAY_REF)
848 op = TREE_OPERAND (aref, 1);
849 if (nest)
851 access_fn = analyze_scalar_evolution (loop, op);
852 access_fn = instantiate_scev (before_loop, loop, access_fn);
853 VEC_safe_push (tree, heap, access_fns, access_fn);
856 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
859 aref = TREE_OPERAND (aref, 0);
862 if (nest
863 && (INDIRECT_REF_P (aref)
864 || TREE_CODE (aref) == MEM_REF))
866 op = TREE_OPERAND (aref, 0);
867 access_fn = analyze_scalar_evolution (loop, op);
868 access_fn = instantiate_scev (before_loop, loop, access_fn);
869 base = initial_condition (access_fn);
870 split_constant_offset (base, &base, &off);
871 if (TREE_CODE (aref) == MEM_REF)
872 off = size_binop (PLUS_EXPR, off,
873 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
874 access_fn = chrec_replace_initial_condition (access_fn,
875 fold_convert (TREE_TYPE (base), off));
877 TREE_OPERAND (aref, 0) = base;
878 VEC_safe_push (tree, heap, access_fns, access_fn);
881 if (TREE_CODE (aref) == MEM_REF)
882 TREE_OPERAND (aref, 1)
883 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
885 if (TREE_CODE (ref) == MEM_REF
886 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
887 && integer_zerop (TREE_OPERAND (ref, 1)))
888 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
890 /* For canonicalization purposes we'd like to strip all outermost
891 zero-offset component-refs.
892 ??? For now simply handle zero-index array-refs. */
893 while (TREE_CODE (ref) == ARRAY_REF
894 && integer_zerop (TREE_OPERAND (ref, 1)))
895 ref = TREE_OPERAND (ref, 0);
897 DR_BASE_OBJECT (dr) = ref;
898 DR_ACCESS_FNS (dr) = access_fns;
901 /* Extracts the alias analysis information from the memory reference DR. */
903 static void
904 dr_analyze_alias (struct data_reference *dr)
906 tree ref = DR_REF (dr);
907 tree base = get_base_address (ref), addr;
909 if (INDIRECT_REF_P (base)
910 || TREE_CODE (base) == MEM_REF)
912 addr = TREE_OPERAND (base, 0);
913 if (TREE_CODE (addr) == SSA_NAME)
914 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
918 /* Returns true if the address of DR is invariant. */
920 static bool
921 dr_address_invariant_p (struct data_reference *dr)
923 unsigned i;
924 tree idx;
926 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
927 if (tree_contains_chrecs (idx, NULL))
928 return false;
930 return true;
933 /* Frees data reference DR. */
935 void
936 free_data_ref (data_reference_p dr)
938 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
939 free (dr);
942 /* Analyzes memory reference MEMREF accessed in STMT. The reference
943 is read if IS_READ is true, write otherwise. Returns the
944 data_reference description of MEMREF. NEST is the outermost loop of the
945 loop nest in that the reference should be analyzed. */
947 struct data_reference *
948 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
950 struct data_reference *dr;
952 if (dump_file && (dump_flags & TDF_DETAILS))
954 fprintf (dump_file, "Creating dr for ");
955 print_generic_expr (dump_file, memref, TDF_SLIM);
956 fprintf (dump_file, "\n");
959 dr = XCNEW (struct data_reference);
960 DR_STMT (dr) = stmt;
961 DR_REF (dr) = memref;
962 DR_IS_READ (dr) = is_read;
964 dr_analyze_innermost (dr);
965 dr_analyze_indices (dr, nest);
966 dr_analyze_alias (dr);
968 if (dump_file && (dump_flags & TDF_DETAILS))
970 fprintf (dump_file, "\tbase_address: ");
971 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
972 fprintf (dump_file, "\n\toffset from base address: ");
973 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
974 fprintf (dump_file, "\n\tconstant offset from base address: ");
975 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
976 fprintf (dump_file, "\n\tstep: ");
977 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\taligned to: ");
979 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tbase_object: ");
981 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n");
985 return dr;
988 /* Returns true if FNA == FNB. */
990 static bool
991 affine_function_equal_p (affine_fn fna, affine_fn fnb)
993 unsigned i, n = VEC_length (tree, fna);
995 if (n != VEC_length (tree, fnb))
996 return false;
998 for (i = 0; i < n; i++)
999 if (!operand_equal_p (VEC_index (tree, fna, i),
1000 VEC_index (tree, fnb, i), 0))
1001 return false;
1003 return true;
1006 /* If all the functions in CF are the same, returns one of them,
1007 otherwise returns NULL. */
1009 static affine_fn
1010 common_affine_function (conflict_function *cf)
1012 unsigned i;
1013 affine_fn comm;
1015 if (!CF_NONTRIVIAL_P (cf))
1016 return NULL;
1018 comm = cf->fns[0];
1020 for (i = 1; i < cf->n; i++)
1021 if (!affine_function_equal_p (comm, cf->fns[i]))
1022 return NULL;
1024 return comm;
1027 /* Returns the base of the affine function FN. */
1029 static tree
1030 affine_function_base (affine_fn fn)
1032 return VEC_index (tree, fn, 0);
1035 /* Returns true if FN is a constant. */
1037 static bool
1038 affine_function_constant_p (affine_fn fn)
1040 unsigned i;
1041 tree coef;
1043 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1044 if (!integer_zerop (coef))
1045 return false;
1047 return true;
1050 /* Returns true if FN is the zero constant function. */
1052 static bool
1053 affine_function_zero_p (affine_fn fn)
1055 return (integer_zerop (affine_function_base (fn))
1056 && affine_function_constant_p (fn));
1059 /* Returns a signed integer type with the largest precision from TA
1060 and TB. */
1062 static tree
1063 signed_type_for_types (tree ta, tree tb)
1065 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1066 return signed_type_for (ta);
1067 else
1068 return signed_type_for (tb);
1071 /* Applies operation OP on affine functions FNA and FNB, and returns the
1072 result. */
1074 static affine_fn
1075 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1077 unsigned i, n, m;
1078 affine_fn ret;
1079 tree coef;
1081 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1083 n = VEC_length (tree, fna);
1084 m = VEC_length (tree, fnb);
1086 else
1088 n = VEC_length (tree, fnb);
1089 m = VEC_length (tree, fna);
1092 ret = VEC_alloc (tree, heap, m);
1093 for (i = 0; i < n; i++)
1095 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1096 TREE_TYPE (VEC_index (tree, fnb, i)));
1098 VEC_quick_push (tree, ret,
1099 fold_build2 (op, type,
1100 VEC_index (tree, fna, i),
1101 VEC_index (tree, fnb, i)));
1104 for (; VEC_iterate (tree, fna, i, coef); i++)
1105 VEC_quick_push (tree, ret,
1106 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1107 coef, integer_zero_node));
1108 for (; VEC_iterate (tree, fnb, i, coef); i++)
1109 VEC_quick_push (tree, ret,
1110 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1111 integer_zero_node, coef));
1113 return ret;
1116 /* Returns the sum of affine functions FNA and FNB. */
1118 static affine_fn
1119 affine_fn_plus (affine_fn fna, affine_fn fnb)
1121 return affine_fn_op (PLUS_EXPR, fna, fnb);
1124 /* Returns the difference of affine functions FNA and FNB. */
1126 static affine_fn
1127 affine_fn_minus (affine_fn fna, affine_fn fnb)
1129 return affine_fn_op (MINUS_EXPR, fna, fnb);
1132 /* Frees affine function FN. */
1134 static void
1135 affine_fn_free (affine_fn fn)
1137 VEC_free (tree, heap, fn);
1140 /* Determine for each subscript in the data dependence relation DDR
1141 the distance. */
1143 static void
1144 compute_subscript_distance (struct data_dependence_relation *ddr)
1146 conflict_function *cf_a, *cf_b;
1147 affine_fn fn_a, fn_b, diff;
1149 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1151 unsigned int i;
1153 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1155 struct subscript *subscript;
1157 subscript = DDR_SUBSCRIPT (ddr, i);
1158 cf_a = SUB_CONFLICTS_IN_A (subscript);
1159 cf_b = SUB_CONFLICTS_IN_B (subscript);
1161 fn_a = common_affine_function (cf_a);
1162 fn_b = common_affine_function (cf_b);
1163 if (!fn_a || !fn_b)
1165 SUB_DISTANCE (subscript) = chrec_dont_know;
1166 return;
1168 diff = affine_fn_minus (fn_a, fn_b);
1170 if (affine_function_constant_p (diff))
1171 SUB_DISTANCE (subscript) = affine_function_base (diff);
1172 else
1173 SUB_DISTANCE (subscript) = chrec_dont_know;
1175 affine_fn_free (diff);
1180 /* Returns the conflict function for "unknown". */
1182 static conflict_function *
1183 conflict_fn_not_known (void)
1185 conflict_function *fn = XCNEW (conflict_function);
1186 fn->n = NOT_KNOWN;
1188 return fn;
1191 /* Returns the conflict function for "independent". */
1193 static conflict_function *
1194 conflict_fn_no_dependence (void)
1196 conflict_function *fn = XCNEW (conflict_function);
1197 fn->n = NO_DEPENDENCE;
1199 return fn;
1202 /* Returns true if the address of OBJ is invariant in LOOP. */
1204 static bool
1205 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1207 while (handled_component_p (obj))
1209 if (TREE_CODE (obj) == ARRAY_REF)
1211 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1212 need to check the stride and the lower bound of the reference. */
1213 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1214 loop->num)
1215 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1216 loop->num))
1217 return false;
1219 else if (TREE_CODE (obj) == COMPONENT_REF)
1221 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1222 loop->num))
1223 return false;
1225 obj = TREE_OPERAND (obj, 0);
1228 if (!INDIRECT_REF_P (obj)
1229 && TREE_CODE (obj) != MEM_REF)
1230 return true;
1232 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1233 loop->num);
1236 /* Returns true if A and B are accesses to different objects, or to different
1237 fields of the same object. */
1239 static bool
1240 disjoint_objects_p (tree a, tree b)
1242 tree base_a, base_b;
1243 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1244 bool ret;
1246 base_a = get_base_address (a);
1247 base_b = get_base_address (b);
1249 if (DECL_P (base_a)
1250 && DECL_P (base_b)
1251 && base_a != base_b)
1252 return true;
1254 if (!operand_equal_p (base_a, base_b, 0))
1255 return false;
1257 /* Compare the component references of A and B. We must start from the inner
1258 ones, so record them to the vector first. */
1259 while (handled_component_p (a))
1261 VEC_safe_push (tree, heap, comp_a, a);
1262 a = TREE_OPERAND (a, 0);
1264 while (handled_component_p (b))
1266 VEC_safe_push (tree, heap, comp_b, b);
1267 b = TREE_OPERAND (b, 0);
1270 ret = false;
1271 while (1)
1273 if (VEC_length (tree, comp_a) == 0
1274 || VEC_length (tree, comp_b) == 0)
1275 break;
1277 a = VEC_pop (tree, comp_a);
1278 b = VEC_pop (tree, comp_b);
1280 /* Real and imaginary part of a variable do not alias. */
1281 if ((TREE_CODE (a) == REALPART_EXPR
1282 && TREE_CODE (b) == IMAGPART_EXPR)
1283 || (TREE_CODE (a) == IMAGPART_EXPR
1284 && TREE_CODE (b) == REALPART_EXPR))
1286 ret = true;
1287 break;
1290 if (TREE_CODE (a) != TREE_CODE (b))
1291 break;
1293 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1294 DR_BASE_OBJECT are always zero. */
1295 if (TREE_CODE (a) == ARRAY_REF)
1296 continue;
1297 else if (TREE_CODE (a) == COMPONENT_REF)
1299 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1300 continue;
1302 /* Different fields of unions may overlap. */
1303 base_a = TREE_OPERAND (a, 0);
1304 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1305 break;
1307 /* Different fields of structures cannot. */
1308 ret = true;
1309 break;
1311 else
1312 break;
1315 VEC_free (tree, heap, comp_a);
1316 VEC_free (tree, heap, comp_b);
1318 return ret;
1321 /* Returns false if we can prove that data references A and B do not alias,
1322 true otherwise. */
1324 bool
1325 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1327 const_tree addr_a = DR_BASE_ADDRESS (a);
1328 const_tree addr_b = DR_BASE_ADDRESS (b);
1329 const_tree type_a, type_b;
1330 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1332 /* If the accessed objects are disjoint, the memory references do not
1333 alias. */
1334 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1335 return false;
1337 /* Query the alias oracle. */
1338 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1340 if (!refs_output_dependent_p (DR_REF (a), DR_REF (b)))
1341 return false;
1343 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1345 if (!refs_anti_dependent_p (DR_REF (a), DR_REF (b)))
1346 return false;
1348 else if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1349 return false;
1351 if (!addr_a || !addr_b)
1352 return true;
1354 /* If the references are based on different static objects, they cannot
1355 alias (PTA should be able to disambiguate such accesses, but often
1356 it fails to). */
1357 if (TREE_CODE (addr_a) == ADDR_EXPR
1358 && TREE_CODE (addr_b) == ADDR_EXPR)
1359 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1361 /* An instruction writing through a restricted pointer is "independent" of any
1362 instruction reading or writing through a different restricted pointer,
1363 in the same block/scope. */
1365 type_a = TREE_TYPE (addr_a);
1366 type_b = TREE_TYPE (addr_b);
1367 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1369 if (TREE_CODE (addr_a) == SSA_NAME)
1370 decl_a = SSA_NAME_VAR (addr_a);
1371 if (TREE_CODE (addr_b) == SSA_NAME)
1372 decl_b = SSA_NAME_VAR (addr_b);
1374 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1375 && (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1376 && decl_a && DECL_P (decl_a)
1377 && decl_b && DECL_P (decl_b)
1378 && decl_a != decl_b
1379 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1380 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1381 return false;
1383 return true;
1386 static void compute_self_dependence (struct data_dependence_relation *);
1388 /* Initialize a data dependence relation between data accesses A and
1389 B. NB_LOOPS is the number of loops surrounding the references: the
1390 size of the classic distance/direction vectors. */
1392 static struct data_dependence_relation *
1393 initialize_data_dependence_relation (struct data_reference *a,
1394 struct data_reference *b,
1395 VEC (loop_p, heap) *loop_nest)
1397 struct data_dependence_relation *res;
1398 unsigned int i;
1400 res = XNEW (struct data_dependence_relation);
1401 DDR_A (res) = a;
1402 DDR_B (res) = b;
1403 DDR_LOOP_NEST (res) = NULL;
1404 DDR_REVERSED_P (res) = false;
1405 DDR_SUBSCRIPTS (res) = NULL;
1406 DDR_DIR_VECTS (res) = NULL;
1407 DDR_DIST_VECTS (res) = NULL;
1409 if (a == NULL || b == NULL)
1411 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1412 return res;
1415 /* If the data references do not alias, then they are independent. */
1416 if (!dr_may_alias_p (a, b))
1418 DDR_ARE_DEPENDENT (res) = chrec_known;
1419 return res;
1422 /* When the references are exactly the same, don't spend time doing
1423 the data dependence tests, just initialize the ddr and return. */
1424 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1426 DDR_AFFINE_P (res) = true;
1427 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1428 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1429 DDR_LOOP_NEST (res) = loop_nest;
1430 DDR_INNER_LOOP (res) = 0;
1431 DDR_SELF_REFERENCE (res) = true;
1432 compute_self_dependence (res);
1433 return res;
1436 /* If the references do not access the same object, we do not know
1437 whether they alias or not. */
1438 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1440 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1441 return res;
1444 /* If the base of the object is not invariant in the loop nest, we cannot
1445 analyze it. TODO -- in fact, it would suffice to record that there may
1446 be arbitrary dependences in the loops where the base object varies. */
1447 if (loop_nest
1448 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1449 DR_BASE_OBJECT (a)))
1451 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1452 return res;
1455 /* If the number of dimensions of the access to not agree we can have
1456 a pointer access to a component of the array element type and an
1457 array access while the base-objects are still the same. Punt. */
1458 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1460 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1461 return res;
1464 DDR_AFFINE_P (res) = true;
1465 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1466 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1467 DDR_LOOP_NEST (res) = loop_nest;
1468 DDR_INNER_LOOP (res) = 0;
1469 DDR_SELF_REFERENCE (res) = false;
1471 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1473 struct subscript *subscript;
1475 subscript = XNEW (struct subscript);
1476 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1477 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1478 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1479 SUB_DISTANCE (subscript) = chrec_dont_know;
1480 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1483 return res;
1486 /* Frees memory used by the conflict function F. */
1488 static void
1489 free_conflict_function (conflict_function *f)
1491 unsigned i;
1493 if (CF_NONTRIVIAL_P (f))
1495 for (i = 0; i < f->n; i++)
1496 affine_fn_free (f->fns[i]);
1498 free (f);
1501 /* Frees memory used by SUBSCRIPTS. */
1503 static void
1504 free_subscripts (VEC (subscript_p, heap) *subscripts)
1506 unsigned i;
1507 subscript_p s;
1509 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1511 free_conflict_function (s->conflicting_iterations_in_a);
1512 free_conflict_function (s->conflicting_iterations_in_b);
1513 free (s);
1515 VEC_free (subscript_p, heap, subscripts);
1518 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1519 description. */
1521 static inline void
1522 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1523 tree chrec)
1525 if (dump_file && (dump_flags & TDF_DETAILS))
1527 fprintf (dump_file, "(dependence classified: ");
1528 print_generic_expr (dump_file, chrec, 0);
1529 fprintf (dump_file, ")\n");
1532 DDR_ARE_DEPENDENT (ddr) = chrec;
1533 free_subscripts (DDR_SUBSCRIPTS (ddr));
1534 DDR_SUBSCRIPTS (ddr) = NULL;
1537 /* The dependence relation DDR cannot be represented by a distance
1538 vector. */
1540 static inline void
1541 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1543 if (dump_file && (dump_flags & TDF_DETAILS))
1544 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1546 DDR_AFFINE_P (ddr) = false;
1551 /* This section contains the classic Banerjee tests. */
1553 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1554 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1556 static inline bool
1557 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1559 return (evolution_function_is_constant_p (chrec_a)
1560 && evolution_function_is_constant_p (chrec_b));
1563 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1564 variable, i.e., if the SIV (Single Index Variable) test is true. */
1566 static bool
1567 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1569 if ((evolution_function_is_constant_p (chrec_a)
1570 && evolution_function_is_univariate_p (chrec_b))
1571 || (evolution_function_is_constant_p (chrec_b)
1572 && evolution_function_is_univariate_p (chrec_a)))
1573 return true;
1575 if (evolution_function_is_univariate_p (chrec_a)
1576 && evolution_function_is_univariate_p (chrec_b))
1578 switch (TREE_CODE (chrec_a))
1580 case POLYNOMIAL_CHREC:
1581 switch (TREE_CODE (chrec_b))
1583 case POLYNOMIAL_CHREC:
1584 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1585 return false;
1587 default:
1588 return true;
1591 default:
1592 return true;
1596 return false;
1599 /* Creates a conflict function with N dimensions. The affine functions
1600 in each dimension follow. */
1602 static conflict_function *
1603 conflict_fn (unsigned n, ...)
1605 unsigned i;
1606 conflict_function *ret = XCNEW (conflict_function);
1607 va_list ap;
1609 gcc_assert (0 < n && n <= MAX_DIM);
1610 va_start(ap, n);
1612 ret->n = n;
1613 for (i = 0; i < n; i++)
1614 ret->fns[i] = va_arg (ap, affine_fn);
1615 va_end(ap);
1617 return ret;
1620 /* Returns constant affine function with value CST. */
1622 static affine_fn
1623 affine_fn_cst (tree cst)
1625 affine_fn fn = VEC_alloc (tree, heap, 1);
1626 VEC_quick_push (tree, fn, cst);
1627 return fn;
1630 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1632 static affine_fn
1633 affine_fn_univar (tree cst, unsigned dim, tree coef)
1635 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1636 unsigned i;
1638 gcc_assert (dim > 0);
1639 VEC_quick_push (tree, fn, cst);
1640 for (i = 1; i < dim; i++)
1641 VEC_quick_push (tree, fn, integer_zero_node);
1642 VEC_quick_push (tree, fn, coef);
1643 return fn;
1646 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1647 *OVERLAPS_B are initialized to the functions that describe the
1648 relation between the elements accessed twice by CHREC_A and
1649 CHREC_B. For k >= 0, the following property is verified:
1651 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1653 static void
1654 analyze_ziv_subscript (tree chrec_a,
1655 tree chrec_b,
1656 conflict_function **overlaps_a,
1657 conflict_function **overlaps_b,
1658 tree *last_conflicts)
1660 tree type, difference;
1661 dependence_stats.num_ziv++;
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 fprintf (dump_file, "(analyze_ziv_subscript \n");
1666 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1667 chrec_a = chrec_convert (type, chrec_a, NULL);
1668 chrec_b = chrec_convert (type, chrec_b, NULL);
1669 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1671 switch (TREE_CODE (difference))
1673 case INTEGER_CST:
1674 if (integer_zerop (difference))
1676 /* The difference is equal to zero: the accessed index
1677 overlaps for each iteration in the loop. */
1678 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1679 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1680 *last_conflicts = chrec_dont_know;
1681 dependence_stats.num_ziv_dependent++;
1683 else
1685 /* The accesses do not overlap. */
1686 *overlaps_a = conflict_fn_no_dependence ();
1687 *overlaps_b = conflict_fn_no_dependence ();
1688 *last_conflicts = integer_zero_node;
1689 dependence_stats.num_ziv_independent++;
1691 break;
1693 default:
1694 /* We're not sure whether the indexes overlap. For the moment,
1695 conservatively answer "don't know". */
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1699 *overlaps_a = conflict_fn_not_known ();
1700 *overlaps_b = conflict_fn_not_known ();
1701 *last_conflicts = chrec_dont_know;
1702 dependence_stats.num_ziv_unimplemented++;
1703 break;
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, ")\n");
1710 /* Sets NIT to the estimated number of executions of the statements in
1711 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1712 large as the number of iterations. If we have no reliable estimate,
1713 the function returns false, otherwise returns true. */
1715 bool
1716 estimated_loop_iterations (struct loop *loop, bool conservative,
1717 double_int *nit)
1719 estimate_numbers_of_iterations_loop (loop, true);
1720 if (conservative)
1722 if (!loop->any_upper_bound)
1723 return false;
1725 *nit = loop->nb_iterations_upper_bound;
1727 else
1729 if (!loop->any_estimate)
1730 return false;
1732 *nit = loop->nb_iterations_estimate;
1735 return true;
1738 /* Similar to estimated_loop_iterations, but returns the estimate only
1739 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1740 on the number of iterations of LOOP could not be derived, returns -1. */
1742 HOST_WIDE_INT
1743 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1745 double_int nit;
1746 HOST_WIDE_INT hwi_nit;
1748 if (!estimated_loop_iterations (loop, conservative, &nit))
1749 return -1;
1751 if (!double_int_fits_in_shwi_p (nit))
1752 return -1;
1753 hwi_nit = double_int_to_shwi (nit);
1755 return hwi_nit < 0 ? -1 : hwi_nit;
1758 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1759 and only if it fits to the int type. If this is not the case, or the
1760 estimate on the number of iterations of LOOP could not be derived, returns
1761 chrec_dont_know. */
1763 static tree
1764 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1766 double_int nit;
1767 tree type;
1769 if (!estimated_loop_iterations (loop, conservative, &nit))
1770 return chrec_dont_know;
1772 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1773 if (!double_int_fits_to_tree_p (type, nit))
1774 return chrec_dont_know;
1776 return double_int_to_tree (type, nit);
1779 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1780 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1781 *OVERLAPS_B are initialized to the functions that describe the
1782 relation between the elements accessed twice by CHREC_A and
1783 CHREC_B. For k >= 0, the following property is verified:
1785 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1787 static void
1788 analyze_siv_subscript_cst_affine (tree chrec_a,
1789 tree chrec_b,
1790 conflict_function **overlaps_a,
1791 conflict_function **overlaps_b,
1792 tree *last_conflicts)
1794 bool value0, value1, value2;
1795 tree type, difference, tmp;
1797 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1798 chrec_a = chrec_convert (type, chrec_a, NULL);
1799 chrec_b = chrec_convert (type, chrec_b, NULL);
1800 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1802 if (!chrec_is_positive (initial_condition (difference), &value0))
1804 if (dump_file && (dump_flags & TDF_DETAILS))
1805 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1807 dependence_stats.num_siv_unimplemented++;
1808 *overlaps_a = conflict_fn_not_known ();
1809 *overlaps_b = conflict_fn_not_known ();
1810 *last_conflicts = chrec_dont_know;
1811 return;
1813 else
1815 if (value0 == false)
1817 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1819 if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1822 *overlaps_a = conflict_fn_not_known ();
1823 *overlaps_b = conflict_fn_not_known ();
1824 *last_conflicts = chrec_dont_know;
1825 dependence_stats.num_siv_unimplemented++;
1826 return;
1828 else
1830 if (value1 == true)
1832 /* Example:
1833 chrec_a = 12
1834 chrec_b = {10, +, 1}
1837 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1839 HOST_WIDE_INT numiter;
1840 struct loop *loop = get_chrec_loop (chrec_b);
1842 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1843 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1844 fold_build1 (ABS_EXPR, type, difference),
1845 CHREC_RIGHT (chrec_b));
1846 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1847 *last_conflicts = integer_one_node;
1850 /* Perform weak-zero siv test to see if overlap is
1851 outside the loop bounds. */
1852 numiter = estimated_loop_iterations_int (loop, false);
1854 if (numiter >= 0
1855 && compare_tree_int (tmp, numiter) > 0)
1857 free_conflict_function (*overlaps_a);
1858 free_conflict_function (*overlaps_b);
1859 *overlaps_a = conflict_fn_no_dependence ();
1860 *overlaps_b = conflict_fn_no_dependence ();
1861 *last_conflicts = integer_zero_node;
1862 dependence_stats.num_siv_independent++;
1863 return;
1865 dependence_stats.num_siv_dependent++;
1866 return;
1869 /* When the step does not divide the difference, there are
1870 no overlaps. */
1871 else
1873 *overlaps_a = conflict_fn_no_dependence ();
1874 *overlaps_b = conflict_fn_no_dependence ();
1875 *last_conflicts = integer_zero_node;
1876 dependence_stats.num_siv_independent++;
1877 return;
1881 else
1883 /* Example:
1884 chrec_a = 12
1885 chrec_b = {10, +, -1}
1887 In this case, chrec_a will not overlap with chrec_b. */
1888 *overlaps_a = conflict_fn_no_dependence ();
1889 *overlaps_b = conflict_fn_no_dependence ();
1890 *last_conflicts = integer_zero_node;
1891 dependence_stats.num_siv_independent++;
1892 return;
1896 else
1898 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1900 if (dump_file && (dump_flags & TDF_DETAILS))
1901 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1903 *overlaps_a = conflict_fn_not_known ();
1904 *overlaps_b = conflict_fn_not_known ();
1905 *last_conflicts = chrec_dont_know;
1906 dependence_stats.num_siv_unimplemented++;
1907 return;
1909 else
1911 if (value2 == false)
1913 /* Example:
1914 chrec_a = 3
1915 chrec_b = {10, +, -1}
1917 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1919 HOST_WIDE_INT numiter;
1920 struct loop *loop = get_chrec_loop (chrec_b);
1922 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1923 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1924 CHREC_RIGHT (chrec_b));
1925 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1926 *last_conflicts = integer_one_node;
1928 /* Perform weak-zero siv test to see if overlap is
1929 outside the loop bounds. */
1930 numiter = estimated_loop_iterations_int (loop, false);
1932 if (numiter >= 0
1933 && compare_tree_int (tmp, numiter) > 0)
1935 free_conflict_function (*overlaps_a);
1936 free_conflict_function (*overlaps_b);
1937 *overlaps_a = conflict_fn_no_dependence ();
1938 *overlaps_b = conflict_fn_no_dependence ();
1939 *last_conflicts = integer_zero_node;
1940 dependence_stats.num_siv_independent++;
1941 return;
1943 dependence_stats.num_siv_dependent++;
1944 return;
1947 /* When the step does not divide the difference, there
1948 are no overlaps. */
1949 else
1951 *overlaps_a = conflict_fn_no_dependence ();
1952 *overlaps_b = conflict_fn_no_dependence ();
1953 *last_conflicts = integer_zero_node;
1954 dependence_stats.num_siv_independent++;
1955 return;
1958 else
1960 /* Example:
1961 chrec_a = 3
1962 chrec_b = {4, +, 1}
1964 In this case, chrec_a will not overlap with chrec_b. */
1965 *overlaps_a = conflict_fn_no_dependence ();
1966 *overlaps_b = conflict_fn_no_dependence ();
1967 *last_conflicts = integer_zero_node;
1968 dependence_stats.num_siv_independent++;
1969 return;
1976 /* Helper recursive function for initializing the matrix A. Returns
1977 the initial value of CHREC. */
1979 static tree
1980 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1982 gcc_assert (chrec);
1984 switch (TREE_CODE (chrec))
1986 case POLYNOMIAL_CHREC:
1987 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1989 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1990 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1992 case PLUS_EXPR:
1993 case MULT_EXPR:
1994 case MINUS_EXPR:
1996 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1997 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1999 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2002 case NOP_EXPR:
2004 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2005 return chrec_convert (chrec_type (chrec), op, NULL);
2008 case BIT_NOT_EXPR:
2010 /* Handle ~X as -1 - X. */
2011 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2012 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2013 build_int_cst (TREE_TYPE (chrec), -1), op);
2016 case INTEGER_CST:
2017 return chrec;
2019 default:
2020 gcc_unreachable ();
2021 return NULL_TREE;
2025 #define FLOOR_DIV(x,y) ((x) / (y))
2027 /* Solves the special case of the Diophantine equation:
2028 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2030 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2031 number of iterations that loops X and Y run. The overlaps will be
2032 constructed as evolutions in dimension DIM. */
2034 static void
2035 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2036 affine_fn *overlaps_a,
2037 affine_fn *overlaps_b,
2038 tree *last_conflicts, int dim)
2040 if (((step_a > 0 && step_b > 0)
2041 || (step_a < 0 && step_b < 0)))
2043 int step_overlaps_a, step_overlaps_b;
2044 int gcd_steps_a_b, last_conflict, tau2;
2046 gcd_steps_a_b = gcd (step_a, step_b);
2047 step_overlaps_a = step_b / gcd_steps_a_b;
2048 step_overlaps_b = step_a / gcd_steps_a_b;
2050 if (niter > 0)
2052 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2053 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2054 last_conflict = tau2;
2055 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2057 else
2058 *last_conflicts = chrec_dont_know;
2060 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2061 build_int_cst (NULL_TREE,
2062 step_overlaps_a));
2063 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2064 build_int_cst (NULL_TREE,
2065 step_overlaps_b));
2068 else
2070 *overlaps_a = affine_fn_cst (integer_zero_node);
2071 *overlaps_b = affine_fn_cst (integer_zero_node);
2072 *last_conflicts = integer_zero_node;
2076 /* Solves the special case of a Diophantine equation where CHREC_A is
2077 an affine bivariate function, and CHREC_B is an affine univariate
2078 function. For example,
2080 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2082 has the following overlapping functions:
2084 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2085 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2086 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2088 FORNOW: This is a specialized implementation for a case occurring in
2089 a common benchmark. Implement the general algorithm. */
2091 static void
2092 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2093 conflict_function **overlaps_a,
2094 conflict_function **overlaps_b,
2095 tree *last_conflicts)
2097 bool xz_p, yz_p, xyz_p;
2098 int step_x, step_y, step_z;
2099 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2100 affine_fn overlaps_a_xz, overlaps_b_xz;
2101 affine_fn overlaps_a_yz, overlaps_b_yz;
2102 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2103 affine_fn ova1, ova2, ovb;
2104 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2106 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2107 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2108 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2110 niter_x =
2111 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2112 false);
2113 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2114 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2116 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2119 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2121 *overlaps_a = conflict_fn_not_known ();
2122 *overlaps_b = conflict_fn_not_known ();
2123 *last_conflicts = chrec_dont_know;
2124 return;
2127 niter = MIN (niter_x, niter_z);
2128 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2129 &overlaps_a_xz,
2130 &overlaps_b_xz,
2131 &last_conflicts_xz, 1);
2132 niter = MIN (niter_y, niter_z);
2133 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2134 &overlaps_a_yz,
2135 &overlaps_b_yz,
2136 &last_conflicts_yz, 2);
2137 niter = MIN (niter_x, niter_z);
2138 niter = MIN (niter_y, niter);
2139 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2140 &overlaps_a_xyz,
2141 &overlaps_b_xyz,
2142 &last_conflicts_xyz, 3);
2144 xz_p = !integer_zerop (last_conflicts_xz);
2145 yz_p = !integer_zerop (last_conflicts_yz);
2146 xyz_p = !integer_zerop (last_conflicts_xyz);
2148 if (xz_p || yz_p || xyz_p)
2150 ova1 = affine_fn_cst (integer_zero_node);
2151 ova2 = affine_fn_cst (integer_zero_node);
2152 ovb = affine_fn_cst (integer_zero_node);
2153 if (xz_p)
2155 affine_fn t0 = ova1;
2156 affine_fn t2 = ovb;
2158 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2159 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2160 affine_fn_free (t0);
2161 affine_fn_free (t2);
2162 *last_conflicts = last_conflicts_xz;
2164 if (yz_p)
2166 affine_fn t0 = ova2;
2167 affine_fn t2 = ovb;
2169 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2170 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2171 affine_fn_free (t0);
2172 affine_fn_free (t2);
2173 *last_conflicts = last_conflicts_yz;
2175 if (xyz_p)
2177 affine_fn t0 = ova1;
2178 affine_fn t2 = ova2;
2179 affine_fn t4 = ovb;
2181 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2182 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2183 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2184 affine_fn_free (t0);
2185 affine_fn_free (t2);
2186 affine_fn_free (t4);
2187 *last_conflicts = last_conflicts_xyz;
2189 *overlaps_a = conflict_fn (2, ova1, ova2);
2190 *overlaps_b = conflict_fn (1, ovb);
2192 else
2194 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2195 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2196 *last_conflicts = integer_zero_node;
2199 affine_fn_free (overlaps_a_xz);
2200 affine_fn_free (overlaps_b_xz);
2201 affine_fn_free (overlaps_a_yz);
2202 affine_fn_free (overlaps_b_yz);
2203 affine_fn_free (overlaps_a_xyz);
2204 affine_fn_free (overlaps_b_xyz);
2207 /* Determines the overlapping elements due to accesses CHREC_A and
2208 CHREC_B, that are affine functions. This function cannot handle
2209 symbolic evolution functions, ie. when initial conditions are
2210 parameters, because it uses lambda matrices of integers. */
2212 static void
2213 analyze_subscript_affine_affine (tree chrec_a,
2214 tree chrec_b,
2215 conflict_function **overlaps_a,
2216 conflict_function **overlaps_b,
2217 tree *last_conflicts)
2219 unsigned nb_vars_a, nb_vars_b, dim;
2220 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2221 lambda_matrix A, U, S;
2222 struct obstack scratch_obstack;
2224 if (eq_evolutions_p (chrec_a, chrec_b))
2226 /* The accessed index overlaps for each iteration in the
2227 loop. */
2228 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2229 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2230 *last_conflicts = chrec_dont_know;
2231 return;
2233 if (dump_file && (dump_flags & TDF_DETAILS))
2234 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2236 /* For determining the initial intersection, we have to solve a
2237 Diophantine equation. This is the most time consuming part.
2239 For answering to the question: "Is there a dependence?" we have
2240 to prove that there exists a solution to the Diophantine
2241 equation, and that the solution is in the iteration domain,
2242 i.e. the solution is positive or zero, and that the solution
2243 happens before the upper bound loop.nb_iterations. Otherwise
2244 there is no dependence. This function outputs a description of
2245 the iterations that hold the intersections. */
2247 nb_vars_a = nb_vars_in_chrec (chrec_a);
2248 nb_vars_b = nb_vars_in_chrec (chrec_b);
2250 gcc_obstack_init (&scratch_obstack);
2252 dim = nb_vars_a + nb_vars_b;
2253 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2254 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2255 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2257 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2258 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2259 gamma = init_b - init_a;
2261 /* Don't do all the hard work of solving the Diophantine equation
2262 when we already know the solution: for example,
2263 | {3, +, 1}_1
2264 | {3, +, 4}_2
2265 | gamma = 3 - 3 = 0.
2266 Then the first overlap occurs during the first iterations:
2267 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2269 if (gamma == 0)
2271 if (nb_vars_a == 1 && nb_vars_b == 1)
2273 HOST_WIDE_INT step_a, step_b;
2274 HOST_WIDE_INT niter, niter_a, niter_b;
2275 affine_fn ova, ovb;
2277 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2278 false);
2279 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2280 false);
2281 niter = MIN (niter_a, niter_b);
2282 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2283 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2285 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2286 &ova, &ovb,
2287 last_conflicts, 1);
2288 *overlaps_a = conflict_fn (1, ova);
2289 *overlaps_b = conflict_fn (1, ovb);
2292 else if (nb_vars_a == 2 && nb_vars_b == 1)
2293 compute_overlap_steps_for_affine_1_2
2294 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2296 else if (nb_vars_a == 1 && nb_vars_b == 2)
2297 compute_overlap_steps_for_affine_1_2
2298 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2300 else
2302 if (dump_file && (dump_flags & TDF_DETAILS))
2303 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2304 *overlaps_a = conflict_fn_not_known ();
2305 *overlaps_b = conflict_fn_not_known ();
2306 *last_conflicts = chrec_dont_know;
2308 goto end_analyze_subs_aa;
2311 /* U.A = S */
2312 lambda_matrix_right_hermite (A, dim, 1, S, U);
2314 if (S[0][0] < 0)
2316 S[0][0] *= -1;
2317 lambda_matrix_row_negate (U, dim, 0);
2319 gcd_alpha_beta = S[0][0];
2321 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2322 but that is a quite strange case. Instead of ICEing, answer
2323 don't know. */
2324 if (gcd_alpha_beta == 0)
2326 *overlaps_a = conflict_fn_not_known ();
2327 *overlaps_b = conflict_fn_not_known ();
2328 *last_conflicts = chrec_dont_know;
2329 goto end_analyze_subs_aa;
2332 /* The classic "gcd-test". */
2333 if (!int_divides_p (gcd_alpha_beta, gamma))
2335 /* The "gcd-test" has determined that there is no integer
2336 solution, i.e. there is no dependence. */
2337 *overlaps_a = conflict_fn_no_dependence ();
2338 *overlaps_b = conflict_fn_no_dependence ();
2339 *last_conflicts = integer_zero_node;
2342 /* Both access functions are univariate. This includes SIV and MIV cases. */
2343 else if (nb_vars_a == 1 && nb_vars_b == 1)
2345 /* Both functions should have the same evolution sign. */
2346 if (((A[0][0] > 0 && -A[1][0] > 0)
2347 || (A[0][0] < 0 && -A[1][0] < 0)))
2349 /* The solutions are given by:
2351 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2352 | [u21 u22] [y0]
2354 For a given integer t. Using the following variables,
2356 | i0 = u11 * gamma / gcd_alpha_beta
2357 | j0 = u12 * gamma / gcd_alpha_beta
2358 | i1 = u21
2359 | j1 = u22
2361 the solutions are:
2363 | x0 = i0 + i1 * t,
2364 | y0 = j0 + j1 * t. */
2365 HOST_WIDE_INT i0, j0, i1, j1;
2367 i0 = U[0][0] * gamma / gcd_alpha_beta;
2368 j0 = U[0][1] * gamma / gcd_alpha_beta;
2369 i1 = U[1][0];
2370 j1 = U[1][1];
2372 if ((i1 == 0 && i0 < 0)
2373 || (j1 == 0 && j0 < 0))
2375 /* There is no solution.
2376 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2377 falls in here, but for the moment we don't look at the
2378 upper bound of the iteration domain. */
2379 *overlaps_a = conflict_fn_no_dependence ();
2380 *overlaps_b = conflict_fn_no_dependence ();
2381 *last_conflicts = integer_zero_node;
2382 goto end_analyze_subs_aa;
2385 if (i1 > 0 && j1 > 0)
2387 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2388 (get_chrec_loop (chrec_a), false);
2389 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2390 (get_chrec_loop (chrec_b), false);
2391 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2393 /* (X0, Y0) is a solution of the Diophantine equation:
2394 "chrec_a (X0) = chrec_b (Y0)". */
2395 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2396 CEIL (-j0, j1));
2397 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2398 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2400 /* (X1, Y1) is the smallest positive solution of the eq
2401 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2402 first conflict occurs. */
2403 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2404 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2405 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2407 if (niter > 0)
2409 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2410 FLOOR_DIV (niter - j0, j1));
2411 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2413 /* If the overlap occurs outside of the bounds of the
2414 loop, there is no dependence. */
2415 if (x1 >= niter || y1 >= niter)
2417 *overlaps_a = conflict_fn_no_dependence ();
2418 *overlaps_b = conflict_fn_no_dependence ();
2419 *last_conflicts = integer_zero_node;
2420 goto end_analyze_subs_aa;
2422 else
2423 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2425 else
2426 *last_conflicts = chrec_dont_know;
2428 *overlaps_a
2429 = conflict_fn (1,
2430 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2432 build_int_cst (NULL_TREE, i1)));
2433 *overlaps_b
2434 = conflict_fn (1,
2435 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2437 build_int_cst (NULL_TREE, j1)));
2439 else
2441 /* FIXME: For the moment, the upper bound of the
2442 iteration domain for i and j is not checked. */
2443 if (dump_file && (dump_flags & TDF_DETAILS))
2444 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2445 *overlaps_a = conflict_fn_not_known ();
2446 *overlaps_b = conflict_fn_not_known ();
2447 *last_conflicts = chrec_dont_know;
2450 else
2452 if (dump_file && (dump_flags & TDF_DETAILS))
2453 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2454 *overlaps_a = conflict_fn_not_known ();
2455 *overlaps_b = conflict_fn_not_known ();
2456 *last_conflicts = chrec_dont_know;
2459 else
2461 if (dump_file && (dump_flags & TDF_DETAILS))
2462 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2463 *overlaps_a = conflict_fn_not_known ();
2464 *overlaps_b = conflict_fn_not_known ();
2465 *last_conflicts = chrec_dont_know;
2468 end_analyze_subs_aa:
2469 obstack_free (&scratch_obstack, NULL);
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, " (overlaps_a = ");
2473 dump_conflict_function (dump_file, *overlaps_a);
2474 fprintf (dump_file, ")\n (overlaps_b = ");
2475 dump_conflict_function (dump_file, *overlaps_b);
2476 fprintf (dump_file, ")\n");
2477 fprintf (dump_file, ")\n");
2481 /* Returns true when analyze_subscript_affine_affine can be used for
2482 determining the dependence relation between chrec_a and chrec_b,
2483 that contain symbols. This function modifies chrec_a and chrec_b
2484 such that the analysis result is the same, and such that they don't
2485 contain symbols, and then can safely be passed to the analyzer.
2487 Example: The analysis of the following tuples of evolutions produce
2488 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2489 vs. {0, +, 1}_1
2491 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2492 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2495 static bool
2496 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2498 tree diff, type, left_a, left_b, right_b;
2500 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2501 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2502 /* FIXME: For the moment not handled. Might be refined later. */
2503 return false;
2505 type = chrec_type (*chrec_a);
2506 left_a = CHREC_LEFT (*chrec_a);
2507 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2508 diff = chrec_fold_minus (type, left_a, left_b);
2510 if (!evolution_function_is_constant_p (diff))
2511 return false;
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2514 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2516 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2517 diff, CHREC_RIGHT (*chrec_a));
2518 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2519 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2520 build_int_cst (type, 0),
2521 right_b);
2522 return true;
2525 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2526 *OVERLAPS_B are initialized to the functions that describe the
2527 relation between the elements accessed twice by CHREC_A and
2528 CHREC_B. For k >= 0, the following property is verified:
2530 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2532 static void
2533 analyze_siv_subscript (tree chrec_a,
2534 tree chrec_b,
2535 conflict_function **overlaps_a,
2536 conflict_function **overlaps_b,
2537 tree *last_conflicts,
2538 int loop_nest_num)
2540 dependence_stats.num_siv++;
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2543 fprintf (dump_file, "(analyze_siv_subscript \n");
2545 if (evolution_function_is_constant_p (chrec_a)
2546 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2547 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2548 overlaps_a, overlaps_b, last_conflicts);
2550 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2551 && evolution_function_is_constant_p (chrec_b))
2552 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2553 overlaps_b, overlaps_a, last_conflicts);
2555 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2556 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2558 if (!chrec_contains_symbols (chrec_a)
2559 && !chrec_contains_symbols (chrec_b))
2561 analyze_subscript_affine_affine (chrec_a, chrec_b,
2562 overlaps_a, overlaps_b,
2563 last_conflicts);
2565 if (CF_NOT_KNOWN_P (*overlaps_a)
2566 || CF_NOT_KNOWN_P (*overlaps_b))
2567 dependence_stats.num_siv_unimplemented++;
2568 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2569 || CF_NO_DEPENDENCE_P (*overlaps_b))
2570 dependence_stats.num_siv_independent++;
2571 else
2572 dependence_stats.num_siv_dependent++;
2574 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2575 &chrec_b))
2577 analyze_subscript_affine_affine (chrec_a, chrec_b,
2578 overlaps_a, overlaps_b,
2579 last_conflicts);
2581 if (CF_NOT_KNOWN_P (*overlaps_a)
2582 || CF_NOT_KNOWN_P (*overlaps_b))
2583 dependence_stats.num_siv_unimplemented++;
2584 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2585 || CF_NO_DEPENDENCE_P (*overlaps_b))
2586 dependence_stats.num_siv_independent++;
2587 else
2588 dependence_stats.num_siv_dependent++;
2590 else
2591 goto siv_subscript_dontknow;
2594 else
2596 siv_subscript_dontknow:;
2597 if (dump_file && (dump_flags & TDF_DETAILS))
2598 fprintf (dump_file, "siv test failed: unimplemented.\n");
2599 *overlaps_a = conflict_fn_not_known ();
2600 *overlaps_b = conflict_fn_not_known ();
2601 *last_conflicts = chrec_dont_know;
2602 dependence_stats.num_siv_unimplemented++;
2605 if (dump_file && (dump_flags & TDF_DETAILS))
2606 fprintf (dump_file, ")\n");
2609 /* Returns false if we can prove that the greatest common divisor of the steps
2610 of CHREC does not divide CST, false otherwise. */
2612 static bool
2613 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2615 HOST_WIDE_INT cd = 0, val;
2616 tree step;
2618 if (!host_integerp (cst, 0))
2619 return true;
2620 val = tree_low_cst (cst, 0);
2622 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2624 step = CHREC_RIGHT (chrec);
2625 if (!host_integerp (step, 0))
2626 return true;
2627 cd = gcd (cd, tree_low_cst (step, 0));
2628 chrec = CHREC_LEFT (chrec);
2631 return val % cd == 0;
2634 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2635 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2636 functions that describe the relation between the elements accessed
2637 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2638 is verified:
2640 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2642 static void
2643 analyze_miv_subscript (tree chrec_a,
2644 tree chrec_b,
2645 conflict_function **overlaps_a,
2646 conflict_function **overlaps_b,
2647 tree *last_conflicts,
2648 struct loop *loop_nest)
2650 /* FIXME: This is a MIV subscript, not yet handled.
2651 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2652 (A[i] vs. A[j]).
2654 In the SIV test we had to solve a Diophantine equation with two
2655 variables. In the MIV case we have to solve a Diophantine
2656 equation with 2*n variables (if the subscript uses n IVs).
2658 tree type, difference;
2660 dependence_stats.num_miv++;
2661 if (dump_file && (dump_flags & TDF_DETAILS))
2662 fprintf (dump_file, "(analyze_miv_subscript \n");
2664 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2665 chrec_a = chrec_convert (type, chrec_a, NULL);
2666 chrec_b = chrec_convert (type, chrec_b, NULL);
2667 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2669 if (eq_evolutions_p (chrec_a, chrec_b))
2671 /* Access functions are the same: all the elements are accessed
2672 in the same order. */
2673 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2674 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2675 *last_conflicts = estimated_loop_iterations_tree
2676 (get_chrec_loop (chrec_a), true);
2677 dependence_stats.num_miv_dependent++;
2680 else if (evolution_function_is_constant_p (difference)
2681 /* For the moment, the following is verified:
2682 evolution_function_is_affine_multivariate_p (chrec_a,
2683 loop_nest->num) */
2684 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2686 /* testsuite/.../ssa-chrec-33.c
2687 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2689 The difference is 1, and all the evolution steps are multiples
2690 of 2, consequently there are no overlapping elements. */
2691 *overlaps_a = conflict_fn_no_dependence ();
2692 *overlaps_b = conflict_fn_no_dependence ();
2693 *last_conflicts = integer_zero_node;
2694 dependence_stats.num_miv_independent++;
2697 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2698 && !chrec_contains_symbols (chrec_a)
2699 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2700 && !chrec_contains_symbols (chrec_b))
2702 /* testsuite/.../ssa-chrec-35.c
2703 {0, +, 1}_2 vs. {0, +, 1}_3
2704 the overlapping elements are respectively located at iterations:
2705 {0, +, 1}_x and {0, +, 1}_x,
2706 in other words, we have the equality:
2707 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2709 Other examples:
2710 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2711 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2713 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2714 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2716 analyze_subscript_affine_affine (chrec_a, chrec_b,
2717 overlaps_a, overlaps_b, last_conflicts);
2719 if (CF_NOT_KNOWN_P (*overlaps_a)
2720 || CF_NOT_KNOWN_P (*overlaps_b))
2721 dependence_stats.num_miv_unimplemented++;
2722 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2723 || CF_NO_DEPENDENCE_P (*overlaps_b))
2724 dependence_stats.num_miv_independent++;
2725 else
2726 dependence_stats.num_miv_dependent++;
2729 else
2731 /* When the analysis is too difficult, answer "don't know". */
2732 if (dump_file && (dump_flags & TDF_DETAILS))
2733 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2735 *overlaps_a = conflict_fn_not_known ();
2736 *overlaps_b = conflict_fn_not_known ();
2737 *last_conflicts = chrec_dont_know;
2738 dependence_stats.num_miv_unimplemented++;
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, ")\n");
2745 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2746 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2747 OVERLAP_ITERATIONS_B are initialized with two functions that
2748 describe the iterations that contain conflicting elements.
2750 Remark: For an integer k >= 0, the following equality is true:
2752 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2755 static void
2756 analyze_overlapping_iterations (tree chrec_a,
2757 tree chrec_b,
2758 conflict_function **overlap_iterations_a,
2759 conflict_function **overlap_iterations_b,
2760 tree *last_conflicts, struct loop *loop_nest)
2762 unsigned int lnn = loop_nest->num;
2764 dependence_stats.num_subscript_tests++;
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2769 fprintf (dump_file, " (chrec_a = ");
2770 print_generic_expr (dump_file, chrec_a, 0);
2771 fprintf (dump_file, ")\n (chrec_b = ");
2772 print_generic_expr (dump_file, chrec_b, 0);
2773 fprintf (dump_file, ")\n");
2776 if (chrec_a == NULL_TREE
2777 || chrec_b == NULL_TREE
2778 || chrec_contains_undetermined (chrec_a)
2779 || chrec_contains_undetermined (chrec_b))
2781 dependence_stats.num_subscript_undetermined++;
2783 *overlap_iterations_a = conflict_fn_not_known ();
2784 *overlap_iterations_b = conflict_fn_not_known ();
2787 /* If they are the same chrec, and are affine, they overlap
2788 on every iteration. */
2789 else if (eq_evolutions_p (chrec_a, chrec_b)
2790 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2792 dependence_stats.num_same_subscript_function++;
2793 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2794 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2795 *last_conflicts = chrec_dont_know;
2798 /* If they aren't the same, and aren't affine, we can't do anything
2799 yet. */
2800 else if ((chrec_contains_symbols (chrec_a)
2801 || chrec_contains_symbols (chrec_b))
2802 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2803 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2805 dependence_stats.num_subscript_undetermined++;
2806 *overlap_iterations_a = conflict_fn_not_known ();
2807 *overlap_iterations_b = conflict_fn_not_known ();
2810 else if (ziv_subscript_p (chrec_a, chrec_b))
2811 analyze_ziv_subscript (chrec_a, chrec_b,
2812 overlap_iterations_a, overlap_iterations_b,
2813 last_conflicts);
2815 else if (siv_subscript_p (chrec_a, chrec_b))
2816 analyze_siv_subscript (chrec_a, chrec_b,
2817 overlap_iterations_a, overlap_iterations_b,
2818 last_conflicts, lnn);
2820 else
2821 analyze_miv_subscript (chrec_a, chrec_b,
2822 overlap_iterations_a, overlap_iterations_b,
2823 last_conflicts, loop_nest);
2825 if (dump_file && (dump_flags & TDF_DETAILS))
2827 fprintf (dump_file, " (overlap_iterations_a = ");
2828 dump_conflict_function (dump_file, *overlap_iterations_a);
2829 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2830 dump_conflict_function (dump_file, *overlap_iterations_b);
2831 fprintf (dump_file, ")\n");
2832 fprintf (dump_file, ")\n");
2836 /* Helper function for uniquely inserting distance vectors. */
2838 static void
2839 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2841 unsigned i;
2842 lambda_vector v;
2844 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2845 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2846 return;
2848 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2851 /* Helper function for uniquely inserting direction vectors. */
2853 static void
2854 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2856 unsigned i;
2857 lambda_vector v;
2859 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2860 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2861 return;
2863 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2866 /* Add a distance of 1 on all the loops outer than INDEX. If we
2867 haven't yet determined a distance for this outer loop, push a new
2868 distance vector composed of the previous distance, and a distance
2869 of 1 for this outer loop. Example:
2871 | loop_1
2872 | loop_2
2873 | A[10]
2874 | endloop_2
2875 | endloop_1
2877 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2878 save (0, 1), then we have to save (1, 0). */
2880 static void
2881 add_outer_distances (struct data_dependence_relation *ddr,
2882 lambda_vector dist_v, int index)
2884 /* For each outer loop where init_v is not set, the accesses are
2885 in dependence of distance 1 in the loop. */
2886 while (--index >= 0)
2888 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2889 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2890 save_v[index] = 1;
2891 save_dist_v (ddr, save_v);
2895 /* Return false when fail to represent the data dependence as a
2896 distance vector. INIT_B is set to true when a component has been
2897 added to the distance vector DIST_V. INDEX_CARRY is then set to
2898 the index in DIST_V that carries the dependence. */
2900 static bool
2901 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2902 struct data_reference *ddr_a,
2903 struct data_reference *ddr_b,
2904 lambda_vector dist_v, bool *init_b,
2905 int *index_carry)
2907 unsigned i;
2908 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2910 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2912 tree access_fn_a, access_fn_b;
2913 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2915 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2917 non_affine_dependence_relation (ddr);
2918 return false;
2921 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2922 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2924 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2925 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2927 int dist, index;
2928 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2929 DDR_LOOP_NEST (ddr));
2930 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2931 DDR_LOOP_NEST (ddr));
2933 /* The dependence is carried by the outermost loop. Example:
2934 | loop_1
2935 | A[{4, +, 1}_1]
2936 | loop_2
2937 | A[{5, +, 1}_2]
2938 | endloop_2
2939 | endloop_1
2940 In this case, the dependence is carried by loop_1. */
2941 index = index_a < index_b ? index_a : index_b;
2942 *index_carry = MIN (index, *index_carry);
2944 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2946 non_affine_dependence_relation (ddr);
2947 return false;
2950 dist = int_cst_value (SUB_DISTANCE (subscript));
2952 /* This is the subscript coupling test. If we have already
2953 recorded a distance for this loop (a distance coming from
2954 another subscript), it should be the same. For example,
2955 in the following code, there is no dependence:
2957 | loop i = 0, N, 1
2958 | T[i+1][i] = ...
2959 | ... = T[i][i]
2960 | endloop
2962 if (init_v[index] != 0 && dist_v[index] != dist)
2964 finalize_ddr_dependent (ddr, chrec_known);
2965 return false;
2968 dist_v[index] = dist;
2969 init_v[index] = 1;
2970 *init_b = true;
2972 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2974 /* This can be for example an affine vs. constant dependence
2975 (T[i] vs. T[3]) that is not an affine dependence and is
2976 not representable as a distance vector. */
2977 non_affine_dependence_relation (ddr);
2978 return false;
2982 return true;
2985 /* Return true when the DDR contains only constant access functions. */
2987 static bool
2988 constant_access_functions (const struct data_dependence_relation *ddr)
2990 unsigned i;
2992 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2993 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2994 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2995 return false;
2997 return true;
3000 /* Helper function for the case where DDR_A and DDR_B are the same
3001 multivariate access function with a constant step. For an example
3002 see pr34635-1.c. */
3004 static void
3005 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3007 int x_1, x_2;
3008 tree c_1 = CHREC_LEFT (c_2);
3009 tree c_0 = CHREC_LEFT (c_1);
3010 lambda_vector dist_v;
3011 int v1, v2, cd;
3013 /* Polynomials with more than 2 variables are not handled yet. When
3014 the evolution steps are parameters, it is not possible to
3015 represent the dependence using classical distance vectors. */
3016 if (TREE_CODE (c_0) != INTEGER_CST
3017 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3018 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3020 DDR_AFFINE_P (ddr) = false;
3021 return;
3024 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3025 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3027 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3028 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3029 v1 = int_cst_value (CHREC_RIGHT (c_1));
3030 v2 = int_cst_value (CHREC_RIGHT (c_2));
3031 cd = gcd (v1, v2);
3032 v1 /= cd;
3033 v2 /= cd;
3035 if (v2 < 0)
3037 v2 = -v2;
3038 v1 = -v1;
3041 dist_v[x_1] = v2;
3042 dist_v[x_2] = -v1;
3043 save_dist_v (ddr, dist_v);
3045 add_outer_distances (ddr, dist_v, x_1);
3048 /* Helper function for the case where DDR_A and DDR_B are the same
3049 access functions. */
3051 static void
3052 add_other_self_distances (struct data_dependence_relation *ddr)
3054 lambda_vector dist_v;
3055 unsigned i;
3056 int index_carry = DDR_NB_LOOPS (ddr);
3058 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3060 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3062 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3064 if (!evolution_function_is_univariate_p (access_fun))
3066 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3068 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3069 return;
3072 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3074 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3075 add_multivariate_self_dist (ddr, access_fun);
3076 else
3077 /* The evolution step is not constant: it varies in
3078 the outer loop, so this cannot be represented by a
3079 distance vector. For example in pr34635.c the
3080 evolution is {0, +, {0, +, 4}_1}_2. */
3081 DDR_AFFINE_P (ddr) = false;
3083 return;
3086 index_carry = MIN (index_carry,
3087 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3088 DDR_LOOP_NEST (ddr)));
3092 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3093 add_outer_distances (ddr, dist_v, index_carry);
3096 static void
3097 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3099 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3101 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3102 save_dist_v (ddr, dist_v);
3105 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3106 is the case for example when access functions are the same and
3107 equal to a constant, as in:
3109 | loop_1
3110 | A[3] = ...
3111 | ... = A[3]
3112 | endloop_1
3114 in which case the distance vectors are (0) and (1). */
3116 static void
3117 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3119 unsigned i, j;
3121 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3123 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3124 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3125 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3127 for (j = 0; j < ca->n; j++)
3128 if (affine_function_zero_p (ca->fns[j]))
3130 insert_innermost_unit_dist_vector (ddr);
3131 return;
3134 for (j = 0; j < cb->n; j++)
3135 if (affine_function_zero_p (cb->fns[j]))
3137 insert_innermost_unit_dist_vector (ddr);
3138 return;
3143 /* Compute the classic per loop distance vector. DDR is the data
3144 dependence relation to build a vector from. Return false when fail
3145 to represent the data dependence as a distance vector. */
3147 static bool
3148 build_classic_dist_vector (struct data_dependence_relation *ddr,
3149 struct loop *loop_nest)
3151 bool init_b = false;
3152 int index_carry = DDR_NB_LOOPS (ddr);
3153 lambda_vector dist_v;
3155 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3156 return false;
3158 if (same_access_functions (ddr))
3160 /* Save the 0 vector. */
3161 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3162 save_dist_v (ddr, dist_v);
3164 if (constant_access_functions (ddr))
3165 add_distance_for_zero_overlaps (ddr);
3167 if (DDR_NB_LOOPS (ddr) > 1)
3168 add_other_self_distances (ddr);
3170 return true;
3173 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3174 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3175 dist_v, &init_b, &index_carry))
3176 return false;
3178 /* Save the distance vector if we initialized one. */
3179 if (init_b)
3181 /* Verify a basic constraint: classic distance vectors should
3182 always be lexicographically positive.
3184 Data references are collected in the order of execution of
3185 the program, thus for the following loop
3187 | for (i = 1; i < 100; i++)
3188 | for (j = 1; j < 100; j++)
3190 | t = T[j+1][i-1]; // A
3191 | T[j][i] = t + 2; // B
3194 references are collected following the direction of the wind:
3195 A then B. The data dependence tests are performed also
3196 following this order, such that we're looking at the distance
3197 separating the elements accessed by A from the elements later
3198 accessed by B. But in this example, the distance returned by
3199 test_dep (A, B) is lexicographically negative (-1, 1), that
3200 means that the access A occurs later than B with respect to
3201 the outer loop, ie. we're actually looking upwind. In this
3202 case we solve test_dep (B, A) looking downwind to the
3203 lexicographically positive solution, that returns the
3204 distance vector (1, -1). */
3205 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3207 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3208 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3209 loop_nest))
3210 return false;
3211 compute_subscript_distance (ddr);
3212 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3213 save_v, &init_b, &index_carry))
3214 return false;
3215 save_dist_v (ddr, save_v);
3216 DDR_REVERSED_P (ddr) = true;
3218 /* In this case there is a dependence forward for all the
3219 outer loops:
3221 | for (k = 1; k < 100; k++)
3222 | for (i = 1; i < 100; i++)
3223 | for (j = 1; j < 100; j++)
3225 | t = T[j+1][i-1]; // A
3226 | T[j][i] = t + 2; // B
3229 the vectors are:
3230 (0, 1, -1)
3231 (1, 1, -1)
3232 (1, -1, 1)
3234 if (DDR_NB_LOOPS (ddr) > 1)
3236 add_outer_distances (ddr, save_v, index_carry);
3237 add_outer_distances (ddr, dist_v, index_carry);
3240 else
3242 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3243 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3245 if (DDR_NB_LOOPS (ddr) > 1)
3247 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3249 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3250 DDR_A (ddr), loop_nest))
3251 return false;
3252 compute_subscript_distance (ddr);
3253 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3254 opposite_v, &init_b,
3255 &index_carry))
3256 return false;
3258 save_dist_v (ddr, save_v);
3259 add_outer_distances (ddr, dist_v, index_carry);
3260 add_outer_distances (ddr, opposite_v, index_carry);
3262 else
3263 save_dist_v (ddr, save_v);
3266 else
3268 /* There is a distance of 1 on all the outer loops: Example:
3269 there is a dependence of distance 1 on loop_1 for the array A.
3271 | loop_1
3272 | A[5] = ...
3273 | endloop
3275 add_outer_distances (ddr, dist_v,
3276 lambda_vector_first_nz (dist_v,
3277 DDR_NB_LOOPS (ddr), 0));
3280 if (dump_file && (dump_flags & TDF_DETAILS))
3282 unsigned i;
3284 fprintf (dump_file, "(build_classic_dist_vector\n");
3285 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3287 fprintf (dump_file, " dist_vector = (");
3288 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3289 DDR_NB_LOOPS (ddr));
3290 fprintf (dump_file, " )\n");
3292 fprintf (dump_file, ")\n");
3295 return true;
3298 /* Return the direction for a given distance.
3299 FIXME: Computing dir this way is suboptimal, since dir can catch
3300 cases that dist is unable to represent. */
3302 static inline enum data_dependence_direction
3303 dir_from_dist (int dist)
3305 if (dist > 0)
3306 return dir_positive;
3307 else if (dist < 0)
3308 return dir_negative;
3309 else
3310 return dir_equal;
3313 /* Compute the classic per loop direction vector. DDR is the data
3314 dependence relation to build a vector from. */
3316 static void
3317 build_classic_dir_vector (struct data_dependence_relation *ddr)
3319 unsigned i, j;
3320 lambda_vector dist_v;
3322 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3324 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3326 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3327 dir_v[j] = dir_from_dist (dist_v[j]);
3329 save_dir_v (ddr, dir_v);
3333 /* Helper function. Returns true when there is a dependence between
3334 data references DRA and DRB. */
3336 static bool
3337 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3338 struct data_reference *dra,
3339 struct data_reference *drb,
3340 struct loop *loop_nest)
3342 unsigned int i;
3343 tree last_conflicts;
3344 struct subscript *subscript;
3346 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3347 i++)
3349 conflict_function *overlaps_a, *overlaps_b;
3351 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3352 DR_ACCESS_FN (drb, i),
3353 &overlaps_a, &overlaps_b,
3354 &last_conflicts, loop_nest);
3356 if (CF_NOT_KNOWN_P (overlaps_a)
3357 || CF_NOT_KNOWN_P (overlaps_b))
3359 finalize_ddr_dependent (ddr, chrec_dont_know);
3360 dependence_stats.num_dependence_undetermined++;
3361 free_conflict_function (overlaps_a);
3362 free_conflict_function (overlaps_b);
3363 return false;
3366 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3367 || CF_NO_DEPENDENCE_P (overlaps_b))
3369 finalize_ddr_dependent (ddr, chrec_known);
3370 dependence_stats.num_dependence_independent++;
3371 free_conflict_function (overlaps_a);
3372 free_conflict_function (overlaps_b);
3373 return false;
3376 else
3378 if (SUB_CONFLICTS_IN_A (subscript))
3379 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3380 if (SUB_CONFLICTS_IN_B (subscript))
3381 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3383 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3384 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3385 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3389 return true;
3392 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3394 static void
3395 subscript_dependence_tester (struct data_dependence_relation *ddr,
3396 struct loop *loop_nest)
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, "(subscript_dependence_tester \n");
3402 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3403 dependence_stats.num_dependence_dependent++;
3405 compute_subscript_distance (ddr);
3406 if (build_classic_dist_vector (ddr, loop_nest))
3407 build_classic_dir_vector (ddr);
3409 if (dump_file && (dump_flags & TDF_DETAILS))
3410 fprintf (dump_file, ")\n");
3413 /* Returns true when all the access functions of A are affine or
3414 constant with respect to LOOP_NEST. */
3416 static bool
3417 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3418 const struct loop *loop_nest)
3420 unsigned int i;
3421 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3422 tree t;
3424 FOR_EACH_VEC_ELT (tree, fns, i, t)
3425 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3426 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3427 return false;
3429 return true;
3432 /* Initializes an equation for an OMEGA problem using the information
3433 contained in the ACCESS_FUN. Returns true when the operation
3434 succeeded.
3436 PB is the omega constraint system.
3437 EQ is the number of the equation to be initialized.
3438 OFFSET is used for shifting the variables names in the constraints:
3439 a constrain is composed of 2 * the number of variables surrounding
3440 dependence accesses. OFFSET is set either to 0 for the first n variables,
3441 then it is set to n.
3442 ACCESS_FUN is expected to be an affine chrec. */
3444 static bool
3445 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3446 unsigned int offset, tree access_fun,
3447 struct data_dependence_relation *ddr)
3449 switch (TREE_CODE (access_fun))
3451 case POLYNOMIAL_CHREC:
3453 tree left = CHREC_LEFT (access_fun);
3454 tree right = CHREC_RIGHT (access_fun);
3455 int var = CHREC_VARIABLE (access_fun);
3456 unsigned var_idx;
3458 if (TREE_CODE (right) != INTEGER_CST)
3459 return false;
3461 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3462 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3464 /* Compute the innermost loop index. */
3465 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3467 if (offset == 0)
3468 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3469 += int_cst_value (right);
3471 switch (TREE_CODE (left))
3473 case POLYNOMIAL_CHREC:
3474 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3476 case INTEGER_CST:
3477 pb->eqs[eq].coef[0] += int_cst_value (left);
3478 return true;
3480 default:
3481 return false;
3485 case INTEGER_CST:
3486 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3487 return true;
3489 default:
3490 return false;
3494 /* As explained in the comments preceding init_omega_for_ddr, we have
3495 to set up a system for each loop level, setting outer loops
3496 variation to zero, and current loop variation to positive or zero.
3497 Save each lexico positive distance vector. */
3499 static void
3500 omega_extract_distance_vectors (omega_pb pb,
3501 struct data_dependence_relation *ddr)
3503 int eq, geq;
3504 unsigned i, j;
3505 struct loop *loopi, *loopj;
3506 enum omega_result res;
3508 /* Set a new problem for each loop in the nest. The basis is the
3509 problem that we have initialized until now. On top of this we
3510 add new constraints. */
3511 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3512 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3514 int dist = 0;
3515 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3516 DDR_NB_LOOPS (ddr));
3518 omega_copy_problem (copy, pb);
3520 /* For all the outer loops "loop_j", add "dj = 0". */
3521 for (j = 0;
3522 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3524 eq = omega_add_zero_eq (copy, omega_black);
3525 copy->eqs[eq].coef[j + 1] = 1;
3528 /* For "loop_i", add "0 <= di". */
3529 geq = omega_add_zero_geq (copy, omega_black);
3530 copy->geqs[geq].coef[i + 1] = 1;
3532 /* Reduce the constraint system, and test that the current
3533 problem is feasible. */
3534 res = omega_simplify_problem (copy);
3535 if (res == omega_false
3536 || res == omega_unknown
3537 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3538 goto next_problem;
3540 for (eq = 0; eq < copy->num_subs; eq++)
3541 if (copy->subs[eq].key == (int) i + 1)
3543 dist = copy->subs[eq].coef[0];
3544 goto found_dist;
3547 if (dist == 0)
3549 /* Reinitialize problem... */
3550 omega_copy_problem (copy, pb);
3551 for (j = 0;
3552 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3554 eq = omega_add_zero_eq (copy, omega_black);
3555 copy->eqs[eq].coef[j + 1] = 1;
3558 /* ..., but this time "di = 1". */
3559 eq = omega_add_zero_eq (copy, omega_black);
3560 copy->eqs[eq].coef[i + 1] = 1;
3561 copy->eqs[eq].coef[0] = -1;
3563 res = omega_simplify_problem (copy);
3564 if (res == omega_false
3565 || res == omega_unknown
3566 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3567 goto next_problem;
3569 for (eq = 0; eq < copy->num_subs; eq++)
3570 if (copy->subs[eq].key == (int) i + 1)
3572 dist = copy->subs[eq].coef[0];
3573 goto found_dist;
3577 found_dist:;
3578 /* Save the lexicographically positive distance vector. */
3579 if (dist >= 0)
3581 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3582 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3584 dist_v[i] = dist;
3586 for (eq = 0; eq < copy->num_subs; eq++)
3587 if (copy->subs[eq].key > 0)
3589 dist = copy->subs[eq].coef[0];
3590 dist_v[copy->subs[eq].key - 1] = dist;
3593 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3594 dir_v[j] = dir_from_dist (dist_v[j]);
3596 save_dist_v (ddr, dist_v);
3597 save_dir_v (ddr, dir_v);
3600 next_problem:;
3601 omega_free_problem (copy);
3605 /* This is called for each subscript of a tuple of data references:
3606 insert an equality for representing the conflicts. */
3608 static bool
3609 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3610 struct data_dependence_relation *ddr,
3611 omega_pb pb, bool *maybe_dependent)
3613 int eq;
3614 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3615 TREE_TYPE (access_fun_b));
3616 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3617 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3618 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3620 /* When the fun_a - fun_b is not constant, the dependence is not
3621 captured by the classic distance vector representation. */
3622 if (TREE_CODE (difference) != INTEGER_CST)
3623 return false;
3625 /* ZIV test. */
3626 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3628 /* There is no dependence. */
3629 *maybe_dependent = false;
3630 return true;
3633 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3635 eq = omega_add_zero_eq (pb, omega_black);
3636 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3637 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3638 /* There is probably a dependence, but the system of
3639 constraints cannot be built: answer "don't know". */
3640 return false;
3642 /* GCD test. */
3643 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3644 && !int_divides_p (lambda_vector_gcd
3645 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3646 2 * DDR_NB_LOOPS (ddr)),
3647 pb->eqs[eq].coef[0]))
3649 /* There is no dependence. */
3650 *maybe_dependent = false;
3651 return true;
3654 return true;
3657 /* Helper function, same as init_omega_for_ddr but specialized for
3658 data references A and B. */
3660 static bool
3661 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3662 struct data_dependence_relation *ddr,
3663 omega_pb pb, bool *maybe_dependent)
3665 unsigned i;
3666 int ineq;
3667 struct loop *loopi;
3668 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3670 /* Insert an equality per subscript. */
3671 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3673 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3674 ddr, pb, maybe_dependent))
3675 return false;
3676 else if (*maybe_dependent == false)
3678 /* There is no dependence. */
3679 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3680 return true;
3684 /* Insert inequalities: constraints corresponding to the iteration
3685 domain, i.e. the loops surrounding the references "loop_x" and
3686 the distance variables "dx". The layout of the OMEGA
3687 representation is as follows:
3688 - coef[0] is the constant
3689 - coef[1..nb_loops] are the protected variables that will not be
3690 removed by the solver: the "dx"
3691 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3693 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3694 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3696 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3698 /* 0 <= loop_x */
3699 ineq = omega_add_zero_geq (pb, omega_black);
3700 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3702 /* 0 <= loop_x + dx */
3703 ineq = omega_add_zero_geq (pb, omega_black);
3704 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3705 pb->geqs[ineq].coef[i + 1] = 1;
3707 if (nbi != -1)
3709 /* loop_x <= nb_iters */
3710 ineq = omega_add_zero_geq (pb, omega_black);
3711 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3712 pb->geqs[ineq].coef[0] = nbi;
3714 /* loop_x + dx <= nb_iters */
3715 ineq = omega_add_zero_geq (pb, omega_black);
3716 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3717 pb->geqs[ineq].coef[i + 1] = -1;
3718 pb->geqs[ineq].coef[0] = nbi;
3720 /* A step "dx" bigger than nb_iters is not feasible, so
3721 add "0 <= nb_iters + dx", */
3722 ineq = omega_add_zero_geq (pb, omega_black);
3723 pb->geqs[ineq].coef[i + 1] = 1;
3724 pb->geqs[ineq].coef[0] = nbi;
3725 /* and "dx <= nb_iters". */
3726 ineq = omega_add_zero_geq (pb, omega_black);
3727 pb->geqs[ineq].coef[i + 1] = -1;
3728 pb->geqs[ineq].coef[0] = nbi;
3732 omega_extract_distance_vectors (pb, ddr);
3734 return true;
3737 /* Sets up the Omega dependence problem for the data dependence
3738 relation DDR. Returns false when the constraint system cannot be
3739 built, ie. when the test answers "don't know". Returns true
3740 otherwise, and when independence has been proved (using one of the
3741 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3742 set MAYBE_DEPENDENT to true.
3744 Example: for setting up the dependence system corresponding to the
3745 conflicting accesses
3747 | loop_i
3748 | loop_j
3749 | A[i, i+1] = ...
3750 | ... A[2*j, 2*(i + j)]
3751 | endloop_j
3752 | endloop_i
3754 the following constraints come from the iteration domain:
3756 0 <= i <= Ni
3757 0 <= i + di <= Ni
3758 0 <= j <= Nj
3759 0 <= j + dj <= Nj
3761 where di, dj are the distance variables. The constraints
3762 representing the conflicting elements are:
3764 i = 2 * (j + dj)
3765 i + 1 = 2 * (i + di + j + dj)
3767 For asking that the resulting distance vector (di, dj) be
3768 lexicographically positive, we insert the constraint "di >= 0". If
3769 "di = 0" in the solution, we fix that component to zero, and we
3770 look at the inner loops: we set a new problem where all the outer
3771 loop distances are zero, and fix this inner component to be
3772 positive. When one of the components is positive, we save that
3773 distance, and set a new problem where the distance on this loop is
3774 zero, searching for other distances in the inner loops. Here is
3775 the classic example that illustrates that we have to set for each
3776 inner loop a new problem:
3778 | loop_1
3779 | loop_2
3780 | A[10]
3781 | endloop_2
3782 | endloop_1
3784 we have to save two distances (1, 0) and (0, 1).
3786 Given two array references, refA and refB, we have to set the
3787 dependence problem twice, refA vs. refB and refB vs. refA, and we
3788 cannot do a single test, as refB might occur before refA in the
3789 inner loops, and the contrary when considering outer loops: ex.
3791 | loop_0
3792 | loop_1
3793 | loop_2
3794 | T[{1,+,1}_2][{1,+,1}_1] // refA
3795 | T[{2,+,1}_2][{0,+,1}_1] // refB
3796 | endloop_2
3797 | endloop_1
3798 | endloop_0
3800 refB touches the elements in T before refA, and thus for the same
3801 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3802 but for successive loop_0 iterations, we have (1, -1, 1)
3804 The Omega solver expects the distance variables ("di" in the
3805 previous example) to come first in the constraint system (as
3806 variables to be protected, or "safe" variables), the constraint
3807 system is built using the following layout:
3809 "cst | distance vars | index vars".
3812 static bool
3813 init_omega_for_ddr (struct data_dependence_relation *ddr,
3814 bool *maybe_dependent)
3816 omega_pb pb;
3817 bool res = false;
3819 *maybe_dependent = true;
3821 if (same_access_functions (ddr))
3823 unsigned j;
3824 lambda_vector dir_v;
3826 /* Save the 0 vector. */
3827 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3828 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3829 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3830 dir_v[j] = dir_equal;
3831 save_dir_v (ddr, dir_v);
3833 /* Save the dependences carried by outer loops. */
3834 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3835 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3836 maybe_dependent);
3837 omega_free_problem (pb);
3838 return res;
3841 /* Omega expects the protected variables (those that have to be kept
3842 after elimination) to appear first in the constraint system.
3843 These variables are the distance variables. In the following
3844 initialization we declare NB_LOOPS safe variables, and the total
3845 number of variables for the constraint system is 2*NB_LOOPS. */
3846 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3847 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3848 maybe_dependent);
3849 omega_free_problem (pb);
3851 /* Stop computation if not decidable, or no dependence. */
3852 if (res == false || *maybe_dependent == false)
3853 return res;
3855 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3856 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3857 maybe_dependent);
3858 omega_free_problem (pb);
3860 return res;
3863 /* Return true when DDR contains the same information as that stored
3864 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3866 static bool
3867 ddr_consistent_p (FILE *file,
3868 struct data_dependence_relation *ddr,
3869 VEC (lambda_vector, heap) *dist_vects,
3870 VEC (lambda_vector, heap) *dir_vects)
3872 unsigned int i, j;
3874 /* If dump_file is set, output there. */
3875 if (dump_file && (dump_flags & TDF_DETAILS))
3876 file = dump_file;
3878 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3880 lambda_vector b_dist_v;
3881 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3882 VEC_length (lambda_vector, dist_vects),
3883 DDR_NUM_DIST_VECTS (ddr));
3885 fprintf (file, "Banerjee dist vectors:\n");
3886 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3887 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3889 fprintf (file, "Omega dist vectors:\n");
3890 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3891 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3893 fprintf (file, "data dependence relation:\n");
3894 dump_data_dependence_relation (file, ddr);
3896 fprintf (file, ")\n");
3897 return false;
3900 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3902 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3903 VEC_length (lambda_vector, dir_vects),
3904 DDR_NUM_DIR_VECTS (ddr));
3905 return false;
3908 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3910 lambda_vector a_dist_v;
3911 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3913 /* Distance vectors are not ordered in the same way in the DDR
3914 and in the DIST_VECTS: search for a matching vector. */
3915 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3916 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3917 break;
3919 if (j == VEC_length (lambda_vector, dist_vects))
3921 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3922 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3923 fprintf (file, "not found in Omega dist vectors:\n");
3924 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3925 fprintf (file, "data dependence relation:\n");
3926 dump_data_dependence_relation (file, ddr);
3927 fprintf (file, ")\n");
3931 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3933 lambda_vector a_dir_v;
3934 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3936 /* Direction vectors are not ordered in the same way in the DDR
3937 and in the DIR_VECTS: search for a matching vector. */
3938 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3939 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3940 break;
3942 if (j == VEC_length (lambda_vector, dist_vects))
3944 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3945 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3946 fprintf (file, "not found in Omega dir vectors:\n");
3947 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3948 fprintf (file, "data dependence relation:\n");
3949 dump_data_dependence_relation (file, ddr);
3950 fprintf (file, ")\n");
3954 return true;
3957 /* This computes the affine dependence relation between A and B with
3958 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3959 independence between two accesses, while CHREC_DONT_KNOW is used
3960 for representing the unknown relation.
3962 Note that it is possible to stop the computation of the dependence
3963 relation the first time we detect a CHREC_KNOWN element for a given
3964 subscript. */
3966 static void
3967 compute_affine_dependence (struct data_dependence_relation *ddr,
3968 struct loop *loop_nest)
3970 struct data_reference *dra = DDR_A (ddr);
3971 struct data_reference *drb = DDR_B (ddr);
3973 if (dump_file && (dump_flags & TDF_DETAILS))
3975 fprintf (dump_file, "(compute_affine_dependence\n");
3976 fprintf (dump_file, " (stmt_a = \n");
3977 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3978 fprintf (dump_file, ")\n (stmt_b = \n");
3979 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3980 fprintf (dump_file, ")\n");
3983 /* Analyze only when the dependence relation is not yet known. */
3984 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3985 && !DDR_SELF_REFERENCE (ddr))
3987 dependence_stats.num_dependence_tests++;
3989 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3990 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3992 if (flag_check_data_deps)
3994 /* Compute the dependences using the first algorithm. */
3995 subscript_dependence_tester (ddr, loop_nest);
3997 if (dump_file && (dump_flags & TDF_DETAILS))
3999 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4000 dump_data_dependence_relation (dump_file, ddr);
4003 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4005 bool maybe_dependent;
4006 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4008 /* Save the result of the first DD analyzer. */
4009 dist_vects = DDR_DIST_VECTS (ddr);
4010 dir_vects = DDR_DIR_VECTS (ddr);
4012 /* Reset the information. */
4013 DDR_DIST_VECTS (ddr) = NULL;
4014 DDR_DIR_VECTS (ddr) = NULL;
4016 /* Compute the same information using Omega. */
4017 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4018 goto csys_dont_know;
4020 if (dump_file && (dump_flags & TDF_DETAILS))
4022 fprintf (dump_file, "Omega Analyzer\n");
4023 dump_data_dependence_relation (dump_file, ddr);
4026 /* Check that we get the same information. */
4027 if (maybe_dependent)
4028 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4029 dir_vects));
4032 else
4033 subscript_dependence_tester (ddr, loop_nest);
4036 /* As a last case, if the dependence cannot be determined, or if
4037 the dependence is considered too difficult to determine, answer
4038 "don't know". */
4039 else
4041 csys_dont_know:;
4042 dependence_stats.num_dependence_undetermined++;
4044 if (dump_file && (dump_flags & TDF_DETAILS))
4046 fprintf (dump_file, "Data ref a:\n");
4047 dump_data_reference (dump_file, dra);
4048 fprintf (dump_file, "Data ref b:\n");
4049 dump_data_reference (dump_file, drb);
4050 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4052 finalize_ddr_dependent (ddr, chrec_dont_know);
4056 if (dump_file && (dump_flags & TDF_DETAILS))
4057 fprintf (dump_file, ")\n");
4060 /* This computes the dependence relation for the same data
4061 reference into DDR. */
4063 static void
4064 compute_self_dependence (struct data_dependence_relation *ddr)
4066 unsigned int i;
4067 struct subscript *subscript;
4069 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4070 return;
4072 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4073 i++)
4075 if (SUB_CONFLICTS_IN_A (subscript))
4076 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4077 if (SUB_CONFLICTS_IN_B (subscript))
4078 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4080 /* The accessed index overlaps for each iteration. */
4081 SUB_CONFLICTS_IN_A (subscript)
4082 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4083 SUB_CONFLICTS_IN_B (subscript)
4084 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4085 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4088 /* The distance vector is the zero vector. */
4089 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4090 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4093 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4094 the data references in DATAREFS, in the LOOP_NEST. When
4095 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4096 relations. */
4098 void
4099 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4100 VEC (ddr_p, heap) **dependence_relations,
4101 VEC (loop_p, heap) *loop_nest,
4102 bool compute_self_and_rr)
4104 struct data_dependence_relation *ddr;
4105 struct data_reference *a, *b;
4106 unsigned int i, j;
4108 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4109 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4110 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4112 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4113 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4114 if (loop_nest)
4115 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4118 if (compute_self_and_rr)
4119 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4121 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4122 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4123 compute_self_dependence (ddr);
4127 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4128 true if STMT clobbers memory, false otherwise. */
4130 bool
4131 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4133 bool clobbers_memory = false;
4134 data_ref_loc *ref;
4135 tree *op0, *op1;
4136 enum gimple_code stmt_code = gimple_code (stmt);
4138 *references = NULL;
4140 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4141 Calls have side-effects, except those to const or pure
4142 functions. */
4143 if ((stmt_code == GIMPLE_CALL
4144 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4145 || (stmt_code == GIMPLE_ASM
4146 && gimple_asm_volatile_p (stmt)))
4147 clobbers_memory = true;
4149 if (!gimple_vuse (stmt))
4150 return clobbers_memory;
4152 if (stmt_code == GIMPLE_ASSIGN)
4154 tree base;
4155 op0 = gimple_assign_lhs_ptr (stmt);
4156 op1 = gimple_assign_rhs1_ptr (stmt);
4158 if (DECL_P (*op1)
4159 || (REFERENCE_CLASS_P (*op1)
4160 && (base = get_base_address (*op1))
4161 && TREE_CODE (base) != SSA_NAME))
4163 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4164 ref->pos = op1;
4165 ref->is_read = true;
4168 if (DECL_P (*op0)
4169 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4171 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4172 ref->pos = op0;
4173 ref->is_read = false;
4176 else if (stmt_code == GIMPLE_CALL)
4178 unsigned i, n = gimple_call_num_args (stmt);
4180 for (i = 0; i < n; i++)
4182 op0 = gimple_call_arg_ptr (stmt, i);
4184 if (DECL_P (*op0)
4185 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4187 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4188 ref->pos = op0;
4189 ref->is_read = true;
4194 return clobbers_memory;
4197 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4198 reference, returns false, otherwise returns true. NEST is the outermost
4199 loop of the loop nest in which the references should be analyzed. */
4201 bool
4202 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4203 VEC (data_reference_p, heap) **datarefs)
4205 unsigned i;
4206 VEC (data_ref_loc, heap) *references;
4207 data_ref_loc *ref;
4208 bool ret = true;
4209 data_reference_p dr;
4211 if (get_references_in_stmt (stmt, &references))
4213 VEC_free (data_ref_loc, heap, references);
4214 return false;
4217 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4219 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4220 gcc_assert (dr != NULL);
4222 /* FIXME -- data dependence analysis does not work correctly for objects
4223 with invariant addresses in loop nests. Let us fail here until the
4224 problem is fixed. */
4225 if (dr_address_invariant_p (dr) && nest)
4227 free_data_ref (dr);
4228 if (dump_file && (dump_flags & TDF_DETAILS))
4229 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4230 ret = false;
4231 break;
4234 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4236 VEC_free (data_ref_loc, heap, references);
4237 return ret;
4240 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4241 reference, returns false, otherwise returns true. NEST is the outermost
4242 loop of the loop nest in which the references should be analyzed. */
4244 bool
4245 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4246 VEC (data_reference_p, heap) **datarefs)
4248 unsigned i;
4249 VEC (data_ref_loc, heap) *references;
4250 data_ref_loc *ref;
4251 bool ret = true;
4252 data_reference_p dr;
4254 if (get_references_in_stmt (stmt, &references))
4256 VEC_free (data_ref_loc, heap, references);
4257 return false;
4260 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4262 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4263 gcc_assert (dr != NULL);
4264 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4267 VEC_free (data_ref_loc, heap, references);
4268 return ret;
4271 /* Search the data references in LOOP, and record the information into
4272 DATAREFS. Returns chrec_dont_know when failing to analyze a
4273 difficult case, returns NULL_TREE otherwise. */
4275 static tree
4276 find_data_references_in_bb (struct loop *loop, basic_block bb,
4277 VEC (data_reference_p, heap) **datarefs)
4279 gimple_stmt_iterator bsi;
4281 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4283 gimple stmt = gsi_stmt (bsi);
4285 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4287 struct data_reference *res;
4288 res = XCNEW (struct data_reference);
4289 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4291 return chrec_dont_know;
4295 return NULL_TREE;
4298 /* Search the data references in LOOP, and record the information into
4299 DATAREFS. Returns chrec_dont_know when failing to analyze a
4300 difficult case, returns NULL_TREE otherwise.
4302 TODO: This function should be made smarter so that it can handle address
4303 arithmetic as if they were array accesses, etc. */
4305 tree
4306 find_data_references_in_loop (struct loop *loop,
4307 VEC (data_reference_p, heap) **datarefs)
4309 basic_block bb, *bbs;
4310 unsigned int i;
4312 bbs = get_loop_body_in_dom_order (loop);
4314 for (i = 0; i < loop->num_nodes; i++)
4316 bb = bbs[i];
4318 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4320 free (bbs);
4321 return chrec_dont_know;
4324 free (bbs);
4326 return NULL_TREE;
4329 /* Recursive helper function. */
4331 static bool
4332 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4334 /* Inner loops of the nest should not contain siblings. Example:
4335 when there are two consecutive loops,
4337 | loop_0
4338 | loop_1
4339 | A[{0, +, 1}_1]
4340 | endloop_1
4341 | loop_2
4342 | A[{0, +, 1}_2]
4343 | endloop_2
4344 | endloop_0
4346 the dependence relation cannot be captured by the distance
4347 abstraction. */
4348 if (loop->next)
4349 return false;
4351 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4352 if (loop->inner)
4353 return find_loop_nest_1 (loop->inner, loop_nest);
4354 return true;
4357 /* Return false when the LOOP is not well nested. Otherwise return
4358 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4359 contain the loops from the outermost to the innermost, as they will
4360 appear in the classic distance vector. */
4362 bool
4363 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4365 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4366 if (loop->inner)
4367 return find_loop_nest_1 (loop->inner, loop_nest);
4368 return true;
4371 /* Returns true when the data dependences have been computed, false otherwise.
4372 Given a loop nest LOOP, the following vectors are returned:
4373 DATAREFS is initialized to all the array elements contained in this loop,
4374 DEPENDENCE_RELATIONS contains the relations between the data references.
4375 Compute read-read and self relations if
4376 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4378 bool
4379 compute_data_dependences_for_loop (struct loop *loop,
4380 bool compute_self_and_read_read_dependences,
4381 VEC (data_reference_p, heap) **datarefs,
4382 VEC (ddr_p, heap) **dependence_relations)
4384 bool res = true;
4385 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4387 memset (&dependence_stats, 0, sizeof (dependence_stats));
4389 /* If the loop nest is not well formed, or one of the data references
4390 is not computable, give up without spending time to compute other
4391 dependences. */
4392 if (!loop
4393 || !find_loop_nest (loop, &vloops)
4394 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4396 struct data_dependence_relation *ddr;
4398 /* Insert a single relation into dependence_relations:
4399 chrec_dont_know. */
4400 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4401 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4402 res = false;
4404 else
4405 compute_all_dependences (*datarefs, dependence_relations, vloops,
4406 compute_self_and_read_read_dependences);
4408 if (dump_file && (dump_flags & TDF_STATS))
4410 fprintf (dump_file, "Dependence tester statistics:\n");
4412 fprintf (dump_file, "Number of dependence tests: %d\n",
4413 dependence_stats.num_dependence_tests);
4414 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4415 dependence_stats.num_dependence_dependent);
4416 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4417 dependence_stats.num_dependence_independent);
4418 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4419 dependence_stats.num_dependence_undetermined);
4421 fprintf (dump_file, "Number of subscript tests: %d\n",
4422 dependence_stats.num_subscript_tests);
4423 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4424 dependence_stats.num_subscript_undetermined);
4425 fprintf (dump_file, "Number of same subscript function: %d\n",
4426 dependence_stats.num_same_subscript_function);
4428 fprintf (dump_file, "Number of ziv tests: %d\n",
4429 dependence_stats.num_ziv);
4430 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4431 dependence_stats.num_ziv_dependent);
4432 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4433 dependence_stats.num_ziv_independent);
4434 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4435 dependence_stats.num_ziv_unimplemented);
4437 fprintf (dump_file, "Number of siv tests: %d\n",
4438 dependence_stats.num_siv);
4439 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4440 dependence_stats.num_siv_dependent);
4441 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4442 dependence_stats.num_siv_independent);
4443 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4444 dependence_stats.num_siv_unimplemented);
4446 fprintf (dump_file, "Number of miv tests: %d\n",
4447 dependence_stats.num_miv);
4448 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4449 dependence_stats.num_miv_dependent);
4450 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4451 dependence_stats.num_miv_independent);
4452 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4453 dependence_stats.num_miv_unimplemented);
4456 return res;
4459 /* Returns true when the data dependences for the basic block BB have been
4460 computed, false otherwise.
4461 DATAREFS is initialized to all the array elements contained in this basic
4462 block, DEPENDENCE_RELATIONS contains the relations between the data
4463 references. Compute read-read and self relations if
4464 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4465 bool
4466 compute_data_dependences_for_bb (basic_block bb,
4467 bool compute_self_and_read_read_dependences,
4468 VEC (data_reference_p, heap) **datarefs,
4469 VEC (ddr_p, heap) **dependence_relations)
4471 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4472 return false;
4474 compute_all_dependences (*datarefs, dependence_relations, NULL,
4475 compute_self_and_read_read_dependences);
4476 return true;
4479 /* Entry point (for testing only). Analyze all the data references
4480 and the dependence relations in LOOP.
4482 The data references are computed first.
4484 A relation on these nodes is represented by a complete graph. Some
4485 of the relations could be of no interest, thus the relations can be
4486 computed on demand.
4488 In the following function we compute all the relations. This is
4489 just a first implementation that is here for:
4490 - for showing how to ask for the dependence relations,
4491 - for the debugging the whole dependence graph,
4492 - for the dejagnu testcases and maintenance.
4494 It is possible to ask only for a part of the graph, avoiding to
4495 compute the whole dependence graph. The computed dependences are
4496 stored in a knowledge base (KB) such that later queries don't
4497 recompute the same information. The implementation of this KB is
4498 transparent to the optimizer, and thus the KB can be changed with a
4499 more efficient implementation, or the KB could be disabled. */
4500 static void
4501 analyze_all_data_dependences (struct loop *loop)
4503 unsigned int i;
4504 int nb_data_refs = 10;
4505 VEC (data_reference_p, heap) *datarefs =
4506 VEC_alloc (data_reference_p, heap, nb_data_refs);
4507 VEC (ddr_p, heap) *dependence_relations =
4508 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4510 /* Compute DDs on the whole function. */
4511 compute_data_dependences_for_loop (loop, false, &datarefs,
4512 &dependence_relations);
4514 if (dump_file)
4516 dump_data_dependence_relations (dump_file, dependence_relations);
4517 fprintf (dump_file, "\n\n");
4519 if (dump_flags & TDF_DETAILS)
4520 dump_dist_dir_vectors (dump_file, dependence_relations);
4522 if (dump_flags & TDF_STATS)
4524 unsigned nb_top_relations = 0;
4525 unsigned nb_bot_relations = 0;
4526 unsigned nb_chrec_relations = 0;
4527 struct data_dependence_relation *ddr;
4529 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4531 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4532 nb_top_relations++;
4534 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4535 nb_bot_relations++;
4537 else
4538 nb_chrec_relations++;
4541 gather_stats_on_scev_database ();
4545 free_dependence_relations (dependence_relations);
4546 free_data_refs (datarefs);
4549 /* Computes all the data dependences and check that the results of
4550 several analyzers are the same. */
4552 void
4553 tree_check_data_deps (void)
4555 loop_iterator li;
4556 struct loop *loop_nest;
4558 FOR_EACH_LOOP (li, loop_nest, 0)
4559 analyze_all_data_dependences (loop_nest);
4562 /* Free the memory used by a data dependence relation DDR. */
4564 void
4565 free_dependence_relation (struct data_dependence_relation *ddr)
4567 if (ddr == NULL)
4568 return;
4570 if (DDR_SUBSCRIPTS (ddr))
4571 free_subscripts (DDR_SUBSCRIPTS (ddr));
4572 if (DDR_DIST_VECTS (ddr))
4573 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4574 if (DDR_DIR_VECTS (ddr))
4575 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4577 free (ddr);
4580 /* Free the memory used by the data dependence relations from
4581 DEPENDENCE_RELATIONS. */
4583 void
4584 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4586 unsigned int i;
4587 struct data_dependence_relation *ddr;
4588 VEC (loop_p, heap) *loop_nest = NULL;
4590 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4592 if (ddr == NULL)
4593 continue;
4594 if (loop_nest == NULL)
4595 loop_nest = DDR_LOOP_NEST (ddr);
4596 else
4597 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4598 || DDR_LOOP_NEST (ddr) == loop_nest);
4599 free_dependence_relation (ddr);
4602 if (loop_nest)
4603 VEC_free (loop_p, heap, loop_nest);
4604 VEC_free (ddr_p, heap, dependence_relations);
4607 /* Free the memory used by the data references from DATAREFS. */
4609 void
4610 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4612 unsigned int i;
4613 struct data_reference *dr;
4615 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4616 free_data_ref (dr);
4617 VEC_free (data_reference_p, heap, datarefs);
4622 /* Dump vertex I in RDG to FILE. */
4624 void
4625 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4627 struct vertex *v = &(rdg->vertices[i]);
4628 struct graph_edge *e;
4630 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4631 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4632 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4634 if (v->pred)
4635 for (e = v->pred; e; e = e->pred_next)
4636 fprintf (file, " %d", e->src);
4638 fprintf (file, ") (out:");
4640 if (v->succ)
4641 for (e = v->succ; e; e = e->succ_next)
4642 fprintf (file, " %d", e->dest);
4644 fprintf (file, ") \n");
4645 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4646 fprintf (file, ")\n");
4649 /* Call dump_rdg_vertex on stderr. */
4651 DEBUG_FUNCTION void
4652 debug_rdg_vertex (struct graph *rdg, int i)
4654 dump_rdg_vertex (stderr, rdg, i);
4657 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4658 dumped vertices to that bitmap. */
4660 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4662 int i;
4664 fprintf (file, "(%d\n", c);
4666 for (i = 0; i < rdg->n_vertices; i++)
4667 if (rdg->vertices[i].component == c)
4669 if (dumped)
4670 bitmap_set_bit (dumped, i);
4672 dump_rdg_vertex (file, rdg, i);
4675 fprintf (file, ")\n");
4678 /* Call dump_rdg_vertex on stderr. */
4680 DEBUG_FUNCTION void
4681 debug_rdg_component (struct graph *rdg, int c)
4683 dump_rdg_component (stderr, rdg, c, NULL);
4686 /* Dump the reduced dependence graph RDG to FILE. */
4688 void
4689 dump_rdg (FILE *file, struct graph *rdg)
4691 int i;
4692 bitmap dumped = BITMAP_ALLOC (NULL);
4694 fprintf (file, "(rdg\n");
4696 for (i = 0; i < rdg->n_vertices; i++)
4697 if (!bitmap_bit_p (dumped, i))
4698 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4700 fprintf (file, ")\n");
4701 BITMAP_FREE (dumped);
4704 /* Call dump_rdg on stderr. */
4706 DEBUG_FUNCTION void
4707 debug_rdg (struct graph *rdg)
4709 dump_rdg (stderr, rdg);
4712 /* This structure is used for recording the mapping statement index in
4713 the RDG. */
4715 struct GTY(()) rdg_vertex_info
4717 gimple stmt;
4718 int index;
4721 /* Returns the index of STMT in RDG. */
4724 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4726 struct rdg_vertex_info rvi, *slot;
4728 rvi.stmt = stmt;
4729 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4731 if (!slot)
4732 return -1;
4734 return slot->index;
4737 /* Creates an edge in RDG for each distance vector from DDR. The
4738 order that we keep track of in the RDG is the order in which
4739 statements have to be executed. */
4741 static void
4742 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4744 struct graph_edge *e;
4745 int va, vb;
4746 data_reference_p dra = DDR_A (ddr);
4747 data_reference_p drb = DDR_B (ddr);
4748 unsigned level = ddr_dependence_level (ddr);
4750 /* For non scalar dependences, when the dependence is REVERSED,
4751 statement B has to be executed before statement A. */
4752 if (level > 0
4753 && !DDR_REVERSED_P (ddr))
4755 data_reference_p tmp = dra;
4756 dra = drb;
4757 drb = tmp;
4760 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4761 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4763 if (va < 0 || vb < 0)
4764 return;
4766 e = add_edge (rdg, va, vb);
4767 e->data = XNEW (struct rdg_edge);
4769 RDGE_LEVEL (e) = level;
4770 RDGE_RELATION (e) = ddr;
4772 /* Determines the type of the data dependence. */
4773 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4774 RDGE_TYPE (e) = input_dd;
4775 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4776 RDGE_TYPE (e) = output_dd;
4777 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4778 RDGE_TYPE (e) = flow_dd;
4779 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4780 RDGE_TYPE (e) = anti_dd;
4783 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4784 the index of DEF in RDG. */
4786 static void
4787 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4789 use_operand_p imm_use_p;
4790 imm_use_iterator iterator;
4792 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4794 struct graph_edge *e;
4795 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4797 if (use < 0)
4798 continue;
4800 e = add_edge (rdg, idef, use);
4801 e->data = XNEW (struct rdg_edge);
4802 RDGE_TYPE (e) = flow_dd;
4803 RDGE_RELATION (e) = NULL;
4807 /* Creates the edges of the reduced dependence graph RDG. */
4809 static void
4810 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4812 int i;
4813 struct data_dependence_relation *ddr;
4814 def_operand_p def_p;
4815 ssa_op_iter iter;
4817 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4818 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4819 create_rdg_edge_for_ddr (rdg, ddr);
4821 for (i = 0; i < rdg->n_vertices; i++)
4822 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4823 iter, SSA_OP_DEF)
4824 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4827 /* Build the vertices of the reduced dependence graph RDG. */
4829 void
4830 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4832 int i, j;
4833 gimple stmt;
4835 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4837 VEC (data_ref_loc, heap) *references;
4838 data_ref_loc *ref;
4839 struct vertex *v = &(rdg->vertices[i]);
4840 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4841 struct rdg_vertex_info **slot;
4843 rvi->stmt = stmt;
4844 rvi->index = i;
4845 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4847 if (!*slot)
4848 *slot = rvi;
4849 else
4850 free (rvi);
4852 v->data = XNEW (struct rdg_vertex);
4853 RDG_STMT (rdg, i) = stmt;
4855 RDG_MEM_WRITE_STMT (rdg, i) = false;
4856 RDG_MEM_READS_STMT (rdg, i) = false;
4857 if (gimple_code (stmt) == GIMPLE_PHI)
4858 continue;
4860 get_references_in_stmt (stmt, &references);
4861 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4862 if (!ref->is_read)
4863 RDG_MEM_WRITE_STMT (rdg, i) = true;
4864 else
4865 RDG_MEM_READS_STMT (rdg, i) = true;
4867 VEC_free (data_ref_loc, heap, references);
4871 /* Initialize STMTS with all the statements of LOOP. When
4872 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4873 which we discover statements is important as
4874 generate_loops_for_partition is using the same traversal for
4875 identifying statements. */
4877 static void
4878 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4880 unsigned int i;
4881 basic_block *bbs = get_loop_body_in_dom_order (loop);
4883 for (i = 0; i < loop->num_nodes; i++)
4885 basic_block bb = bbs[i];
4886 gimple_stmt_iterator bsi;
4887 gimple stmt;
4889 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4890 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4892 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4894 stmt = gsi_stmt (bsi);
4895 if (gimple_code (stmt) != GIMPLE_LABEL)
4896 VEC_safe_push (gimple, heap, *stmts, stmt);
4900 free (bbs);
4903 /* Returns true when all the dependences are computable. */
4905 static bool
4906 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4908 ddr_p ddr;
4909 unsigned int i;
4911 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4912 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4913 return false;
4915 return true;
4918 /* Computes a hash function for element ELT. */
4920 static hashval_t
4921 hash_stmt_vertex_info (const void *elt)
4923 const struct rdg_vertex_info *const rvi =
4924 (const struct rdg_vertex_info *) elt;
4925 gimple stmt = rvi->stmt;
4927 return htab_hash_pointer (stmt);
4930 /* Compares database elements E1 and E2. */
4932 static int
4933 eq_stmt_vertex_info (const void *e1, const void *e2)
4935 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4936 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4938 return elt1->stmt == elt2->stmt;
4941 /* Free the element E. */
4943 static void
4944 hash_stmt_vertex_del (void *e)
4946 free (e);
4949 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4950 statement of the loop nest, and one edge per data dependence or
4951 scalar dependence. */
4953 struct graph *
4954 build_empty_rdg (int n_stmts)
4956 int nb_data_refs = 10;
4957 struct graph *rdg = new_graph (n_stmts);
4959 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4960 eq_stmt_vertex_info, hash_stmt_vertex_del);
4961 return rdg;
4964 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4965 statement of the loop nest, and one edge per data dependence or
4966 scalar dependence. */
4968 struct graph *
4969 build_rdg (struct loop *loop)
4971 int nb_data_refs = 10;
4972 struct graph *rdg = NULL;
4973 VEC (ddr_p, heap) *dependence_relations;
4974 VEC (data_reference_p, heap) *datarefs;
4975 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4977 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4978 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4979 compute_data_dependences_for_loop (loop,
4980 false,
4981 &datarefs,
4982 &dependence_relations);
4984 if (!known_dependences_p (dependence_relations))
4986 free_dependence_relations (dependence_relations);
4987 free_data_refs (datarefs);
4988 VEC_free (gimple, heap, stmts);
4990 return rdg;
4993 stmts_from_loop (loop, &stmts);
4994 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4996 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4997 eq_stmt_vertex_info, hash_stmt_vertex_del);
4998 create_rdg_vertices (rdg, stmts);
4999 create_rdg_edges (rdg, dependence_relations);
5001 VEC_free (gimple, heap, stmts);
5002 return rdg;
5005 /* Free the reduced dependence graph RDG. */
5007 void
5008 free_rdg (struct graph *rdg)
5010 int i;
5012 for (i = 0; i < rdg->n_vertices; i++)
5013 free (rdg->vertices[i].data);
5015 htab_delete (rdg->indices);
5016 free_graph (rdg);
5019 /* Initialize STMTS with all the statements of LOOP that contain a
5020 store to memory. */
5022 void
5023 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5025 unsigned int i;
5026 basic_block *bbs = get_loop_body_in_dom_order (loop);
5028 for (i = 0; i < loop->num_nodes; i++)
5030 basic_block bb = bbs[i];
5031 gimple_stmt_iterator bsi;
5033 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5034 if (gimple_vdef (gsi_stmt (bsi)))
5035 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5038 free (bbs);
5041 /* Initialize STMTS with all the statements of LOOP that contain a
5042 store to memory of the form "A[i] = 0". */
5044 void
5045 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5047 unsigned int i;
5048 basic_block bb;
5049 gimple_stmt_iterator si;
5050 gimple stmt;
5051 tree op;
5052 basic_block *bbs = get_loop_body_in_dom_order (loop);
5054 for (i = 0; i < loop->num_nodes; i++)
5055 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5056 if ((stmt = gsi_stmt (si))
5057 && gimple_vdef (stmt)
5058 && is_gimple_assign (stmt)
5059 && gimple_assign_rhs_code (stmt) == INTEGER_CST
5060 && (op = gimple_assign_rhs1 (stmt))
5061 && (integer_zerop (op) || real_zerop (op)))
5062 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5064 free (bbs);
5067 /* For a data reference REF, return the declaration of its base
5068 address or NULL_TREE if the base is not determined. */
5070 static inline tree
5071 ref_base_address (gimple stmt, data_ref_loc *ref)
5073 tree base = NULL_TREE;
5074 tree base_address;
5075 struct data_reference *dr = XCNEW (struct data_reference);
5077 DR_STMT (dr) = stmt;
5078 DR_REF (dr) = *ref->pos;
5079 dr_analyze_innermost (dr);
5080 base_address = DR_BASE_ADDRESS (dr);
5082 if (!base_address)
5083 goto end;
5085 switch (TREE_CODE (base_address))
5087 case ADDR_EXPR:
5088 base = TREE_OPERAND (base_address, 0);
5089 break;
5091 default:
5092 base = base_address;
5093 break;
5096 end:
5097 free_data_ref (dr);
5098 return base;
5101 /* Determines whether the statement from vertex V of the RDG has a
5102 definition used outside the loop that contains this statement. */
5104 bool
5105 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5107 gimple stmt = RDG_STMT (rdg, v);
5108 struct loop *loop = loop_containing_stmt (stmt);
5109 use_operand_p imm_use_p;
5110 imm_use_iterator iterator;
5111 ssa_op_iter it;
5112 def_operand_p def_p;
5114 if (!loop)
5115 return true;
5117 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5119 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5121 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5122 return true;
5126 return false;
5129 /* Determines whether statements S1 and S2 access to similar memory
5130 locations. Two memory accesses are considered similar when they
5131 have the same base address declaration, i.e. when their
5132 ref_base_address is the same. */
5134 bool
5135 have_similar_memory_accesses (gimple s1, gimple s2)
5137 bool res = false;
5138 unsigned i, j;
5139 VEC (data_ref_loc, heap) *refs1, *refs2;
5140 data_ref_loc *ref1, *ref2;
5142 get_references_in_stmt (s1, &refs1);
5143 get_references_in_stmt (s2, &refs2);
5145 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5147 tree base1 = ref_base_address (s1, ref1);
5149 if (base1)
5150 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5151 if (base1 == ref_base_address (s2, ref2))
5153 res = true;
5154 goto end;
5158 end:
5159 VEC_free (data_ref_loc, heap, refs1);
5160 VEC_free (data_ref_loc, heap, refs2);
5161 return res;
5164 /* Helper function for the hashtab. */
5166 static int
5167 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5169 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5170 CONST_CAST_GIMPLE ((const_gimple) s2));
5173 /* Helper function for the hashtab. */
5175 static hashval_t
5176 ref_base_address_1 (const void *s)
5178 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5179 unsigned i;
5180 VEC (data_ref_loc, heap) *refs;
5181 data_ref_loc *ref;
5182 hashval_t res = 0;
5184 get_references_in_stmt (stmt, &refs);
5186 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5187 if (!ref->is_read)
5189 res = htab_hash_pointer (ref_base_address (stmt, ref));
5190 break;
5193 VEC_free (data_ref_loc, heap, refs);
5194 return res;
5197 /* Try to remove duplicated write data references from STMTS. */
5199 void
5200 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5202 unsigned i;
5203 gimple stmt;
5204 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5205 have_similar_memory_accesses_1, NULL);
5207 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5209 void **slot;
5211 slot = htab_find_slot (seen, stmt, INSERT);
5213 if (*slot)
5214 VEC_ordered_remove (gimple, *stmts, i);
5215 else
5217 *slot = (void *) stmt;
5218 i++;
5222 htab_delete (seen);
5225 /* Returns the index of PARAMETER in the parameters vector of the
5226 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5229 access_matrix_get_index_for_parameter (tree parameter,
5230 struct access_matrix *access_matrix)
5232 int i;
5233 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5234 tree lambda_parameter;
5236 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5237 if (lambda_parameter == parameter)
5238 return i + AM_NB_INDUCTION_VARS (access_matrix);
5240 return -1;