gcc:
[official-gcc.git] / gcc / tree-data-ref.c
blobd58542c91cae622dada11eccd4130c948c51d8ff
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
131 static inline bool
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 unsigned int i;
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
173 unsigned int i;
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
194 unsigned int i;
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
218 unsigned i;
219 tree coef;
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
235 unsigned i;
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
243 for (i = 0; i < cf->n; i++)
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
292 int eq;
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
299 switch (dir)
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
336 unsigned j;
337 lambda_vector v;
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 int i;
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
361 unsigned j;
362 lambda_vector v;
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
368 /* Debug version. */
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 if (ddr)
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
402 return;
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 unsigned int i;
416 struct loop *loopi;
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
457 switch (dir)
459 case dir_positive:
460 fprintf (file, "+");
461 break;
463 case dir_negative:
464 fprintf (file, "-");
465 break;
467 case dir_equal:
468 fprintf (file, "=");
469 break;
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
483 case dir_star:
484 fprintf (file, "*");
485 break;
487 default:
488 break;
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 unsigned int i;
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
553 *var = NULL_TREE;
554 *off = NULL_TREE;
556 switch (code)
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
583 case ADDR_EXPR:
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
602 if (poffset)
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
631 return false;
633 *var = var0;
634 *off = off0;
635 return true;
638 case SSA_NAME:
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
652 CASE_CONVERT:
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
668 return false;
671 default:
672 return false;
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
679 void
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
685 *var = exp;
686 *off = ssize_int (0);
687 STRIP_NOPS (exp);
689 if (tree_is_chrec (exp))
690 return;
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
698 *off = o;
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
705 static tree
706 canonicalize_base_object_address (tree addr)
708 tree orig = addr;
710 STRIP_NOPS (addr);
712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
727 bool
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
734 tree base, poffset;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
752 return false;
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
759 if (!poffset)
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
769 else
770 base = build_fold_addr_expr (base);
771 if (in_loop)
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
781 else
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
788 if (!poffset)
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
793 else
795 if (!in_loop)
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
831 return true;
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
837 static void
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
845 if (nest)
846 before_loop = block_before_loop (nest);
848 while (handled_component_p (aref))
850 if (TREE_CODE (aref) == ARRAY_REF)
852 op = TREE_OPERAND (aref, 1);
853 if (nest)
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
863 aref = TREE_OPERAND (aref, 0);
866 if (nest
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
905 /* Extracts the alias analysis information from the memory reference DR. */
907 static void
908 dr_analyze_alias (struct data_reference *dr)
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
922 /* Frees data reference DR. */
924 void
925 free_data_ref (data_reference_p dr)
927 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
928 free (dr);
931 /* Analyzes memory reference MEMREF accessed in STMT. The reference
932 is read if IS_READ is true, write otherwise. Returns the
933 data_reference description of MEMREF. NEST is the outermost loop
934 in which the reference should be instantiated, LOOP is the loop in
935 which the data reference should be analyzed. */
937 struct data_reference *
938 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
939 bool is_read)
941 struct data_reference *dr;
943 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Creating dr for ");
946 print_generic_expr (dump_file, memref, TDF_SLIM);
947 fprintf (dump_file, "\n");
950 dr = XCNEW (struct data_reference);
951 DR_STMT (dr) = stmt;
952 DR_REF (dr) = memref;
953 DR_IS_READ (dr) = is_read;
955 dr_analyze_innermost (dr);
956 dr_analyze_indices (dr, nest, loop);
957 dr_analyze_alias (dr);
959 if (dump_file && (dump_flags & TDF_DETAILS))
961 fprintf (dump_file, "\tbase_address: ");
962 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
963 fprintf (dump_file, "\n\toffset from base address: ");
964 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
965 fprintf (dump_file, "\n\tconstant offset from base address: ");
966 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
967 fprintf (dump_file, "\n\tstep: ");
968 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
969 fprintf (dump_file, "\n\taligned to: ");
970 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
971 fprintf (dump_file, "\n\tbase_object: ");
972 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
973 fprintf (dump_file, "\n");
976 return dr;
979 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
980 expressions. */
981 static bool
982 dr_equal_offsets_p1 (tree offset1, tree offset2)
984 bool res;
986 STRIP_NOPS (offset1);
987 STRIP_NOPS (offset2);
989 if (offset1 == offset2)
990 return true;
992 if (TREE_CODE (offset1) != TREE_CODE (offset2)
993 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
994 return false;
996 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
997 TREE_OPERAND (offset2, 0));
999 if (!res || !BINARY_CLASS_P (offset1))
1000 return res;
1002 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1003 TREE_OPERAND (offset2, 1));
1005 return res;
1008 /* Check if DRA and DRB have equal offsets. */
1009 bool
1010 dr_equal_offsets_p (struct data_reference *dra,
1011 struct data_reference *drb)
1013 tree offset1, offset2;
1015 offset1 = DR_OFFSET (dra);
1016 offset2 = DR_OFFSET (drb);
1018 return dr_equal_offsets_p1 (offset1, offset2);
1021 /* Returns true if FNA == FNB. */
1023 static bool
1024 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1026 unsigned i, n = VEC_length (tree, fna);
1028 if (n != VEC_length (tree, fnb))
1029 return false;
1031 for (i = 0; i < n; i++)
1032 if (!operand_equal_p (VEC_index (tree, fna, i),
1033 VEC_index (tree, fnb, i), 0))
1034 return false;
1036 return true;
1039 /* If all the functions in CF are the same, returns one of them,
1040 otherwise returns NULL. */
1042 static affine_fn
1043 common_affine_function (conflict_function *cf)
1045 unsigned i;
1046 affine_fn comm;
1048 if (!CF_NONTRIVIAL_P (cf))
1049 return NULL;
1051 comm = cf->fns[0];
1053 for (i = 1; i < cf->n; i++)
1054 if (!affine_function_equal_p (comm, cf->fns[i]))
1055 return NULL;
1057 return comm;
1060 /* Returns the base of the affine function FN. */
1062 static tree
1063 affine_function_base (affine_fn fn)
1065 return VEC_index (tree, fn, 0);
1068 /* Returns true if FN is a constant. */
1070 static bool
1071 affine_function_constant_p (affine_fn fn)
1073 unsigned i;
1074 tree coef;
1076 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1077 if (!integer_zerop (coef))
1078 return false;
1080 return true;
1083 /* Returns true if FN is the zero constant function. */
1085 static bool
1086 affine_function_zero_p (affine_fn fn)
1088 return (integer_zerop (affine_function_base (fn))
1089 && affine_function_constant_p (fn));
1092 /* Returns a signed integer type with the largest precision from TA
1093 and TB. */
1095 static tree
1096 signed_type_for_types (tree ta, tree tb)
1098 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1099 return signed_type_for (ta);
1100 else
1101 return signed_type_for (tb);
1104 /* Applies operation OP on affine functions FNA and FNB, and returns the
1105 result. */
1107 static affine_fn
1108 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1110 unsigned i, n, m;
1111 affine_fn ret;
1112 tree coef;
1114 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1116 n = VEC_length (tree, fna);
1117 m = VEC_length (tree, fnb);
1119 else
1121 n = VEC_length (tree, fnb);
1122 m = VEC_length (tree, fna);
1125 ret = VEC_alloc (tree, heap, m);
1126 for (i = 0; i < n; i++)
1128 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1129 TREE_TYPE (VEC_index (tree, fnb, i)));
1131 VEC_quick_push (tree, ret,
1132 fold_build2 (op, type,
1133 VEC_index (tree, fna, i),
1134 VEC_index (tree, fnb, i)));
1137 for (; VEC_iterate (tree, fna, i, coef); i++)
1138 VEC_quick_push (tree, ret,
1139 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1140 coef, integer_zero_node));
1141 for (; VEC_iterate (tree, fnb, i, coef); i++)
1142 VEC_quick_push (tree, ret,
1143 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1144 integer_zero_node, coef));
1146 return ret;
1149 /* Returns the sum of affine functions FNA and FNB. */
1151 static affine_fn
1152 affine_fn_plus (affine_fn fna, affine_fn fnb)
1154 return affine_fn_op (PLUS_EXPR, fna, fnb);
1157 /* Returns the difference of affine functions FNA and FNB. */
1159 static affine_fn
1160 affine_fn_minus (affine_fn fna, affine_fn fnb)
1162 return affine_fn_op (MINUS_EXPR, fna, fnb);
1165 /* Frees affine function FN. */
1167 static void
1168 affine_fn_free (affine_fn fn)
1170 VEC_free (tree, heap, fn);
1173 /* Determine for each subscript in the data dependence relation DDR
1174 the distance. */
1176 static void
1177 compute_subscript_distance (struct data_dependence_relation *ddr)
1179 conflict_function *cf_a, *cf_b;
1180 affine_fn fn_a, fn_b, diff;
1182 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1184 unsigned int i;
1186 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1188 struct subscript *subscript;
1190 subscript = DDR_SUBSCRIPT (ddr, i);
1191 cf_a = SUB_CONFLICTS_IN_A (subscript);
1192 cf_b = SUB_CONFLICTS_IN_B (subscript);
1194 fn_a = common_affine_function (cf_a);
1195 fn_b = common_affine_function (cf_b);
1196 if (!fn_a || !fn_b)
1198 SUB_DISTANCE (subscript) = chrec_dont_know;
1199 return;
1201 diff = affine_fn_minus (fn_a, fn_b);
1203 if (affine_function_constant_p (diff))
1204 SUB_DISTANCE (subscript) = affine_function_base (diff);
1205 else
1206 SUB_DISTANCE (subscript) = chrec_dont_know;
1208 affine_fn_free (diff);
1213 /* Returns the conflict function for "unknown". */
1215 static conflict_function *
1216 conflict_fn_not_known (void)
1218 conflict_function *fn = XCNEW (conflict_function);
1219 fn->n = NOT_KNOWN;
1221 return fn;
1224 /* Returns the conflict function for "independent". */
1226 static conflict_function *
1227 conflict_fn_no_dependence (void)
1229 conflict_function *fn = XCNEW (conflict_function);
1230 fn->n = NO_DEPENDENCE;
1232 return fn;
1235 /* Returns true if the address of OBJ is invariant in LOOP. */
1237 static bool
1238 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1240 while (handled_component_p (obj))
1242 if (TREE_CODE (obj) == ARRAY_REF)
1244 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1245 need to check the stride and the lower bound of the reference. */
1246 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1247 loop->num)
1248 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1249 loop->num))
1250 return false;
1252 else if (TREE_CODE (obj) == COMPONENT_REF)
1254 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1255 loop->num))
1256 return false;
1258 obj = TREE_OPERAND (obj, 0);
1261 if (!INDIRECT_REF_P (obj)
1262 && TREE_CODE (obj) != MEM_REF)
1263 return true;
1265 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1266 loop->num);
1269 /* Returns false if we can prove that data references A and B do not alias,
1270 true otherwise. */
1272 bool
1273 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1275 tree addr_a = DR_BASE_OBJECT (a);
1276 tree addr_b = DR_BASE_OBJECT (b);
1278 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1279 return refs_output_dependent_p (addr_a, addr_b);
1280 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1281 return refs_anti_dependent_p (addr_a, addr_b);
1282 return refs_may_alias_p (addr_a, addr_b);
1285 static void compute_self_dependence (struct data_dependence_relation *);
1287 /* Initialize a data dependence relation between data accesses A and
1288 B. NB_LOOPS is the number of loops surrounding the references: the
1289 size of the classic distance/direction vectors. */
1291 static struct data_dependence_relation *
1292 initialize_data_dependence_relation (struct data_reference *a,
1293 struct data_reference *b,
1294 VEC (loop_p, heap) *loop_nest)
1296 struct data_dependence_relation *res;
1297 unsigned int i;
1299 res = XNEW (struct data_dependence_relation);
1300 DDR_A (res) = a;
1301 DDR_B (res) = b;
1302 DDR_LOOP_NEST (res) = NULL;
1303 DDR_REVERSED_P (res) = false;
1304 DDR_SUBSCRIPTS (res) = NULL;
1305 DDR_DIR_VECTS (res) = NULL;
1306 DDR_DIST_VECTS (res) = NULL;
1308 if (a == NULL || b == NULL)
1310 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1311 return res;
1314 /* If the data references do not alias, then they are independent. */
1315 if (!dr_may_alias_p (a, b))
1317 DDR_ARE_DEPENDENT (res) = chrec_known;
1318 return res;
1321 /* When the references are exactly the same, don't spend time doing
1322 the data dependence tests, just initialize the ddr and return. */
1323 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1325 DDR_AFFINE_P (res) = true;
1326 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1327 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1328 DDR_LOOP_NEST (res) = loop_nest;
1329 DDR_INNER_LOOP (res) = 0;
1330 DDR_SELF_REFERENCE (res) = true;
1331 compute_self_dependence (res);
1332 return res;
1335 /* If the references do not access the same object, we do not know
1336 whether they alias or not. */
1337 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1339 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1340 return res;
1343 /* If the base of the object is not invariant in the loop nest, we cannot
1344 analyze it. TODO -- in fact, it would suffice to record that there may
1345 be arbitrary dependences in the loops where the base object varies. */
1346 if (loop_nest
1347 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1348 DR_BASE_OBJECT (a)))
1350 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1351 return res;
1354 /* If the number of dimensions of the access to not agree we can have
1355 a pointer access to a component of the array element type and an
1356 array access while the base-objects are still the same. Punt. */
1357 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1359 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1360 return res;
1363 DDR_AFFINE_P (res) = true;
1364 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1365 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1366 DDR_LOOP_NEST (res) = loop_nest;
1367 DDR_INNER_LOOP (res) = 0;
1368 DDR_SELF_REFERENCE (res) = false;
1370 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1372 struct subscript *subscript;
1374 subscript = XNEW (struct subscript);
1375 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1376 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1377 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1378 SUB_DISTANCE (subscript) = chrec_dont_know;
1379 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1382 return res;
1385 /* Frees memory used by the conflict function F. */
1387 static void
1388 free_conflict_function (conflict_function *f)
1390 unsigned i;
1392 if (CF_NONTRIVIAL_P (f))
1394 for (i = 0; i < f->n; i++)
1395 affine_fn_free (f->fns[i]);
1397 free (f);
1400 /* Frees memory used by SUBSCRIPTS. */
1402 static void
1403 free_subscripts (VEC (subscript_p, heap) *subscripts)
1405 unsigned i;
1406 subscript_p s;
1408 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1410 free_conflict_function (s->conflicting_iterations_in_a);
1411 free_conflict_function (s->conflicting_iterations_in_b);
1412 free (s);
1414 VEC_free (subscript_p, heap, subscripts);
1417 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1418 description. */
1420 static inline void
1421 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1422 tree chrec)
1424 if (dump_file && (dump_flags & TDF_DETAILS))
1426 fprintf (dump_file, "(dependence classified: ");
1427 print_generic_expr (dump_file, chrec, 0);
1428 fprintf (dump_file, ")\n");
1431 DDR_ARE_DEPENDENT (ddr) = chrec;
1432 free_subscripts (DDR_SUBSCRIPTS (ddr));
1433 DDR_SUBSCRIPTS (ddr) = NULL;
1436 /* The dependence relation DDR cannot be represented by a distance
1437 vector. */
1439 static inline void
1440 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1443 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1445 DDR_AFFINE_P (ddr) = false;
1450 /* This section contains the classic Banerjee tests. */
1452 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1453 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1455 static inline bool
1456 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1458 return (evolution_function_is_constant_p (chrec_a)
1459 && evolution_function_is_constant_p (chrec_b));
1462 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1463 variable, i.e., if the SIV (Single Index Variable) test is true. */
1465 static bool
1466 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1468 if ((evolution_function_is_constant_p (chrec_a)
1469 && evolution_function_is_univariate_p (chrec_b))
1470 || (evolution_function_is_constant_p (chrec_b)
1471 && evolution_function_is_univariate_p (chrec_a)))
1472 return true;
1474 if (evolution_function_is_univariate_p (chrec_a)
1475 && evolution_function_is_univariate_p (chrec_b))
1477 switch (TREE_CODE (chrec_a))
1479 case POLYNOMIAL_CHREC:
1480 switch (TREE_CODE (chrec_b))
1482 case POLYNOMIAL_CHREC:
1483 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1484 return false;
1486 default:
1487 return true;
1490 default:
1491 return true;
1495 return false;
1498 /* Creates a conflict function with N dimensions. The affine functions
1499 in each dimension follow. */
1501 static conflict_function *
1502 conflict_fn (unsigned n, ...)
1504 unsigned i;
1505 conflict_function *ret = XCNEW (conflict_function);
1506 va_list ap;
1508 gcc_assert (0 < n && n <= MAX_DIM);
1509 va_start(ap, n);
1511 ret->n = n;
1512 for (i = 0; i < n; i++)
1513 ret->fns[i] = va_arg (ap, affine_fn);
1514 va_end(ap);
1516 return ret;
1519 /* Returns constant affine function with value CST. */
1521 static affine_fn
1522 affine_fn_cst (tree cst)
1524 affine_fn fn = VEC_alloc (tree, heap, 1);
1525 VEC_quick_push (tree, fn, cst);
1526 return fn;
1529 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1531 static affine_fn
1532 affine_fn_univar (tree cst, unsigned dim, tree coef)
1534 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1535 unsigned i;
1537 gcc_assert (dim > 0);
1538 VEC_quick_push (tree, fn, cst);
1539 for (i = 1; i < dim; i++)
1540 VEC_quick_push (tree, fn, integer_zero_node);
1541 VEC_quick_push (tree, fn, coef);
1542 return fn;
1545 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1546 *OVERLAPS_B are initialized to the functions that describe the
1547 relation between the elements accessed twice by CHREC_A and
1548 CHREC_B. For k >= 0, the following property is verified:
1550 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1552 static void
1553 analyze_ziv_subscript (tree chrec_a,
1554 tree chrec_b,
1555 conflict_function **overlaps_a,
1556 conflict_function **overlaps_b,
1557 tree *last_conflicts)
1559 tree type, difference;
1560 dependence_stats.num_ziv++;
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1563 fprintf (dump_file, "(analyze_ziv_subscript \n");
1565 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1566 chrec_a = chrec_convert (type, chrec_a, NULL);
1567 chrec_b = chrec_convert (type, chrec_b, NULL);
1568 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1570 switch (TREE_CODE (difference))
1572 case INTEGER_CST:
1573 if (integer_zerop (difference))
1575 /* The difference is equal to zero: the accessed index
1576 overlaps for each iteration in the loop. */
1577 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1578 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1579 *last_conflicts = chrec_dont_know;
1580 dependence_stats.num_ziv_dependent++;
1582 else
1584 /* The accesses do not overlap. */
1585 *overlaps_a = conflict_fn_no_dependence ();
1586 *overlaps_b = conflict_fn_no_dependence ();
1587 *last_conflicts = integer_zero_node;
1588 dependence_stats.num_ziv_independent++;
1590 break;
1592 default:
1593 /* We're not sure whether the indexes overlap. For the moment,
1594 conservatively answer "don't know". */
1595 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1598 *overlaps_a = conflict_fn_not_known ();
1599 *overlaps_b = conflict_fn_not_known ();
1600 *last_conflicts = chrec_dont_know;
1601 dependence_stats.num_ziv_unimplemented++;
1602 break;
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, ")\n");
1609 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1610 and only if it fits to the int type. If this is not the case, or the
1611 bound on the number of iterations of LOOP could not be derived, returns
1612 chrec_dont_know. */
1614 static tree
1615 max_stmt_executions_tree (struct loop *loop)
1617 double_int nit;
1618 tree type;
1620 if (!max_stmt_executions (loop, true, &nit))
1621 return chrec_dont_know;
1623 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1624 if (!double_int_fits_to_tree_p (type, nit))
1625 return chrec_dont_know;
1627 return double_int_to_tree (type, nit);
1630 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1631 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1632 *OVERLAPS_B are initialized to the functions that describe the
1633 relation between the elements accessed twice by CHREC_A and
1634 CHREC_B. For k >= 0, the following property is verified:
1636 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1638 static void
1639 analyze_siv_subscript_cst_affine (tree chrec_a,
1640 tree chrec_b,
1641 conflict_function **overlaps_a,
1642 conflict_function **overlaps_b,
1643 tree *last_conflicts)
1645 bool value0, value1, value2;
1646 tree type, difference, tmp;
1648 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1649 chrec_a = chrec_convert (type, chrec_a, NULL);
1650 chrec_b = chrec_convert (type, chrec_b, NULL);
1651 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1653 if (!chrec_is_positive (initial_condition (difference), &value0))
1655 if (dump_file && (dump_flags & TDF_DETAILS))
1656 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1658 dependence_stats.num_siv_unimplemented++;
1659 *overlaps_a = conflict_fn_not_known ();
1660 *overlaps_b = conflict_fn_not_known ();
1661 *last_conflicts = chrec_dont_know;
1662 return;
1664 else
1666 if (value0 == false)
1668 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1670 if (dump_file && (dump_flags & TDF_DETAILS))
1671 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1673 *overlaps_a = conflict_fn_not_known ();
1674 *overlaps_b = conflict_fn_not_known ();
1675 *last_conflicts = chrec_dont_know;
1676 dependence_stats.num_siv_unimplemented++;
1677 return;
1679 else
1681 if (value1 == true)
1683 /* Example:
1684 chrec_a = 12
1685 chrec_b = {10, +, 1}
1688 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1690 HOST_WIDE_INT numiter;
1691 struct loop *loop = get_chrec_loop (chrec_b);
1693 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1694 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1695 fold_build1 (ABS_EXPR, type, difference),
1696 CHREC_RIGHT (chrec_b));
1697 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1698 *last_conflicts = integer_one_node;
1701 /* Perform weak-zero siv test to see if overlap is
1702 outside the loop bounds. */
1703 numiter = max_stmt_executions_int (loop, true);
1705 if (numiter >= 0
1706 && compare_tree_int (tmp, numiter) > 0)
1708 free_conflict_function (*overlaps_a);
1709 free_conflict_function (*overlaps_b);
1710 *overlaps_a = conflict_fn_no_dependence ();
1711 *overlaps_b = conflict_fn_no_dependence ();
1712 *last_conflicts = integer_zero_node;
1713 dependence_stats.num_siv_independent++;
1714 return;
1716 dependence_stats.num_siv_dependent++;
1717 return;
1720 /* When the step does not divide the difference, there are
1721 no overlaps. */
1722 else
1724 *overlaps_a = conflict_fn_no_dependence ();
1725 *overlaps_b = conflict_fn_no_dependence ();
1726 *last_conflicts = integer_zero_node;
1727 dependence_stats.num_siv_independent++;
1728 return;
1732 else
1734 /* Example:
1735 chrec_a = 12
1736 chrec_b = {10, +, -1}
1738 In this case, chrec_a will not overlap with chrec_b. */
1739 *overlaps_a = conflict_fn_no_dependence ();
1740 *overlaps_b = conflict_fn_no_dependence ();
1741 *last_conflicts = integer_zero_node;
1742 dependence_stats.num_siv_independent++;
1743 return;
1747 else
1749 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1751 if (dump_file && (dump_flags & TDF_DETAILS))
1752 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1754 *overlaps_a = conflict_fn_not_known ();
1755 *overlaps_b = conflict_fn_not_known ();
1756 *last_conflicts = chrec_dont_know;
1757 dependence_stats.num_siv_unimplemented++;
1758 return;
1760 else
1762 if (value2 == false)
1764 /* Example:
1765 chrec_a = 3
1766 chrec_b = {10, +, -1}
1768 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1770 HOST_WIDE_INT numiter;
1771 struct loop *loop = get_chrec_loop (chrec_b);
1773 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1774 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1775 CHREC_RIGHT (chrec_b));
1776 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1777 *last_conflicts = integer_one_node;
1779 /* Perform weak-zero siv test to see if overlap is
1780 outside the loop bounds. */
1781 numiter = max_stmt_executions_int (loop, true);
1783 if (numiter >= 0
1784 && compare_tree_int (tmp, numiter) > 0)
1786 free_conflict_function (*overlaps_a);
1787 free_conflict_function (*overlaps_b);
1788 *overlaps_a = conflict_fn_no_dependence ();
1789 *overlaps_b = conflict_fn_no_dependence ();
1790 *last_conflicts = integer_zero_node;
1791 dependence_stats.num_siv_independent++;
1792 return;
1794 dependence_stats.num_siv_dependent++;
1795 return;
1798 /* When the step does not divide the difference, there
1799 are no overlaps. */
1800 else
1802 *overlaps_a = conflict_fn_no_dependence ();
1803 *overlaps_b = conflict_fn_no_dependence ();
1804 *last_conflicts = integer_zero_node;
1805 dependence_stats.num_siv_independent++;
1806 return;
1809 else
1811 /* Example:
1812 chrec_a = 3
1813 chrec_b = {4, +, 1}
1815 In this case, chrec_a will not overlap with chrec_b. */
1816 *overlaps_a = conflict_fn_no_dependence ();
1817 *overlaps_b = conflict_fn_no_dependence ();
1818 *last_conflicts = integer_zero_node;
1819 dependence_stats.num_siv_independent++;
1820 return;
1827 /* Helper recursive function for initializing the matrix A. Returns
1828 the initial value of CHREC. */
1830 static tree
1831 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1833 gcc_assert (chrec);
1835 switch (TREE_CODE (chrec))
1837 case POLYNOMIAL_CHREC:
1838 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1840 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1841 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1843 case PLUS_EXPR:
1844 case MULT_EXPR:
1845 case MINUS_EXPR:
1847 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1848 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1850 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1853 case NOP_EXPR:
1855 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1856 return chrec_convert (chrec_type (chrec), op, NULL);
1859 case BIT_NOT_EXPR:
1861 /* Handle ~X as -1 - X. */
1862 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1863 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1864 build_int_cst (TREE_TYPE (chrec), -1), op);
1867 case INTEGER_CST:
1868 return chrec;
1870 default:
1871 gcc_unreachable ();
1872 return NULL_TREE;
1876 #define FLOOR_DIV(x,y) ((x) / (y))
1878 /* Solves the special case of the Diophantine equation:
1879 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1881 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1882 number of iterations that loops X and Y run. The overlaps will be
1883 constructed as evolutions in dimension DIM. */
1885 static void
1886 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1887 affine_fn *overlaps_a,
1888 affine_fn *overlaps_b,
1889 tree *last_conflicts, int dim)
1891 if (((step_a > 0 && step_b > 0)
1892 || (step_a < 0 && step_b < 0)))
1894 int step_overlaps_a, step_overlaps_b;
1895 int gcd_steps_a_b, last_conflict, tau2;
1897 gcd_steps_a_b = gcd (step_a, step_b);
1898 step_overlaps_a = step_b / gcd_steps_a_b;
1899 step_overlaps_b = step_a / gcd_steps_a_b;
1901 if (niter > 0)
1903 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1904 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1905 last_conflict = tau2;
1906 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1908 else
1909 *last_conflicts = chrec_dont_know;
1911 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1912 build_int_cst (NULL_TREE,
1913 step_overlaps_a));
1914 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1915 build_int_cst (NULL_TREE,
1916 step_overlaps_b));
1919 else
1921 *overlaps_a = affine_fn_cst (integer_zero_node);
1922 *overlaps_b = affine_fn_cst (integer_zero_node);
1923 *last_conflicts = integer_zero_node;
1927 /* Solves the special case of a Diophantine equation where CHREC_A is
1928 an affine bivariate function, and CHREC_B is an affine univariate
1929 function. For example,
1931 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1933 has the following overlapping functions:
1935 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1936 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1937 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1939 FORNOW: This is a specialized implementation for a case occurring in
1940 a common benchmark. Implement the general algorithm. */
1942 static void
1943 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1944 conflict_function **overlaps_a,
1945 conflict_function **overlaps_b,
1946 tree *last_conflicts)
1948 bool xz_p, yz_p, xyz_p;
1949 int step_x, step_y, step_z;
1950 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1951 affine_fn overlaps_a_xz, overlaps_b_xz;
1952 affine_fn overlaps_a_yz, overlaps_b_yz;
1953 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1954 affine_fn ova1, ova2, ovb;
1955 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1957 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1958 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1959 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1961 niter_x =
1962 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1963 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1964 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1966 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1968 if (dump_file && (dump_flags & TDF_DETAILS))
1969 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1971 *overlaps_a = conflict_fn_not_known ();
1972 *overlaps_b = conflict_fn_not_known ();
1973 *last_conflicts = chrec_dont_know;
1974 return;
1977 niter = MIN (niter_x, niter_z);
1978 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1979 &overlaps_a_xz,
1980 &overlaps_b_xz,
1981 &last_conflicts_xz, 1);
1982 niter = MIN (niter_y, niter_z);
1983 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1984 &overlaps_a_yz,
1985 &overlaps_b_yz,
1986 &last_conflicts_yz, 2);
1987 niter = MIN (niter_x, niter_z);
1988 niter = MIN (niter_y, niter);
1989 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1990 &overlaps_a_xyz,
1991 &overlaps_b_xyz,
1992 &last_conflicts_xyz, 3);
1994 xz_p = !integer_zerop (last_conflicts_xz);
1995 yz_p = !integer_zerop (last_conflicts_yz);
1996 xyz_p = !integer_zerop (last_conflicts_xyz);
1998 if (xz_p || yz_p || xyz_p)
2000 ova1 = affine_fn_cst (integer_zero_node);
2001 ova2 = affine_fn_cst (integer_zero_node);
2002 ovb = affine_fn_cst (integer_zero_node);
2003 if (xz_p)
2005 affine_fn t0 = ova1;
2006 affine_fn t2 = ovb;
2008 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2009 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2010 affine_fn_free (t0);
2011 affine_fn_free (t2);
2012 *last_conflicts = last_conflicts_xz;
2014 if (yz_p)
2016 affine_fn t0 = ova2;
2017 affine_fn t2 = ovb;
2019 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2020 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2021 affine_fn_free (t0);
2022 affine_fn_free (t2);
2023 *last_conflicts = last_conflicts_yz;
2025 if (xyz_p)
2027 affine_fn t0 = ova1;
2028 affine_fn t2 = ova2;
2029 affine_fn t4 = ovb;
2031 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2032 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2033 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2034 affine_fn_free (t0);
2035 affine_fn_free (t2);
2036 affine_fn_free (t4);
2037 *last_conflicts = last_conflicts_xyz;
2039 *overlaps_a = conflict_fn (2, ova1, ova2);
2040 *overlaps_b = conflict_fn (1, ovb);
2042 else
2044 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2045 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2046 *last_conflicts = integer_zero_node;
2049 affine_fn_free (overlaps_a_xz);
2050 affine_fn_free (overlaps_b_xz);
2051 affine_fn_free (overlaps_a_yz);
2052 affine_fn_free (overlaps_b_yz);
2053 affine_fn_free (overlaps_a_xyz);
2054 affine_fn_free (overlaps_b_xyz);
2057 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2059 static void
2060 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2061 int size)
2063 memcpy (vec2, vec1, size * sizeof (*vec1));
2066 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2068 static void
2069 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2070 int m, int n)
2072 int i;
2074 for (i = 0; i < m; i++)
2075 lambda_vector_copy (mat1[i], mat2[i], n);
2078 /* Store the N x N identity matrix in MAT. */
2080 static void
2081 lambda_matrix_id (lambda_matrix mat, int size)
2083 int i, j;
2085 for (i = 0; i < size; i++)
2086 for (j = 0; j < size; j++)
2087 mat[i][j] = (i == j) ? 1 : 0;
2090 /* Return the first nonzero element of vector VEC1 between START and N.
2091 We must have START <= N. Returns N if VEC1 is the zero vector. */
2093 static int
2094 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2096 int j = start;
2097 while (j < n && vec1[j] == 0)
2098 j++;
2099 return j;
2102 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2103 R2 = R2 + CONST1 * R1. */
2105 static void
2106 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2108 int i;
2110 if (const1 == 0)
2111 return;
2113 for (i = 0; i < n; i++)
2114 mat[r2][i] += const1 * mat[r1][i];
2117 /* Swap rows R1 and R2 in matrix MAT. */
2119 static void
2120 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2122 lambda_vector row;
2124 row = mat[r1];
2125 mat[r1] = mat[r2];
2126 mat[r2] = row;
2129 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2130 and store the result in VEC2. */
2132 static void
2133 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2134 int size, int const1)
2136 int i;
2138 if (const1 == 0)
2139 lambda_vector_clear (vec2, size);
2140 else
2141 for (i = 0; i < size; i++)
2142 vec2[i] = const1 * vec1[i];
2145 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2147 static void
2148 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2149 int size)
2151 lambda_vector_mult_const (vec1, vec2, size, -1);
2154 /* Negate row R1 of matrix MAT which has N columns. */
2156 static void
2157 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2159 lambda_vector_negate (mat[r1], mat[r1], n);
2162 /* Return true if two vectors are equal. */
2164 static bool
2165 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2167 int i;
2168 for (i = 0; i < size; i++)
2169 if (vec1[i] != vec2[i])
2170 return false;
2171 return true;
2174 /* Given an M x N integer matrix A, this function determines an M x
2175 M unimodular matrix U, and an M x N echelon matrix S such that
2176 "U.A = S". This decomposition is also known as "right Hermite".
2178 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2179 Restructuring Compilers" Utpal Banerjee. */
2181 static void
2182 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2183 lambda_matrix S, lambda_matrix U)
2185 int i, j, i0 = 0;
2187 lambda_matrix_copy (A, S, m, n);
2188 lambda_matrix_id (U, m);
2190 for (j = 0; j < n; j++)
2192 if (lambda_vector_first_nz (S[j], m, i0) < m)
2194 ++i0;
2195 for (i = m - 1; i >= i0; i--)
2197 while (S[i][j] != 0)
2199 int sigma, factor, a, b;
2201 a = S[i-1][j];
2202 b = S[i][j];
2203 sigma = (a * b < 0) ? -1: 1;
2204 a = abs (a);
2205 b = abs (b);
2206 factor = sigma * (a / b);
2208 lambda_matrix_row_add (S, n, i, i-1, -factor);
2209 lambda_matrix_row_exchange (S, i, i-1);
2211 lambda_matrix_row_add (U, m, i, i-1, -factor);
2212 lambda_matrix_row_exchange (U, i, i-1);
2219 /* Determines the overlapping elements due to accesses CHREC_A and
2220 CHREC_B, that are affine functions. This function cannot handle
2221 symbolic evolution functions, ie. when initial conditions are
2222 parameters, because it uses lambda matrices of integers. */
2224 static void
2225 analyze_subscript_affine_affine (tree chrec_a,
2226 tree chrec_b,
2227 conflict_function **overlaps_a,
2228 conflict_function **overlaps_b,
2229 tree *last_conflicts)
2231 unsigned nb_vars_a, nb_vars_b, dim;
2232 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2233 lambda_matrix A, U, S;
2234 struct obstack scratch_obstack;
2236 if (eq_evolutions_p (chrec_a, chrec_b))
2238 /* The accessed index overlaps for each iteration in the
2239 loop. */
2240 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2241 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2242 *last_conflicts = chrec_dont_know;
2243 return;
2245 if (dump_file && (dump_flags & TDF_DETAILS))
2246 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2248 /* For determining the initial intersection, we have to solve a
2249 Diophantine equation. This is the most time consuming part.
2251 For answering to the question: "Is there a dependence?" we have
2252 to prove that there exists a solution to the Diophantine
2253 equation, and that the solution is in the iteration domain,
2254 i.e. the solution is positive or zero, and that the solution
2255 happens before the upper bound loop.nb_iterations. Otherwise
2256 there is no dependence. This function outputs a description of
2257 the iterations that hold the intersections. */
2259 nb_vars_a = nb_vars_in_chrec (chrec_a);
2260 nb_vars_b = nb_vars_in_chrec (chrec_b);
2262 gcc_obstack_init (&scratch_obstack);
2264 dim = nb_vars_a + nb_vars_b;
2265 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2266 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2267 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2269 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2270 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2271 gamma = init_b - init_a;
2273 /* Don't do all the hard work of solving the Diophantine equation
2274 when we already know the solution: for example,
2275 | {3, +, 1}_1
2276 | {3, +, 4}_2
2277 | gamma = 3 - 3 = 0.
2278 Then the first overlap occurs during the first iterations:
2279 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2281 if (gamma == 0)
2283 if (nb_vars_a == 1 && nb_vars_b == 1)
2285 HOST_WIDE_INT step_a, step_b;
2286 HOST_WIDE_INT niter, niter_a, niter_b;
2287 affine_fn ova, ovb;
2289 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2290 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2291 niter = MIN (niter_a, niter_b);
2292 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2293 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2295 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2296 &ova, &ovb,
2297 last_conflicts, 1);
2298 *overlaps_a = conflict_fn (1, ova);
2299 *overlaps_b = conflict_fn (1, ovb);
2302 else if (nb_vars_a == 2 && nb_vars_b == 1)
2303 compute_overlap_steps_for_affine_1_2
2304 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2306 else if (nb_vars_a == 1 && nb_vars_b == 2)
2307 compute_overlap_steps_for_affine_1_2
2308 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2310 else
2312 if (dump_file && (dump_flags & TDF_DETAILS))
2313 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2314 *overlaps_a = conflict_fn_not_known ();
2315 *overlaps_b = conflict_fn_not_known ();
2316 *last_conflicts = chrec_dont_know;
2318 goto end_analyze_subs_aa;
2321 /* U.A = S */
2322 lambda_matrix_right_hermite (A, dim, 1, S, U);
2324 if (S[0][0] < 0)
2326 S[0][0] *= -1;
2327 lambda_matrix_row_negate (U, dim, 0);
2329 gcd_alpha_beta = S[0][0];
2331 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2332 but that is a quite strange case. Instead of ICEing, answer
2333 don't know. */
2334 if (gcd_alpha_beta == 0)
2336 *overlaps_a = conflict_fn_not_known ();
2337 *overlaps_b = conflict_fn_not_known ();
2338 *last_conflicts = chrec_dont_know;
2339 goto end_analyze_subs_aa;
2342 /* The classic "gcd-test". */
2343 if (!int_divides_p (gcd_alpha_beta, gamma))
2345 /* The "gcd-test" has determined that there is no integer
2346 solution, i.e. there is no dependence. */
2347 *overlaps_a = conflict_fn_no_dependence ();
2348 *overlaps_b = conflict_fn_no_dependence ();
2349 *last_conflicts = integer_zero_node;
2352 /* Both access functions are univariate. This includes SIV and MIV cases. */
2353 else if (nb_vars_a == 1 && nb_vars_b == 1)
2355 /* Both functions should have the same evolution sign. */
2356 if (((A[0][0] > 0 && -A[1][0] > 0)
2357 || (A[0][0] < 0 && -A[1][0] < 0)))
2359 /* The solutions are given by:
2361 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2362 | [u21 u22] [y0]
2364 For a given integer t. Using the following variables,
2366 | i0 = u11 * gamma / gcd_alpha_beta
2367 | j0 = u12 * gamma / gcd_alpha_beta
2368 | i1 = u21
2369 | j1 = u22
2371 the solutions are:
2373 | x0 = i0 + i1 * t,
2374 | y0 = j0 + j1 * t. */
2375 HOST_WIDE_INT i0, j0, i1, j1;
2377 i0 = U[0][0] * gamma / gcd_alpha_beta;
2378 j0 = U[0][1] * gamma / gcd_alpha_beta;
2379 i1 = U[1][0];
2380 j1 = U[1][1];
2382 if ((i1 == 0 && i0 < 0)
2383 || (j1 == 0 && j0 < 0))
2385 /* There is no solution.
2386 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2387 falls in here, but for the moment we don't look at the
2388 upper bound of the iteration domain. */
2389 *overlaps_a = conflict_fn_no_dependence ();
2390 *overlaps_b = conflict_fn_no_dependence ();
2391 *last_conflicts = integer_zero_node;
2392 goto end_analyze_subs_aa;
2395 if (i1 > 0 && j1 > 0)
2397 HOST_WIDE_INT niter_a = max_stmt_executions_int
2398 (get_chrec_loop (chrec_a), true);
2399 HOST_WIDE_INT niter_b = max_stmt_executions_int
2400 (get_chrec_loop (chrec_b), true);
2401 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2403 /* (X0, Y0) is a solution of the Diophantine equation:
2404 "chrec_a (X0) = chrec_b (Y0)". */
2405 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2406 CEIL (-j0, j1));
2407 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2408 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2410 /* (X1, Y1) is the smallest positive solution of the eq
2411 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2412 first conflict occurs. */
2413 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2414 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2415 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2417 if (niter > 0)
2419 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2420 FLOOR_DIV (niter - j0, j1));
2421 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2423 /* If the overlap occurs outside of the bounds of the
2424 loop, there is no dependence. */
2425 if (x1 >= niter || y1 >= niter)
2427 *overlaps_a = conflict_fn_no_dependence ();
2428 *overlaps_b = conflict_fn_no_dependence ();
2429 *last_conflicts = integer_zero_node;
2430 goto end_analyze_subs_aa;
2432 else
2433 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2435 else
2436 *last_conflicts = chrec_dont_know;
2438 *overlaps_a
2439 = conflict_fn (1,
2440 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2442 build_int_cst (NULL_TREE, i1)));
2443 *overlaps_b
2444 = conflict_fn (1,
2445 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2447 build_int_cst (NULL_TREE, j1)));
2449 else
2451 /* FIXME: For the moment, the upper bound of the
2452 iteration domain for i and j is not checked. */
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2454 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2455 *overlaps_a = conflict_fn_not_known ();
2456 *overlaps_b = conflict_fn_not_known ();
2457 *last_conflicts = chrec_dont_know;
2460 else
2462 if (dump_file && (dump_flags & TDF_DETAILS))
2463 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2464 *overlaps_a = conflict_fn_not_known ();
2465 *overlaps_b = conflict_fn_not_known ();
2466 *last_conflicts = chrec_dont_know;
2469 else
2471 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2473 *overlaps_a = conflict_fn_not_known ();
2474 *overlaps_b = conflict_fn_not_known ();
2475 *last_conflicts = chrec_dont_know;
2478 end_analyze_subs_aa:
2479 obstack_free (&scratch_obstack, NULL);
2480 if (dump_file && (dump_flags & TDF_DETAILS))
2482 fprintf (dump_file, " (overlaps_a = ");
2483 dump_conflict_function (dump_file, *overlaps_a);
2484 fprintf (dump_file, ")\n (overlaps_b = ");
2485 dump_conflict_function (dump_file, *overlaps_b);
2486 fprintf (dump_file, ")\n");
2487 fprintf (dump_file, ")\n");
2491 /* Returns true when analyze_subscript_affine_affine can be used for
2492 determining the dependence relation between chrec_a and chrec_b,
2493 that contain symbols. This function modifies chrec_a and chrec_b
2494 such that the analysis result is the same, and such that they don't
2495 contain symbols, and then can safely be passed to the analyzer.
2497 Example: The analysis of the following tuples of evolutions produce
2498 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2499 vs. {0, +, 1}_1
2501 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2502 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2505 static bool
2506 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2508 tree diff, type, left_a, left_b, right_b;
2510 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2511 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2512 /* FIXME: For the moment not handled. Might be refined later. */
2513 return false;
2515 type = chrec_type (*chrec_a);
2516 left_a = CHREC_LEFT (*chrec_a);
2517 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2518 diff = chrec_fold_minus (type, left_a, left_b);
2520 if (!evolution_function_is_constant_p (diff))
2521 return false;
2523 if (dump_file && (dump_flags & TDF_DETAILS))
2524 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2526 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2527 diff, CHREC_RIGHT (*chrec_a));
2528 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2529 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2530 build_int_cst (type, 0),
2531 right_b);
2532 return true;
2535 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2536 *OVERLAPS_B are initialized to the functions that describe the
2537 relation between the elements accessed twice by CHREC_A and
2538 CHREC_B. For k >= 0, the following property is verified:
2540 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2542 static void
2543 analyze_siv_subscript (tree chrec_a,
2544 tree chrec_b,
2545 conflict_function **overlaps_a,
2546 conflict_function **overlaps_b,
2547 tree *last_conflicts,
2548 int loop_nest_num)
2550 dependence_stats.num_siv++;
2552 if (dump_file && (dump_flags & TDF_DETAILS))
2553 fprintf (dump_file, "(analyze_siv_subscript \n");
2555 if (evolution_function_is_constant_p (chrec_a)
2556 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2557 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2558 overlaps_a, overlaps_b, last_conflicts);
2560 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2561 && evolution_function_is_constant_p (chrec_b))
2562 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2563 overlaps_b, overlaps_a, last_conflicts);
2565 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2566 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2568 if (!chrec_contains_symbols (chrec_a)
2569 && !chrec_contains_symbols (chrec_b))
2571 analyze_subscript_affine_affine (chrec_a, chrec_b,
2572 overlaps_a, overlaps_b,
2573 last_conflicts);
2575 if (CF_NOT_KNOWN_P (*overlaps_a)
2576 || CF_NOT_KNOWN_P (*overlaps_b))
2577 dependence_stats.num_siv_unimplemented++;
2578 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2579 || CF_NO_DEPENDENCE_P (*overlaps_b))
2580 dependence_stats.num_siv_independent++;
2581 else
2582 dependence_stats.num_siv_dependent++;
2584 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2585 &chrec_b))
2587 analyze_subscript_affine_affine (chrec_a, chrec_b,
2588 overlaps_a, overlaps_b,
2589 last_conflicts);
2591 if (CF_NOT_KNOWN_P (*overlaps_a)
2592 || CF_NOT_KNOWN_P (*overlaps_b))
2593 dependence_stats.num_siv_unimplemented++;
2594 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2595 || CF_NO_DEPENDENCE_P (*overlaps_b))
2596 dependence_stats.num_siv_independent++;
2597 else
2598 dependence_stats.num_siv_dependent++;
2600 else
2601 goto siv_subscript_dontknow;
2604 else
2606 siv_subscript_dontknow:;
2607 if (dump_file && (dump_flags & TDF_DETAILS))
2608 fprintf (dump_file, "siv test failed: unimplemented.\n");
2609 *overlaps_a = conflict_fn_not_known ();
2610 *overlaps_b = conflict_fn_not_known ();
2611 *last_conflicts = chrec_dont_know;
2612 dependence_stats.num_siv_unimplemented++;
2615 if (dump_file && (dump_flags & TDF_DETAILS))
2616 fprintf (dump_file, ")\n");
2619 /* Returns false if we can prove that the greatest common divisor of the steps
2620 of CHREC does not divide CST, false otherwise. */
2622 static bool
2623 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2625 HOST_WIDE_INT cd = 0, val;
2626 tree step;
2628 if (!host_integerp (cst, 0))
2629 return true;
2630 val = tree_low_cst (cst, 0);
2632 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2634 step = CHREC_RIGHT (chrec);
2635 if (!host_integerp (step, 0))
2636 return true;
2637 cd = gcd (cd, tree_low_cst (step, 0));
2638 chrec = CHREC_LEFT (chrec);
2641 return val % cd == 0;
2644 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2645 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2646 functions that describe the relation between the elements accessed
2647 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2648 is verified:
2650 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2652 static void
2653 analyze_miv_subscript (tree chrec_a,
2654 tree chrec_b,
2655 conflict_function **overlaps_a,
2656 conflict_function **overlaps_b,
2657 tree *last_conflicts,
2658 struct loop *loop_nest)
2660 tree type, difference;
2662 dependence_stats.num_miv++;
2663 if (dump_file && (dump_flags & TDF_DETAILS))
2664 fprintf (dump_file, "(analyze_miv_subscript \n");
2666 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2667 chrec_a = chrec_convert (type, chrec_a, NULL);
2668 chrec_b = chrec_convert (type, chrec_b, NULL);
2669 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2671 if (eq_evolutions_p (chrec_a, chrec_b))
2673 /* Access functions are the same: all the elements are accessed
2674 in the same order. */
2675 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2676 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2677 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2678 dependence_stats.num_miv_dependent++;
2681 else if (evolution_function_is_constant_p (difference)
2682 /* For the moment, the following is verified:
2683 evolution_function_is_affine_multivariate_p (chrec_a,
2684 loop_nest->num) */
2685 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2687 /* testsuite/.../ssa-chrec-33.c
2688 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2690 The difference is 1, and all the evolution steps are multiples
2691 of 2, consequently there are no overlapping elements. */
2692 *overlaps_a = conflict_fn_no_dependence ();
2693 *overlaps_b = conflict_fn_no_dependence ();
2694 *last_conflicts = integer_zero_node;
2695 dependence_stats.num_miv_independent++;
2698 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2699 && !chrec_contains_symbols (chrec_a)
2700 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2701 && !chrec_contains_symbols (chrec_b))
2703 /* testsuite/.../ssa-chrec-35.c
2704 {0, +, 1}_2 vs. {0, +, 1}_3
2705 the overlapping elements are respectively located at iterations:
2706 {0, +, 1}_x and {0, +, 1}_x,
2707 in other words, we have the equality:
2708 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2710 Other examples:
2711 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2712 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2714 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2715 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2717 analyze_subscript_affine_affine (chrec_a, chrec_b,
2718 overlaps_a, overlaps_b, last_conflicts);
2720 if (CF_NOT_KNOWN_P (*overlaps_a)
2721 || CF_NOT_KNOWN_P (*overlaps_b))
2722 dependence_stats.num_miv_unimplemented++;
2723 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2724 || CF_NO_DEPENDENCE_P (*overlaps_b))
2725 dependence_stats.num_miv_independent++;
2726 else
2727 dependence_stats.num_miv_dependent++;
2730 else
2732 /* When the analysis is too difficult, answer "don't know". */
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2736 *overlaps_a = conflict_fn_not_known ();
2737 *overlaps_b = conflict_fn_not_known ();
2738 *last_conflicts = chrec_dont_know;
2739 dependence_stats.num_miv_unimplemented++;
2742 if (dump_file && (dump_flags & TDF_DETAILS))
2743 fprintf (dump_file, ")\n");
2746 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2747 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2748 OVERLAP_ITERATIONS_B are initialized with two functions that
2749 describe the iterations that contain conflicting elements.
2751 Remark: For an integer k >= 0, the following equality is true:
2753 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2756 static void
2757 analyze_overlapping_iterations (tree chrec_a,
2758 tree chrec_b,
2759 conflict_function **overlap_iterations_a,
2760 conflict_function **overlap_iterations_b,
2761 tree *last_conflicts, struct loop *loop_nest)
2763 unsigned int lnn = loop_nest->num;
2765 dependence_stats.num_subscript_tests++;
2767 if (dump_file && (dump_flags & TDF_DETAILS))
2769 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2770 fprintf (dump_file, " (chrec_a = ");
2771 print_generic_expr (dump_file, chrec_a, 0);
2772 fprintf (dump_file, ")\n (chrec_b = ");
2773 print_generic_expr (dump_file, chrec_b, 0);
2774 fprintf (dump_file, ")\n");
2777 if (chrec_a == NULL_TREE
2778 || chrec_b == NULL_TREE
2779 || chrec_contains_undetermined (chrec_a)
2780 || chrec_contains_undetermined (chrec_b))
2782 dependence_stats.num_subscript_undetermined++;
2784 *overlap_iterations_a = conflict_fn_not_known ();
2785 *overlap_iterations_b = conflict_fn_not_known ();
2788 /* If they are the same chrec, and are affine, they overlap
2789 on every iteration. */
2790 else if (eq_evolutions_p (chrec_a, chrec_b)
2791 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2792 || operand_equal_p (chrec_a, chrec_b, 0)))
2794 dependence_stats.num_same_subscript_function++;
2795 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2796 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2797 *last_conflicts = chrec_dont_know;
2800 /* If they aren't the same, and aren't affine, we can't do anything
2801 yet. */
2802 else if ((chrec_contains_symbols (chrec_a)
2803 || chrec_contains_symbols (chrec_b))
2804 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2805 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2807 dependence_stats.num_subscript_undetermined++;
2808 *overlap_iterations_a = conflict_fn_not_known ();
2809 *overlap_iterations_b = conflict_fn_not_known ();
2812 else if (ziv_subscript_p (chrec_a, chrec_b))
2813 analyze_ziv_subscript (chrec_a, chrec_b,
2814 overlap_iterations_a, overlap_iterations_b,
2815 last_conflicts);
2817 else if (siv_subscript_p (chrec_a, chrec_b))
2818 analyze_siv_subscript (chrec_a, chrec_b,
2819 overlap_iterations_a, overlap_iterations_b,
2820 last_conflicts, lnn);
2822 else
2823 analyze_miv_subscript (chrec_a, chrec_b,
2824 overlap_iterations_a, overlap_iterations_b,
2825 last_conflicts, loop_nest);
2827 if (dump_file && (dump_flags & TDF_DETAILS))
2829 fprintf (dump_file, " (overlap_iterations_a = ");
2830 dump_conflict_function (dump_file, *overlap_iterations_a);
2831 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2832 dump_conflict_function (dump_file, *overlap_iterations_b);
2833 fprintf (dump_file, ")\n");
2834 fprintf (dump_file, ")\n");
2838 /* Helper function for uniquely inserting distance vectors. */
2840 static void
2841 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2843 unsigned i;
2844 lambda_vector v;
2846 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2847 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2848 return;
2850 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2853 /* Helper function for uniquely inserting direction vectors. */
2855 static void
2856 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2858 unsigned i;
2859 lambda_vector v;
2861 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2862 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2863 return;
2865 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2868 /* Add a distance of 1 on all the loops outer than INDEX. If we
2869 haven't yet determined a distance for this outer loop, push a new
2870 distance vector composed of the previous distance, and a distance
2871 of 1 for this outer loop. Example:
2873 | loop_1
2874 | loop_2
2875 | A[10]
2876 | endloop_2
2877 | endloop_1
2879 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2880 save (0, 1), then we have to save (1, 0). */
2882 static void
2883 add_outer_distances (struct data_dependence_relation *ddr,
2884 lambda_vector dist_v, int index)
2886 /* For each outer loop where init_v is not set, the accesses are
2887 in dependence of distance 1 in the loop. */
2888 while (--index >= 0)
2890 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2891 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2892 save_v[index] = 1;
2893 save_dist_v (ddr, save_v);
2897 /* Return false when fail to represent the data dependence as a
2898 distance vector. INIT_B is set to true when a component has been
2899 added to the distance vector DIST_V. INDEX_CARRY is then set to
2900 the index in DIST_V that carries the dependence. */
2902 static bool
2903 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2904 struct data_reference *ddr_a,
2905 struct data_reference *ddr_b,
2906 lambda_vector dist_v, bool *init_b,
2907 int *index_carry)
2909 unsigned i;
2910 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2912 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2914 tree access_fn_a, access_fn_b;
2915 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2917 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2919 non_affine_dependence_relation (ddr);
2920 return false;
2923 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2924 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2926 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2927 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2929 int dist, index;
2930 int var_a = CHREC_VARIABLE (access_fn_a);
2931 int var_b = CHREC_VARIABLE (access_fn_b);
2933 if (var_a != var_b
2934 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2936 non_affine_dependence_relation (ddr);
2937 return false;
2940 dist = int_cst_value (SUB_DISTANCE (subscript));
2941 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2942 *index_carry = MIN (index, *index_carry);
2944 /* This is the subscript coupling test. If we have already
2945 recorded a distance for this loop (a distance coming from
2946 another subscript), it should be the same. For example,
2947 in the following code, there is no dependence:
2949 | loop i = 0, N, 1
2950 | T[i+1][i] = ...
2951 | ... = T[i][i]
2952 | endloop
2954 if (init_v[index] != 0 && dist_v[index] != dist)
2956 finalize_ddr_dependent (ddr, chrec_known);
2957 return false;
2960 dist_v[index] = dist;
2961 init_v[index] = 1;
2962 *init_b = true;
2964 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2966 /* This can be for example an affine vs. constant dependence
2967 (T[i] vs. T[3]) that is not an affine dependence and is
2968 not representable as a distance vector. */
2969 non_affine_dependence_relation (ddr);
2970 return false;
2974 return true;
2977 /* Return true when the DDR contains only constant access functions. */
2979 static bool
2980 constant_access_functions (const struct data_dependence_relation *ddr)
2982 unsigned i;
2984 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2985 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2986 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2987 return false;
2989 return true;
2992 /* Helper function for the case where DDR_A and DDR_B are the same
2993 multivariate access function with a constant step. For an example
2994 see pr34635-1.c. */
2996 static void
2997 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2999 int x_1, x_2;
3000 tree c_1 = CHREC_LEFT (c_2);
3001 tree c_0 = CHREC_LEFT (c_1);
3002 lambda_vector dist_v;
3003 int v1, v2, cd;
3005 /* Polynomials with more than 2 variables are not handled yet. When
3006 the evolution steps are parameters, it is not possible to
3007 represent the dependence using classical distance vectors. */
3008 if (TREE_CODE (c_0) != INTEGER_CST
3009 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3010 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3012 DDR_AFFINE_P (ddr) = false;
3013 return;
3016 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3017 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3019 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3020 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3021 v1 = int_cst_value (CHREC_RIGHT (c_1));
3022 v2 = int_cst_value (CHREC_RIGHT (c_2));
3023 cd = gcd (v1, v2);
3024 v1 /= cd;
3025 v2 /= cd;
3027 if (v2 < 0)
3029 v2 = -v2;
3030 v1 = -v1;
3033 dist_v[x_1] = v2;
3034 dist_v[x_2] = -v1;
3035 save_dist_v (ddr, dist_v);
3037 add_outer_distances (ddr, dist_v, x_1);
3040 /* Helper function for the case where DDR_A and DDR_B are the same
3041 access functions. */
3043 static void
3044 add_other_self_distances (struct data_dependence_relation *ddr)
3046 lambda_vector dist_v;
3047 unsigned i;
3048 int index_carry = DDR_NB_LOOPS (ddr);
3050 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3052 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3054 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3056 if (!evolution_function_is_univariate_p (access_fun))
3058 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3060 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3061 return;
3064 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3066 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3067 add_multivariate_self_dist (ddr, access_fun);
3068 else
3069 /* The evolution step is not constant: it varies in
3070 the outer loop, so this cannot be represented by a
3071 distance vector. For example in pr34635.c the
3072 evolution is {0, +, {0, +, 4}_1}_2. */
3073 DDR_AFFINE_P (ddr) = false;
3075 return;
3078 index_carry = MIN (index_carry,
3079 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3080 DDR_LOOP_NEST (ddr)));
3084 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3085 add_outer_distances (ddr, dist_v, index_carry);
3088 static void
3089 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3091 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3093 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3094 save_dist_v (ddr, dist_v);
3097 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3098 is the case for example when access functions are the same and
3099 equal to a constant, as in:
3101 | loop_1
3102 | A[3] = ...
3103 | ... = A[3]
3104 | endloop_1
3106 in which case the distance vectors are (0) and (1). */
3108 static void
3109 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3111 unsigned i, j;
3113 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3115 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3116 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3117 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3119 for (j = 0; j < ca->n; j++)
3120 if (affine_function_zero_p (ca->fns[j]))
3122 insert_innermost_unit_dist_vector (ddr);
3123 return;
3126 for (j = 0; j < cb->n; j++)
3127 if (affine_function_zero_p (cb->fns[j]))
3129 insert_innermost_unit_dist_vector (ddr);
3130 return;
3135 /* Compute the classic per loop distance vector. DDR is the data
3136 dependence relation to build a vector from. Return false when fail
3137 to represent the data dependence as a distance vector. */
3139 static bool
3140 build_classic_dist_vector (struct data_dependence_relation *ddr,
3141 struct loop *loop_nest)
3143 bool init_b = false;
3144 int index_carry = DDR_NB_LOOPS (ddr);
3145 lambda_vector dist_v;
3147 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3148 return false;
3150 if (same_access_functions (ddr))
3152 /* Save the 0 vector. */
3153 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3154 save_dist_v (ddr, dist_v);
3156 if (constant_access_functions (ddr))
3157 add_distance_for_zero_overlaps (ddr);
3159 if (DDR_NB_LOOPS (ddr) > 1)
3160 add_other_self_distances (ddr);
3162 return true;
3165 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3166 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3167 dist_v, &init_b, &index_carry))
3168 return false;
3170 /* Save the distance vector if we initialized one. */
3171 if (init_b)
3173 /* Verify a basic constraint: classic distance vectors should
3174 always be lexicographically positive.
3176 Data references are collected in the order of execution of
3177 the program, thus for the following loop
3179 | for (i = 1; i < 100; i++)
3180 | for (j = 1; j < 100; j++)
3182 | t = T[j+1][i-1]; // A
3183 | T[j][i] = t + 2; // B
3186 references are collected following the direction of the wind:
3187 A then B. The data dependence tests are performed also
3188 following this order, such that we're looking at the distance
3189 separating the elements accessed by A from the elements later
3190 accessed by B. But in this example, the distance returned by
3191 test_dep (A, B) is lexicographically negative (-1, 1), that
3192 means that the access A occurs later than B with respect to
3193 the outer loop, ie. we're actually looking upwind. In this
3194 case we solve test_dep (B, A) looking downwind to the
3195 lexicographically positive solution, that returns the
3196 distance vector (1, -1). */
3197 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3199 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3200 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3201 loop_nest))
3202 return false;
3203 compute_subscript_distance (ddr);
3204 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3205 save_v, &init_b, &index_carry))
3206 return false;
3207 save_dist_v (ddr, save_v);
3208 DDR_REVERSED_P (ddr) = true;
3210 /* In this case there is a dependence forward for all the
3211 outer loops:
3213 | for (k = 1; k < 100; k++)
3214 | for (i = 1; i < 100; i++)
3215 | for (j = 1; j < 100; j++)
3217 | t = T[j+1][i-1]; // A
3218 | T[j][i] = t + 2; // B
3221 the vectors are:
3222 (0, 1, -1)
3223 (1, 1, -1)
3224 (1, -1, 1)
3226 if (DDR_NB_LOOPS (ddr) > 1)
3228 add_outer_distances (ddr, save_v, index_carry);
3229 add_outer_distances (ddr, dist_v, index_carry);
3232 else
3234 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3235 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3237 if (DDR_NB_LOOPS (ddr) > 1)
3239 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3241 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3242 DDR_A (ddr), loop_nest))
3243 return false;
3244 compute_subscript_distance (ddr);
3245 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3246 opposite_v, &init_b,
3247 &index_carry))
3248 return false;
3250 save_dist_v (ddr, save_v);
3251 add_outer_distances (ddr, dist_v, index_carry);
3252 add_outer_distances (ddr, opposite_v, index_carry);
3254 else
3255 save_dist_v (ddr, save_v);
3258 else
3260 /* There is a distance of 1 on all the outer loops: Example:
3261 there is a dependence of distance 1 on loop_1 for the array A.
3263 | loop_1
3264 | A[5] = ...
3265 | endloop
3267 add_outer_distances (ddr, dist_v,
3268 lambda_vector_first_nz (dist_v,
3269 DDR_NB_LOOPS (ddr), 0));
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3274 unsigned i;
3276 fprintf (dump_file, "(build_classic_dist_vector\n");
3277 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3279 fprintf (dump_file, " dist_vector = (");
3280 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3281 DDR_NB_LOOPS (ddr));
3282 fprintf (dump_file, " )\n");
3284 fprintf (dump_file, ")\n");
3287 return true;
3290 /* Return the direction for a given distance.
3291 FIXME: Computing dir this way is suboptimal, since dir can catch
3292 cases that dist is unable to represent. */
3294 static inline enum data_dependence_direction
3295 dir_from_dist (int dist)
3297 if (dist > 0)
3298 return dir_positive;
3299 else if (dist < 0)
3300 return dir_negative;
3301 else
3302 return dir_equal;
3305 /* Compute the classic per loop direction vector. DDR is the data
3306 dependence relation to build a vector from. */
3308 static void
3309 build_classic_dir_vector (struct data_dependence_relation *ddr)
3311 unsigned i, j;
3312 lambda_vector dist_v;
3314 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3316 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3318 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3319 dir_v[j] = dir_from_dist (dist_v[j]);
3321 save_dir_v (ddr, dir_v);
3325 /* Helper function. Returns true when there is a dependence between
3326 data references DRA and DRB. */
3328 static bool
3329 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3330 struct data_reference *dra,
3331 struct data_reference *drb,
3332 struct loop *loop_nest)
3334 unsigned int i;
3335 tree last_conflicts;
3336 struct subscript *subscript;
3338 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3339 i++)
3341 conflict_function *overlaps_a, *overlaps_b;
3343 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3344 DR_ACCESS_FN (drb, i),
3345 &overlaps_a, &overlaps_b,
3346 &last_conflicts, loop_nest);
3348 if (CF_NOT_KNOWN_P (overlaps_a)
3349 || CF_NOT_KNOWN_P (overlaps_b))
3351 finalize_ddr_dependent (ddr, chrec_dont_know);
3352 dependence_stats.num_dependence_undetermined++;
3353 free_conflict_function (overlaps_a);
3354 free_conflict_function (overlaps_b);
3355 return false;
3358 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3359 || CF_NO_DEPENDENCE_P (overlaps_b))
3361 finalize_ddr_dependent (ddr, chrec_known);
3362 dependence_stats.num_dependence_independent++;
3363 free_conflict_function (overlaps_a);
3364 free_conflict_function (overlaps_b);
3365 return false;
3368 else
3370 if (SUB_CONFLICTS_IN_A (subscript))
3371 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3372 if (SUB_CONFLICTS_IN_B (subscript))
3373 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3375 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3376 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3377 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3381 return true;
3384 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3386 static void
3387 subscript_dependence_tester (struct data_dependence_relation *ddr,
3388 struct loop *loop_nest)
3391 if (dump_file && (dump_flags & TDF_DETAILS))
3392 fprintf (dump_file, "(subscript_dependence_tester \n");
3394 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3395 dependence_stats.num_dependence_dependent++;
3397 compute_subscript_distance (ddr);
3398 if (build_classic_dist_vector (ddr, loop_nest))
3399 build_classic_dir_vector (ddr);
3401 if (dump_file && (dump_flags & TDF_DETAILS))
3402 fprintf (dump_file, ")\n");
3405 /* Returns true when all the access functions of A are affine or
3406 constant with respect to LOOP_NEST. */
3408 static bool
3409 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3410 const struct loop *loop_nest)
3412 unsigned int i;
3413 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3414 tree t;
3416 FOR_EACH_VEC_ELT (tree, fns, i, t)
3417 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3418 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3419 return false;
3421 return true;
3424 /* Initializes an equation for an OMEGA problem using the information
3425 contained in the ACCESS_FUN. Returns true when the operation
3426 succeeded.
3428 PB is the omega constraint system.
3429 EQ is the number of the equation to be initialized.
3430 OFFSET is used for shifting the variables names in the constraints:
3431 a constrain is composed of 2 * the number of variables surrounding
3432 dependence accesses. OFFSET is set either to 0 for the first n variables,
3433 then it is set to n.
3434 ACCESS_FUN is expected to be an affine chrec. */
3436 static bool
3437 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3438 unsigned int offset, tree access_fun,
3439 struct data_dependence_relation *ddr)
3441 switch (TREE_CODE (access_fun))
3443 case POLYNOMIAL_CHREC:
3445 tree left = CHREC_LEFT (access_fun);
3446 tree right = CHREC_RIGHT (access_fun);
3447 int var = CHREC_VARIABLE (access_fun);
3448 unsigned var_idx;
3450 if (TREE_CODE (right) != INTEGER_CST)
3451 return false;
3453 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3454 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3456 /* Compute the innermost loop index. */
3457 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3459 if (offset == 0)
3460 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3461 += int_cst_value (right);
3463 switch (TREE_CODE (left))
3465 case POLYNOMIAL_CHREC:
3466 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3468 case INTEGER_CST:
3469 pb->eqs[eq].coef[0] += int_cst_value (left);
3470 return true;
3472 default:
3473 return false;
3477 case INTEGER_CST:
3478 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3479 return true;
3481 default:
3482 return false;
3486 /* As explained in the comments preceding init_omega_for_ddr, we have
3487 to set up a system for each loop level, setting outer loops
3488 variation to zero, and current loop variation to positive or zero.
3489 Save each lexico positive distance vector. */
3491 static void
3492 omega_extract_distance_vectors (omega_pb pb,
3493 struct data_dependence_relation *ddr)
3495 int eq, geq;
3496 unsigned i, j;
3497 struct loop *loopi, *loopj;
3498 enum omega_result res;
3500 /* Set a new problem for each loop in the nest. The basis is the
3501 problem that we have initialized until now. On top of this we
3502 add new constraints. */
3503 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3504 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3506 int dist = 0;
3507 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3508 DDR_NB_LOOPS (ddr));
3510 omega_copy_problem (copy, pb);
3512 /* For all the outer loops "loop_j", add "dj = 0". */
3513 for (j = 0;
3514 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3516 eq = omega_add_zero_eq (copy, omega_black);
3517 copy->eqs[eq].coef[j + 1] = 1;
3520 /* For "loop_i", add "0 <= di". */
3521 geq = omega_add_zero_geq (copy, omega_black);
3522 copy->geqs[geq].coef[i + 1] = 1;
3524 /* Reduce the constraint system, and test that the current
3525 problem is feasible. */
3526 res = omega_simplify_problem (copy);
3527 if (res == omega_false
3528 || res == omega_unknown
3529 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3530 goto next_problem;
3532 for (eq = 0; eq < copy->num_subs; eq++)
3533 if (copy->subs[eq].key == (int) i + 1)
3535 dist = copy->subs[eq].coef[0];
3536 goto found_dist;
3539 if (dist == 0)
3541 /* Reinitialize problem... */
3542 omega_copy_problem (copy, pb);
3543 for (j = 0;
3544 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3546 eq = omega_add_zero_eq (copy, omega_black);
3547 copy->eqs[eq].coef[j + 1] = 1;
3550 /* ..., but this time "di = 1". */
3551 eq = omega_add_zero_eq (copy, omega_black);
3552 copy->eqs[eq].coef[i + 1] = 1;
3553 copy->eqs[eq].coef[0] = -1;
3555 res = omega_simplify_problem (copy);
3556 if (res == omega_false
3557 || res == omega_unknown
3558 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3559 goto next_problem;
3561 for (eq = 0; eq < copy->num_subs; eq++)
3562 if (copy->subs[eq].key == (int) i + 1)
3564 dist = copy->subs[eq].coef[0];
3565 goto found_dist;
3569 found_dist:;
3570 /* Save the lexicographically positive distance vector. */
3571 if (dist >= 0)
3573 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3574 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3576 dist_v[i] = dist;
3578 for (eq = 0; eq < copy->num_subs; eq++)
3579 if (copy->subs[eq].key > 0)
3581 dist = copy->subs[eq].coef[0];
3582 dist_v[copy->subs[eq].key - 1] = dist;
3585 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3586 dir_v[j] = dir_from_dist (dist_v[j]);
3588 save_dist_v (ddr, dist_v);
3589 save_dir_v (ddr, dir_v);
3592 next_problem:;
3593 omega_free_problem (copy);
3597 /* This is called for each subscript of a tuple of data references:
3598 insert an equality for representing the conflicts. */
3600 static bool
3601 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3602 struct data_dependence_relation *ddr,
3603 omega_pb pb, bool *maybe_dependent)
3605 int eq;
3606 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3607 TREE_TYPE (access_fun_b));
3608 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3609 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3610 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3611 tree minus_one;
3613 /* When the fun_a - fun_b is not constant, the dependence is not
3614 captured by the classic distance vector representation. */
3615 if (TREE_CODE (difference) != INTEGER_CST)
3616 return false;
3618 /* ZIV test. */
3619 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3621 /* There is no dependence. */
3622 *maybe_dependent = false;
3623 return true;
3626 minus_one = build_int_cst (type, -1);
3627 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3629 eq = omega_add_zero_eq (pb, omega_black);
3630 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3631 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3632 /* There is probably a dependence, but the system of
3633 constraints cannot be built: answer "don't know". */
3634 return false;
3636 /* GCD test. */
3637 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3638 && !int_divides_p (lambda_vector_gcd
3639 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3640 2 * DDR_NB_LOOPS (ddr)),
3641 pb->eqs[eq].coef[0]))
3643 /* There is no dependence. */
3644 *maybe_dependent = false;
3645 return true;
3648 return true;
3651 /* Helper function, same as init_omega_for_ddr but specialized for
3652 data references A and B. */
3654 static bool
3655 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3656 struct data_dependence_relation *ddr,
3657 omega_pb pb, bool *maybe_dependent)
3659 unsigned i;
3660 int ineq;
3661 struct loop *loopi;
3662 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3664 /* Insert an equality per subscript. */
3665 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3667 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3668 ddr, pb, maybe_dependent))
3669 return false;
3670 else if (*maybe_dependent == false)
3672 /* There is no dependence. */
3673 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3674 return true;
3678 /* Insert inequalities: constraints corresponding to the iteration
3679 domain, i.e. the loops surrounding the references "loop_x" and
3680 the distance variables "dx". The layout of the OMEGA
3681 representation is as follows:
3682 - coef[0] is the constant
3683 - coef[1..nb_loops] are the protected variables that will not be
3684 removed by the solver: the "dx"
3685 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3687 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3688 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3690 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3692 /* 0 <= loop_x */
3693 ineq = omega_add_zero_geq (pb, omega_black);
3694 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3696 /* 0 <= loop_x + dx */
3697 ineq = omega_add_zero_geq (pb, omega_black);
3698 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3699 pb->geqs[ineq].coef[i + 1] = 1;
3701 if (nbi != -1)
3703 /* loop_x <= nb_iters */
3704 ineq = omega_add_zero_geq (pb, omega_black);
3705 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3706 pb->geqs[ineq].coef[0] = nbi;
3708 /* loop_x + dx <= nb_iters */
3709 ineq = omega_add_zero_geq (pb, omega_black);
3710 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3711 pb->geqs[ineq].coef[i + 1] = -1;
3712 pb->geqs[ineq].coef[0] = nbi;
3714 /* A step "dx" bigger than nb_iters is not feasible, so
3715 add "0 <= nb_iters + dx", */
3716 ineq = omega_add_zero_geq (pb, omega_black);
3717 pb->geqs[ineq].coef[i + 1] = 1;
3718 pb->geqs[ineq].coef[0] = nbi;
3719 /* and "dx <= nb_iters". */
3720 ineq = omega_add_zero_geq (pb, omega_black);
3721 pb->geqs[ineq].coef[i + 1] = -1;
3722 pb->geqs[ineq].coef[0] = nbi;
3726 omega_extract_distance_vectors (pb, ddr);
3728 return true;
3731 /* Sets up the Omega dependence problem for the data dependence
3732 relation DDR. Returns false when the constraint system cannot be
3733 built, ie. when the test answers "don't know". Returns true
3734 otherwise, and when independence has been proved (using one of the
3735 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3736 set MAYBE_DEPENDENT to true.
3738 Example: for setting up the dependence system corresponding to the
3739 conflicting accesses
3741 | loop_i
3742 | loop_j
3743 | A[i, i+1] = ...
3744 | ... A[2*j, 2*(i + j)]
3745 | endloop_j
3746 | endloop_i
3748 the following constraints come from the iteration domain:
3750 0 <= i <= Ni
3751 0 <= i + di <= Ni
3752 0 <= j <= Nj
3753 0 <= j + dj <= Nj
3755 where di, dj are the distance variables. The constraints
3756 representing the conflicting elements are:
3758 i = 2 * (j + dj)
3759 i + 1 = 2 * (i + di + j + dj)
3761 For asking that the resulting distance vector (di, dj) be
3762 lexicographically positive, we insert the constraint "di >= 0". If
3763 "di = 0" in the solution, we fix that component to zero, and we
3764 look at the inner loops: we set a new problem where all the outer
3765 loop distances are zero, and fix this inner component to be
3766 positive. When one of the components is positive, we save that
3767 distance, and set a new problem where the distance on this loop is
3768 zero, searching for other distances in the inner loops. Here is
3769 the classic example that illustrates that we have to set for each
3770 inner loop a new problem:
3772 | loop_1
3773 | loop_2
3774 | A[10]
3775 | endloop_2
3776 | endloop_1
3778 we have to save two distances (1, 0) and (0, 1).
3780 Given two array references, refA and refB, we have to set the
3781 dependence problem twice, refA vs. refB and refB vs. refA, and we
3782 cannot do a single test, as refB might occur before refA in the
3783 inner loops, and the contrary when considering outer loops: ex.
3785 | loop_0
3786 | loop_1
3787 | loop_2
3788 | T[{1,+,1}_2][{1,+,1}_1] // refA
3789 | T[{2,+,1}_2][{0,+,1}_1] // refB
3790 | endloop_2
3791 | endloop_1
3792 | endloop_0
3794 refB touches the elements in T before refA, and thus for the same
3795 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3796 but for successive loop_0 iterations, we have (1, -1, 1)
3798 The Omega solver expects the distance variables ("di" in the
3799 previous example) to come first in the constraint system (as
3800 variables to be protected, or "safe" variables), the constraint
3801 system is built using the following layout:
3803 "cst | distance vars | index vars".
3806 static bool
3807 init_omega_for_ddr (struct data_dependence_relation *ddr,
3808 bool *maybe_dependent)
3810 omega_pb pb;
3811 bool res = false;
3813 *maybe_dependent = true;
3815 if (same_access_functions (ddr))
3817 unsigned j;
3818 lambda_vector dir_v;
3820 /* Save the 0 vector. */
3821 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3822 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3823 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3824 dir_v[j] = dir_equal;
3825 save_dir_v (ddr, dir_v);
3827 /* Save the dependences carried by outer loops. */
3828 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3829 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3830 maybe_dependent);
3831 omega_free_problem (pb);
3832 return res;
3835 /* Omega expects the protected variables (those that have to be kept
3836 after elimination) to appear first in the constraint system.
3837 These variables are the distance variables. In the following
3838 initialization we declare NB_LOOPS safe variables, and the total
3839 number of variables for the constraint system is 2*NB_LOOPS. */
3840 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3841 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3842 maybe_dependent);
3843 omega_free_problem (pb);
3845 /* Stop computation if not decidable, or no dependence. */
3846 if (res == false || *maybe_dependent == false)
3847 return res;
3849 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3850 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3851 maybe_dependent);
3852 omega_free_problem (pb);
3854 return res;
3857 /* Return true when DDR contains the same information as that stored
3858 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3860 static bool
3861 ddr_consistent_p (FILE *file,
3862 struct data_dependence_relation *ddr,
3863 VEC (lambda_vector, heap) *dist_vects,
3864 VEC (lambda_vector, heap) *dir_vects)
3866 unsigned int i, j;
3868 /* If dump_file is set, output there. */
3869 if (dump_file && (dump_flags & TDF_DETAILS))
3870 file = dump_file;
3872 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3874 lambda_vector b_dist_v;
3875 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3876 VEC_length (lambda_vector, dist_vects),
3877 DDR_NUM_DIST_VECTS (ddr));
3879 fprintf (file, "Banerjee dist vectors:\n");
3880 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3881 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3883 fprintf (file, "Omega dist vectors:\n");
3884 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3885 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3887 fprintf (file, "data dependence relation:\n");
3888 dump_data_dependence_relation (file, ddr);
3890 fprintf (file, ")\n");
3891 return false;
3894 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3896 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3897 VEC_length (lambda_vector, dir_vects),
3898 DDR_NUM_DIR_VECTS (ddr));
3899 return false;
3902 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3904 lambda_vector a_dist_v;
3905 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3907 /* Distance vectors are not ordered in the same way in the DDR
3908 and in the DIST_VECTS: search for a matching vector. */
3909 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3910 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3911 break;
3913 if (j == VEC_length (lambda_vector, dist_vects))
3915 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3916 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3917 fprintf (file, "not found in Omega dist vectors:\n");
3918 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3919 fprintf (file, "data dependence relation:\n");
3920 dump_data_dependence_relation (file, ddr);
3921 fprintf (file, ")\n");
3925 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3927 lambda_vector a_dir_v;
3928 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3930 /* Direction vectors are not ordered in the same way in the DDR
3931 and in the DIR_VECTS: search for a matching vector. */
3932 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3933 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3934 break;
3936 if (j == VEC_length (lambda_vector, dist_vects))
3938 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3939 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3940 fprintf (file, "not found in Omega dir vectors:\n");
3941 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3942 fprintf (file, "data dependence relation:\n");
3943 dump_data_dependence_relation (file, ddr);
3944 fprintf (file, ")\n");
3948 return true;
3951 /* This computes the affine dependence relation between A and B with
3952 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3953 independence between two accesses, while CHREC_DONT_KNOW is used
3954 for representing the unknown relation.
3956 Note that it is possible to stop the computation of the dependence
3957 relation the first time we detect a CHREC_KNOWN element for a given
3958 subscript. */
3960 static void
3961 compute_affine_dependence (struct data_dependence_relation *ddr,
3962 struct loop *loop_nest)
3964 struct data_reference *dra = DDR_A (ddr);
3965 struct data_reference *drb = DDR_B (ddr);
3967 if (dump_file && (dump_flags & TDF_DETAILS))
3969 fprintf (dump_file, "(compute_affine_dependence\n");
3970 fprintf (dump_file, " (stmt_a = \n");
3971 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3972 fprintf (dump_file, ")\n (stmt_b = \n");
3973 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3974 fprintf (dump_file, ")\n");
3977 /* Analyze only when the dependence relation is not yet known. */
3978 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3979 && !DDR_SELF_REFERENCE (ddr))
3981 dependence_stats.num_dependence_tests++;
3983 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3984 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3986 if (flag_check_data_deps)
3988 /* Compute the dependences using the first algorithm. */
3989 subscript_dependence_tester (ddr, loop_nest);
3991 if (dump_file && (dump_flags & TDF_DETAILS))
3993 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3994 dump_data_dependence_relation (dump_file, ddr);
3997 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3999 bool maybe_dependent;
4000 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4002 /* Save the result of the first DD analyzer. */
4003 dist_vects = DDR_DIST_VECTS (ddr);
4004 dir_vects = DDR_DIR_VECTS (ddr);
4006 /* Reset the information. */
4007 DDR_DIST_VECTS (ddr) = NULL;
4008 DDR_DIR_VECTS (ddr) = NULL;
4010 /* Compute the same information using Omega. */
4011 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4012 goto csys_dont_know;
4014 if (dump_file && (dump_flags & TDF_DETAILS))
4016 fprintf (dump_file, "Omega Analyzer\n");
4017 dump_data_dependence_relation (dump_file, ddr);
4020 /* Check that we get the same information. */
4021 if (maybe_dependent)
4022 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4023 dir_vects));
4026 else
4027 subscript_dependence_tester (ddr, loop_nest);
4030 /* As a last case, if the dependence cannot be determined, or if
4031 the dependence is considered too difficult to determine, answer
4032 "don't know". */
4033 else
4035 csys_dont_know:;
4036 dependence_stats.num_dependence_undetermined++;
4038 if (dump_file && (dump_flags & TDF_DETAILS))
4040 fprintf (dump_file, "Data ref a:\n");
4041 dump_data_reference (dump_file, dra);
4042 fprintf (dump_file, "Data ref b:\n");
4043 dump_data_reference (dump_file, drb);
4044 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4046 finalize_ddr_dependent (ddr, chrec_dont_know);
4050 if (dump_file && (dump_flags & TDF_DETAILS))
4051 fprintf (dump_file, ")\n");
4054 /* This computes the dependence relation for the same data
4055 reference into DDR. */
4057 static void
4058 compute_self_dependence (struct data_dependence_relation *ddr)
4060 unsigned int i;
4061 struct subscript *subscript;
4063 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4064 return;
4066 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4067 i++)
4069 if (SUB_CONFLICTS_IN_A (subscript))
4070 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4071 if (SUB_CONFLICTS_IN_B (subscript))
4072 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4074 /* The accessed index overlaps for each iteration. */
4075 SUB_CONFLICTS_IN_A (subscript)
4076 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4077 SUB_CONFLICTS_IN_B (subscript)
4078 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4079 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4082 /* The distance vector is the zero vector. */
4083 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4084 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4087 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4088 the data references in DATAREFS, in the LOOP_NEST. When
4089 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4090 relations. */
4092 void
4093 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4094 VEC (ddr_p, heap) **dependence_relations,
4095 VEC (loop_p, heap) *loop_nest,
4096 bool compute_self_and_rr)
4098 struct data_dependence_relation *ddr;
4099 struct data_reference *a, *b;
4100 unsigned int i, j;
4102 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4103 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4104 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4106 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4107 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4108 if (loop_nest)
4109 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4112 if (compute_self_and_rr)
4113 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4115 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4116 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4117 compute_self_dependence (ddr);
4121 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4122 true if STMT clobbers memory, false otherwise. */
4124 bool
4125 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4127 bool clobbers_memory = false;
4128 data_ref_loc *ref;
4129 tree *op0, *op1;
4130 enum gimple_code stmt_code = gimple_code (stmt);
4132 *references = NULL;
4134 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4135 Calls have side-effects, except those to const or pure
4136 functions. */
4137 if ((stmt_code == GIMPLE_CALL
4138 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4139 || (stmt_code == GIMPLE_ASM
4140 && gimple_asm_volatile_p (stmt)))
4141 clobbers_memory = true;
4143 if (!gimple_vuse (stmt))
4144 return clobbers_memory;
4146 if (stmt_code == GIMPLE_ASSIGN)
4148 tree base;
4149 op0 = gimple_assign_lhs_ptr (stmt);
4150 op1 = gimple_assign_rhs1_ptr (stmt);
4152 if (DECL_P (*op1)
4153 || (REFERENCE_CLASS_P (*op1)
4154 && (base = get_base_address (*op1))
4155 && TREE_CODE (base) != SSA_NAME))
4157 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4158 ref->pos = op1;
4159 ref->is_read = true;
4162 if (DECL_P (*op0)
4163 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4165 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4166 ref->pos = op0;
4167 ref->is_read = false;
4170 else if (stmt_code == GIMPLE_CALL)
4172 unsigned i, n = gimple_call_num_args (stmt);
4174 for (i = 0; i < n; i++)
4176 op0 = gimple_call_arg_ptr (stmt, i);
4178 if (DECL_P (*op0)
4179 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4181 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4182 ref->pos = op0;
4183 ref->is_read = true;
4188 return clobbers_memory;
4191 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4192 reference, returns false, otherwise returns true. NEST is the outermost
4193 loop of the loop nest in which the references should be analyzed. */
4195 bool
4196 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4197 VEC (data_reference_p, heap) **datarefs)
4199 unsigned i;
4200 VEC (data_ref_loc, heap) *references;
4201 data_ref_loc *ref;
4202 bool ret = true;
4203 data_reference_p dr;
4205 if (get_references_in_stmt (stmt, &references))
4207 VEC_free (data_ref_loc, heap, references);
4208 return false;
4211 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4213 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4214 *ref->pos, stmt, ref->is_read);
4215 gcc_assert (dr != NULL);
4216 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4218 VEC_free (data_ref_loc, heap, references);
4219 return ret;
4222 /* Stores the data references in STMT to DATAREFS. If there is an
4223 unanalyzable reference, returns false, otherwise returns true.
4224 NEST is the outermost loop of the loop nest in which the references
4225 should be instantiated, LOOP is the loop in which the references
4226 should be analyzed. */
4228 bool
4229 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4230 VEC (data_reference_p, heap) **datarefs)
4232 unsigned i;
4233 VEC (data_ref_loc, heap) *references;
4234 data_ref_loc *ref;
4235 bool ret = true;
4236 data_reference_p dr;
4238 if (get_references_in_stmt (stmt, &references))
4240 VEC_free (data_ref_loc, heap, references);
4241 return false;
4244 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4246 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4247 gcc_assert (dr != NULL);
4248 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4251 VEC_free (data_ref_loc, heap, references);
4252 return ret;
4255 /* Search the data references in LOOP, and record the information into
4256 DATAREFS. Returns chrec_dont_know when failing to analyze a
4257 difficult case, returns NULL_TREE otherwise. */
4259 tree
4260 find_data_references_in_bb (struct loop *loop, basic_block bb,
4261 VEC (data_reference_p, heap) **datarefs)
4263 gimple_stmt_iterator bsi;
4265 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4267 gimple stmt = gsi_stmt (bsi);
4269 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4271 struct data_reference *res;
4272 res = XCNEW (struct data_reference);
4273 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4275 return chrec_dont_know;
4279 return NULL_TREE;
4282 /* Search the data references in LOOP, and record the information into
4283 DATAREFS. Returns chrec_dont_know when failing to analyze a
4284 difficult case, returns NULL_TREE otherwise.
4286 TODO: This function should be made smarter so that it can handle address
4287 arithmetic as if they were array accesses, etc. */
4289 tree
4290 find_data_references_in_loop (struct loop *loop,
4291 VEC (data_reference_p, heap) **datarefs)
4293 basic_block bb, *bbs;
4294 unsigned int i;
4296 bbs = get_loop_body_in_dom_order (loop);
4298 for (i = 0; i < loop->num_nodes; i++)
4300 bb = bbs[i];
4302 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4304 free (bbs);
4305 return chrec_dont_know;
4308 free (bbs);
4310 return NULL_TREE;
4313 /* Recursive helper function. */
4315 static bool
4316 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4318 /* Inner loops of the nest should not contain siblings. Example:
4319 when there are two consecutive loops,
4321 | loop_0
4322 | loop_1
4323 | A[{0, +, 1}_1]
4324 | endloop_1
4325 | loop_2
4326 | A[{0, +, 1}_2]
4327 | endloop_2
4328 | endloop_0
4330 the dependence relation cannot be captured by the distance
4331 abstraction. */
4332 if (loop->next)
4333 return false;
4335 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4336 if (loop->inner)
4337 return find_loop_nest_1 (loop->inner, loop_nest);
4338 return true;
4341 /* Return false when the LOOP is not well nested. Otherwise return
4342 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4343 contain the loops from the outermost to the innermost, as they will
4344 appear in the classic distance vector. */
4346 bool
4347 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4349 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4350 if (loop->inner)
4351 return find_loop_nest_1 (loop->inner, loop_nest);
4352 return true;
4355 /* Returns true when the data dependences have been computed, false otherwise.
4356 Given a loop nest LOOP, the following vectors are returned:
4357 DATAREFS is initialized to all the array elements contained in this loop,
4358 DEPENDENCE_RELATIONS contains the relations between the data references.
4359 Compute read-read and self relations if
4360 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4362 bool
4363 compute_data_dependences_for_loop (struct loop *loop,
4364 bool compute_self_and_read_read_dependences,
4365 VEC (loop_p, heap) **loop_nest,
4366 VEC (data_reference_p, heap) **datarefs,
4367 VEC (ddr_p, heap) **dependence_relations)
4369 bool res = true;
4371 memset (&dependence_stats, 0, sizeof (dependence_stats));
4373 /* If the loop nest is not well formed, or one of the data references
4374 is not computable, give up without spending time to compute other
4375 dependences. */
4376 if (!loop
4377 || !find_loop_nest (loop, loop_nest)
4378 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4380 struct data_dependence_relation *ddr;
4382 /* Insert a single relation into dependence_relations:
4383 chrec_dont_know. */
4384 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4385 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4386 res = false;
4388 else
4389 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4390 compute_self_and_read_read_dependences);
4392 if (dump_file && (dump_flags & TDF_STATS))
4394 fprintf (dump_file, "Dependence tester statistics:\n");
4396 fprintf (dump_file, "Number of dependence tests: %d\n",
4397 dependence_stats.num_dependence_tests);
4398 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4399 dependence_stats.num_dependence_dependent);
4400 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4401 dependence_stats.num_dependence_independent);
4402 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4403 dependence_stats.num_dependence_undetermined);
4405 fprintf (dump_file, "Number of subscript tests: %d\n",
4406 dependence_stats.num_subscript_tests);
4407 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4408 dependence_stats.num_subscript_undetermined);
4409 fprintf (dump_file, "Number of same subscript function: %d\n",
4410 dependence_stats.num_same_subscript_function);
4412 fprintf (dump_file, "Number of ziv tests: %d\n",
4413 dependence_stats.num_ziv);
4414 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4415 dependence_stats.num_ziv_dependent);
4416 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4417 dependence_stats.num_ziv_independent);
4418 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4419 dependence_stats.num_ziv_unimplemented);
4421 fprintf (dump_file, "Number of siv tests: %d\n",
4422 dependence_stats.num_siv);
4423 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4424 dependence_stats.num_siv_dependent);
4425 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4426 dependence_stats.num_siv_independent);
4427 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4428 dependence_stats.num_siv_unimplemented);
4430 fprintf (dump_file, "Number of miv tests: %d\n",
4431 dependence_stats.num_miv);
4432 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4433 dependence_stats.num_miv_dependent);
4434 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4435 dependence_stats.num_miv_independent);
4436 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4437 dependence_stats.num_miv_unimplemented);
4440 return res;
4443 /* Returns true when the data dependences for the basic block BB have been
4444 computed, false otherwise.
4445 DATAREFS is initialized to all the array elements contained in this basic
4446 block, DEPENDENCE_RELATIONS contains the relations between the data
4447 references. Compute read-read and self relations if
4448 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4449 bool
4450 compute_data_dependences_for_bb (basic_block bb,
4451 bool compute_self_and_read_read_dependences,
4452 VEC (data_reference_p, heap) **datarefs,
4453 VEC (ddr_p, heap) **dependence_relations)
4455 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4456 return false;
4458 compute_all_dependences (*datarefs, dependence_relations, NULL,
4459 compute_self_and_read_read_dependences);
4460 return true;
4463 /* Entry point (for testing only). Analyze all the data references
4464 and the dependence relations in LOOP.
4466 The data references are computed first.
4468 A relation on these nodes is represented by a complete graph. Some
4469 of the relations could be of no interest, thus the relations can be
4470 computed on demand.
4472 In the following function we compute all the relations. This is
4473 just a first implementation that is here for:
4474 - for showing how to ask for the dependence relations,
4475 - for the debugging the whole dependence graph,
4476 - for the dejagnu testcases and maintenance.
4478 It is possible to ask only for a part of the graph, avoiding to
4479 compute the whole dependence graph. The computed dependences are
4480 stored in a knowledge base (KB) such that later queries don't
4481 recompute the same information. The implementation of this KB is
4482 transparent to the optimizer, and thus the KB can be changed with a
4483 more efficient implementation, or the KB could be disabled. */
4484 static void
4485 analyze_all_data_dependences (struct loop *loop)
4487 unsigned int i;
4488 int nb_data_refs = 10;
4489 VEC (data_reference_p, heap) *datarefs =
4490 VEC_alloc (data_reference_p, heap, nb_data_refs);
4491 VEC (ddr_p, heap) *dependence_relations =
4492 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4493 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4495 /* Compute DDs on the whole function. */
4496 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4497 &dependence_relations);
4499 if (dump_file)
4501 dump_data_dependence_relations (dump_file, dependence_relations);
4502 fprintf (dump_file, "\n\n");
4504 if (dump_flags & TDF_DETAILS)
4505 dump_dist_dir_vectors (dump_file, dependence_relations);
4507 if (dump_flags & TDF_STATS)
4509 unsigned nb_top_relations = 0;
4510 unsigned nb_bot_relations = 0;
4511 unsigned nb_chrec_relations = 0;
4512 struct data_dependence_relation *ddr;
4514 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4516 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4517 nb_top_relations++;
4519 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4520 nb_bot_relations++;
4522 else
4523 nb_chrec_relations++;
4526 gather_stats_on_scev_database ();
4530 VEC_free (loop_p, heap, loop_nest);
4531 free_dependence_relations (dependence_relations);
4532 free_data_refs (datarefs);
4535 /* Computes all the data dependences and check that the results of
4536 several analyzers are the same. */
4538 void
4539 tree_check_data_deps (void)
4541 loop_iterator li;
4542 struct loop *loop_nest;
4544 FOR_EACH_LOOP (li, loop_nest, 0)
4545 analyze_all_data_dependences (loop_nest);
4548 /* Free the memory used by a data dependence relation DDR. */
4550 void
4551 free_dependence_relation (struct data_dependence_relation *ddr)
4553 if (ddr == NULL)
4554 return;
4556 if (DDR_SUBSCRIPTS (ddr))
4557 free_subscripts (DDR_SUBSCRIPTS (ddr));
4558 if (DDR_DIST_VECTS (ddr))
4559 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4560 if (DDR_DIR_VECTS (ddr))
4561 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4563 free (ddr);
4566 /* Free the memory used by the data dependence relations from
4567 DEPENDENCE_RELATIONS. */
4569 void
4570 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4572 unsigned int i;
4573 struct data_dependence_relation *ddr;
4575 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4576 if (ddr)
4577 free_dependence_relation (ddr);
4579 VEC_free (ddr_p, heap, dependence_relations);
4582 /* Free the memory used by the data references from DATAREFS. */
4584 void
4585 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4587 unsigned int i;
4588 struct data_reference *dr;
4590 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4591 free_data_ref (dr);
4592 VEC_free (data_reference_p, heap, datarefs);
4597 /* Dump vertex I in RDG to FILE. */
4599 void
4600 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4602 struct vertex *v = &(rdg->vertices[i]);
4603 struct graph_edge *e;
4605 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4606 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4607 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4609 if (v->pred)
4610 for (e = v->pred; e; e = e->pred_next)
4611 fprintf (file, " %d", e->src);
4613 fprintf (file, ") (out:");
4615 if (v->succ)
4616 for (e = v->succ; e; e = e->succ_next)
4617 fprintf (file, " %d", e->dest);
4619 fprintf (file, ")\n");
4620 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4621 fprintf (file, ")\n");
4624 /* Call dump_rdg_vertex on stderr. */
4626 DEBUG_FUNCTION void
4627 debug_rdg_vertex (struct graph *rdg, int i)
4629 dump_rdg_vertex (stderr, rdg, i);
4632 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4633 dumped vertices to that bitmap. */
4635 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4637 int i;
4639 fprintf (file, "(%d\n", c);
4641 for (i = 0; i < rdg->n_vertices; i++)
4642 if (rdg->vertices[i].component == c)
4644 if (dumped)
4645 bitmap_set_bit (dumped, i);
4647 dump_rdg_vertex (file, rdg, i);
4650 fprintf (file, ")\n");
4653 /* Call dump_rdg_vertex on stderr. */
4655 DEBUG_FUNCTION void
4656 debug_rdg_component (struct graph *rdg, int c)
4658 dump_rdg_component (stderr, rdg, c, NULL);
4661 /* Dump the reduced dependence graph RDG to FILE. */
4663 void
4664 dump_rdg (FILE *file, struct graph *rdg)
4666 int i;
4667 bitmap dumped = BITMAP_ALLOC (NULL);
4669 fprintf (file, "(rdg\n");
4671 for (i = 0; i < rdg->n_vertices; i++)
4672 if (!bitmap_bit_p (dumped, i))
4673 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4675 fprintf (file, ")\n");
4676 BITMAP_FREE (dumped);
4679 /* Call dump_rdg on stderr. */
4681 DEBUG_FUNCTION void
4682 debug_rdg (struct graph *rdg)
4684 dump_rdg (stderr, rdg);
4687 static void
4688 dot_rdg_1 (FILE *file, struct graph *rdg)
4690 int i;
4692 fprintf (file, "digraph RDG {\n");
4694 for (i = 0; i < rdg->n_vertices; i++)
4696 struct vertex *v = &(rdg->vertices[i]);
4697 struct graph_edge *e;
4699 /* Highlight reads from memory. */
4700 if (RDG_MEM_READS_STMT (rdg, i))
4701 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4703 /* Highlight stores to memory. */
4704 if (RDG_MEM_WRITE_STMT (rdg, i))
4705 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4707 if (v->succ)
4708 for (e = v->succ; e; e = e->succ_next)
4709 switch (RDGE_TYPE (e))
4711 case input_dd:
4712 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4713 break;
4715 case output_dd:
4716 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4717 break;
4719 case flow_dd:
4720 /* These are the most common dependences: don't print these. */
4721 fprintf (file, "%d -> %d \n", i, e->dest);
4722 break;
4724 case anti_dd:
4725 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4726 break;
4728 default:
4729 gcc_unreachable ();
4733 fprintf (file, "}\n\n");
4736 /* Display the Reduced Dependence Graph using dotty. */
4737 extern void dot_rdg (struct graph *);
4739 DEBUG_FUNCTION void
4740 dot_rdg (struct graph *rdg)
4742 /* When debugging, enable the following code. This cannot be used
4743 in production compilers because it calls "system". */
4744 #if 0
4745 FILE *file = fopen ("/tmp/rdg.dot", "w");
4746 gcc_assert (file != NULL);
4748 dot_rdg_1 (file, rdg);
4749 fclose (file);
4751 system ("dotty /tmp/rdg.dot &");
4752 #else
4753 dot_rdg_1 (stderr, rdg);
4754 #endif
4757 /* This structure is used for recording the mapping statement index in
4758 the RDG. */
4760 struct GTY(()) rdg_vertex_info
4762 gimple stmt;
4763 int index;
4766 /* Returns the index of STMT in RDG. */
4769 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4771 struct rdg_vertex_info rvi, *slot;
4773 rvi.stmt = stmt;
4774 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4776 if (!slot)
4777 return -1;
4779 return slot->index;
4782 /* Creates an edge in RDG for each distance vector from DDR. The
4783 order that we keep track of in the RDG is the order in which
4784 statements have to be executed. */
4786 static void
4787 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4789 struct graph_edge *e;
4790 int va, vb;
4791 data_reference_p dra = DDR_A (ddr);
4792 data_reference_p drb = DDR_B (ddr);
4793 unsigned level = ddr_dependence_level (ddr);
4795 /* For non scalar dependences, when the dependence is REVERSED,
4796 statement B has to be executed before statement A. */
4797 if (level > 0
4798 && !DDR_REVERSED_P (ddr))
4800 data_reference_p tmp = dra;
4801 dra = drb;
4802 drb = tmp;
4805 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4806 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4808 if (va < 0 || vb < 0)
4809 return;
4811 e = add_edge (rdg, va, vb);
4812 e->data = XNEW (struct rdg_edge);
4814 RDGE_LEVEL (e) = level;
4815 RDGE_RELATION (e) = ddr;
4817 /* Determines the type of the data dependence. */
4818 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4819 RDGE_TYPE (e) = input_dd;
4820 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4821 RDGE_TYPE (e) = output_dd;
4822 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4823 RDGE_TYPE (e) = flow_dd;
4824 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4825 RDGE_TYPE (e) = anti_dd;
4828 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4829 the index of DEF in RDG. */
4831 static void
4832 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4834 use_operand_p imm_use_p;
4835 imm_use_iterator iterator;
4837 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4839 struct graph_edge *e;
4840 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4842 if (use < 0)
4843 continue;
4845 e = add_edge (rdg, idef, use);
4846 e->data = XNEW (struct rdg_edge);
4847 RDGE_TYPE (e) = flow_dd;
4848 RDGE_RELATION (e) = NULL;
4852 /* Creates the edges of the reduced dependence graph RDG. */
4854 static void
4855 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4857 int i;
4858 struct data_dependence_relation *ddr;
4859 def_operand_p def_p;
4860 ssa_op_iter iter;
4862 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4863 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4864 create_rdg_edge_for_ddr (rdg, ddr);
4866 for (i = 0; i < rdg->n_vertices; i++)
4867 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4868 iter, SSA_OP_DEF)
4869 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4872 /* Build the vertices of the reduced dependence graph RDG. */
4874 void
4875 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4877 int i, j;
4878 gimple stmt;
4880 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4882 VEC (data_ref_loc, heap) *references;
4883 data_ref_loc *ref;
4884 struct vertex *v = &(rdg->vertices[i]);
4885 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4886 struct rdg_vertex_info **slot;
4888 rvi->stmt = stmt;
4889 rvi->index = i;
4890 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4892 if (!*slot)
4893 *slot = rvi;
4894 else
4895 free (rvi);
4897 v->data = XNEW (struct rdg_vertex);
4898 RDG_STMT (rdg, i) = stmt;
4900 RDG_MEM_WRITE_STMT (rdg, i) = false;
4901 RDG_MEM_READS_STMT (rdg, i) = false;
4902 if (gimple_code (stmt) == GIMPLE_PHI)
4903 continue;
4905 get_references_in_stmt (stmt, &references);
4906 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4907 if (!ref->is_read)
4908 RDG_MEM_WRITE_STMT (rdg, i) = true;
4909 else
4910 RDG_MEM_READS_STMT (rdg, i) = true;
4912 VEC_free (data_ref_loc, heap, references);
4916 /* Initialize STMTS with all the statements of LOOP. When
4917 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4918 which we discover statements is important as
4919 generate_loops_for_partition is using the same traversal for
4920 identifying statements. */
4922 static void
4923 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4925 unsigned int i;
4926 basic_block *bbs = get_loop_body_in_dom_order (loop);
4928 for (i = 0; i < loop->num_nodes; i++)
4930 basic_block bb = bbs[i];
4931 gimple_stmt_iterator bsi;
4932 gimple stmt;
4934 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4935 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4937 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4939 stmt = gsi_stmt (bsi);
4940 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4941 VEC_safe_push (gimple, heap, *stmts, stmt);
4945 free (bbs);
4948 /* Returns true when all the dependences are computable. */
4950 static bool
4951 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4953 ddr_p ddr;
4954 unsigned int i;
4956 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4957 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4958 return false;
4960 return true;
4963 /* Computes a hash function for element ELT. */
4965 static hashval_t
4966 hash_stmt_vertex_info (const void *elt)
4968 const struct rdg_vertex_info *const rvi =
4969 (const struct rdg_vertex_info *) elt;
4970 gimple stmt = rvi->stmt;
4972 return htab_hash_pointer (stmt);
4975 /* Compares database elements E1 and E2. */
4977 static int
4978 eq_stmt_vertex_info (const void *e1, const void *e2)
4980 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4981 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4983 return elt1->stmt == elt2->stmt;
4986 /* Free the element E. */
4988 static void
4989 hash_stmt_vertex_del (void *e)
4991 free (e);
4994 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4995 statement of the loop nest, and one edge per data dependence or
4996 scalar dependence. */
4998 struct graph *
4999 build_empty_rdg (int n_stmts)
5001 int nb_data_refs = 10;
5002 struct graph *rdg = new_graph (n_stmts);
5004 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5005 eq_stmt_vertex_info, hash_stmt_vertex_del);
5006 return rdg;
5009 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5010 statement of the loop nest, and one edge per data dependence or
5011 scalar dependence. */
5013 struct graph *
5014 build_rdg (struct loop *loop,
5015 VEC (loop_p, heap) **loop_nest,
5016 VEC (ddr_p, heap) **dependence_relations,
5017 VEC (data_reference_p, heap) **datarefs)
5019 struct graph *rdg = NULL;
5020 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5022 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5023 dependence_relations);
5025 if (known_dependences_p (*dependence_relations))
5027 stmts_from_loop (loop, &stmts);
5028 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5029 create_rdg_vertices (rdg, stmts);
5030 create_rdg_edges (rdg, *dependence_relations);
5033 VEC_free (gimple, heap, stmts);
5034 return rdg;
5037 /* Free the reduced dependence graph RDG. */
5039 void
5040 free_rdg (struct graph *rdg)
5042 int i;
5044 for (i = 0; i < rdg->n_vertices; i++)
5046 struct vertex *v = &(rdg->vertices[i]);
5047 struct graph_edge *e;
5049 for (e = v->succ; e; e = e->succ_next)
5050 free (e->data);
5052 free (v->data);
5055 htab_delete (rdg->indices);
5056 free_graph (rdg);
5059 /* Initialize STMTS with all the statements of LOOP that contain a
5060 store to memory. */
5062 void
5063 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5065 unsigned int i;
5066 basic_block *bbs = get_loop_body_in_dom_order (loop);
5068 for (i = 0; i < loop->num_nodes; i++)
5070 basic_block bb = bbs[i];
5071 gimple_stmt_iterator bsi;
5073 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5074 if (gimple_vdef (gsi_stmt (bsi)))
5075 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5078 free (bbs);
5081 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5082 that contains a data reference on its LHS with a stride of the same
5083 size as its unit type. */
5085 bool
5086 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5088 tree op0, op1;
5089 bool res;
5090 struct data_reference *dr;
5092 if (!stmt
5093 || !gimple_vdef (stmt)
5094 || !is_gimple_assign (stmt)
5095 || !gimple_assign_single_p (stmt)
5096 || !(op1 = gimple_assign_rhs1 (stmt))
5097 || !(integer_zerop (op1) || real_zerop (op1)))
5098 return false;
5100 dr = XCNEW (struct data_reference);
5101 op0 = gimple_assign_lhs (stmt);
5103 DR_STMT (dr) = stmt;
5104 DR_REF (dr) = op0;
5106 res = dr_analyze_innermost (dr)
5107 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5109 free_data_ref (dr);
5110 return res;
5113 /* Initialize STMTS with all the statements of LOOP that contain a
5114 store to memory of the form "A[i] = 0". */
5116 void
5117 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5119 unsigned int i;
5120 basic_block bb;
5121 gimple_stmt_iterator si;
5122 gimple stmt;
5123 basic_block *bbs = get_loop_body_in_dom_order (loop);
5125 for (i = 0; i < loop->num_nodes; i++)
5126 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5127 if ((stmt = gsi_stmt (si))
5128 && stmt_with_adjacent_zero_store_dr_p (stmt))
5129 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5131 free (bbs);
5134 /* For a data reference REF, return the declaration of its base
5135 address or NULL_TREE if the base is not determined. */
5137 static inline tree
5138 ref_base_address (gimple stmt, data_ref_loc *ref)
5140 tree base = NULL_TREE;
5141 tree base_address;
5142 struct data_reference *dr = XCNEW (struct data_reference);
5144 DR_STMT (dr) = stmt;
5145 DR_REF (dr) = *ref->pos;
5146 dr_analyze_innermost (dr);
5147 base_address = DR_BASE_ADDRESS (dr);
5149 if (!base_address)
5150 goto end;
5152 switch (TREE_CODE (base_address))
5154 case ADDR_EXPR:
5155 base = TREE_OPERAND (base_address, 0);
5156 break;
5158 default:
5159 base = base_address;
5160 break;
5163 end:
5164 free_data_ref (dr);
5165 return base;
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5171 bool
5172 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5174 gimple stmt = RDG_STMT (rdg, v);
5175 struct loop *loop = loop_containing_stmt (stmt);
5176 use_operand_p imm_use_p;
5177 imm_use_iterator iterator;
5178 ssa_op_iter it;
5179 def_operand_p def_p;
5181 if (!loop)
5182 return true;
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5186 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5188 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5189 return true;
5193 return false;
5196 /* Determines whether statements S1 and S2 access to similar memory
5197 locations. Two memory accesses are considered similar when they
5198 have the same base address declaration, i.e. when their
5199 ref_base_address is the same. */
5201 bool
5202 have_similar_memory_accesses (gimple s1, gimple s2)
5204 bool res = false;
5205 unsigned i, j;
5206 VEC (data_ref_loc, heap) *refs1, *refs2;
5207 data_ref_loc *ref1, *ref2;
5209 get_references_in_stmt (s1, &refs1);
5210 get_references_in_stmt (s2, &refs2);
5212 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5214 tree base1 = ref_base_address (s1, ref1);
5216 if (base1)
5217 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5218 if (base1 == ref_base_address (s2, ref2))
5220 res = true;
5221 goto end;
5225 end:
5226 VEC_free (data_ref_loc, heap, refs1);
5227 VEC_free (data_ref_loc, heap, refs2);
5228 return res;
5231 /* Helper function for the hashtab. */
5233 static int
5234 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5236 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5237 CONST_CAST_GIMPLE ((const_gimple) s2));
5240 /* Helper function for the hashtab. */
5242 static hashval_t
5243 ref_base_address_1 (const void *s)
5245 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5246 unsigned i;
5247 VEC (data_ref_loc, heap) *refs;
5248 data_ref_loc *ref;
5249 hashval_t res = 0;
5251 get_references_in_stmt (stmt, &refs);
5253 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5254 if (!ref->is_read)
5256 res = htab_hash_pointer (ref_base_address (stmt, ref));
5257 break;
5260 VEC_free (data_ref_loc, heap, refs);
5261 return res;
5264 /* Try to remove duplicated write data references from STMTS. */
5266 void
5267 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5269 unsigned i;
5270 gimple stmt;
5271 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5272 have_similar_memory_accesses_1, NULL);
5274 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5276 void **slot;
5278 slot = htab_find_slot (seen, stmt, INSERT);
5280 if (*slot)
5281 VEC_ordered_remove (gimple, *stmts, i);
5282 else
5284 *slot = (void *) stmt;
5285 i++;
5289 htab_delete (seen);
5292 /* Returns the index of PARAMETER in the parameters vector of the
5293 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5296 access_matrix_get_index_for_parameter (tree parameter,
5297 struct access_matrix *access_matrix)
5299 int i;
5300 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5301 tree lambda_parameter;
5303 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5304 if (lambda_parameter == parameter)
5305 return i + AM_NB_INDUCTION_VARS (access_matrix);
5307 return -1;