2011-08-19 Andrew Stubbs <ams@codesourcery.com>
[official-gcc.git] / gcc / tree-data-ref.c
blobf7c7ae5e45ae0d72a6b253196261e7fa6e5154b6
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
131 static inline bool
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 unsigned int i;
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
173 unsigned int i;
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
194 unsigned int i;
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
218 unsigned i;
219 tree coef;
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
235 unsigned i;
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
243 for (i = 0; i < cf->n; i++)
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
292 int eq;
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
299 switch (dir)
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
336 unsigned j;
337 lambda_vector v;
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 int i;
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
361 unsigned j;
362 lambda_vector v;
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
368 /* Debug version. */
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 if (ddr)
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
402 return;
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 unsigned int i;
416 struct loop *loopi;
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
457 switch (dir)
459 case dir_positive:
460 fprintf (file, "+");
461 break;
463 case dir_negative:
464 fprintf (file, "-");
465 break;
467 case dir_equal:
468 fprintf (file, "=");
469 break;
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
483 case dir_star:
484 fprintf (file, "*");
485 break;
487 default:
488 break;
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 unsigned int i;
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
553 *var = NULL_TREE;
554 *off = NULL_TREE;
556 switch (code)
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
583 case ADDR_EXPR:
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
602 if (poffset)
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build_pointer_plus (base, poffset);
608 else
609 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
610 fold_convert (TREE_TYPE (base), poffset));
613 var0 = fold_convert (type, base);
615 /* If variable length types are involved, punt, otherwise casts
616 might be converted into ARRAY_REFs in gimplify_conversion.
617 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
618 possibly no longer appears in current GIMPLE, might resurface.
619 This perhaps could run
620 if (CONVERT_EXPR_P (var0))
622 gimplify_conversion (&var0);
623 // Attempt to fill in any within var0 found ARRAY_REF's
624 // element size from corresponding op embedded ARRAY_REF,
625 // if unsuccessful, just punt.
626 } */
627 while (POINTER_TYPE_P (type))
628 type = TREE_TYPE (type);
629 if (int_size_in_bytes (type) < 0)
630 return false;
632 *var = var0;
633 *off = off0;
634 return true;
637 case SSA_NAME:
639 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
640 enum tree_code subcode;
642 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
643 return false;
645 var0 = gimple_assign_rhs1 (def_stmt);
646 subcode = gimple_assign_rhs_code (def_stmt);
647 var1 = gimple_assign_rhs2 (def_stmt);
649 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
651 CASE_CONVERT:
653 /* We must not introduce undefined overflow, and we must not change the value.
654 Hence we're okay if the inner type doesn't overflow to start with
655 (pointer or signed), the outer type also is an integer or pointer
656 and the outer precision is at least as large as the inner. */
657 tree itype = TREE_TYPE (op0);
658 if ((POINTER_TYPE_P (itype)
659 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
660 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
661 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 split_constant_offset (op0, &var0, off);
664 *var = fold_convert (type, var0);
665 return true;
667 return false;
670 default:
671 return false;
675 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
676 will be ssizetype. */
678 void
679 split_constant_offset (tree exp, tree *var, tree *off)
681 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
682 enum tree_code code;
684 *var = exp;
685 *off = ssize_int (0);
686 STRIP_NOPS (exp);
688 if (tree_is_chrec (exp))
689 return;
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 *var = fold_convert (type, e);
697 *off = o;
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
704 static tree
705 canonicalize_base_object_address (tree addr)
707 tree orig = addr;
709 STRIP_NOPS (addr);
711 /* The base address may be obtained by casting from integer, in that case
712 keep the cast. */
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714 return orig;
716 if (TREE_CODE (addr) != ADDR_EXPR)
717 return addr;
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
724 otherwise. */
726 bool
727 dr_analyze_innermost (struct data_reference *dr)
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
733 tree base, poffset;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
747 if (pbitpos % BITS_PER_UNIT != 0)
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
751 return false;
754 if (TREE_CODE (base) == MEM_REF)
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 if (!poffset)
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
763 else
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 base = TREE_OPERAND (base, 0);
768 else
769 base = build_fold_addr_expr (base);
770 if (in_loop)
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
775 if (dump_file && (dump_flags & TDF_DETAILS))
776 fprintf (dump_file, "failed: evolution of base is not affine.\n");
777 return false;
780 else
782 base_iv.base = base;
783 base_iv.step = ssize_int (0);
784 base_iv.no_overflow = true;
787 if (!poffset)
789 offset_iv.base = ssize_int (0);
790 offset_iv.step = ssize_int (0);
792 else
794 if (!in_loop)
796 offset_iv.base = poffset;
797 offset_iv.step = ssize_int (0);
799 else if (!simple_iv (loop, loop_containing_stmt (stmt),
800 poffset, &offset_iv, false))
802 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "failed: evolution of offset is not"
804 " affine.\n");
805 return false;
809 init = ssize_int (pbitpos / BITS_PER_UNIT);
810 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
811 init = size_binop (PLUS_EXPR, init, dinit);
812 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
813 init = size_binop (PLUS_EXPR, init, dinit);
815 step = size_binop (PLUS_EXPR,
816 fold_convert (ssizetype, base_iv.step),
817 fold_convert (ssizetype, offset_iv.step));
819 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
822 DR_INIT (dr) = init;
823 DR_STEP (dr) = step;
825 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "success.\n");
830 return true;
833 /* Determines the base object and the list of indices of memory reference
834 DR, analyzed in LOOP and instantiated in loop nest NEST. */
836 static void
837 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 VEC (tree, heap) *access_fns = NULL;
840 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
841 tree base, off, access_fn = NULL_TREE;
842 basic_block before_loop = NULL;
844 if (nest)
845 before_loop = block_before_loop (nest);
847 while (handled_component_p (aref))
849 if (TREE_CODE (aref) == ARRAY_REF)
851 op = TREE_OPERAND (aref, 1);
852 if (nest)
854 access_fn = analyze_scalar_evolution (loop, op);
855 access_fn = instantiate_scev (before_loop, loop, access_fn);
856 VEC_safe_push (tree, heap, access_fns, access_fn);
859 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
862 aref = TREE_OPERAND (aref, 0);
865 if (nest
866 && TREE_CODE (aref) == MEM_REF)
868 op = TREE_OPERAND (aref, 0);
869 access_fn = analyze_scalar_evolution (loop, op);
870 access_fn = instantiate_scev (before_loop, loop, access_fn);
871 base = initial_condition (access_fn);
872 split_constant_offset (base, &base, &off);
873 if (!integer_zerop (TREE_OPERAND (aref, 1)))
875 off = size_binop (PLUS_EXPR, off,
876 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
877 TREE_OPERAND (aref, 1)
878 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
880 access_fn = chrec_replace_initial_condition (access_fn,
881 fold_convert (TREE_TYPE (base), off));
883 TREE_OPERAND (aref, 0) = base;
884 VEC_safe_push (tree, heap, access_fns, access_fn);
887 if (TREE_CODE (ref) == MEM_REF
888 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
889 && integer_zerop (TREE_OPERAND (ref, 1)))
890 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
892 /* For canonicalization purposes we'd like to strip all outermost
893 zero-offset component-refs.
894 ??? For now simply handle zero-index array-refs. */
895 while (TREE_CODE (ref) == ARRAY_REF
896 && integer_zerop (TREE_OPERAND (ref, 1)))
897 ref = TREE_OPERAND (ref, 0);
899 DR_BASE_OBJECT (dr) = ref;
900 DR_ACCESS_FNS (dr) = access_fns;
903 /* Extracts the alias analysis information from the memory reference DR. */
905 static void
906 dr_analyze_alias (struct data_reference *dr)
908 tree ref = DR_REF (dr);
909 tree base = get_base_address (ref), addr;
911 if (INDIRECT_REF_P (base)
912 || TREE_CODE (base) == MEM_REF)
914 addr = TREE_OPERAND (base, 0);
915 if (TREE_CODE (addr) == SSA_NAME)
916 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
920 /* Frees data reference DR. */
922 void
923 free_data_ref (data_reference_p dr)
925 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
926 free (dr);
929 /* Analyzes memory reference MEMREF accessed in STMT. The reference
930 is read if IS_READ is true, write otherwise. Returns the
931 data_reference description of MEMREF. NEST is the outermost loop
932 in which the reference should be instantiated, LOOP is the loop in
933 which the data reference should be analyzed. */
935 struct data_reference *
936 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
937 bool is_read)
939 struct data_reference *dr;
941 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "Creating dr for ");
944 print_generic_expr (dump_file, memref, TDF_SLIM);
945 fprintf (dump_file, "\n");
948 dr = XCNEW (struct data_reference);
949 DR_STMT (dr) = stmt;
950 DR_REF (dr) = memref;
951 DR_IS_READ (dr) = is_read;
953 dr_analyze_innermost (dr);
954 dr_analyze_indices (dr, nest, loop);
955 dr_analyze_alias (dr);
957 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "\tbase_address: ");
960 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
961 fprintf (dump_file, "\n\toffset from base address: ");
962 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
963 fprintf (dump_file, "\n\tconstant offset from base address: ");
964 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
965 fprintf (dump_file, "\n\tstep: ");
966 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
967 fprintf (dump_file, "\n\taligned to: ");
968 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
969 fprintf (dump_file, "\n\tbase_object: ");
970 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
971 fprintf (dump_file, "\n");
974 return dr;
977 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
978 expressions. */
979 static bool
980 dr_equal_offsets_p1 (tree offset1, tree offset2)
982 bool res;
984 STRIP_NOPS (offset1);
985 STRIP_NOPS (offset2);
987 if (offset1 == offset2)
988 return true;
990 if (TREE_CODE (offset1) != TREE_CODE (offset2)
991 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
992 return false;
994 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
995 TREE_OPERAND (offset2, 0));
997 if (!res || !BINARY_CLASS_P (offset1))
998 return res;
1000 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1001 TREE_OPERAND (offset2, 1));
1003 return res;
1006 /* Check if DRA and DRB have equal offsets. */
1007 bool
1008 dr_equal_offsets_p (struct data_reference *dra,
1009 struct data_reference *drb)
1011 tree offset1, offset2;
1013 offset1 = DR_OFFSET (dra);
1014 offset2 = DR_OFFSET (drb);
1016 return dr_equal_offsets_p1 (offset1, offset2);
1019 /* Returns true if FNA == FNB. */
1021 static bool
1022 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1024 unsigned i, n = VEC_length (tree, fna);
1026 if (n != VEC_length (tree, fnb))
1027 return false;
1029 for (i = 0; i < n; i++)
1030 if (!operand_equal_p (VEC_index (tree, fna, i),
1031 VEC_index (tree, fnb, i), 0))
1032 return false;
1034 return true;
1037 /* If all the functions in CF are the same, returns one of them,
1038 otherwise returns NULL. */
1040 static affine_fn
1041 common_affine_function (conflict_function *cf)
1043 unsigned i;
1044 affine_fn comm;
1046 if (!CF_NONTRIVIAL_P (cf))
1047 return NULL;
1049 comm = cf->fns[0];
1051 for (i = 1; i < cf->n; i++)
1052 if (!affine_function_equal_p (comm, cf->fns[i]))
1053 return NULL;
1055 return comm;
1058 /* Returns the base of the affine function FN. */
1060 static tree
1061 affine_function_base (affine_fn fn)
1063 return VEC_index (tree, fn, 0);
1066 /* Returns true if FN is a constant. */
1068 static bool
1069 affine_function_constant_p (affine_fn fn)
1071 unsigned i;
1072 tree coef;
1074 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1075 if (!integer_zerop (coef))
1076 return false;
1078 return true;
1081 /* Returns true if FN is the zero constant function. */
1083 static bool
1084 affine_function_zero_p (affine_fn fn)
1086 return (integer_zerop (affine_function_base (fn))
1087 && affine_function_constant_p (fn));
1090 /* Returns a signed integer type with the largest precision from TA
1091 and TB. */
1093 static tree
1094 signed_type_for_types (tree ta, tree tb)
1096 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1097 return signed_type_for (ta);
1098 else
1099 return signed_type_for (tb);
1102 /* Applies operation OP on affine functions FNA and FNB, and returns the
1103 result. */
1105 static affine_fn
1106 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1108 unsigned i, n, m;
1109 affine_fn ret;
1110 tree coef;
1112 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1114 n = VEC_length (tree, fna);
1115 m = VEC_length (tree, fnb);
1117 else
1119 n = VEC_length (tree, fnb);
1120 m = VEC_length (tree, fna);
1123 ret = VEC_alloc (tree, heap, m);
1124 for (i = 0; i < n; i++)
1126 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1127 TREE_TYPE (VEC_index (tree, fnb, i)));
1129 VEC_quick_push (tree, ret,
1130 fold_build2 (op, type,
1131 VEC_index (tree, fna, i),
1132 VEC_index (tree, fnb, i)));
1135 for (; VEC_iterate (tree, fna, i, coef); i++)
1136 VEC_quick_push (tree, ret,
1137 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1138 coef, integer_zero_node));
1139 for (; VEC_iterate (tree, fnb, i, coef); i++)
1140 VEC_quick_push (tree, ret,
1141 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1142 integer_zero_node, coef));
1144 return ret;
1147 /* Returns the sum of affine functions FNA and FNB. */
1149 static affine_fn
1150 affine_fn_plus (affine_fn fna, affine_fn fnb)
1152 return affine_fn_op (PLUS_EXPR, fna, fnb);
1155 /* Returns the difference of affine functions FNA and FNB. */
1157 static affine_fn
1158 affine_fn_minus (affine_fn fna, affine_fn fnb)
1160 return affine_fn_op (MINUS_EXPR, fna, fnb);
1163 /* Frees affine function FN. */
1165 static void
1166 affine_fn_free (affine_fn fn)
1168 VEC_free (tree, heap, fn);
1171 /* Determine for each subscript in the data dependence relation DDR
1172 the distance. */
1174 static void
1175 compute_subscript_distance (struct data_dependence_relation *ddr)
1177 conflict_function *cf_a, *cf_b;
1178 affine_fn fn_a, fn_b, diff;
1180 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1182 unsigned int i;
1184 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1186 struct subscript *subscript;
1188 subscript = DDR_SUBSCRIPT (ddr, i);
1189 cf_a = SUB_CONFLICTS_IN_A (subscript);
1190 cf_b = SUB_CONFLICTS_IN_B (subscript);
1192 fn_a = common_affine_function (cf_a);
1193 fn_b = common_affine_function (cf_b);
1194 if (!fn_a || !fn_b)
1196 SUB_DISTANCE (subscript) = chrec_dont_know;
1197 return;
1199 diff = affine_fn_minus (fn_a, fn_b);
1201 if (affine_function_constant_p (diff))
1202 SUB_DISTANCE (subscript) = affine_function_base (diff);
1203 else
1204 SUB_DISTANCE (subscript) = chrec_dont_know;
1206 affine_fn_free (diff);
1211 /* Returns the conflict function for "unknown". */
1213 static conflict_function *
1214 conflict_fn_not_known (void)
1216 conflict_function *fn = XCNEW (conflict_function);
1217 fn->n = NOT_KNOWN;
1219 return fn;
1222 /* Returns the conflict function for "independent". */
1224 static conflict_function *
1225 conflict_fn_no_dependence (void)
1227 conflict_function *fn = XCNEW (conflict_function);
1228 fn->n = NO_DEPENDENCE;
1230 return fn;
1233 /* Returns true if the address of OBJ is invariant in LOOP. */
1235 static bool
1236 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1238 while (handled_component_p (obj))
1240 if (TREE_CODE (obj) == ARRAY_REF)
1242 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1243 need to check the stride and the lower bound of the reference. */
1244 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1245 loop->num)
1246 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1247 loop->num))
1248 return false;
1250 else if (TREE_CODE (obj) == COMPONENT_REF)
1252 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1253 loop->num))
1254 return false;
1256 obj = TREE_OPERAND (obj, 0);
1259 if (!INDIRECT_REF_P (obj)
1260 && TREE_CODE (obj) != MEM_REF)
1261 return true;
1263 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1264 loop->num);
1267 /* Returns false if we can prove that data references A and B do not alias,
1268 true otherwise. */
1270 bool
1271 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1273 tree addr_a = DR_BASE_OBJECT (a);
1274 tree addr_b = DR_BASE_OBJECT (b);
1276 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1277 return refs_output_dependent_p (addr_a, addr_b);
1278 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1279 return refs_anti_dependent_p (addr_a, addr_b);
1280 return refs_may_alias_p (addr_a, addr_b);
1283 static void compute_self_dependence (struct data_dependence_relation *);
1285 /* Initialize a data dependence relation between data accesses A and
1286 B. NB_LOOPS is the number of loops surrounding the references: the
1287 size of the classic distance/direction vectors. */
1289 static struct data_dependence_relation *
1290 initialize_data_dependence_relation (struct data_reference *a,
1291 struct data_reference *b,
1292 VEC (loop_p, heap) *loop_nest)
1294 struct data_dependence_relation *res;
1295 unsigned int i;
1297 res = XNEW (struct data_dependence_relation);
1298 DDR_A (res) = a;
1299 DDR_B (res) = b;
1300 DDR_LOOP_NEST (res) = NULL;
1301 DDR_REVERSED_P (res) = false;
1302 DDR_SUBSCRIPTS (res) = NULL;
1303 DDR_DIR_VECTS (res) = NULL;
1304 DDR_DIST_VECTS (res) = NULL;
1306 if (a == NULL || b == NULL)
1308 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1309 return res;
1312 /* If the data references do not alias, then they are independent. */
1313 if (!dr_may_alias_p (a, b))
1315 DDR_ARE_DEPENDENT (res) = chrec_known;
1316 return res;
1319 /* When the references are exactly the same, don't spend time doing
1320 the data dependence tests, just initialize the ddr and return. */
1321 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1323 DDR_AFFINE_P (res) = true;
1324 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1325 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1326 DDR_LOOP_NEST (res) = loop_nest;
1327 DDR_INNER_LOOP (res) = 0;
1328 DDR_SELF_REFERENCE (res) = true;
1329 compute_self_dependence (res);
1330 return res;
1333 /* If the references do not access the same object, we do not know
1334 whether they alias or not. */
1335 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1337 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1338 return res;
1341 /* If the base of the object is not invariant in the loop nest, we cannot
1342 analyze it. TODO -- in fact, it would suffice to record that there may
1343 be arbitrary dependences in the loops where the base object varies. */
1344 if (loop_nest
1345 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1346 DR_BASE_OBJECT (a)))
1348 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1349 return res;
1352 /* If the number of dimensions of the access to not agree we can have
1353 a pointer access to a component of the array element type and an
1354 array access while the base-objects are still the same. Punt. */
1355 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1357 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1358 return res;
1361 DDR_AFFINE_P (res) = true;
1362 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1363 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1364 DDR_LOOP_NEST (res) = loop_nest;
1365 DDR_INNER_LOOP (res) = 0;
1366 DDR_SELF_REFERENCE (res) = false;
1368 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1370 struct subscript *subscript;
1372 subscript = XNEW (struct subscript);
1373 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1374 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1375 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1376 SUB_DISTANCE (subscript) = chrec_dont_know;
1377 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1380 return res;
1383 /* Frees memory used by the conflict function F. */
1385 static void
1386 free_conflict_function (conflict_function *f)
1388 unsigned i;
1390 if (CF_NONTRIVIAL_P (f))
1392 for (i = 0; i < f->n; i++)
1393 affine_fn_free (f->fns[i]);
1395 free (f);
1398 /* Frees memory used by SUBSCRIPTS. */
1400 static void
1401 free_subscripts (VEC (subscript_p, heap) *subscripts)
1403 unsigned i;
1404 subscript_p s;
1406 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1408 free_conflict_function (s->conflicting_iterations_in_a);
1409 free_conflict_function (s->conflicting_iterations_in_b);
1410 free (s);
1412 VEC_free (subscript_p, heap, subscripts);
1415 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1416 description. */
1418 static inline void
1419 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1420 tree chrec)
1422 if (dump_file && (dump_flags & TDF_DETAILS))
1424 fprintf (dump_file, "(dependence classified: ");
1425 print_generic_expr (dump_file, chrec, 0);
1426 fprintf (dump_file, ")\n");
1429 DDR_ARE_DEPENDENT (ddr) = chrec;
1430 free_subscripts (DDR_SUBSCRIPTS (ddr));
1431 DDR_SUBSCRIPTS (ddr) = NULL;
1434 /* The dependence relation DDR cannot be represented by a distance
1435 vector. */
1437 static inline void
1438 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1440 if (dump_file && (dump_flags & TDF_DETAILS))
1441 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1443 DDR_AFFINE_P (ddr) = false;
1448 /* This section contains the classic Banerjee tests. */
1450 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1451 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1453 static inline bool
1454 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1456 return (evolution_function_is_constant_p (chrec_a)
1457 && evolution_function_is_constant_p (chrec_b));
1460 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1461 variable, i.e., if the SIV (Single Index Variable) test is true. */
1463 static bool
1464 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1466 if ((evolution_function_is_constant_p (chrec_a)
1467 && evolution_function_is_univariate_p (chrec_b))
1468 || (evolution_function_is_constant_p (chrec_b)
1469 && evolution_function_is_univariate_p (chrec_a)))
1470 return true;
1472 if (evolution_function_is_univariate_p (chrec_a)
1473 && evolution_function_is_univariate_p (chrec_b))
1475 switch (TREE_CODE (chrec_a))
1477 case POLYNOMIAL_CHREC:
1478 switch (TREE_CODE (chrec_b))
1480 case POLYNOMIAL_CHREC:
1481 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1482 return false;
1484 default:
1485 return true;
1488 default:
1489 return true;
1493 return false;
1496 /* Creates a conflict function with N dimensions. The affine functions
1497 in each dimension follow. */
1499 static conflict_function *
1500 conflict_fn (unsigned n, ...)
1502 unsigned i;
1503 conflict_function *ret = XCNEW (conflict_function);
1504 va_list ap;
1506 gcc_assert (0 < n && n <= MAX_DIM);
1507 va_start(ap, n);
1509 ret->n = n;
1510 for (i = 0; i < n; i++)
1511 ret->fns[i] = va_arg (ap, affine_fn);
1512 va_end(ap);
1514 return ret;
1517 /* Returns constant affine function with value CST. */
1519 static affine_fn
1520 affine_fn_cst (tree cst)
1522 affine_fn fn = VEC_alloc (tree, heap, 1);
1523 VEC_quick_push (tree, fn, cst);
1524 return fn;
1527 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1529 static affine_fn
1530 affine_fn_univar (tree cst, unsigned dim, tree coef)
1532 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1533 unsigned i;
1535 gcc_assert (dim > 0);
1536 VEC_quick_push (tree, fn, cst);
1537 for (i = 1; i < dim; i++)
1538 VEC_quick_push (tree, fn, integer_zero_node);
1539 VEC_quick_push (tree, fn, coef);
1540 return fn;
1543 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1544 *OVERLAPS_B are initialized to the functions that describe the
1545 relation between the elements accessed twice by CHREC_A and
1546 CHREC_B. For k >= 0, the following property is verified:
1548 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1550 static void
1551 analyze_ziv_subscript (tree chrec_a,
1552 tree chrec_b,
1553 conflict_function **overlaps_a,
1554 conflict_function **overlaps_b,
1555 tree *last_conflicts)
1557 tree type, difference;
1558 dependence_stats.num_ziv++;
1560 if (dump_file && (dump_flags & TDF_DETAILS))
1561 fprintf (dump_file, "(analyze_ziv_subscript \n");
1563 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1564 chrec_a = chrec_convert (type, chrec_a, NULL);
1565 chrec_b = chrec_convert (type, chrec_b, NULL);
1566 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1568 switch (TREE_CODE (difference))
1570 case INTEGER_CST:
1571 if (integer_zerop (difference))
1573 /* The difference is equal to zero: the accessed index
1574 overlaps for each iteration in the loop. */
1575 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1576 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1577 *last_conflicts = chrec_dont_know;
1578 dependence_stats.num_ziv_dependent++;
1580 else
1582 /* The accesses do not overlap. */
1583 *overlaps_a = conflict_fn_no_dependence ();
1584 *overlaps_b = conflict_fn_no_dependence ();
1585 *last_conflicts = integer_zero_node;
1586 dependence_stats.num_ziv_independent++;
1588 break;
1590 default:
1591 /* We're not sure whether the indexes overlap. For the moment,
1592 conservatively answer "don't know". */
1593 if (dump_file && (dump_flags & TDF_DETAILS))
1594 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1596 *overlaps_a = conflict_fn_not_known ();
1597 *overlaps_b = conflict_fn_not_known ();
1598 *last_conflicts = chrec_dont_know;
1599 dependence_stats.num_ziv_unimplemented++;
1600 break;
1603 if (dump_file && (dump_flags & TDF_DETAILS))
1604 fprintf (dump_file, ")\n");
1607 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1608 and only if it fits to the int type. If this is not the case, or the
1609 bound on the number of iterations of LOOP could not be derived, returns
1610 chrec_dont_know. */
1612 static tree
1613 max_stmt_executions_tree (struct loop *loop)
1615 double_int nit;
1617 if (!max_stmt_executions (loop, true, &nit))
1618 return chrec_dont_know;
1620 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1621 return chrec_dont_know;
1623 return double_int_to_tree (unsigned_type_node, nit);
1626 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1627 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1628 *OVERLAPS_B are initialized to the functions that describe the
1629 relation between the elements accessed twice by CHREC_A and
1630 CHREC_B. For k >= 0, the following property is verified:
1632 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1634 static void
1635 analyze_siv_subscript_cst_affine (tree chrec_a,
1636 tree chrec_b,
1637 conflict_function **overlaps_a,
1638 conflict_function **overlaps_b,
1639 tree *last_conflicts)
1641 bool value0, value1, value2;
1642 tree type, difference, tmp;
1644 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1645 chrec_a = chrec_convert (type, chrec_a, NULL);
1646 chrec_b = chrec_convert (type, chrec_b, NULL);
1647 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1649 if (!chrec_is_positive (initial_condition (difference), &value0))
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1652 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1654 dependence_stats.num_siv_unimplemented++;
1655 *overlaps_a = conflict_fn_not_known ();
1656 *overlaps_b = conflict_fn_not_known ();
1657 *last_conflicts = chrec_dont_know;
1658 return;
1660 else
1662 if (value0 == false)
1664 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1667 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1669 *overlaps_a = conflict_fn_not_known ();
1670 *overlaps_b = conflict_fn_not_known ();
1671 *last_conflicts = chrec_dont_know;
1672 dependence_stats.num_siv_unimplemented++;
1673 return;
1675 else
1677 if (value1 == true)
1679 /* Example:
1680 chrec_a = 12
1681 chrec_b = {10, +, 1}
1684 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1686 HOST_WIDE_INT numiter;
1687 struct loop *loop = get_chrec_loop (chrec_b);
1689 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1690 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1691 fold_build1 (ABS_EXPR, type, difference),
1692 CHREC_RIGHT (chrec_b));
1693 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1694 *last_conflicts = integer_one_node;
1697 /* Perform weak-zero siv test to see if overlap is
1698 outside the loop bounds. */
1699 numiter = max_stmt_executions_int (loop, true);
1701 if (numiter >= 0
1702 && compare_tree_int (tmp, numiter) > 0)
1704 free_conflict_function (*overlaps_a);
1705 free_conflict_function (*overlaps_b);
1706 *overlaps_a = conflict_fn_no_dependence ();
1707 *overlaps_b = conflict_fn_no_dependence ();
1708 *last_conflicts = integer_zero_node;
1709 dependence_stats.num_siv_independent++;
1710 return;
1712 dependence_stats.num_siv_dependent++;
1713 return;
1716 /* When the step does not divide the difference, there are
1717 no overlaps. */
1718 else
1720 *overlaps_a = conflict_fn_no_dependence ();
1721 *overlaps_b = conflict_fn_no_dependence ();
1722 *last_conflicts = integer_zero_node;
1723 dependence_stats.num_siv_independent++;
1724 return;
1728 else
1730 /* Example:
1731 chrec_a = 12
1732 chrec_b = {10, +, -1}
1734 In this case, chrec_a will not overlap with chrec_b. */
1735 *overlaps_a = conflict_fn_no_dependence ();
1736 *overlaps_b = conflict_fn_no_dependence ();
1737 *last_conflicts = integer_zero_node;
1738 dependence_stats.num_siv_independent++;
1739 return;
1743 else
1745 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1748 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1750 *overlaps_a = conflict_fn_not_known ();
1751 *overlaps_b = conflict_fn_not_known ();
1752 *last_conflicts = chrec_dont_know;
1753 dependence_stats.num_siv_unimplemented++;
1754 return;
1756 else
1758 if (value2 == false)
1760 /* Example:
1761 chrec_a = 3
1762 chrec_b = {10, +, -1}
1764 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1766 HOST_WIDE_INT numiter;
1767 struct loop *loop = get_chrec_loop (chrec_b);
1769 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1770 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1771 CHREC_RIGHT (chrec_b));
1772 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1773 *last_conflicts = integer_one_node;
1775 /* Perform weak-zero siv test to see if overlap is
1776 outside the loop bounds. */
1777 numiter = max_stmt_executions_int (loop, true);
1779 if (numiter >= 0
1780 && compare_tree_int (tmp, numiter) > 0)
1782 free_conflict_function (*overlaps_a);
1783 free_conflict_function (*overlaps_b);
1784 *overlaps_a = conflict_fn_no_dependence ();
1785 *overlaps_b = conflict_fn_no_dependence ();
1786 *last_conflicts = integer_zero_node;
1787 dependence_stats.num_siv_independent++;
1788 return;
1790 dependence_stats.num_siv_dependent++;
1791 return;
1794 /* When the step does not divide the difference, there
1795 are no overlaps. */
1796 else
1798 *overlaps_a = conflict_fn_no_dependence ();
1799 *overlaps_b = conflict_fn_no_dependence ();
1800 *last_conflicts = integer_zero_node;
1801 dependence_stats.num_siv_independent++;
1802 return;
1805 else
1807 /* Example:
1808 chrec_a = 3
1809 chrec_b = {4, +, 1}
1811 In this case, chrec_a will not overlap with chrec_b. */
1812 *overlaps_a = conflict_fn_no_dependence ();
1813 *overlaps_b = conflict_fn_no_dependence ();
1814 *last_conflicts = integer_zero_node;
1815 dependence_stats.num_siv_independent++;
1816 return;
1823 /* Helper recursive function for initializing the matrix A. Returns
1824 the initial value of CHREC. */
1826 static tree
1827 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1829 gcc_assert (chrec);
1831 switch (TREE_CODE (chrec))
1833 case POLYNOMIAL_CHREC:
1834 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1836 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1837 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1839 case PLUS_EXPR:
1840 case MULT_EXPR:
1841 case MINUS_EXPR:
1843 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1844 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1846 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1849 case NOP_EXPR:
1851 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1852 return chrec_convert (chrec_type (chrec), op, NULL);
1855 case BIT_NOT_EXPR:
1857 /* Handle ~X as -1 - X. */
1858 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1859 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1860 build_int_cst (TREE_TYPE (chrec), -1), op);
1863 case INTEGER_CST:
1864 return chrec;
1866 default:
1867 gcc_unreachable ();
1868 return NULL_TREE;
1872 #define FLOOR_DIV(x,y) ((x) / (y))
1874 /* Solves the special case of the Diophantine equation:
1875 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1877 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1878 number of iterations that loops X and Y run. The overlaps will be
1879 constructed as evolutions in dimension DIM. */
1881 static void
1882 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1883 affine_fn *overlaps_a,
1884 affine_fn *overlaps_b,
1885 tree *last_conflicts, int dim)
1887 if (((step_a > 0 && step_b > 0)
1888 || (step_a < 0 && step_b < 0)))
1890 int step_overlaps_a, step_overlaps_b;
1891 int gcd_steps_a_b, last_conflict, tau2;
1893 gcd_steps_a_b = gcd (step_a, step_b);
1894 step_overlaps_a = step_b / gcd_steps_a_b;
1895 step_overlaps_b = step_a / gcd_steps_a_b;
1897 if (niter > 0)
1899 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1900 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1901 last_conflict = tau2;
1902 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1904 else
1905 *last_conflicts = chrec_dont_know;
1907 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1908 build_int_cst (NULL_TREE,
1909 step_overlaps_a));
1910 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1911 build_int_cst (NULL_TREE,
1912 step_overlaps_b));
1915 else
1917 *overlaps_a = affine_fn_cst (integer_zero_node);
1918 *overlaps_b = affine_fn_cst (integer_zero_node);
1919 *last_conflicts = integer_zero_node;
1923 /* Solves the special case of a Diophantine equation where CHREC_A is
1924 an affine bivariate function, and CHREC_B is an affine univariate
1925 function. For example,
1927 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1929 has the following overlapping functions:
1931 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1932 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1933 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1935 FORNOW: This is a specialized implementation for a case occurring in
1936 a common benchmark. Implement the general algorithm. */
1938 static void
1939 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1940 conflict_function **overlaps_a,
1941 conflict_function **overlaps_b,
1942 tree *last_conflicts)
1944 bool xz_p, yz_p, xyz_p;
1945 int step_x, step_y, step_z;
1946 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1947 affine_fn overlaps_a_xz, overlaps_b_xz;
1948 affine_fn overlaps_a_yz, overlaps_b_yz;
1949 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1950 affine_fn ova1, ova2, ovb;
1951 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1953 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1954 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1955 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1957 niter_x =
1958 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1959 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1960 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1962 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1964 if (dump_file && (dump_flags & TDF_DETAILS))
1965 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1967 *overlaps_a = conflict_fn_not_known ();
1968 *overlaps_b = conflict_fn_not_known ();
1969 *last_conflicts = chrec_dont_know;
1970 return;
1973 niter = MIN (niter_x, niter_z);
1974 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1975 &overlaps_a_xz,
1976 &overlaps_b_xz,
1977 &last_conflicts_xz, 1);
1978 niter = MIN (niter_y, niter_z);
1979 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1980 &overlaps_a_yz,
1981 &overlaps_b_yz,
1982 &last_conflicts_yz, 2);
1983 niter = MIN (niter_x, niter_z);
1984 niter = MIN (niter_y, niter);
1985 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1986 &overlaps_a_xyz,
1987 &overlaps_b_xyz,
1988 &last_conflicts_xyz, 3);
1990 xz_p = !integer_zerop (last_conflicts_xz);
1991 yz_p = !integer_zerop (last_conflicts_yz);
1992 xyz_p = !integer_zerop (last_conflicts_xyz);
1994 if (xz_p || yz_p || xyz_p)
1996 ova1 = affine_fn_cst (integer_zero_node);
1997 ova2 = affine_fn_cst (integer_zero_node);
1998 ovb = affine_fn_cst (integer_zero_node);
1999 if (xz_p)
2001 affine_fn t0 = ova1;
2002 affine_fn t2 = ovb;
2004 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2005 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2006 affine_fn_free (t0);
2007 affine_fn_free (t2);
2008 *last_conflicts = last_conflicts_xz;
2010 if (yz_p)
2012 affine_fn t0 = ova2;
2013 affine_fn t2 = ovb;
2015 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2016 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2017 affine_fn_free (t0);
2018 affine_fn_free (t2);
2019 *last_conflicts = last_conflicts_yz;
2021 if (xyz_p)
2023 affine_fn t0 = ova1;
2024 affine_fn t2 = ova2;
2025 affine_fn t4 = ovb;
2027 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2028 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2029 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2030 affine_fn_free (t0);
2031 affine_fn_free (t2);
2032 affine_fn_free (t4);
2033 *last_conflicts = last_conflicts_xyz;
2035 *overlaps_a = conflict_fn (2, ova1, ova2);
2036 *overlaps_b = conflict_fn (1, ovb);
2038 else
2040 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2041 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2042 *last_conflicts = integer_zero_node;
2045 affine_fn_free (overlaps_a_xz);
2046 affine_fn_free (overlaps_b_xz);
2047 affine_fn_free (overlaps_a_yz);
2048 affine_fn_free (overlaps_b_yz);
2049 affine_fn_free (overlaps_a_xyz);
2050 affine_fn_free (overlaps_b_xyz);
2053 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2055 static void
2056 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2057 int size)
2059 memcpy (vec2, vec1, size * sizeof (*vec1));
2062 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2064 static void
2065 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2066 int m, int n)
2068 int i;
2070 for (i = 0; i < m; i++)
2071 lambda_vector_copy (mat1[i], mat2[i], n);
2074 /* Store the N x N identity matrix in MAT. */
2076 static void
2077 lambda_matrix_id (lambda_matrix mat, int size)
2079 int i, j;
2081 for (i = 0; i < size; i++)
2082 for (j = 0; j < size; j++)
2083 mat[i][j] = (i == j) ? 1 : 0;
2086 /* Return the first nonzero element of vector VEC1 between START and N.
2087 We must have START <= N. Returns N if VEC1 is the zero vector. */
2089 static int
2090 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2092 int j = start;
2093 while (j < n && vec1[j] == 0)
2094 j++;
2095 return j;
2098 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2099 R2 = R2 + CONST1 * R1. */
2101 static void
2102 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2104 int i;
2106 if (const1 == 0)
2107 return;
2109 for (i = 0; i < n; i++)
2110 mat[r2][i] += const1 * mat[r1][i];
2113 /* Swap rows R1 and R2 in matrix MAT. */
2115 static void
2116 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2118 lambda_vector row;
2120 row = mat[r1];
2121 mat[r1] = mat[r2];
2122 mat[r2] = row;
2125 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2126 and store the result in VEC2. */
2128 static void
2129 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2130 int size, int const1)
2132 int i;
2134 if (const1 == 0)
2135 lambda_vector_clear (vec2, size);
2136 else
2137 for (i = 0; i < size; i++)
2138 vec2[i] = const1 * vec1[i];
2141 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2143 static void
2144 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2145 int size)
2147 lambda_vector_mult_const (vec1, vec2, size, -1);
2150 /* Negate row R1 of matrix MAT which has N columns. */
2152 static void
2153 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2155 lambda_vector_negate (mat[r1], mat[r1], n);
2158 /* Return true if two vectors are equal. */
2160 static bool
2161 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2163 int i;
2164 for (i = 0; i < size; i++)
2165 if (vec1[i] != vec2[i])
2166 return false;
2167 return true;
2170 /* Given an M x N integer matrix A, this function determines an M x
2171 M unimodular matrix U, and an M x N echelon matrix S such that
2172 "U.A = S". This decomposition is also known as "right Hermite".
2174 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2175 Restructuring Compilers" Utpal Banerjee. */
2177 static void
2178 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2179 lambda_matrix S, lambda_matrix U)
2181 int i, j, i0 = 0;
2183 lambda_matrix_copy (A, S, m, n);
2184 lambda_matrix_id (U, m);
2186 for (j = 0; j < n; j++)
2188 if (lambda_vector_first_nz (S[j], m, i0) < m)
2190 ++i0;
2191 for (i = m - 1; i >= i0; i--)
2193 while (S[i][j] != 0)
2195 int sigma, factor, a, b;
2197 a = S[i-1][j];
2198 b = S[i][j];
2199 sigma = (a * b < 0) ? -1: 1;
2200 a = abs (a);
2201 b = abs (b);
2202 factor = sigma * (a / b);
2204 lambda_matrix_row_add (S, n, i, i-1, -factor);
2205 lambda_matrix_row_exchange (S, i, i-1);
2207 lambda_matrix_row_add (U, m, i, i-1, -factor);
2208 lambda_matrix_row_exchange (U, i, i-1);
2215 /* Determines the overlapping elements due to accesses CHREC_A and
2216 CHREC_B, that are affine functions. This function cannot handle
2217 symbolic evolution functions, ie. when initial conditions are
2218 parameters, because it uses lambda matrices of integers. */
2220 static void
2221 analyze_subscript_affine_affine (tree chrec_a,
2222 tree chrec_b,
2223 conflict_function **overlaps_a,
2224 conflict_function **overlaps_b,
2225 tree *last_conflicts)
2227 unsigned nb_vars_a, nb_vars_b, dim;
2228 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2229 lambda_matrix A, U, S;
2230 struct obstack scratch_obstack;
2232 if (eq_evolutions_p (chrec_a, chrec_b))
2234 /* The accessed index overlaps for each iteration in the
2235 loop. */
2236 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2237 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2238 *last_conflicts = chrec_dont_know;
2239 return;
2241 if (dump_file && (dump_flags & TDF_DETAILS))
2242 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2244 /* For determining the initial intersection, we have to solve a
2245 Diophantine equation. This is the most time consuming part.
2247 For answering to the question: "Is there a dependence?" we have
2248 to prove that there exists a solution to the Diophantine
2249 equation, and that the solution is in the iteration domain,
2250 i.e. the solution is positive or zero, and that the solution
2251 happens before the upper bound loop.nb_iterations. Otherwise
2252 there is no dependence. This function outputs a description of
2253 the iterations that hold the intersections. */
2255 nb_vars_a = nb_vars_in_chrec (chrec_a);
2256 nb_vars_b = nb_vars_in_chrec (chrec_b);
2258 gcc_obstack_init (&scratch_obstack);
2260 dim = nb_vars_a + nb_vars_b;
2261 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2262 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2263 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2265 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2266 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2267 gamma = init_b - init_a;
2269 /* Don't do all the hard work of solving the Diophantine equation
2270 when we already know the solution: for example,
2271 | {3, +, 1}_1
2272 | {3, +, 4}_2
2273 | gamma = 3 - 3 = 0.
2274 Then the first overlap occurs during the first iterations:
2275 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2277 if (gamma == 0)
2279 if (nb_vars_a == 1 && nb_vars_b == 1)
2281 HOST_WIDE_INT step_a, step_b;
2282 HOST_WIDE_INT niter, niter_a, niter_b;
2283 affine_fn ova, ovb;
2285 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2286 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2287 niter = MIN (niter_a, niter_b);
2288 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2289 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2291 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2292 &ova, &ovb,
2293 last_conflicts, 1);
2294 *overlaps_a = conflict_fn (1, ova);
2295 *overlaps_b = conflict_fn (1, ovb);
2298 else if (nb_vars_a == 2 && nb_vars_b == 1)
2299 compute_overlap_steps_for_affine_1_2
2300 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2302 else if (nb_vars_a == 1 && nb_vars_b == 2)
2303 compute_overlap_steps_for_affine_1_2
2304 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2306 else
2308 if (dump_file && (dump_flags & TDF_DETAILS))
2309 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2310 *overlaps_a = conflict_fn_not_known ();
2311 *overlaps_b = conflict_fn_not_known ();
2312 *last_conflicts = chrec_dont_know;
2314 goto end_analyze_subs_aa;
2317 /* U.A = S */
2318 lambda_matrix_right_hermite (A, dim, 1, S, U);
2320 if (S[0][0] < 0)
2322 S[0][0] *= -1;
2323 lambda_matrix_row_negate (U, dim, 0);
2325 gcd_alpha_beta = S[0][0];
2327 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2328 but that is a quite strange case. Instead of ICEing, answer
2329 don't know. */
2330 if (gcd_alpha_beta == 0)
2332 *overlaps_a = conflict_fn_not_known ();
2333 *overlaps_b = conflict_fn_not_known ();
2334 *last_conflicts = chrec_dont_know;
2335 goto end_analyze_subs_aa;
2338 /* The classic "gcd-test". */
2339 if (!int_divides_p (gcd_alpha_beta, gamma))
2341 /* The "gcd-test" has determined that there is no integer
2342 solution, i.e. there is no dependence. */
2343 *overlaps_a = conflict_fn_no_dependence ();
2344 *overlaps_b = conflict_fn_no_dependence ();
2345 *last_conflicts = integer_zero_node;
2348 /* Both access functions are univariate. This includes SIV and MIV cases. */
2349 else if (nb_vars_a == 1 && nb_vars_b == 1)
2351 /* Both functions should have the same evolution sign. */
2352 if (((A[0][0] > 0 && -A[1][0] > 0)
2353 || (A[0][0] < 0 && -A[1][0] < 0)))
2355 /* The solutions are given by:
2357 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2358 | [u21 u22] [y0]
2360 For a given integer t. Using the following variables,
2362 | i0 = u11 * gamma / gcd_alpha_beta
2363 | j0 = u12 * gamma / gcd_alpha_beta
2364 | i1 = u21
2365 | j1 = u22
2367 the solutions are:
2369 | x0 = i0 + i1 * t,
2370 | y0 = j0 + j1 * t. */
2371 HOST_WIDE_INT i0, j0, i1, j1;
2373 i0 = U[0][0] * gamma / gcd_alpha_beta;
2374 j0 = U[0][1] * gamma / gcd_alpha_beta;
2375 i1 = U[1][0];
2376 j1 = U[1][1];
2378 if ((i1 == 0 && i0 < 0)
2379 || (j1 == 0 && j0 < 0))
2381 /* There is no solution.
2382 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2383 falls in here, but for the moment we don't look at the
2384 upper bound of the iteration domain. */
2385 *overlaps_a = conflict_fn_no_dependence ();
2386 *overlaps_b = conflict_fn_no_dependence ();
2387 *last_conflicts = integer_zero_node;
2388 goto end_analyze_subs_aa;
2391 if (i1 > 0 && j1 > 0)
2393 HOST_WIDE_INT niter_a = max_stmt_executions_int
2394 (get_chrec_loop (chrec_a), true);
2395 HOST_WIDE_INT niter_b = max_stmt_executions_int
2396 (get_chrec_loop (chrec_b), true);
2397 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2399 /* (X0, Y0) is a solution of the Diophantine equation:
2400 "chrec_a (X0) = chrec_b (Y0)". */
2401 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2402 CEIL (-j0, j1));
2403 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2404 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2406 /* (X1, Y1) is the smallest positive solution of the eq
2407 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2408 first conflict occurs. */
2409 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2410 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2411 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2413 if (niter > 0)
2415 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2416 FLOOR_DIV (niter - j0, j1));
2417 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2419 /* If the overlap occurs outside of the bounds of the
2420 loop, there is no dependence. */
2421 if (x1 >= niter || y1 >= niter)
2423 *overlaps_a = conflict_fn_no_dependence ();
2424 *overlaps_b = conflict_fn_no_dependence ();
2425 *last_conflicts = integer_zero_node;
2426 goto end_analyze_subs_aa;
2428 else
2429 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2431 else
2432 *last_conflicts = chrec_dont_know;
2434 *overlaps_a
2435 = conflict_fn (1,
2436 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2438 build_int_cst (NULL_TREE, i1)));
2439 *overlaps_b
2440 = conflict_fn (1,
2441 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2443 build_int_cst (NULL_TREE, j1)));
2445 else
2447 /* FIXME: For the moment, the upper bound of the
2448 iteration domain for i and j is not checked. */
2449 if (dump_file && (dump_flags & TDF_DETAILS))
2450 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2451 *overlaps_a = conflict_fn_not_known ();
2452 *overlaps_b = conflict_fn_not_known ();
2453 *last_conflicts = chrec_dont_know;
2456 else
2458 if (dump_file && (dump_flags & TDF_DETAILS))
2459 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2460 *overlaps_a = conflict_fn_not_known ();
2461 *overlaps_b = conflict_fn_not_known ();
2462 *last_conflicts = chrec_dont_know;
2465 else
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2469 *overlaps_a = conflict_fn_not_known ();
2470 *overlaps_b = conflict_fn_not_known ();
2471 *last_conflicts = chrec_dont_know;
2474 end_analyze_subs_aa:
2475 obstack_free (&scratch_obstack, NULL);
2476 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, " (overlaps_a = ");
2479 dump_conflict_function (dump_file, *overlaps_a);
2480 fprintf (dump_file, ")\n (overlaps_b = ");
2481 dump_conflict_function (dump_file, *overlaps_b);
2482 fprintf (dump_file, ")\n");
2483 fprintf (dump_file, ")\n");
2487 /* Returns true when analyze_subscript_affine_affine can be used for
2488 determining the dependence relation between chrec_a and chrec_b,
2489 that contain symbols. This function modifies chrec_a and chrec_b
2490 such that the analysis result is the same, and such that they don't
2491 contain symbols, and then can safely be passed to the analyzer.
2493 Example: The analysis of the following tuples of evolutions produce
2494 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2495 vs. {0, +, 1}_1
2497 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2498 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2501 static bool
2502 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2504 tree diff, type, left_a, left_b, right_b;
2506 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2507 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2508 /* FIXME: For the moment not handled. Might be refined later. */
2509 return false;
2511 type = chrec_type (*chrec_a);
2512 left_a = CHREC_LEFT (*chrec_a);
2513 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2514 diff = chrec_fold_minus (type, left_a, left_b);
2516 if (!evolution_function_is_constant_p (diff))
2517 return false;
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2520 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2522 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2523 diff, CHREC_RIGHT (*chrec_a));
2524 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2525 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2526 build_int_cst (type, 0),
2527 right_b);
2528 return true;
2531 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2532 *OVERLAPS_B are initialized to the functions that describe the
2533 relation between the elements accessed twice by CHREC_A and
2534 CHREC_B. For k >= 0, the following property is verified:
2536 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2538 static void
2539 analyze_siv_subscript (tree chrec_a,
2540 tree chrec_b,
2541 conflict_function **overlaps_a,
2542 conflict_function **overlaps_b,
2543 tree *last_conflicts,
2544 int loop_nest_num)
2546 dependence_stats.num_siv++;
2548 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file, "(analyze_siv_subscript \n");
2551 if (evolution_function_is_constant_p (chrec_a)
2552 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2553 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2554 overlaps_a, overlaps_b, last_conflicts);
2556 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2557 && evolution_function_is_constant_p (chrec_b))
2558 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2559 overlaps_b, overlaps_a, last_conflicts);
2561 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2562 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2564 if (!chrec_contains_symbols (chrec_a)
2565 && !chrec_contains_symbols (chrec_b))
2567 analyze_subscript_affine_affine (chrec_a, chrec_b,
2568 overlaps_a, overlaps_b,
2569 last_conflicts);
2571 if (CF_NOT_KNOWN_P (*overlaps_a)
2572 || CF_NOT_KNOWN_P (*overlaps_b))
2573 dependence_stats.num_siv_unimplemented++;
2574 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2575 || CF_NO_DEPENDENCE_P (*overlaps_b))
2576 dependence_stats.num_siv_independent++;
2577 else
2578 dependence_stats.num_siv_dependent++;
2580 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2581 &chrec_b))
2583 analyze_subscript_affine_affine (chrec_a, chrec_b,
2584 overlaps_a, overlaps_b,
2585 last_conflicts);
2587 if (CF_NOT_KNOWN_P (*overlaps_a)
2588 || CF_NOT_KNOWN_P (*overlaps_b))
2589 dependence_stats.num_siv_unimplemented++;
2590 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2591 || CF_NO_DEPENDENCE_P (*overlaps_b))
2592 dependence_stats.num_siv_independent++;
2593 else
2594 dependence_stats.num_siv_dependent++;
2596 else
2597 goto siv_subscript_dontknow;
2600 else
2602 siv_subscript_dontknow:;
2603 if (dump_file && (dump_flags & TDF_DETAILS))
2604 fprintf (dump_file, "siv test failed: unimplemented.\n");
2605 *overlaps_a = conflict_fn_not_known ();
2606 *overlaps_b = conflict_fn_not_known ();
2607 *last_conflicts = chrec_dont_know;
2608 dependence_stats.num_siv_unimplemented++;
2611 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, ")\n");
2615 /* Returns false if we can prove that the greatest common divisor of the steps
2616 of CHREC does not divide CST, false otherwise. */
2618 static bool
2619 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2621 HOST_WIDE_INT cd = 0, val;
2622 tree step;
2624 if (!host_integerp (cst, 0))
2625 return true;
2626 val = tree_low_cst (cst, 0);
2628 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2630 step = CHREC_RIGHT (chrec);
2631 if (!host_integerp (step, 0))
2632 return true;
2633 cd = gcd (cd, tree_low_cst (step, 0));
2634 chrec = CHREC_LEFT (chrec);
2637 return val % cd == 0;
2640 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2641 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2642 functions that describe the relation between the elements accessed
2643 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2644 is verified:
2646 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2648 static void
2649 analyze_miv_subscript (tree chrec_a,
2650 tree chrec_b,
2651 conflict_function **overlaps_a,
2652 conflict_function **overlaps_b,
2653 tree *last_conflicts,
2654 struct loop *loop_nest)
2656 tree type, difference;
2658 dependence_stats.num_miv++;
2659 if (dump_file && (dump_flags & TDF_DETAILS))
2660 fprintf (dump_file, "(analyze_miv_subscript \n");
2662 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2663 chrec_a = chrec_convert (type, chrec_a, NULL);
2664 chrec_b = chrec_convert (type, chrec_b, NULL);
2665 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2667 if (eq_evolutions_p (chrec_a, chrec_b))
2669 /* Access functions are the same: all the elements are accessed
2670 in the same order. */
2671 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2672 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2673 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2674 dependence_stats.num_miv_dependent++;
2677 else if (evolution_function_is_constant_p (difference)
2678 /* For the moment, the following is verified:
2679 evolution_function_is_affine_multivariate_p (chrec_a,
2680 loop_nest->num) */
2681 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2683 /* testsuite/.../ssa-chrec-33.c
2684 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2686 The difference is 1, and all the evolution steps are multiples
2687 of 2, consequently there are no overlapping elements. */
2688 *overlaps_a = conflict_fn_no_dependence ();
2689 *overlaps_b = conflict_fn_no_dependence ();
2690 *last_conflicts = integer_zero_node;
2691 dependence_stats.num_miv_independent++;
2694 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2695 && !chrec_contains_symbols (chrec_a)
2696 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2697 && !chrec_contains_symbols (chrec_b))
2699 /* testsuite/.../ssa-chrec-35.c
2700 {0, +, 1}_2 vs. {0, +, 1}_3
2701 the overlapping elements are respectively located at iterations:
2702 {0, +, 1}_x and {0, +, 1}_x,
2703 in other words, we have the equality:
2704 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2706 Other examples:
2707 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2708 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2710 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2711 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2713 analyze_subscript_affine_affine (chrec_a, chrec_b,
2714 overlaps_a, overlaps_b, last_conflicts);
2716 if (CF_NOT_KNOWN_P (*overlaps_a)
2717 || CF_NOT_KNOWN_P (*overlaps_b))
2718 dependence_stats.num_miv_unimplemented++;
2719 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2720 || CF_NO_DEPENDENCE_P (*overlaps_b))
2721 dependence_stats.num_miv_independent++;
2722 else
2723 dependence_stats.num_miv_dependent++;
2726 else
2728 /* When the analysis is too difficult, answer "don't know". */
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2732 *overlaps_a = conflict_fn_not_known ();
2733 *overlaps_b = conflict_fn_not_known ();
2734 *last_conflicts = chrec_dont_know;
2735 dependence_stats.num_miv_unimplemented++;
2738 if (dump_file && (dump_flags & TDF_DETAILS))
2739 fprintf (dump_file, ")\n");
2742 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2743 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2744 OVERLAP_ITERATIONS_B are initialized with two functions that
2745 describe the iterations that contain conflicting elements.
2747 Remark: For an integer k >= 0, the following equality is true:
2749 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2752 static void
2753 analyze_overlapping_iterations (tree chrec_a,
2754 tree chrec_b,
2755 conflict_function **overlap_iterations_a,
2756 conflict_function **overlap_iterations_b,
2757 tree *last_conflicts, struct loop *loop_nest)
2759 unsigned int lnn = loop_nest->num;
2761 dependence_stats.num_subscript_tests++;
2763 if (dump_file && (dump_flags & TDF_DETAILS))
2765 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2766 fprintf (dump_file, " (chrec_a = ");
2767 print_generic_expr (dump_file, chrec_a, 0);
2768 fprintf (dump_file, ")\n (chrec_b = ");
2769 print_generic_expr (dump_file, chrec_b, 0);
2770 fprintf (dump_file, ")\n");
2773 if (chrec_a == NULL_TREE
2774 || chrec_b == NULL_TREE
2775 || chrec_contains_undetermined (chrec_a)
2776 || chrec_contains_undetermined (chrec_b))
2778 dependence_stats.num_subscript_undetermined++;
2780 *overlap_iterations_a = conflict_fn_not_known ();
2781 *overlap_iterations_b = conflict_fn_not_known ();
2784 /* If they are the same chrec, and are affine, they overlap
2785 on every iteration. */
2786 else if (eq_evolutions_p (chrec_a, chrec_b)
2787 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2788 || operand_equal_p (chrec_a, chrec_b, 0)))
2790 dependence_stats.num_same_subscript_function++;
2791 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2792 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2793 *last_conflicts = chrec_dont_know;
2796 /* If they aren't the same, and aren't affine, we can't do anything
2797 yet. */
2798 else if ((chrec_contains_symbols (chrec_a)
2799 || chrec_contains_symbols (chrec_b))
2800 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2801 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2803 dependence_stats.num_subscript_undetermined++;
2804 *overlap_iterations_a = conflict_fn_not_known ();
2805 *overlap_iterations_b = conflict_fn_not_known ();
2808 else if (ziv_subscript_p (chrec_a, chrec_b))
2809 analyze_ziv_subscript (chrec_a, chrec_b,
2810 overlap_iterations_a, overlap_iterations_b,
2811 last_conflicts);
2813 else if (siv_subscript_p (chrec_a, chrec_b))
2814 analyze_siv_subscript (chrec_a, chrec_b,
2815 overlap_iterations_a, overlap_iterations_b,
2816 last_conflicts, lnn);
2818 else
2819 analyze_miv_subscript (chrec_a, chrec_b,
2820 overlap_iterations_a, overlap_iterations_b,
2821 last_conflicts, loop_nest);
2823 if (dump_file && (dump_flags & TDF_DETAILS))
2825 fprintf (dump_file, " (overlap_iterations_a = ");
2826 dump_conflict_function (dump_file, *overlap_iterations_a);
2827 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2828 dump_conflict_function (dump_file, *overlap_iterations_b);
2829 fprintf (dump_file, ")\n");
2830 fprintf (dump_file, ")\n");
2834 /* Helper function for uniquely inserting distance vectors. */
2836 static void
2837 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2839 unsigned i;
2840 lambda_vector v;
2842 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2843 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2844 return;
2846 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2849 /* Helper function for uniquely inserting direction vectors. */
2851 static void
2852 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2854 unsigned i;
2855 lambda_vector v;
2857 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2858 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2859 return;
2861 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2864 /* Add a distance of 1 on all the loops outer than INDEX. If we
2865 haven't yet determined a distance for this outer loop, push a new
2866 distance vector composed of the previous distance, and a distance
2867 of 1 for this outer loop. Example:
2869 | loop_1
2870 | loop_2
2871 | A[10]
2872 | endloop_2
2873 | endloop_1
2875 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2876 save (0, 1), then we have to save (1, 0). */
2878 static void
2879 add_outer_distances (struct data_dependence_relation *ddr,
2880 lambda_vector dist_v, int index)
2882 /* For each outer loop where init_v is not set, the accesses are
2883 in dependence of distance 1 in the loop. */
2884 while (--index >= 0)
2886 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2887 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2888 save_v[index] = 1;
2889 save_dist_v (ddr, save_v);
2893 /* Return false when fail to represent the data dependence as a
2894 distance vector. INIT_B is set to true when a component has been
2895 added to the distance vector DIST_V. INDEX_CARRY is then set to
2896 the index in DIST_V that carries the dependence. */
2898 static bool
2899 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2900 struct data_reference *ddr_a,
2901 struct data_reference *ddr_b,
2902 lambda_vector dist_v, bool *init_b,
2903 int *index_carry)
2905 unsigned i;
2906 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2908 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2910 tree access_fn_a, access_fn_b;
2911 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2913 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2915 non_affine_dependence_relation (ddr);
2916 return false;
2919 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2920 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2922 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2923 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2925 int dist, index;
2926 int var_a = CHREC_VARIABLE (access_fn_a);
2927 int var_b = CHREC_VARIABLE (access_fn_b);
2929 if (var_a != var_b
2930 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2932 non_affine_dependence_relation (ddr);
2933 return false;
2936 dist = int_cst_value (SUB_DISTANCE (subscript));
2937 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2938 *index_carry = MIN (index, *index_carry);
2940 /* This is the subscript coupling test. If we have already
2941 recorded a distance for this loop (a distance coming from
2942 another subscript), it should be the same. For example,
2943 in the following code, there is no dependence:
2945 | loop i = 0, N, 1
2946 | T[i+1][i] = ...
2947 | ... = T[i][i]
2948 | endloop
2950 if (init_v[index] != 0 && dist_v[index] != dist)
2952 finalize_ddr_dependent (ddr, chrec_known);
2953 return false;
2956 dist_v[index] = dist;
2957 init_v[index] = 1;
2958 *init_b = true;
2960 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2962 /* This can be for example an affine vs. constant dependence
2963 (T[i] vs. T[3]) that is not an affine dependence and is
2964 not representable as a distance vector. */
2965 non_affine_dependence_relation (ddr);
2966 return false;
2970 return true;
2973 /* Return true when the DDR contains only constant access functions. */
2975 static bool
2976 constant_access_functions (const struct data_dependence_relation *ddr)
2978 unsigned i;
2980 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2981 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2982 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2983 return false;
2985 return true;
2988 /* Helper function for the case where DDR_A and DDR_B are the same
2989 multivariate access function with a constant step. For an example
2990 see pr34635-1.c. */
2992 static void
2993 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2995 int x_1, x_2;
2996 tree c_1 = CHREC_LEFT (c_2);
2997 tree c_0 = CHREC_LEFT (c_1);
2998 lambda_vector dist_v;
2999 int v1, v2, cd;
3001 /* Polynomials with more than 2 variables are not handled yet. When
3002 the evolution steps are parameters, it is not possible to
3003 represent the dependence using classical distance vectors. */
3004 if (TREE_CODE (c_0) != INTEGER_CST
3005 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3006 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3008 DDR_AFFINE_P (ddr) = false;
3009 return;
3012 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3013 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3015 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3016 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3017 v1 = int_cst_value (CHREC_RIGHT (c_1));
3018 v2 = int_cst_value (CHREC_RIGHT (c_2));
3019 cd = gcd (v1, v2);
3020 v1 /= cd;
3021 v2 /= cd;
3023 if (v2 < 0)
3025 v2 = -v2;
3026 v1 = -v1;
3029 dist_v[x_1] = v2;
3030 dist_v[x_2] = -v1;
3031 save_dist_v (ddr, dist_v);
3033 add_outer_distances (ddr, dist_v, x_1);
3036 /* Helper function for the case where DDR_A and DDR_B are the same
3037 access functions. */
3039 static void
3040 add_other_self_distances (struct data_dependence_relation *ddr)
3042 lambda_vector dist_v;
3043 unsigned i;
3044 int index_carry = DDR_NB_LOOPS (ddr);
3046 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3048 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3050 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3052 if (!evolution_function_is_univariate_p (access_fun))
3054 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3056 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3057 return;
3060 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3062 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3063 add_multivariate_self_dist (ddr, access_fun);
3064 else
3065 /* The evolution step is not constant: it varies in
3066 the outer loop, so this cannot be represented by a
3067 distance vector. For example in pr34635.c the
3068 evolution is {0, +, {0, +, 4}_1}_2. */
3069 DDR_AFFINE_P (ddr) = false;
3071 return;
3074 index_carry = MIN (index_carry,
3075 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3076 DDR_LOOP_NEST (ddr)));
3080 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3081 add_outer_distances (ddr, dist_v, index_carry);
3084 static void
3085 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3087 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3089 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3090 save_dist_v (ddr, dist_v);
3093 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3094 is the case for example when access functions are the same and
3095 equal to a constant, as in:
3097 | loop_1
3098 | A[3] = ...
3099 | ... = A[3]
3100 | endloop_1
3102 in which case the distance vectors are (0) and (1). */
3104 static void
3105 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3107 unsigned i, j;
3109 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3111 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3112 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3113 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3115 for (j = 0; j < ca->n; j++)
3116 if (affine_function_zero_p (ca->fns[j]))
3118 insert_innermost_unit_dist_vector (ddr);
3119 return;
3122 for (j = 0; j < cb->n; j++)
3123 if (affine_function_zero_p (cb->fns[j]))
3125 insert_innermost_unit_dist_vector (ddr);
3126 return;
3131 /* Compute the classic per loop distance vector. DDR is the data
3132 dependence relation to build a vector from. Return false when fail
3133 to represent the data dependence as a distance vector. */
3135 static bool
3136 build_classic_dist_vector (struct data_dependence_relation *ddr,
3137 struct loop *loop_nest)
3139 bool init_b = false;
3140 int index_carry = DDR_NB_LOOPS (ddr);
3141 lambda_vector dist_v;
3143 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3144 return false;
3146 if (same_access_functions (ddr))
3148 /* Save the 0 vector. */
3149 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3150 save_dist_v (ddr, dist_v);
3152 if (constant_access_functions (ddr))
3153 add_distance_for_zero_overlaps (ddr);
3155 if (DDR_NB_LOOPS (ddr) > 1)
3156 add_other_self_distances (ddr);
3158 return true;
3161 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3162 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3163 dist_v, &init_b, &index_carry))
3164 return false;
3166 /* Save the distance vector if we initialized one. */
3167 if (init_b)
3169 /* Verify a basic constraint: classic distance vectors should
3170 always be lexicographically positive.
3172 Data references are collected in the order of execution of
3173 the program, thus for the following loop
3175 | for (i = 1; i < 100; i++)
3176 | for (j = 1; j < 100; j++)
3178 | t = T[j+1][i-1]; // A
3179 | T[j][i] = t + 2; // B
3182 references are collected following the direction of the wind:
3183 A then B. The data dependence tests are performed also
3184 following this order, such that we're looking at the distance
3185 separating the elements accessed by A from the elements later
3186 accessed by B. But in this example, the distance returned by
3187 test_dep (A, B) is lexicographically negative (-1, 1), that
3188 means that the access A occurs later than B with respect to
3189 the outer loop, ie. we're actually looking upwind. In this
3190 case we solve test_dep (B, A) looking downwind to the
3191 lexicographically positive solution, that returns the
3192 distance vector (1, -1). */
3193 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3195 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3196 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3197 loop_nest))
3198 return false;
3199 compute_subscript_distance (ddr);
3200 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3201 save_v, &init_b, &index_carry))
3202 return false;
3203 save_dist_v (ddr, save_v);
3204 DDR_REVERSED_P (ddr) = true;
3206 /* In this case there is a dependence forward for all the
3207 outer loops:
3209 | for (k = 1; k < 100; k++)
3210 | for (i = 1; i < 100; i++)
3211 | for (j = 1; j < 100; j++)
3213 | t = T[j+1][i-1]; // A
3214 | T[j][i] = t + 2; // B
3217 the vectors are:
3218 (0, 1, -1)
3219 (1, 1, -1)
3220 (1, -1, 1)
3222 if (DDR_NB_LOOPS (ddr) > 1)
3224 add_outer_distances (ddr, save_v, index_carry);
3225 add_outer_distances (ddr, dist_v, index_carry);
3228 else
3230 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3231 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3233 if (DDR_NB_LOOPS (ddr) > 1)
3235 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3237 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3238 DDR_A (ddr), loop_nest))
3239 return false;
3240 compute_subscript_distance (ddr);
3241 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3242 opposite_v, &init_b,
3243 &index_carry))
3244 return false;
3246 save_dist_v (ddr, save_v);
3247 add_outer_distances (ddr, dist_v, index_carry);
3248 add_outer_distances (ddr, opposite_v, index_carry);
3250 else
3251 save_dist_v (ddr, save_v);
3254 else
3256 /* There is a distance of 1 on all the outer loops: Example:
3257 there is a dependence of distance 1 on loop_1 for the array A.
3259 | loop_1
3260 | A[5] = ...
3261 | endloop
3263 add_outer_distances (ddr, dist_v,
3264 lambda_vector_first_nz (dist_v,
3265 DDR_NB_LOOPS (ddr), 0));
3268 if (dump_file && (dump_flags & TDF_DETAILS))
3270 unsigned i;
3272 fprintf (dump_file, "(build_classic_dist_vector\n");
3273 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3275 fprintf (dump_file, " dist_vector = (");
3276 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3277 DDR_NB_LOOPS (ddr));
3278 fprintf (dump_file, " )\n");
3280 fprintf (dump_file, ")\n");
3283 return true;
3286 /* Return the direction for a given distance.
3287 FIXME: Computing dir this way is suboptimal, since dir can catch
3288 cases that dist is unable to represent. */
3290 static inline enum data_dependence_direction
3291 dir_from_dist (int dist)
3293 if (dist > 0)
3294 return dir_positive;
3295 else if (dist < 0)
3296 return dir_negative;
3297 else
3298 return dir_equal;
3301 /* Compute the classic per loop direction vector. DDR is the data
3302 dependence relation to build a vector from. */
3304 static void
3305 build_classic_dir_vector (struct data_dependence_relation *ddr)
3307 unsigned i, j;
3308 lambda_vector dist_v;
3310 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3312 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3314 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3315 dir_v[j] = dir_from_dist (dist_v[j]);
3317 save_dir_v (ddr, dir_v);
3321 /* Helper function. Returns true when there is a dependence between
3322 data references DRA and DRB. */
3324 static bool
3325 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3326 struct data_reference *dra,
3327 struct data_reference *drb,
3328 struct loop *loop_nest)
3330 unsigned int i;
3331 tree last_conflicts;
3332 struct subscript *subscript;
3334 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3335 i++)
3337 conflict_function *overlaps_a, *overlaps_b;
3339 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3340 DR_ACCESS_FN (drb, i),
3341 &overlaps_a, &overlaps_b,
3342 &last_conflicts, loop_nest);
3344 if (CF_NOT_KNOWN_P (overlaps_a)
3345 || CF_NOT_KNOWN_P (overlaps_b))
3347 finalize_ddr_dependent (ddr, chrec_dont_know);
3348 dependence_stats.num_dependence_undetermined++;
3349 free_conflict_function (overlaps_a);
3350 free_conflict_function (overlaps_b);
3351 return false;
3354 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3355 || CF_NO_DEPENDENCE_P (overlaps_b))
3357 finalize_ddr_dependent (ddr, chrec_known);
3358 dependence_stats.num_dependence_independent++;
3359 free_conflict_function (overlaps_a);
3360 free_conflict_function (overlaps_b);
3361 return false;
3364 else
3366 if (SUB_CONFLICTS_IN_A (subscript))
3367 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3368 if (SUB_CONFLICTS_IN_B (subscript))
3369 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3371 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3372 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3373 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3377 return true;
3380 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3382 static void
3383 subscript_dependence_tester (struct data_dependence_relation *ddr,
3384 struct loop *loop_nest)
3387 if (dump_file && (dump_flags & TDF_DETAILS))
3388 fprintf (dump_file, "(subscript_dependence_tester \n");
3390 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3391 dependence_stats.num_dependence_dependent++;
3393 compute_subscript_distance (ddr);
3394 if (build_classic_dist_vector (ddr, loop_nest))
3395 build_classic_dir_vector (ddr);
3397 if (dump_file && (dump_flags & TDF_DETAILS))
3398 fprintf (dump_file, ")\n");
3401 /* Returns true when all the access functions of A are affine or
3402 constant with respect to LOOP_NEST. */
3404 static bool
3405 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3406 const struct loop *loop_nest)
3408 unsigned int i;
3409 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3410 tree t;
3412 FOR_EACH_VEC_ELT (tree, fns, i, t)
3413 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3414 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3415 return false;
3417 return true;
3420 /* Initializes an equation for an OMEGA problem using the information
3421 contained in the ACCESS_FUN. Returns true when the operation
3422 succeeded.
3424 PB is the omega constraint system.
3425 EQ is the number of the equation to be initialized.
3426 OFFSET is used for shifting the variables names in the constraints:
3427 a constrain is composed of 2 * the number of variables surrounding
3428 dependence accesses. OFFSET is set either to 0 for the first n variables,
3429 then it is set to n.
3430 ACCESS_FUN is expected to be an affine chrec. */
3432 static bool
3433 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3434 unsigned int offset, tree access_fun,
3435 struct data_dependence_relation *ddr)
3437 switch (TREE_CODE (access_fun))
3439 case POLYNOMIAL_CHREC:
3441 tree left = CHREC_LEFT (access_fun);
3442 tree right = CHREC_RIGHT (access_fun);
3443 int var = CHREC_VARIABLE (access_fun);
3444 unsigned var_idx;
3446 if (TREE_CODE (right) != INTEGER_CST)
3447 return false;
3449 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3450 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3452 /* Compute the innermost loop index. */
3453 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3455 if (offset == 0)
3456 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3457 += int_cst_value (right);
3459 switch (TREE_CODE (left))
3461 case POLYNOMIAL_CHREC:
3462 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3464 case INTEGER_CST:
3465 pb->eqs[eq].coef[0] += int_cst_value (left);
3466 return true;
3468 default:
3469 return false;
3473 case INTEGER_CST:
3474 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3475 return true;
3477 default:
3478 return false;
3482 /* As explained in the comments preceding init_omega_for_ddr, we have
3483 to set up a system for each loop level, setting outer loops
3484 variation to zero, and current loop variation to positive or zero.
3485 Save each lexico positive distance vector. */
3487 static void
3488 omega_extract_distance_vectors (omega_pb pb,
3489 struct data_dependence_relation *ddr)
3491 int eq, geq;
3492 unsigned i, j;
3493 struct loop *loopi, *loopj;
3494 enum omega_result res;
3496 /* Set a new problem for each loop in the nest. The basis is the
3497 problem that we have initialized until now. On top of this we
3498 add new constraints. */
3499 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3500 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3502 int dist = 0;
3503 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3504 DDR_NB_LOOPS (ddr));
3506 omega_copy_problem (copy, pb);
3508 /* For all the outer loops "loop_j", add "dj = 0". */
3509 for (j = 0;
3510 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3512 eq = omega_add_zero_eq (copy, omega_black);
3513 copy->eqs[eq].coef[j + 1] = 1;
3516 /* For "loop_i", add "0 <= di". */
3517 geq = omega_add_zero_geq (copy, omega_black);
3518 copy->geqs[geq].coef[i + 1] = 1;
3520 /* Reduce the constraint system, and test that the current
3521 problem is feasible. */
3522 res = omega_simplify_problem (copy);
3523 if (res == omega_false
3524 || res == omega_unknown
3525 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3526 goto next_problem;
3528 for (eq = 0; eq < copy->num_subs; eq++)
3529 if (copy->subs[eq].key == (int) i + 1)
3531 dist = copy->subs[eq].coef[0];
3532 goto found_dist;
3535 if (dist == 0)
3537 /* Reinitialize problem... */
3538 omega_copy_problem (copy, pb);
3539 for (j = 0;
3540 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3542 eq = omega_add_zero_eq (copy, omega_black);
3543 copy->eqs[eq].coef[j + 1] = 1;
3546 /* ..., but this time "di = 1". */
3547 eq = omega_add_zero_eq (copy, omega_black);
3548 copy->eqs[eq].coef[i + 1] = 1;
3549 copy->eqs[eq].coef[0] = -1;
3551 res = omega_simplify_problem (copy);
3552 if (res == omega_false
3553 || res == omega_unknown
3554 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3555 goto next_problem;
3557 for (eq = 0; eq < copy->num_subs; eq++)
3558 if (copy->subs[eq].key == (int) i + 1)
3560 dist = copy->subs[eq].coef[0];
3561 goto found_dist;
3565 found_dist:;
3566 /* Save the lexicographically positive distance vector. */
3567 if (dist >= 0)
3569 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3570 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3572 dist_v[i] = dist;
3574 for (eq = 0; eq < copy->num_subs; eq++)
3575 if (copy->subs[eq].key > 0)
3577 dist = copy->subs[eq].coef[0];
3578 dist_v[copy->subs[eq].key - 1] = dist;
3581 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3582 dir_v[j] = dir_from_dist (dist_v[j]);
3584 save_dist_v (ddr, dist_v);
3585 save_dir_v (ddr, dir_v);
3588 next_problem:;
3589 omega_free_problem (copy);
3593 /* This is called for each subscript of a tuple of data references:
3594 insert an equality for representing the conflicts. */
3596 static bool
3597 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3598 struct data_dependence_relation *ddr,
3599 omega_pb pb, bool *maybe_dependent)
3601 int eq;
3602 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3603 TREE_TYPE (access_fun_b));
3604 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3605 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3606 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3607 tree minus_one;
3609 /* When the fun_a - fun_b is not constant, the dependence is not
3610 captured by the classic distance vector representation. */
3611 if (TREE_CODE (difference) != INTEGER_CST)
3612 return false;
3614 /* ZIV test. */
3615 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3617 /* There is no dependence. */
3618 *maybe_dependent = false;
3619 return true;
3622 minus_one = build_int_cst (type, -1);
3623 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3625 eq = omega_add_zero_eq (pb, omega_black);
3626 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3627 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3628 /* There is probably a dependence, but the system of
3629 constraints cannot be built: answer "don't know". */
3630 return false;
3632 /* GCD test. */
3633 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3634 && !int_divides_p (lambda_vector_gcd
3635 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3636 2 * DDR_NB_LOOPS (ddr)),
3637 pb->eqs[eq].coef[0]))
3639 /* There is no dependence. */
3640 *maybe_dependent = false;
3641 return true;
3644 return true;
3647 /* Helper function, same as init_omega_for_ddr but specialized for
3648 data references A and B. */
3650 static bool
3651 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3652 struct data_dependence_relation *ddr,
3653 omega_pb pb, bool *maybe_dependent)
3655 unsigned i;
3656 int ineq;
3657 struct loop *loopi;
3658 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3660 /* Insert an equality per subscript. */
3661 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3663 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3664 ddr, pb, maybe_dependent))
3665 return false;
3666 else if (*maybe_dependent == false)
3668 /* There is no dependence. */
3669 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3670 return true;
3674 /* Insert inequalities: constraints corresponding to the iteration
3675 domain, i.e. the loops surrounding the references "loop_x" and
3676 the distance variables "dx". The layout of the OMEGA
3677 representation is as follows:
3678 - coef[0] is the constant
3679 - coef[1..nb_loops] are the protected variables that will not be
3680 removed by the solver: the "dx"
3681 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3683 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3684 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3686 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3688 /* 0 <= loop_x */
3689 ineq = omega_add_zero_geq (pb, omega_black);
3690 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3692 /* 0 <= loop_x + dx */
3693 ineq = omega_add_zero_geq (pb, omega_black);
3694 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3695 pb->geqs[ineq].coef[i + 1] = 1;
3697 if (nbi != -1)
3699 /* loop_x <= nb_iters */
3700 ineq = omega_add_zero_geq (pb, omega_black);
3701 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3702 pb->geqs[ineq].coef[0] = nbi;
3704 /* loop_x + dx <= nb_iters */
3705 ineq = omega_add_zero_geq (pb, omega_black);
3706 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3707 pb->geqs[ineq].coef[i + 1] = -1;
3708 pb->geqs[ineq].coef[0] = nbi;
3710 /* A step "dx" bigger than nb_iters is not feasible, so
3711 add "0 <= nb_iters + dx", */
3712 ineq = omega_add_zero_geq (pb, omega_black);
3713 pb->geqs[ineq].coef[i + 1] = 1;
3714 pb->geqs[ineq].coef[0] = nbi;
3715 /* and "dx <= nb_iters". */
3716 ineq = omega_add_zero_geq (pb, omega_black);
3717 pb->geqs[ineq].coef[i + 1] = -1;
3718 pb->geqs[ineq].coef[0] = nbi;
3722 omega_extract_distance_vectors (pb, ddr);
3724 return true;
3727 /* Sets up the Omega dependence problem for the data dependence
3728 relation DDR. Returns false when the constraint system cannot be
3729 built, ie. when the test answers "don't know". Returns true
3730 otherwise, and when independence has been proved (using one of the
3731 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3732 set MAYBE_DEPENDENT to true.
3734 Example: for setting up the dependence system corresponding to the
3735 conflicting accesses
3737 | loop_i
3738 | loop_j
3739 | A[i, i+1] = ...
3740 | ... A[2*j, 2*(i + j)]
3741 | endloop_j
3742 | endloop_i
3744 the following constraints come from the iteration domain:
3746 0 <= i <= Ni
3747 0 <= i + di <= Ni
3748 0 <= j <= Nj
3749 0 <= j + dj <= Nj
3751 where di, dj are the distance variables. The constraints
3752 representing the conflicting elements are:
3754 i = 2 * (j + dj)
3755 i + 1 = 2 * (i + di + j + dj)
3757 For asking that the resulting distance vector (di, dj) be
3758 lexicographically positive, we insert the constraint "di >= 0". If
3759 "di = 0" in the solution, we fix that component to zero, and we
3760 look at the inner loops: we set a new problem where all the outer
3761 loop distances are zero, and fix this inner component to be
3762 positive. When one of the components is positive, we save that
3763 distance, and set a new problem where the distance on this loop is
3764 zero, searching for other distances in the inner loops. Here is
3765 the classic example that illustrates that we have to set for each
3766 inner loop a new problem:
3768 | loop_1
3769 | loop_2
3770 | A[10]
3771 | endloop_2
3772 | endloop_1
3774 we have to save two distances (1, 0) and (0, 1).
3776 Given two array references, refA and refB, we have to set the
3777 dependence problem twice, refA vs. refB and refB vs. refA, and we
3778 cannot do a single test, as refB might occur before refA in the
3779 inner loops, and the contrary when considering outer loops: ex.
3781 | loop_0
3782 | loop_1
3783 | loop_2
3784 | T[{1,+,1}_2][{1,+,1}_1] // refA
3785 | T[{2,+,1}_2][{0,+,1}_1] // refB
3786 | endloop_2
3787 | endloop_1
3788 | endloop_0
3790 refB touches the elements in T before refA, and thus for the same
3791 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3792 but for successive loop_0 iterations, we have (1, -1, 1)
3794 The Omega solver expects the distance variables ("di" in the
3795 previous example) to come first in the constraint system (as
3796 variables to be protected, or "safe" variables), the constraint
3797 system is built using the following layout:
3799 "cst | distance vars | index vars".
3802 static bool
3803 init_omega_for_ddr (struct data_dependence_relation *ddr,
3804 bool *maybe_dependent)
3806 omega_pb pb;
3807 bool res = false;
3809 *maybe_dependent = true;
3811 if (same_access_functions (ddr))
3813 unsigned j;
3814 lambda_vector dir_v;
3816 /* Save the 0 vector. */
3817 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3818 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3819 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3820 dir_v[j] = dir_equal;
3821 save_dir_v (ddr, dir_v);
3823 /* Save the dependences carried by outer loops. */
3824 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3825 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3826 maybe_dependent);
3827 omega_free_problem (pb);
3828 return res;
3831 /* Omega expects the protected variables (those that have to be kept
3832 after elimination) to appear first in the constraint system.
3833 These variables are the distance variables. In the following
3834 initialization we declare NB_LOOPS safe variables, and the total
3835 number of variables for the constraint system is 2*NB_LOOPS. */
3836 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3837 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3838 maybe_dependent);
3839 omega_free_problem (pb);
3841 /* Stop computation if not decidable, or no dependence. */
3842 if (res == false || *maybe_dependent == false)
3843 return res;
3845 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3846 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3847 maybe_dependent);
3848 omega_free_problem (pb);
3850 return res;
3853 /* Return true when DDR contains the same information as that stored
3854 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3856 static bool
3857 ddr_consistent_p (FILE *file,
3858 struct data_dependence_relation *ddr,
3859 VEC (lambda_vector, heap) *dist_vects,
3860 VEC (lambda_vector, heap) *dir_vects)
3862 unsigned int i, j;
3864 /* If dump_file is set, output there. */
3865 if (dump_file && (dump_flags & TDF_DETAILS))
3866 file = dump_file;
3868 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3870 lambda_vector b_dist_v;
3871 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3872 VEC_length (lambda_vector, dist_vects),
3873 DDR_NUM_DIST_VECTS (ddr));
3875 fprintf (file, "Banerjee dist vectors:\n");
3876 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3877 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3879 fprintf (file, "Omega dist vectors:\n");
3880 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3881 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3883 fprintf (file, "data dependence relation:\n");
3884 dump_data_dependence_relation (file, ddr);
3886 fprintf (file, ")\n");
3887 return false;
3890 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3892 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3893 VEC_length (lambda_vector, dir_vects),
3894 DDR_NUM_DIR_VECTS (ddr));
3895 return false;
3898 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3900 lambda_vector a_dist_v;
3901 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3903 /* Distance vectors are not ordered in the same way in the DDR
3904 and in the DIST_VECTS: search for a matching vector. */
3905 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3906 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3907 break;
3909 if (j == VEC_length (lambda_vector, dist_vects))
3911 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3912 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3913 fprintf (file, "not found in Omega dist vectors:\n");
3914 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3915 fprintf (file, "data dependence relation:\n");
3916 dump_data_dependence_relation (file, ddr);
3917 fprintf (file, ")\n");
3921 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3923 lambda_vector a_dir_v;
3924 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3926 /* Direction vectors are not ordered in the same way in the DDR
3927 and in the DIR_VECTS: search for a matching vector. */
3928 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3929 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3930 break;
3932 if (j == VEC_length (lambda_vector, dist_vects))
3934 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3935 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3936 fprintf (file, "not found in Omega dir vectors:\n");
3937 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3938 fprintf (file, "data dependence relation:\n");
3939 dump_data_dependence_relation (file, ddr);
3940 fprintf (file, ")\n");
3944 return true;
3947 /* This computes the affine dependence relation between A and B with
3948 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3949 independence between two accesses, while CHREC_DONT_KNOW is used
3950 for representing the unknown relation.
3952 Note that it is possible to stop the computation of the dependence
3953 relation the first time we detect a CHREC_KNOWN element for a given
3954 subscript. */
3956 static void
3957 compute_affine_dependence (struct data_dependence_relation *ddr,
3958 struct loop *loop_nest)
3960 struct data_reference *dra = DDR_A (ddr);
3961 struct data_reference *drb = DDR_B (ddr);
3963 if (dump_file && (dump_flags & TDF_DETAILS))
3965 fprintf (dump_file, "(compute_affine_dependence\n");
3966 fprintf (dump_file, " (stmt_a = \n");
3967 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3968 fprintf (dump_file, ")\n (stmt_b = \n");
3969 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3970 fprintf (dump_file, ")\n");
3973 /* Analyze only when the dependence relation is not yet known. */
3974 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3975 && !DDR_SELF_REFERENCE (ddr))
3977 dependence_stats.num_dependence_tests++;
3979 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3980 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3982 if (flag_check_data_deps)
3984 /* Compute the dependences using the first algorithm. */
3985 subscript_dependence_tester (ddr, loop_nest);
3987 if (dump_file && (dump_flags & TDF_DETAILS))
3989 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3990 dump_data_dependence_relation (dump_file, ddr);
3993 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3995 bool maybe_dependent;
3996 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3998 /* Save the result of the first DD analyzer. */
3999 dist_vects = DDR_DIST_VECTS (ddr);
4000 dir_vects = DDR_DIR_VECTS (ddr);
4002 /* Reset the information. */
4003 DDR_DIST_VECTS (ddr) = NULL;
4004 DDR_DIR_VECTS (ddr) = NULL;
4006 /* Compute the same information using Omega. */
4007 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4008 goto csys_dont_know;
4010 if (dump_file && (dump_flags & TDF_DETAILS))
4012 fprintf (dump_file, "Omega Analyzer\n");
4013 dump_data_dependence_relation (dump_file, ddr);
4016 /* Check that we get the same information. */
4017 if (maybe_dependent)
4018 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4019 dir_vects));
4022 else
4023 subscript_dependence_tester (ddr, loop_nest);
4026 /* As a last case, if the dependence cannot be determined, or if
4027 the dependence is considered too difficult to determine, answer
4028 "don't know". */
4029 else
4031 csys_dont_know:;
4032 dependence_stats.num_dependence_undetermined++;
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4036 fprintf (dump_file, "Data ref a:\n");
4037 dump_data_reference (dump_file, dra);
4038 fprintf (dump_file, "Data ref b:\n");
4039 dump_data_reference (dump_file, drb);
4040 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4042 finalize_ddr_dependent (ddr, chrec_dont_know);
4046 if (dump_file && (dump_flags & TDF_DETAILS))
4047 fprintf (dump_file, ")\n");
4050 /* This computes the dependence relation for the same data
4051 reference into DDR. */
4053 static void
4054 compute_self_dependence (struct data_dependence_relation *ddr)
4056 unsigned int i;
4057 struct subscript *subscript;
4059 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4060 return;
4062 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4063 i++)
4065 if (SUB_CONFLICTS_IN_A (subscript))
4066 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4067 if (SUB_CONFLICTS_IN_B (subscript))
4068 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4070 /* The accessed index overlaps for each iteration. */
4071 SUB_CONFLICTS_IN_A (subscript)
4072 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4073 SUB_CONFLICTS_IN_B (subscript)
4074 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4075 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4078 /* The distance vector is the zero vector. */
4079 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4080 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4083 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4084 the data references in DATAREFS, in the LOOP_NEST. When
4085 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4086 relations. */
4088 void
4089 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4090 VEC (ddr_p, heap) **dependence_relations,
4091 VEC (loop_p, heap) *loop_nest,
4092 bool compute_self_and_rr)
4094 struct data_dependence_relation *ddr;
4095 struct data_reference *a, *b;
4096 unsigned int i, j;
4098 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4099 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4100 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4102 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4103 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4104 if (loop_nest)
4105 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4108 if (compute_self_and_rr)
4109 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4111 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4112 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4113 compute_self_dependence (ddr);
4117 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4118 true if STMT clobbers memory, false otherwise. */
4120 bool
4121 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4123 bool clobbers_memory = false;
4124 data_ref_loc *ref;
4125 tree *op0, *op1;
4126 enum gimple_code stmt_code = gimple_code (stmt);
4128 *references = NULL;
4130 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4131 Calls have side-effects, except those to const or pure
4132 functions. */
4133 if ((stmt_code == GIMPLE_CALL
4134 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4135 || (stmt_code == GIMPLE_ASM
4136 && gimple_asm_volatile_p (stmt)))
4137 clobbers_memory = true;
4139 if (!gimple_vuse (stmt))
4140 return clobbers_memory;
4142 if (stmt_code == GIMPLE_ASSIGN)
4144 tree base;
4145 op0 = gimple_assign_lhs_ptr (stmt);
4146 op1 = gimple_assign_rhs1_ptr (stmt);
4148 if (DECL_P (*op1)
4149 || (REFERENCE_CLASS_P (*op1)
4150 && (base = get_base_address (*op1))
4151 && TREE_CODE (base) != SSA_NAME))
4153 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4154 ref->pos = op1;
4155 ref->is_read = true;
4158 else if (stmt_code == GIMPLE_CALL)
4160 unsigned i, n;
4162 op0 = gimple_call_lhs_ptr (stmt);
4163 n = gimple_call_num_args (stmt);
4164 for (i = 0; i < n; i++)
4166 op1 = gimple_call_arg_ptr (stmt, i);
4168 if (DECL_P (*op1)
4169 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4171 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4172 ref->pos = op1;
4173 ref->is_read = true;
4177 else
4178 return clobbers_memory;
4180 if (*op0
4181 && (DECL_P (*op0)
4182 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4184 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4185 ref->pos = op0;
4186 ref->is_read = false;
4188 return clobbers_memory;
4191 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4192 reference, returns false, otherwise returns true. NEST is the outermost
4193 loop of the loop nest in which the references should be analyzed. */
4195 bool
4196 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4197 VEC (data_reference_p, heap) **datarefs)
4199 unsigned i;
4200 VEC (data_ref_loc, heap) *references;
4201 data_ref_loc *ref;
4202 bool ret = true;
4203 data_reference_p dr;
4205 if (get_references_in_stmt (stmt, &references))
4207 VEC_free (data_ref_loc, heap, references);
4208 return false;
4211 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4213 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4214 *ref->pos, stmt, ref->is_read);
4215 gcc_assert (dr != NULL);
4216 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4218 VEC_free (data_ref_loc, heap, references);
4219 return ret;
4222 /* Stores the data references in STMT to DATAREFS. If there is an
4223 unanalyzable reference, returns false, otherwise returns true.
4224 NEST is the outermost loop of the loop nest in which the references
4225 should be instantiated, LOOP is the loop in which the references
4226 should be analyzed. */
4228 bool
4229 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4230 VEC (data_reference_p, heap) **datarefs)
4232 unsigned i;
4233 VEC (data_ref_loc, heap) *references;
4234 data_ref_loc *ref;
4235 bool ret = true;
4236 data_reference_p dr;
4238 if (get_references_in_stmt (stmt, &references))
4240 VEC_free (data_ref_loc, heap, references);
4241 return false;
4244 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4246 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4247 gcc_assert (dr != NULL);
4248 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4251 VEC_free (data_ref_loc, heap, references);
4252 return ret;
4255 /* Search the data references in LOOP, and record the information into
4256 DATAREFS. Returns chrec_dont_know when failing to analyze a
4257 difficult case, returns NULL_TREE otherwise. */
4259 tree
4260 find_data_references_in_bb (struct loop *loop, basic_block bb,
4261 VEC (data_reference_p, heap) **datarefs)
4263 gimple_stmt_iterator bsi;
4265 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4267 gimple stmt = gsi_stmt (bsi);
4269 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4271 struct data_reference *res;
4272 res = XCNEW (struct data_reference);
4273 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4275 return chrec_dont_know;
4279 return NULL_TREE;
4282 /* Search the data references in LOOP, and record the information into
4283 DATAREFS. Returns chrec_dont_know when failing to analyze a
4284 difficult case, returns NULL_TREE otherwise.
4286 TODO: This function should be made smarter so that it can handle address
4287 arithmetic as if they were array accesses, etc. */
4289 tree
4290 find_data_references_in_loop (struct loop *loop,
4291 VEC (data_reference_p, heap) **datarefs)
4293 basic_block bb, *bbs;
4294 unsigned int i;
4296 bbs = get_loop_body_in_dom_order (loop);
4298 for (i = 0; i < loop->num_nodes; i++)
4300 bb = bbs[i];
4302 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4304 free (bbs);
4305 return chrec_dont_know;
4308 free (bbs);
4310 return NULL_TREE;
4313 /* Recursive helper function. */
4315 static bool
4316 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4318 /* Inner loops of the nest should not contain siblings. Example:
4319 when there are two consecutive loops,
4321 | loop_0
4322 | loop_1
4323 | A[{0, +, 1}_1]
4324 | endloop_1
4325 | loop_2
4326 | A[{0, +, 1}_2]
4327 | endloop_2
4328 | endloop_0
4330 the dependence relation cannot be captured by the distance
4331 abstraction. */
4332 if (loop->next)
4333 return false;
4335 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4336 if (loop->inner)
4337 return find_loop_nest_1 (loop->inner, loop_nest);
4338 return true;
4341 /* Return false when the LOOP is not well nested. Otherwise return
4342 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4343 contain the loops from the outermost to the innermost, as they will
4344 appear in the classic distance vector. */
4346 bool
4347 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4349 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4350 if (loop->inner)
4351 return find_loop_nest_1 (loop->inner, loop_nest);
4352 return true;
4355 /* Returns true when the data dependences have been computed, false otherwise.
4356 Given a loop nest LOOP, the following vectors are returned:
4357 DATAREFS is initialized to all the array elements contained in this loop,
4358 DEPENDENCE_RELATIONS contains the relations between the data references.
4359 Compute read-read and self relations if
4360 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4362 bool
4363 compute_data_dependences_for_loop (struct loop *loop,
4364 bool compute_self_and_read_read_dependences,
4365 VEC (loop_p, heap) **loop_nest,
4366 VEC (data_reference_p, heap) **datarefs,
4367 VEC (ddr_p, heap) **dependence_relations)
4369 bool res = true;
4371 memset (&dependence_stats, 0, sizeof (dependence_stats));
4373 /* If the loop nest is not well formed, or one of the data references
4374 is not computable, give up without spending time to compute other
4375 dependences. */
4376 if (!loop
4377 || !find_loop_nest (loop, loop_nest)
4378 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4380 struct data_dependence_relation *ddr;
4382 /* Insert a single relation into dependence_relations:
4383 chrec_dont_know. */
4384 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4385 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4386 res = false;
4388 else
4389 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4390 compute_self_and_read_read_dependences);
4392 if (dump_file && (dump_flags & TDF_STATS))
4394 fprintf (dump_file, "Dependence tester statistics:\n");
4396 fprintf (dump_file, "Number of dependence tests: %d\n",
4397 dependence_stats.num_dependence_tests);
4398 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4399 dependence_stats.num_dependence_dependent);
4400 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4401 dependence_stats.num_dependence_independent);
4402 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4403 dependence_stats.num_dependence_undetermined);
4405 fprintf (dump_file, "Number of subscript tests: %d\n",
4406 dependence_stats.num_subscript_tests);
4407 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4408 dependence_stats.num_subscript_undetermined);
4409 fprintf (dump_file, "Number of same subscript function: %d\n",
4410 dependence_stats.num_same_subscript_function);
4412 fprintf (dump_file, "Number of ziv tests: %d\n",
4413 dependence_stats.num_ziv);
4414 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4415 dependence_stats.num_ziv_dependent);
4416 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4417 dependence_stats.num_ziv_independent);
4418 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4419 dependence_stats.num_ziv_unimplemented);
4421 fprintf (dump_file, "Number of siv tests: %d\n",
4422 dependence_stats.num_siv);
4423 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4424 dependence_stats.num_siv_dependent);
4425 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4426 dependence_stats.num_siv_independent);
4427 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4428 dependence_stats.num_siv_unimplemented);
4430 fprintf (dump_file, "Number of miv tests: %d\n",
4431 dependence_stats.num_miv);
4432 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4433 dependence_stats.num_miv_dependent);
4434 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4435 dependence_stats.num_miv_independent);
4436 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4437 dependence_stats.num_miv_unimplemented);
4440 return res;
4443 /* Returns true when the data dependences for the basic block BB have been
4444 computed, false otherwise.
4445 DATAREFS is initialized to all the array elements contained in this basic
4446 block, DEPENDENCE_RELATIONS contains the relations between the data
4447 references. Compute read-read and self relations if
4448 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4449 bool
4450 compute_data_dependences_for_bb (basic_block bb,
4451 bool compute_self_and_read_read_dependences,
4452 VEC (data_reference_p, heap) **datarefs,
4453 VEC (ddr_p, heap) **dependence_relations)
4455 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4456 return false;
4458 compute_all_dependences (*datarefs, dependence_relations, NULL,
4459 compute_self_and_read_read_dependences);
4460 return true;
4463 /* Entry point (for testing only). Analyze all the data references
4464 and the dependence relations in LOOP.
4466 The data references are computed first.
4468 A relation on these nodes is represented by a complete graph. Some
4469 of the relations could be of no interest, thus the relations can be
4470 computed on demand.
4472 In the following function we compute all the relations. This is
4473 just a first implementation that is here for:
4474 - for showing how to ask for the dependence relations,
4475 - for the debugging the whole dependence graph,
4476 - for the dejagnu testcases and maintenance.
4478 It is possible to ask only for a part of the graph, avoiding to
4479 compute the whole dependence graph. The computed dependences are
4480 stored in a knowledge base (KB) such that later queries don't
4481 recompute the same information. The implementation of this KB is
4482 transparent to the optimizer, and thus the KB can be changed with a
4483 more efficient implementation, or the KB could be disabled. */
4484 static void
4485 analyze_all_data_dependences (struct loop *loop)
4487 unsigned int i;
4488 int nb_data_refs = 10;
4489 VEC (data_reference_p, heap) *datarefs =
4490 VEC_alloc (data_reference_p, heap, nb_data_refs);
4491 VEC (ddr_p, heap) *dependence_relations =
4492 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4493 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4495 /* Compute DDs on the whole function. */
4496 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4497 &dependence_relations);
4499 if (dump_file)
4501 dump_data_dependence_relations (dump_file, dependence_relations);
4502 fprintf (dump_file, "\n\n");
4504 if (dump_flags & TDF_DETAILS)
4505 dump_dist_dir_vectors (dump_file, dependence_relations);
4507 if (dump_flags & TDF_STATS)
4509 unsigned nb_top_relations = 0;
4510 unsigned nb_bot_relations = 0;
4511 unsigned nb_chrec_relations = 0;
4512 struct data_dependence_relation *ddr;
4514 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4516 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4517 nb_top_relations++;
4519 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4520 nb_bot_relations++;
4522 else
4523 nb_chrec_relations++;
4526 gather_stats_on_scev_database ();
4530 VEC_free (loop_p, heap, loop_nest);
4531 free_dependence_relations (dependence_relations);
4532 free_data_refs (datarefs);
4535 /* Computes all the data dependences and check that the results of
4536 several analyzers are the same. */
4538 void
4539 tree_check_data_deps (void)
4541 loop_iterator li;
4542 struct loop *loop_nest;
4544 FOR_EACH_LOOP (li, loop_nest, 0)
4545 analyze_all_data_dependences (loop_nest);
4548 /* Free the memory used by a data dependence relation DDR. */
4550 void
4551 free_dependence_relation (struct data_dependence_relation *ddr)
4553 if (ddr == NULL)
4554 return;
4556 if (DDR_SUBSCRIPTS (ddr))
4557 free_subscripts (DDR_SUBSCRIPTS (ddr));
4558 if (DDR_DIST_VECTS (ddr))
4559 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4560 if (DDR_DIR_VECTS (ddr))
4561 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4563 free (ddr);
4566 /* Free the memory used by the data dependence relations from
4567 DEPENDENCE_RELATIONS. */
4569 void
4570 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4572 unsigned int i;
4573 struct data_dependence_relation *ddr;
4575 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4576 if (ddr)
4577 free_dependence_relation (ddr);
4579 VEC_free (ddr_p, heap, dependence_relations);
4582 /* Free the memory used by the data references from DATAREFS. */
4584 void
4585 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4587 unsigned int i;
4588 struct data_reference *dr;
4590 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4591 free_data_ref (dr);
4592 VEC_free (data_reference_p, heap, datarefs);
4597 /* Dump vertex I in RDG to FILE. */
4599 void
4600 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4602 struct vertex *v = &(rdg->vertices[i]);
4603 struct graph_edge *e;
4605 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4606 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4607 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4609 if (v->pred)
4610 for (e = v->pred; e; e = e->pred_next)
4611 fprintf (file, " %d", e->src);
4613 fprintf (file, ") (out:");
4615 if (v->succ)
4616 for (e = v->succ; e; e = e->succ_next)
4617 fprintf (file, " %d", e->dest);
4619 fprintf (file, ")\n");
4620 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4621 fprintf (file, ")\n");
4624 /* Call dump_rdg_vertex on stderr. */
4626 DEBUG_FUNCTION void
4627 debug_rdg_vertex (struct graph *rdg, int i)
4629 dump_rdg_vertex (stderr, rdg, i);
4632 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4633 dumped vertices to that bitmap. */
4635 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4637 int i;
4639 fprintf (file, "(%d\n", c);
4641 for (i = 0; i < rdg->n_vertices; i++)
4642 if (rdg->vertices[i].component == c)
4644 if (dumped)
4645 bitmap_set_bit (dumped, i);
4647 dump_rdg_vertex (file, rdg, i);
4650 fprintf (file, ")\n");
4653 /* Call dump_rdg_vertex on stderr. */
4655 DEBUG_FUNCTION void
4656 debug_rdg_component (struct graph *rdg, int c)
4658 dump_rdg_component (stderr, rdg, c, NULL);
4661 /* Dump the reduced dependence graph RDG to FILE. */
4663 void
4664 dump_rdg (FILE *file, struct graph *rdg)
4666 int i;
4667 bitmap dumped = BITMAP_ALLOC (NULL);
4669 fprintf (file, "(rdg\n");
4671 for (i = 0; i < rdg->n_vertices; i++)
4672 if (!bitmap_bit_p (dumped, i))
4673 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4675 fprintf (file, ")\n");
4676 BITMAP_FREE (dumped);
4679 /* Call dump_rdg on stderr. */
4681 DEBUG_FUNCTION void
4682 debug_rdg (struct graph *rdg)
4684 dump_rdg (stderr, rdg);
4687 static void
4688 dot_rdg_1 (FILE *file, struct graph *rdg)
4690 int i;
4692 fprintf (file, "digraph RDG {\n");
4694 for (i = 0; i < rdg->n_vertices; i++)
4696 struct vertex *v = &(rdg->vertices[i]);
4697 struct graph_edge *e;
4699 /* Highlight reads from memory. */
4700 if (RDG_MEM_READS_STMT (rdg, i))
4701 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4703 /* Highlight stores to memory. */
4704 if (RDG_MEM_WRITE_STMT (rdg, i))
4705 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4707 if (v->succ)
4708 for (e = v->succ; e; e = e->succ_next)
4709 switch (RDGE_TYPE (e))
4711 case input_dd:
4712 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4713 break;
4715 case output_dd:
4716 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4717 break;
4719 case flow_dd:
4720 /* These are the most common dependences: don't print these. */
4721 fprintf (file, "%d -> %d \n", i, e->dest);
4722 break;
4724 case anti_dd:
4725 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4726 break;
4728 default:
4729 gcc_unreachable ();
4733 fprintf (file, "}\n\n");
4736 /* Display the Reduced Dependence Graph using dotty. */
4737 extern void dot_rdg (struct graph *);
4739 DEBUG_FUNCTION void
4740 dot_rdg (struct graph *rdg)
4742 /* When debugging, enable the following code. This cannot be used
4743 in production compilers because it calls "system". */
4744 #if 0
4745 FILE *file = fopen ("/tmp/rdg.dot", "w");
4746 gcc_assert (file != NULL);
4748 dot_rdg_1 (file, rdg);
4749 fclose (file);
4751 system ("dotty /tmp/rdg.dot &");
4752 #else
4753 dot_rdg_1 (stderr, rdg);
4754 #endif
4757 /* This structure is used for recording the mapping statement index in
4758 the RDG. */
4760 struct GTY(()) rdg_vertex_info
4762 gimple stmt;
4763 int index;
4766 /* Returns the index of STMT in RDG. */
4769 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4771 struct rdg_vertex_info rvi, *slot;
4773 rvi.stmt = stmt;
4774 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4776 if (!slot)
4777 return -1;
4779 return slot->index;
4782 /* Creates an edge in RDG for each distance vector from DDR. The
4783 order that we keep track of in the RDG is the order in which
4784 statements have to be executed. */
4786 static void
4787 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4789 struct graph_edge *e;
4790 int va, vb;
4791 data_reference_p dra = DDR_A (ddr);
4792 data_reference_p drb = DDR_B (ddr);
4793 unsigned level = ddr_dependence_level (ddr);
4795 /* For non scalar dependences, when the dependence is REVERSED,
4796 statement B has to be executed before statement A. */
4797 if (level > 0
4798 && !DDR_REVERSED_P (ddr))
4800 data_reference_p tmp = dra;
4801 dra = drb;
4802 drb = tmp;
4805 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4806 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4808 if (va < 0 || vb < 0)
4809 return;
4811 e = add_edge (rdg, va, vb);
4812 e->data = XNEW (struct rdg_edge);
4814 RDGE_LEVEL (e) = level;
4815 RDGE_RELATION (e) = ddr;
4817 /* Determines the type of the data dependence. */
4818 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4819 RDGE_TYPE (e) = input_dd;
4820 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4821 RDGE_TYPE (e) = output_dd;
4822 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4823 RDGE_TYPE (e) = flow_dd;
4824 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4825 RDGE_TYPE (e) = anti_dd;
4828 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4829 the index of DEF in RDG. */
4831 static void
4832 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4834 use_operand_p imm_use_p;
4835 imm_use_iterator iterator;
4837 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4839 struct graph_edge *e;
4840 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4842 if (use < 0)
4843 continue;
4845 e = add_edge (rdg, idef, use);
4846 e->data = XNEW (struct rdg_edge);
4847 RDGE_TYPE (e) = flow_dd;
4848 RDGE_RELATION (e) = NULL;
4852 /* Creates the edges of the reduced dependence graph RDG. */
4854 static void
4855 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4857 int i;
4858 struct data_dependence_relation *ddr;
4859 def_operand_p def_p;
4860 ssa_op_iter iter;
4862 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4863 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4864 create_rdg_edge_for_ddr (rdg, ddr);
4866 for (i = 0; i < rdg->n_vertices; i++)
4867 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4868 iter, SSA_OP_DEF)
4869 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4872 /* Build the vertices of the reduced dependence graph RDG. */
4874 void
4875 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4877 int i, j;
4878 gimple stmt;
4880 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4882 VEC (data_ref_loc, heap) *references;
4883 data_ref_loc *ref;
4884 struct vertex *v = &(rdg->vertices[i]);
4885 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4886 struct rdg_vertex_info **slot;
4888 rvi->stmt = stmt;
4889 rvi->index = i;
4890 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4892 if (!*slot)
4893 *slot = rvi;
4894 else
4895 free (rvi);
4897 v->data = XNEW (struct rdg_vertex);
4898 RDG_STMT (rdg, i) = stmt;
4900 RDG_MEM_WRITE_STMT (rdg, i) = false;
4901 RDG_MEM_READS_STMT (rdg, i) = false;
4902 if (gimple_code (stmt) == GIMPLE_PHI)
4903 continue;
4905 get_references_in_stmt (stmt, &references);
4906 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4907 if (!ref->is_read)
4908 RDG_MEM_WRITE_STMT (rdg, i) = true;
4909 else
4910 RDG_MEM_READS_STMT (rdg, i) = true;
4912 VEC_free (data_ref_loc, heap, references);
4916 /* Initialize STMTS with all the statements of LOOP. When
4917 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4918 which we discover statements is important as
4919 generate_loops_for_partition is using the same traversal for
4920 identifying statements. */
4922 static void
4923 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4925 unsigned int i;
4926 basic_block *bbs = get_loop_body_in_dom_order (loop);
4928 for (i = 0; i < loop->num_nodes; i++)
4930 basic_block bb = bbs[i];
4931 gimple_stmt_iterator bsi;
4932 gimple stmt;
4934 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4935 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4937 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4939 stmt = gsi_stmt (bsi);
4940 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4941 VEC_safe_push (gimple, heap, *stmts, stmt);
4945 free (bbs);
4948 /* Returns true when all the dependences are computable. */
4950 static bool
4951 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4953 ddr_p ddr;
4954 unsigned int i;
4956 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4957 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4958 return false;
4960 return true;
4963 /* Computes a hash function for element ELT. */
4965 static hashval_t
4966 hash_stmt_vertex_info (const void *elt)
4968 const struct rdg_vertex_info *const rvi =
4969 (const struct rdg_vertex_info *) elt;
4970 gimple stmt = rvi->stmt;
4972 return htab_hash_pointer (stmt);
4975 /* Compares database elements E1 and E2. */
4977 static int
4978 eq_stmt_vertex_info (const void *e1, const void *e2)
4980 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4981 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4983 return elt1->stmt == elt2->stmt;
4986 /* Free the element E. */
4988 static void
4989 hash_stmt_vertex_del (void *e)
4991 free (e);
4994 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4995 statement of the loop nest, and one edge per data dependence or
4996 scalar dependence. */
4998 struct graph *
4999 build_empty_rdg (int n_stmts)
5001 int nb_data_refs = 10;
5002 struct graph *rdg = new_graph (n_stmts);
5004 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5005 eq_stmt_vertex_info, hash_stmt_vertex_del);
5006 return rdg;
5009 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5010 statement of the loop nest, and one edge per data dependence or
5011 scalar dependence. */
5013 struct graph *
5014 build_rdg (struct loop *loop,
5015 VEC (loop_p, heap) **loop_nest,
5016 VEC (ddr_p, heap) **dependence_relations,
5017 VEC (data_reference_p, heap) **datarefs)
5019 struct graph *rdg = NULL;
5020 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5022 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5023 dependence_relations);
5025 if (known_dependences_p (*dependence_relations))
5027 stmts_from_loop (loop, &stmts);
5028 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5029 create_rdg_vertices (rdg, stmts);
5030 create_rdg_edges (rdg, *dependence_relations);
5033 VEC_free (gimple, heap, stmts);
5034 return rdg;
5037 /* Free the reduced dependence graph RDG. */
5039 void
5040 free_rdg (struct graph *rdg)
5042 int i;
5044 for (i = 0; i < rdg->n_vertices; i++)
5046 struct vertex *v = &(rdg->vertices[i]);
5047 struct graph_edge *e;
5049 for (e = v->succ; e; e = e->succ_next)
5050 free (e->data);
5052 free (v->data);
5055 htab_delete (rdg->indices);
5056 free_graph (rdg);
5059 /* Initialize STMTS with all the statements of LOOP that contain a
5060 store to memory. */
5062 void
5063 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5065 unsigned int i;
5066 basic_block *bbs = get_loop_body_in_dom_order (loop);
5068 for (i = 0; i < loop->num_nodes; i++)
5070 basic_block bb = bbs[i];
5071 gimple_stmt_iterator bsi;
5073 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5074 if (gimple_vdef (gsi_stmt (bsi)))
5075 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5078 free (bbs);
5081 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5082 that contains a data reference on its LHS with a stride of the same
5083 size as its unit type. */
5085 bool
5086 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5088 tree op0, op1;
5089 bool res;
5090 struct data_reference *dr;
5092 if (!stmt
5093 || !gimple_vdef (stmt)
5094 || !is_gimple_assign (stmt)
5095 || !gimple_assign_single_p (stmt)
5096 || !(op1 = gimple_assign_rhs1 (stmt))
5097 || !(integer_zerop (op1) || real_zerop (op1)))
5098 return false;
5100 dr = XCNEW (struct data_reference);
5101 op0 = gimple_assign_lhs (stmt);
5103 DR_STMT (dr) = stmt;
5104 DR_REF (dr) = op0;
5106 res = dr_analyze_innermost (dr)
5107 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5109 free_data_ref (dr);
5110 return res;
5113 /* Initialize STMTS with all the statements of LOOP that contain a
5114 store to memory of the form "A[i] = 0". */
5116 void
5117 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5119 unsigned int i;
5120 basic_block bb;
5121 gimple_stmt_iterator si;
5122 gimple stmt;
5123 basic_block *bbs = get_loop_body_in_dom_order (loop);
5125 for (i = 0; i < loop->num_nodes; i++)
5126 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5127 if ((stmt = gsi_stmt (si))
5128 && stmt_with_adjacent_zero_store_dr_p (stmt))
5129 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5131 free (bbs);
5134 /* For a data reference REF, return the declaration of its base
5135 address or NULL_TREE if the base is not determined. */
5137 static inline tree
5138 ref_base_address (gimple stmt, data_ref_loc *ref)
5140 tree base = NULL_TREE;
5141 tree base_address;
5142 struct data_reference *dr = XCNEW (struct data_reference);
5144 DR_STMT (dr) = stmt;
5145 DR_REF (dr) = *ref->pos;
5146 dr_analyze_innermost (dr);
5147 base_address = DR_BASE_ADDRESS (dr);
5149 if (!base_address)
5150 goto end;
5152 switch (TREE_CODE (base_address))
5154 case ADDR_EXPR:
5155 base = TREE_OPERAND (base_address, 0);
5156 break;
5158 default:
5159 base = base_address;
5160 break;
5163 end:
5164 free_data_ref (dr);
5165 return base;
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5171 bool
5172 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5174 gimple stmt = RDG_STMT (rdg, v);
5175 struct loop *loop = loop_containing_stmt (stmt);
5176 use_operand_p imm_use_p;
5177 imm_use_iterator iterator;
5178 ssa_op_iter it;
5179 def_operand_p def_p;
5181 if (!loop)
5182 return true;
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5186 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5188 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5189 return true;
5193 return false;
5196 /* Determines whether statements S1 and S2 access to similar memory
5197 locations. Two memory accesses are considered similar when they
5198 have the same base address declaration, i.e. when their
5199 ref_base_address is the same. */
5201 bool
5202 have_similar_memory_accesses (gimple s1, gimple s2)
5204 bool res = false;
5205 unsigned i, j;
5206 VEC (data_ref_loc, heap) *refs1, *refs2;
5207 data_ref_loc *ref1, *ref2;
5209 get_references_in_stmt (s1, &refs1);
5210 get_references_in_stmt (s2, &refs2);
5212 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5214 tree base1 = ref_base_address (s1, ref1);
5216 if (base1)
5217 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5218 if (base1 == ref_base_address (s2, ref2))
5220 res = true;
5221 goto end;
5225 end:
5226 VEC_free (data_ref_loc, heap, refs1);
5227 VEC_free (data_ref_loc, heap, refs2);
5228 return res;
5231 /* Helper function for the hashtab. */
5233 static int
5234 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5236 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5237 CONST_CAST_GIMPLE ((const_gimple) s2));
5240 /* Helper function for the hashtab. */
5242 static hashval_t
5243 ref_base_address_1 (const void *s)
5245 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5246 unsigned i;
5247 VEC (data_ref_loc, heap) *refs;
5248 data_ref_loc *ref;
5249 hashval_t res = 0;
5251 get_references_in_stmt (stmt, &refs);
5253 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5254 if (!ref->is_read)
5256 res = htab_hash_pointer (ref_base_address (stmt, ref));
5257 break;
5260 VEC_free (data_ref_loc, heap, refs);
5261 return res;
5264 /* Try to remove duplicated write data references from STMTS. */
5266 void
5267 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5269 unsigned i;
5270 gimple stmt;
5271 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5272 have_similar_memory_accesses_1, NULL);
5274 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5276 void **slot;
5278 slot = htab_find_slot (seen, stmt, INSERT);
5280 if (*slot)
5281 VEC_ordered_remove (gimple, *stmts, i);
5282 else
5284 *slot = (void *) stmt;
5285 i++;
5289 htab_delete (seen);
5292 /* Returns the index of PARAMETER in the parameters vector of the
5293 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5296 access_matrix_get_index_for_parameter (tree parameter,
5297 struct access_matrix *access_matrix)
5299 int i;
5300 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5301 tree lambda_parameter;
5303 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5304 if (lambda_parameter == parameter)
5305 return i + AM_NB_INDUCTION_VARS (access_matrix);
5307 return -1;