2011-07-21 François Dumont <francois.cppdevs@free.fr>
[official-gcc.git] / gcc / tree-data-ref.c
blob3e18e8d1837ec2dd6b389bf631dd0bd3893059fd
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
131 static inline bool
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 unsigned int i;
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
173 unsigned int i;
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
194 unsigned int i;
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
218 unsigned i;
219 tree coef;
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
235 unsigned i;
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
243 for (i = 0; i < cf->n; i++)
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
292 int eq;
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
299 switch (dir)
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
336 unsigned j;
337 lambda_vector v;
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 int i;
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
361 unsigned j;
362 lambda_vector v;
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
368 /* Debug version. */
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 if (ddr)
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
402 return;
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 unsigned int i;
416 struct loop *loopi;
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
457 switch (dir)
459 case dir_positive:
460 fprintf (file, "+");
461 break;
463 case dir_negative:
464 fprintf (file, "-");
465 break;
467 case dir_equal:
468 fprintf (file, "=");
469 break;
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
483 case dir_star:
484 fprintf (file, "*");
485 break;
487 default:
488 break;
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 unsigned int i;
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
553 *var = NULL_TREE;
554 *off = NULL_TREE;
556 switch (code)
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
583 case ADDR_EXPR:
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
602 if (poffset)
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build_pointer_plus (base, poffset);
608 else
609 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
610 fold_convert (TREE_TYPE (base), poffset));
613 var0 = fold_convert (type, base);
615 /* If variable length types are involved, punt, otherwise casts
616 might be converted into ARRAY_REFs in gimplify_conversion.
617 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
618 possibly no longer appears in current GIMPLE, might resurface.
619 This perhaps could run
620 if (CONVERT_EXPR_P (var0))
622 gimplify_conversion (&var0);
623 // Attempt to fill in any within var0 found ARRAY_REF's
624 // element size from corresponding op embedded ARRAY_REF,
625 // if unsuccessful, just punt.
626 } */
627 while (POINTER_TYPE_P (type))
628 type = TREE_TYPE (type);
629 if (int_size_in_bytes (type) < 0)
630 return false;
632 *var = var0;
633 *off = off0;
634 return true;
637 case SSA_NAME:
639 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
640 enum tree_code subcode;
642 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
643 return false;
645 var0 = gimple_assign_rhs1 (def_stmt);
646 subcode = gimple_assign_rhs_code (def_stmt);
647 var1 = gimple_assign_rhs2 (def_stmt);
649 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
651 CASE_CONVERT:
653 /* We must not introduce undefined overflow, and we must not change the value.
654 Hence we're okay if the inner type doesn't overflow to start with
655 (pointer or signed), the outer type also is an integer or pointer
656 and the outer precision is at least as large as the inner. */
657 tree itype = TREE_TYPE (op0);
658 if ((POINTER_TYPE_P (itype)
659 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
660 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
661 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 split_constant_offset (op0, &var0, off);
664 *var = fold_convert (type, var0);
665 return true;
667 return false;
670 default:
671 return false;
675 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
676 will be ssizetype. */
678 void
679 split_constant_offset (tree exp, tree *var, tree *off)
681 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
682 enum tree_code code;
684 *var = exp;
685 *off = ssize_int (0);
686 STRIP_NOPS (exp);
688 if (tree_is_chrec (exp))
689 return;
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 *var = fold_convert (type, e);
697 *off = o;
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
704 static tree
705 canonicalize_base_object_address (tree addr)
707 tree orig = addr;
709 STRIP_NOPS (addr);
711 /* The base address may be obtained by casting from integer, in that case
712 keep the cast. */
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714 return orig;
716 if (TREE_CODE (addr) != ADDR_EXPR)
717 return addr;
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
724 otherwise. */
726 bool
727 dr_analyze_innermost (struct data_reference *dr)
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
733 tree base, poffset;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
747 if (pbitpos % BITS_PER_UNIT != 0)
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
751 return false;
754 if (TREE_CODE (base) == MEM_REF)
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 if (!poffset)
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
763 else
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 base = TREE_OPERAND (base, 0);
768 else
769 base = build_fold_addr_expr (base);
770 if (in_loop)
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
775 if (dump_file && (dump_flags & TDF_DETAILS))
776 fprintf (dump_file, "failed: evolution of base is not affine.\n");
777 return false;
780 else
782 base_iv.base = base;
783 base_iv.step = ssize_int (0);
784 base_iv.no_overflow = true;
787 if (!poffset)
789 offset_iv.base = ssize_int (0);
790 offset_iv.step = ssize_int (0);
792 else
794 if (!in_loop)
796 offset_iv.base = poffset;
797 offset_iv.step = ssize_int (0);
799 else if (!simple_iv (loop, loop_containing_stmt (stmt),
800 poffset, &offset_iv, false))
802 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "failed: evolution of offset is not"
804 " affine.\n");
805 return false;
809 init = ssize_int (pbitpos / BITS_PER_UNIT);
810 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
811 init = size_binop (PLUS_EXPR, init, dinit);
812 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
813 init = size_binop (PLUS_EXPR, init, dinit);
815 step = size_binop (PLUS_EXPR,
816 fold_convert (ssizetype, base_iv.step),
817 fold_convert (ssizetype, offset_iv.step));
819 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
822 DR_INIT (dr) = init;
823 DR_STEP (dr) = step;
825 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "success.\n");
830 return true;
833 /* Determines the base object and the list of indices of memory reference
834 DR, analyzed in LOOP and instantiated in loop nest NEST. */
836 static void
837 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 VEC (tree, heap) *access_fns = NULL;
840 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
841 tree base, off, access_fn = NULL_TREE;
842 basic_block before_loop = NULL;
844 if (nest)
845 before_loop = block_before_loop (nest);
847 while (handled_component_p (aref))
849 if (TREE_CODE (aref) == ARRAY_REF)
851 op = TREE_OPERAND (aref, 1);
852 if (nest)
854 access_fn = analyze_scalar_evolution (loop, op);
855 access_fn = instantiate_scev (before_loop, loop, access_fn);
856 VEC_safe_push (tree, heap, access_fns, access_fn);
859 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
862 aref = TREE_OPERAND (aref, 0);
865 if (nest
866 && (INDIRECT_REF_P (aref)
867 || TREE_CODE (aref) == MEM_REF))
869 op = TREE_OPERAND (aref, 0);
870 access_fn = analyze_scalar_evolution (loop, op);
871 access_fn = instantiate_scev (before_loop, loop, access_fn);
872 base = initial_condition (access_fn);
873 split_constant_offset (base, &base, &off);
874 if (TREE_CODE (aref) == MEM_REF)
875 off = size_binop (PLUS_EXPR, off,
876 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
877 access_fn = chrec_replace_initial_condition (access_fn,
878 fold_convert (TREE_TYPE (base), off));
880 TREE_OPERAND (aref, 0) = base;
881 VEC_safe_push (tree, heap, access_fns, access_fn);
884 if (TREE_CODE (aref) == MEM_REF)
885 TREE_OPERAND (aref, 1)
886 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888 if (TREE_CODE (ref) == MEM_REF
889 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
890 && integer_zerop (TREE_OPERAND (ref, 1)))
891 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893 /* For canonicalization purposes we'd like to strip all outermost
894 zero-offset component-refs.
895 ??? For now simply handle zero-index array-refs. */
896 while (TREE_CODE (ref) == ARRAY_REF
897 && integer_zerop (TREE_OPERAND (ref, 1)))
898 ref = TREE_OPERAND (ref, 0);
900 DR_BASE_OBJECT (dr) = ref;
901 DR_ACCESS_FNS (dr) = access_fns;
904 /* Extracts the alias analysis information from the memory reference DR. */
906 static void
907 dr_analyze_alias (struct data_reference *dr)
909 tree ref = DR_REF (dr);
910 tree base = get_base_address (ref), addr;
912 if (INDIRECT_REF_P (base)
913 || TREE_CODE (base) == MEM_REF)
915 addr = TREE_OPERAND (base, 0);
916 if (TREE_CODE (addr) == SSA_NAME)
917 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
921 /* Frees data reference DR. */
923 void
924 free_data_ref (data_reference_p dr)
926 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
927 free (dr);
930 /* Analyzes memory reference MEMREF accessed in STMT. The reference
931 is read if IS_READ is true, write otherwise. Returns the
932 data_reference description of MEMREF. NEST is the outermost loop
933 in which the reference should be instantiated, LOOP is the loop in
934 which the data reference should be analyzed. */
936 struct data_reference *
937 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
938 bool is_read)
940 struct data_reference *dr;
942 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file, "Creating dr for ");
945 print_generic_expr (dump_file, memref, TDF_SLIM);
946 fprintf (dump_file, "\n");
949 dr = XCNEW (struct data_reference);
950 DR_STMT (dr) = stmt;
951 DR_REF (dr) = memref;
952 DR_IS_READ (dr) = is_read;
954 dr_analyze_innermost (dr);
955 dr_analyze_indices (dr, nest, loop);
956 dr_analyze_alias (dr);
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "\tbase_address: ");
961 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
962 fprintf (dump_file, "\n\toffset from base address: ");
963 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
964 fprintf (dump_file, "\n\tconstant offset from base address: ");
965 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\tstep: ");
967 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\taligned to: ");
969 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\tbase_object: ");
971 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
972 fprintf (dump_file, "\n");
975 return dr;
978 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
979 expressions. */
980 static bool
981 dr_equal_offsets_p1 (tree offset1, tree offset2)
983 bool res;
985 STRIP_NOPS (offset1);
986 STRIP_NOPS (offset2);
988 if (offset1 == offset2)
989 return true;
991 if (TREE_CODE (offset1) != TREE_CODE (offset2)
992 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
993 return false;
995 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
996 TREE_OPERAND (offset2, 0));
998 if (!res || !BINARY_CLASS_P (offset1))
999 return res;
1001 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1002 TREE_OPERAND (offset2, 1));
1004 return res;
1007 /* Check if DRA and DRB have equal offsets. */
1008 bool
1009 dr_equal_offsets_p (struct data_reference *dra,
1010 struct data_reference *drb)
1012 tree offset1, offset2;
1014 offset1 = DR_OFFSET (dra);
1015 offset2 = DR_OFFSET (drb);
1017 return dr_equal_offsets_p1 (offset1, offset2);
1020 /* Returns true if FNA == FNB. */
1022 static bool
1023 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1025 unsigned i, n = VEC_length (tree, fna);
1027 if (n != VEC_length (tree, fnb))
1028 return false;
1030 for (i = 0; i < n; i++)
1031 if (!operand_equal_p (VEC_index (tree, fna, i),
1032 VEC_index (tree, fnb, i), 0))
1033 return false;
1035 return true;
1038 /* If all the functions in CF are the same, returns one of them,
1039 otherwise returns NULL. */
1041 static affine_fn
1042 common_affine_function (conflict_function *cf)
1044 unsigned i;
1045 affine_fn comm;
1047 if (!CF_NONTRIVIAL_P (cf))
1048 return NULL;
1050 comm = cf->fns[0];
1052 for (i = 1; i < cf->n; i++)
1053 if (!affine_function_equal_p (comm, cf->fns[i]))
1054 return NULL;
1056 return comm;
1059 /* Returns the base of the affine function FN. */
1061 static tree
1062 affine_function_base (affine_fn fn)
1064 return VEC_index (tree, fn, 0);
1067 /* Returns true if FN is a constant. */
1069 static bool
1070 affine_function_constant_p (affine_fn fn)
1072 unsigned i;
1073 tree coef;
1075 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1076 if (!integer_zerop (coef))
1077 return false;
1079 return true;
1082 /* Returns true if FN is the zero constant function. */
1084 static bool
1085 affine_function_zero_p (affine_fn fn)
1087 return (integer_zerop (affine_function_base (fn))
1088 && affine_function_constant_p (fn));
1091 /* Returns a signed integer type with the largest precision from TA
1092 and TB. */
1094 static tree
1095 signed_type_for_types (tree ta, tree tb)
1097 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1098 return signed_type_for (ta);
1099 else
1100 return signed_type_for (tb);
1103 /* Applies operation OP on affine functions FNA and FNB, and returns the
1104 result. */
1106 static affine_fn
1107 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1109 unsigned i, n, m;
1110 affine_fn ret;
1111 tree coef;
1113 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1115 n = VEC_length (tree, fna);
1116 m = VEC_length (tree, fnb);
1118 else
1120 n = VEC_length (tree, fnb);
1121 m = VEC_length (tree, fna);
1124 ret = VEC_alloc (tree, heap, m);
1125 for (i = 0; i < n; i++)
1127 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1128 TREE_TYPE (VEC_index (tree, fnb, i)));
1130 VEC_quick_push (tree, ret,
1131 fold_build2 (op, type,
1132 VEC_index (tree, fna, i),
1133 VEC_index (tree, fnb, i)));
1136 for (; VEC_iterate (tree, fna, i, coef); i++)
1137 VEC_quick_push (tree, ret,
1138 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1139 coef, integer_zero_node));
1140 for (; VEC_iterate (tree, fnb, i, coef); i++)
1141 VEC_quick_push (tree, ret,
1142 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1143 integer_zero_node, coef));
1145 return ret;
1148 /* Returns the sum of affine functions FNA and FNB. */
1150 static affine_fn
1151 affine_fn_plus (affine_fn fna, affine_fn fnb)
1153 return affine_fn_op (PLUS_EXPR, fna, fnb);
1156 /* Returns the difference of affine functions FNA and FNB. */
1158 static affine_fn
1159 affine_fn_minus (affine_fn fna, affine_fn fnb)
1161 return affine_fn_op (MINUS_EXPR, fna, fnb);
1164 /* Frees affine function FN. */
1166 static void
1167 affine_fn_free (affine_fn fn)
1169 VEC_free (tree, heap, fn);
1172 /* Determine for each subscript in the data dependence relation DDR
1173 the distance. */
1175 static void
1176 compute_subscript_distance (struct data_dependence_relation *ddr)
1178 conflict_function *cf_a, *cf_b;
1179 affine_fn fn_a, fn_b, diff;
1181 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1183 unsigned int i;
1185 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1187 struct subscript *subscript;
1189 subscript = DDR_SUBSCRIPT (ddr, i);
1190 cf_a = SUB_CONFLICTS_IN_A (subscript);
1191 cf_b = SUB_CONFLICTS_IN_B (subscript);
1193 fn_a = common_affine_function (cf_a);
1194 fn_b = common_affine_function (cf_b);
1195 if (!fn_a || !fn_b)
1197 SUB_DISTANCE (subscript) = chrec_dont_know;
1198 return;
1200 diff = affine_fn_minus (fn_a, fn_b);
1202 if (affine_function_constant_p (diff))
1203 SUB_DISTANCE (subscript) = affine_function_base (diff);
1204 else
1205 SUB_DISTANCE (subscript) = chrec_dont_know;
1207 affine_fn_free (diff);
1212 /* Returns the conflict function for "unknown". */
1214 static conflict_function *
1215 conflict_fn_not_known (void)
1217 conflict_function *fn = XCNEW (conflict_function);
1218 fn->n = NOT_KNOWN;
1220 return fn;
1223 /* Returns the conflict function for "independent". */
1225 static conflict_function *
1226 conflict_fn_no_dependence (void)
1228 conflict_function *fn = XCNEW (conflict_function);
1229 fn->n = NO_DEPENDENCE;
1231 return fn;
1234 /* Returns true if the address of OBJ is invariant in LOOP. */
1236 static bool
1237 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1239 while (handled_component_p (obj))
1241 if (TREE_CODE (obj) == ARRAY_REF)
1243 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1244 need to check the stride and the lower bound of the reference. */
1245 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1246 loop->num)
1247 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1248 loop->num))
1249 return false;
1251 else if (TREE_CODE (obj) == COMPONENT_REF)
1253 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1254 loop->num))
1255 return false;
1257 obj = TREE_OPERAND (obj, 0);
1260 if (!INDIRECT_REF_P (obj)
1261 && TREE_CODE (obj) != MEM_REF)
1262 return true;
1264 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1265 loop->num);
1268 /* Returns false if we can prove that data references A and B do not alias,
1269 true otherwise. */
1271 bool
1272 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1274 tree addr_a = DR_BASE_OBJECT (a);
1275 tree addr_b = DR_BASE_OBJECT (b);
1277 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1278 return refs_output_dependent_p (addr_a, addr_b);
1279 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1280 return refs_anti_dependent_p (addr_a, addr_b);
1281 return refs_may_alias_p (addr_a, addr_b);
1284 static void compute_self_dependence (struct data_dependence_relation *);
1286 /* Initialize a data dependence relation between data accesses A and
1287 B. NB_LOOPS is the number of loops surrounding the references: the
1288 size of the classic distance/direction vectors. */
1290 static struct data_dependence_relation *
1291 initialize_data_dependence_relation (struct data_reference *a,
1292 struct data_reference *b,
1293 VEC (loop_p, heap) *loop_nest)
1295 struct data_dependence_relation *res;
1296 unsigned int i;
1298 res = XNEW (struct data_dependence_relation);
1299 DDR_A (res) = a;
1300 DDR_B (res) = b;
1301 DDR_LOOP_NEST (res) = NULL;
1302 DDR_REVERSED_P (res) = false;
1303 DDR_SUBSCRIPTS (res) = NULL;
1304 DDR_DIR_VECTS (res) = NULL;
1305 DDR_DIST_VECTS (res) = NULL;
1307 if (a == NULL || b == NULL)
1309 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1310 return res;
1313 /* If the data references do not alias, then they are independent. */
1314 if (!dr_may_alias_p (a, b))
1316 DDR_ARE_DEPENDENT (res) = chrec_known;
1317 return res;
1320 /* When the references are exactly the same, don't spend time doing
1321 the data dependence tests, just initialize the ddr and return. */
1322 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1324 DDR_AFFINE_P (res) = true;
1325 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1326 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1327 DDR_LOOP_NEST (res) = loop_nest;
1328 DDR_INNER_LOOP (res) = 0;
1329 DDR_SELF_REFERENCE (res) = true;
1330 compute_self_dependence (res);
1331 return res;
1334 /* If the references do not access the same object, we do not know
1335 whether they alias or not. */
1336 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1338 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1339 return res;
1342 /* If the base of the object is not invariant in the loop nest, we cannot
1343 analyze it. TODO -- in fact, it would suffice to record that there may
1344 be arbitrary dependences in the loops where the base object varies. */
1345 if (loop_nest
1346 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1347 DR_BASE_OBJECT (a)))
1349 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1350 return res;
1353 /* If the number of dimensions of the access to not agree we can have
1354 a pointer access to a component of the array element type and an
1355 array access while the base-objects are still the same. Punt. */
1356 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1358 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1359 return res;
1362 DDR_AFFINE_P (res) = true;
1363 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1364 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1365 DDR_LOOP_NEST (res) = loop_nest;
1366 DDR_INNER_LOOP (res) = 0;
1367 DDR_SELF_REFERENCE (res) = false;
1369 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1371 struct subscript *subscript;
1373 subscript = XNEW (struct subscript);
1374 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1375 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1376 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1377 SUB_DISTANCE (subscript) = chrec_dont_know;
1378 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1381 return res;
1384 /* Frees memory used by the conflict function F. */
1386 static void
1387 free_conflict_function (conflict_function *f)
1389 unsigned i;
1391 if (CF_NONTRIVIAL_P (f))
1393 for (i = 0; i < f->n; i++)
1394 affine_fn_free (f->fns[i]);
1396 free (f);
1399 /* Frees memory used by SUBSCRIPTS. */
1401 static void
1402 free_subscripts (VEC (subscript_p, heap) *subscripts)
1404 unsigned i;
1405 subscript_p s;
1407 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1409 free_conflict_function (s->conflicting_iterations_in_a);
1410 free_conflict_function (s->conflicting_iterations_in_b);
1411 free (s);
1413 VEC_free (subscript_p, heap, subscripts);
1416 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1417 description. */
1419 static inline void
1420 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1421 tree chrec)
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file, "(dependence classified: ");
1426 print_generic_expr (dump_file, chrec, 0);
1427 fprintf (dump_file, ")\n");
1430 DDR_ARE_DEPENDENT (ddr) = chrec;
1431 free_subscripts (DDR_SUBSCRIPTS (ddr));
1432 DDR_SUBSCRIPTS (ddr) = NULL;
1435 /* The dependence relation DDR cannot be represented by a distance
1436 vector. */
1438 static inline void
1439 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1444 DDR_AFFINE_P (ddr) = false;
1449 /* This section contains the classic Banerjee tests. */
1451 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1452 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1454 static inline bool
1455 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1457 return (evolution_function_is_constant_p (chrec_a)
1458 && evolution_function_is_constant_p (chrec_b));
1461 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1462 variable, i.e., if the SIV (Single Index Variable) test is true. */
1464 static bool
1465 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1467 if ((evolution_function_is_constant_p (chrec_a)
1468 && evolution_function_is_univariate_p (chrec_b))
1469 || (evolution_function_is_constant_p (chrec_b)
1470 && evolution_function_is_univariate_p (chrec_a)))
1471 return true;
1473 if (evolution_function_is_univariate_p (chrec_a)
1474 && evolution_function_is_univariate_p (chrec_b))
1476 switch (TREE_CODE (chrec_a))
1478 case POLYNOMIAL_CHREC:
1479 switch (TREE_CODE (chrec_b))
1481 case POLYNOMIAL_CHREC:
1482 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1483 return false;
1485 default:
1486 return true;
1489 default:
1490 return true;
1494 return false;
1497 /* Creates a conflict function with N dimensions. The affine functions
1498 in each dimension follow. */
1500 static conflict_function *
1501 conflict_fn (unsigned n, ...)
1503 unsigned i;
1504 conflict_function *ret = XCNEW (conflict_function);
1505 va_list ap;
1507 gcc_assert (0 < n && n <= MAX_DIM);
1508 va_start(ap, n);
1510 ret->n = n;
1511 for (i = 0; i < n; i++)
1512 ret->fns[i] = va_arg (ap, affine_fn);
1513 va_end(ap);
1515 return ret;
1518 /* Returns constant affine function with value CST. */
1520 static affine_fn
1521 affine_fn_cst (tree cst)
1523 affine_fn fn = VEC_alloc (tree, heap, 1);
1524 VEC_quick_push (tree, fn, cst);
1525 return fn;
1528 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1530 static affine_fn
1531 affine_fn_univar (tree cst, unsigned dim, tree coef)
1533 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1534 unsigned i;
1536 gcc_assert (dim > 0);
1537 VEC_quick_push (tree, fn, cst);
1538 for (i = 1; i < dim; i++)
1539 VEC_quick_push (tree, fn, integer_zero_node);
1540 VEC_quick_push (tree, fn, coef);
1541 return fn;
1544 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1545 *OVERLAPS_B are initialized to the functions that describe the
1546 relation between the elements accessed twice by CHREC_A and
1547 CHREC_B. For k >= 0, the following property is verified:
1549 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1551 static void
1552 analyze_ziv_subscript (tree chrec_a,
1553 tree chrec_b,
1554 conflict_function **overlaps_a,
1555 conflict_function **overlaps_b,
1556 tree *last_conflicts)
1558 tree type, difference;
1559 dependence_stats.num_ziv++;
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 fprintf (dump_file, "(analyze_ziv_subscript \n");
1564 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1565 chrec_a = chrec_convert (type, chrec_a, NULL);
1566 chrec_b = chrec_convert (type, chrec_b, NULL);
1567 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1569 switch (TREE_CODE (difference))
1571 case INTEGER_CST:
1572 if (integer_zerop (difference))
1574 /* The difference is equal to zero: the accessed index
1575 overlaps for each iteration in the loop. */
1576 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1577 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1578 *last_conflicts = chrec_dont_know;
1579 dependence_stats.num_ziv_dependent++;
1581 else
1583 /* The accesses do not overlap. */
1584 *overlaps_a = conflict_fn_no_dependence ();
1585 *overlaps_b = conflict_fn_no_dependence ();
1586 *last_conflicts = integer_zero_node;
1587 dependence_stats.num_ziv_independent++;
1589 break;
1591 default:
1592 /* We're not sure whether the indexes overlap. For the moment,
1593 conservatively answer "don't know". */
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1597 *overlaps_a = conflict_fn_not_known ();
1598 *overlaps_b = conflict_fn_not_known ();
1599 *last_conflicts = chrec_dont_know;
1600 dependence_stats.num_ziv_unimplemented++;
1601 break;
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1605 fprintf (dump_file, ")\n");
1608 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1609 and only if it fits to the int type. If this is not the case, or the
1610 bound on the number of iterations of LOOP could not be derived, returns
1611 chrec_dont_know. */
1613 static tree
1614 max_stmt_executions_tree (struct loop *loop)
1616 double_int nit;
1617 tree type;
1619 if (!max_stmt_executions (loop, true, &nit))
1620 return chrec_dont_know;
1622 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1623 if (!double_int_fits_to_tree_p (type, nit))
1624 return chrec_dont_know;
1626 return double_int_to_tree (type, nit);
1629 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1630 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1631 *OVERLAPS_B are initialized to the functions that describe the
1632 relation between the elements accessed twice by CHREC_A and
1633 CHREC_B. For k >= 0, the following property is verified:
1635 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1637 static void
1638 analyze_siv_subscript_cst_affine (tree chrec_a,
1639 tree chrec_b,
1640 conflict_function **overlaps_a,
1641 conflict_function **overlaps_b,
1642 tree *last_conflicts)
1644 bool value0, value1, value2;
1645 tree type, difference, tmp;
1647 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1648 chrec_a = chrec_convert (type, chrec_a, NULL);
1649 chrec_b = chrec_convert (type, chrec_b, NULL);
1650 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1652 if (!chrec_is_positive (initial_condition (difference), &value0))
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1655 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1657 dependence_stats.num_siv_unimplemented++;
1658 *overlaps_a = conflict_fn_not_known ();
1659 *overlaps_b = conflict_fn_not_known ();
1660 *last_conflicts = chrec_dont_know;
1661 return;
1663 else
1665 if (value0 == false)
1667 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1672 *overlaps_a = conflict_fn_not_known ();
1673 *overlaps_b = conflict_fn_not_known ();
1674 *last_conflicts = chrec_dont_know;
1675 dependence_stats.num_siv_unimplemented++;
1676 return;
1678 else
1680 if (value1 == true)
1682 /* Example:
1683 chrec_a = 12
1684 chrec_b = {10, +, 1}
1687 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1689 HOST_WIDE_INT numiter;
1690 struct loop *loop = get_chrec_loop (chrec_b);
1692 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1693 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1694 fold_build1 (ABS_EXPR, type, difference),
1695 CHREC_RIGHT (chrec_b));
1696 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1697 *last_conflicts = integer_one_node;
1700 /* Perform weak-zero siv test to see if overlap is
1701 outside the loop bounds. */
1702 numiter = max_stmt_executions_int (loop, true);
1704 if (numiter >= 0
1705 && compare_tree_int (tmp, numiter) > 0)
1707 free_conflict_function (*overlaps_a);
1708 free_conflict_function (*overlaps_b);
1709 *overlaps_a = conflict_fn_no_dependence ();
1710 *overlaps_b = conflict_fn_no_dependence ();
1711 *last_conflicts = integer_zero_node;
1712 dependence_stats.num_siv_independent++;
1713 return;
1715 dependence_stats.num_siv_dependent++;
1716 return;
1719 /* When the step does not divide the difference, there are
1720 no overlaps. */
1721 else
1723 *overlaps_a = conflict_fn_no_dependence ();
1724 *overlaps_b = conflict_fn_no_dependence ();
1725 *last_conflicts = integer_zero_node;
1726 dependence_stats.num_siv_independent++;
1727 return;
1731 else
1733 /* Example:
1734 chrec_a = 12
1735 chrec_b = {10, +, -1}
1737 In this case, chrec_a will not overlap with chrec_b. */
1738 *overlaps_a = conflict_fn_no_dependence ();
1739 *overlaps_b = conflict_fn_no_dependence ();
1740 *last_conflicts = integer_zero_node;
1741 dependence_stats.num_siv_independent++;
1742 return;
1746 else
1748 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1750 if (dump_file && (dump_flags & TDF_DETAILS))
1751 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1753 *overlaps_a = conflict_fn_not_known ();
1754 *overlaps_b = conflict_fn_not_known ();
1755 *last_conflicts = chrec_dont_know;
1756 dependence_stats.num_siv_unimplemented++;
1757 return;
1759 else
1761 if (value2 == false)
1763 /* Example:
1764 chrec_a = 3
1765 chrec_b = {10, +, -1}
1767 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1769 HOST_WIDE_INT numiter;
1770 struct loop *loop = get_chrec_loop (chrec_b);
1772 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1773 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1774 CHREC_RIGHT (chrec_b));
1775 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1776 *last_conflicts = integer_one_node;
1778 /* Perform weak-zero siv test to see if overlap is
1779 outside the loop bounds. */
1780 numiter = max_stmt_executions_int (loop, true);
1782 if (numiter >= 0
1783 && compare_tree_int (tmp, numiter) > 0)
1785 free_conflict_function (*overlaps_a);
1786 free_conflict_function (*overlaps_b);
1787 *overlaps_a = conflict_fn_no_dependence ();
1788 *overlaps_b = conflict_fn_no_dependence ();
1789 *last_conflicts = integer_zero_node;
1790 dependence_stats.num_siv_independent++;
1791 return;
1793 dependence_stats.num_siv_dependent++;
1794 return;
1797 /* When the step does not divide the difference, there
1798 are no overlaps. */
1799 else
1801 *overlaps_a = conflict_fn_no_dependence ();
1802 *overlaps_b = conflict_fn_no_dependence ();
1803 *last_conflicts = integer_zero_node;
1804 dependence_stats.num_siv_independent++;
1805 return;
1808 else
1810 /* Example:
1811 chrec_a = 3
1812 chrec_b = {4, +, 1}
1814 In this case, chrec_a will not overlap with chrec_b. */
1815 *overlaps_a = conflict_fn_no_dependence ();
1816 *overlaps_b = conflict_fn_no_dependence ();
1817 *last_conflicts = integer_zero_node;
1818 dependence_stats.num_siv_independent++;
1819 return;
1826 /* Helper recursive function for initializing the matrix A. Returns
1827 the initial value of CHREC. */
1829 static tree
1830 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1832 gcc_assert (chrec);
1834 switch (TREE_CODE (chrec))
1836 case POLYNOMIAL_CHREC:
1837 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1839 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1840 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1842 case PLUS_EXPR:
1843 case MULT_EXPR:
1844 case MINUS_EXPR:
1846 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1847 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1849 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1852 case NOP_EXPR:
1854 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1855 return chrec_convert (chrec_type (chrec), op, NULL);
1858 case BIT_NOT_EXPR:
1860 /* Handle ~X as -1 - X. */
1861 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1862 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1863 build_int_cst (TREE_TYPE (chrec), -1), op);
1866 case INTEGER_CST:
1867 return chrec;
1869 default:
1870 gcc_unreachable ();
1871 return NULL_TREE;
1875 #define FLOOR_DIV(x,y) ((x) / (y))
1877 /* Solves the special case of the Diophantine equation:
1878 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1880 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1881 number of iterations that loops X and Y run. The overlaps will be
1882 constructed as evolutions in dimension DIM. */
1884 static void
1885 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1886 affine_fn *overlaps_a,
1887 affine_fn *overlaps_b,
1888 tree *last_conflicts, int dim)
1890 if (((step_a > 0 && step_b > 0)
1891 || (step_a < 0 && step_b < 0)))
1893 int step_overlaps_a, step_overlaps_b;
1894 int gcd_steps_a_b, last_conflict, tau2;
1896 gcd_steps_a_b = gcd (step_a, step_b);
1897 step_overlaps_a = step_b / gcd_steps_a_b;
1898 step_overlaps_b = step_a / gcd_steps_a_b;
1900 if (niter > 0)
1902 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1903 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1904 last_conflict = tau2;
1905 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1907 else
1908 *last_conflicts = chrec_dont_know;
1910 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1911 build_int_cst (NULL_TREE,
1912 step_overlaps_a));
1913 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1914 build_int_cst (NULL_TREE,
1915 step_overlaps_b));
1918 else
1920 *overlaps_a = affine_fn_cst (integer_zero_node);
1921 *overlaps_b = affine_fn_cst (integer_zero_node);
1922 *last_conflicts = integer_zero_node;
1926 /* Solves the special case of a Diophantine equation where CHREC_A is
1927 an affine bivariate function, and CHREC_B is an affine univariate
1928 function. For example,
1930 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1932 has the following overlapping functions:
1934 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1935 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1936 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1938 FORNOW: This is a specialized implementation for a case occurring in
1939 a common benchmark. Implement the general algorithm. */
1941 static void
1942 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1943 conflict_function **overlaps_a,
1944 conflict_function **overlaps_b,
1945 tree *last_conflicts)
1947 bool xz_p, yz_p, xyz_p;
1948 int step_x, step_y, step_z;
1949 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1950 affine_fn overlaps_a_xz, overlaps_b_xz;
1951 affine_fn overlaps_a_yz, overlaps_b_yz;
1952 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1953 affine_fn ova1, ova2, ovb;
1954 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1956 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1957 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1958 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1960 niter_x =
1961 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1962 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1963 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1965 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1970 *overlaps_a = conflict_fn_not_known ();
1971 *overlaps_b = conflict_fn_not_known ();
1972 *last_conflicts = chrec_dont_know;
1973 return;
1976 niter = MIN (niter_x, niter_z);
1977 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1978 &overlaps_a_xz,
1979 &overlaps_b_xz,
1980 &last_conflicts_xz, 1);
1981 niter = MIN (niter_y, niter_z);
1982 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1983 &overlaps_a_yz,
1984 &overlaps_b_yz,
1985 &last_conflicts_yz, 2);
1986 niter = MIN (niter_x, niter_z);
1987 niter = MIN (niter_y, niter);
1988 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1989 &overlaps_a_xyz,
1990 &overlaps_b_xyz,
1991 &last_conflicts_xyz, 3);
1993 xz_p = !integer_zerop (last_conflicts_xz);
1994 yz_p = !integer_zerop (last_conflicts_yz);
1995 xyz_p = !integer_zerop (last_conflicts_xyz);
1997 if (xz_p || yz_p || xyz_p)
1999 ova1 = affine_fn_cst (integer_zero_node);
2000 ova2 = affine_fn_cst (integer_zero_node);
2001 ovb = affine_fn_cst (integer_zero_node);
2002 if (xz_p)
2004 affine_fn t0 = ova1;
2005 affine_fn t2 = ovb;
2007 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2008 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2009 affine_fn_free (t0);
2010 affine_fn_free (t2);
2011 *last_conflicts = last_conflicts_xz;
2013 if (yz_p)
2015 affine_fn t0 = ova2;
2016 affine_fn t2 = ovb;
2018 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2019 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2020 affine_fn_free (t0);
2021 affine_fn_free (t2);
2022 *last_conflicts = last_conflicts_yz;
2024 if (xyz_p)
2026 affine_fn t0 = ova1;
2027 affine_fn t2 = ova2;
2028 affine_fn t4 = ovb;
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2031 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2032 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2033 affine_fn_free (t0);
2034 affine_fn_free (t2);
2035 affine_fn_free (t4);
2036 *last_conflicts = last_conflicts_xyz;
2038 *overlaps_a = conflict_fn (2, ova1, ova2);
2039 *overlaps_b = conflict_fn (1, ovb);
2041 else
2043 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2044 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2045 *last_conflicts = integer_zero_node;
2048 affine_fn_free (overlaps_a_xz);
2049 affine_fn_free (overlaps_b_xz);
2050 affine_fn_free (overlaps_a_yz);
2051 affine_fn_free (overlaps_b_yz);
2052 affine_fn_free (overlaps_a_xyz);
2053 affine_fn_free (overlaps_b_xyz);
2056 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2058 static void
2059 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2060 int size)
2062 memcpy (vec2, vec1, size * sizeof (*vec1));
2065 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2067 static void
2068 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2069 int m, int n)
2071 int i;
2073 for (i = 0; i < m; i++)
2074 lambda_vector_copy (mat1[i], mat2[i], n);
2077 /* Store the N x N identity matrix in MAT. */
2079 static void
2080 lambda_matrix_id (lambda_matrix mat, int size)
2082 int i, j;
2084 for (i = 0; i < size; i++)
2085 for (j = 0; j < size; j++)
2086 mat[i][j] = (i == j) ? 1 : 0;
2089 /* Return the first nonzero element of vector VEC1 between START and N.
2090 We must have START <= N. Returns N if VEC1 is the zero vector. */
2092 static int
2093 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2095 int j = start;
2096 while (j < n && vec1[j] == 0)
2097 j++;
2098 return j;
2101 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2102 R2 = R2 + CONST1 * R1. */
2104 static void
2105 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2107 int i;
2109 if (const1 == 0)
2110 return;
2112 for (i = 0; i < n; i++)
2113 mat[r2][i] += const1 * mat[r1][i];
2116 /* Swap rows R1 and R2 in matrix MAT. */
2118 static void
2119 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2121 lambda_vector row;
2123 row = mat[r1];
2124 mat[r1] = mat[r2];
2125 mat[r2] = row;
2128 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2129 and store the result in VEC2. */
2131 static void
2132 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2133 int size, int const1)
2135 int i;
2137 if (const1 == 0)
2138 lambda_vector_clear (vec2, size);
2139 else
2140 for (i = 0; i < size; i++)
2141 vec2[i] = const1 * vec1[i];
2144 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2146 static void
2147 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2148 int size)
2150 lambda_vector_mult_const (vec1, vec2, size, -1);
2153 /* Negate row R1 of matrix MAT which has N columns. */
2155 static void
2156 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2158 lambda_vector_negate (mat[r1], mat[r1], n);
2161 /* Return true if two vectors are equal. */
2163 static bool
2164 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2166 int i;
2167 for (i = 0; i < size; i++)
2168 if (vec1[i] != vec2[i])
2169 return false;
2170 return true;
2173 /* Given an M x N integer matrix A, this function determines an M x
2174 M unimodular matrix U, and an M x N echelon matrix S such that
2175 "U.A = S". This decomposition is also known as "right Hermite".
2177 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2178 Restructuring Compilers" Utpal Banerjee. */
2180 static void
2181 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2182 lambda_matrix S, lambda_matrix U)
2184 int i, j, i0 = 0;
2186 lambda_matrix_copy (A, S, m, n);
2187 lambda_matrix_id (U, m);
2189 for (j = 0; j < n; j++)
2191 if (lambda_vector_first_nz (S[j], m, i0) < m)
2193 ++i0;
2194 for (i = m - 1; i >= i0; i--)
2196 while (S[i][j] != 0)
2198 int sigma, factor, a, b;
2200 a = S[i-1][j];
2201 b = S[i][j];
2202 sigma = (a * b < 0) ? -1: 1;
2203 a = abs (a);
2204 b = abs (b);
2205 factor = sigma * (a / b);
2207 lambda_matrix_row_add (S, n, i, i-1, -factor);
2208 lambda_matrix_row_exchange (S, i, i-1);
2210 lambda_matrix_row_add (U, m, i, i-1, -factor);
2211 lambda_matrix_row_exchange (U, i, i-1);
2218 /* Determines the overlapping elements due to accesses CHREC_A and
2219 CHREC_B, that are affine functions. This function cannot handle
2220 symbolic evolution functions, ie. when initial conditions are
2221 parameters, because it uses lambda matrices of integers. */
2223 static void
2224 analyze_subscript_affine_affine (tree chrec_a,
2225 tree chrec_b,
2226 conflict_function **overlaps_a,
2227 conflict_function **overlaps_b,
2228 tree *last_conflicts)
2230 unsigned nb_vars_a, nb_vars_b, dim;
2231 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2232 lambda_matrix A, U, S;
2233 struct obstack scratch_obstack;
2235 if (eq_evolutions_p (chrec_a, chrec_b))
2237 /* The accessed index overlaps for each iteration in the
2238 loop. */
2239 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2240 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2241 *last_conflicts = chrec_dont_know;
2242 return;
2244 if (dump_file && (dump_flags & TDF_DETAILS))
2245 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2247 /* For determining the initial intersection, we have to solve a
2248 Diophantine equation. This is the most time consuming part.
2250 For answering to the question: "Is there a dependence?" we have
2251 to prove that there exists a solution to the Diophantine
2252 equation, and that the solution is in the iteration domain,
2253 i.e. the solution is positive or zero, and that the solution
2254 happens before the upper bound loop.nb_iterations. Otherwise
2255 there is no dependence. This function outputs a description of
2256 the iterations that hold the intersections. */
2258 nb_vars_a = nb_vars_in_chrec (chrec_a);
2259 nb_vars_b = nb_vars_in_chrec (chrec_b);
2261 gcc_obstack_init (&scratch_obstack);
2263 dim = nb_vars_a + nb_vars_b;
2264 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2265 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2266 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2268 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2269 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2270 gamma = init_b - init_a;
2272 /* Don't do all the hard work of solving the Diophantine equation
2273 when we already know the solution: for example,
2274 | {3, +, 1}_1
2275 | {3, +, 4}_2
2276 | gamma = 3 - 3 = 0.
2277 Then the first overlap occurs during the first iterations:
2278 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2280 if (gamma == 0)
2282 if (nb_vars_a == 1 && nb_vars_b == 1)
2284 HOST_WIDE_INT step_a, step_b;
2285 HOST_WIDE_INT niter, niter_a, niter_b;
2286 affine_fn ova, ovb;
2288 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2289 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2290 niter = MIN (niter_a, niter_b);
2291 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2292 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2294 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2295 &ova, &ovb,
2296 last_conflicts, 1);
2297 *overlaps_a = conflict_fn (1, ova);
2298 *overlaps_b = conflict_fn (1, ovb);
2301 else if (nb_vars_a == 2 && nb_vars_b == 1)
2302 compute_overlap_steps_for_affine_1_2
2303 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2305 else if (nb_vars_a == 1 && nb_vars_b == 2)
2306 compute_overlap_steps_for_affine_1_2
2307 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2309 else
2311 if (dump_file && (dump_flags & TDF_DETAILS))
2312 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2313 *overlaps_a = conflict_fn_not_known ();
2314 *overlaps_b = conflict_fn_not_known ();
2315 *last_conflicts = chrec_dont_know;
2317 goto end_analyze_subs_aa;
2320 /* U.A = S */
2321 lambda_matrix_right_hermite (A, dim, 1, S, U);
2323 if (S[0][0] < 0)
2325 S[0][0] *= -1;
2326 lambda_matrix_row_negate (U, dim, 0);
2328 gcd_alpha_beta = S[0][0];
2330 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2331 but that is a quite strange case. Instead of ICEing, answer
2332 don't know. */
2333 if (gcd_alpha_beta == 0)
2335 *overlaps_a = conflict_fn_not_known ();
2336 *overlaps_b = conflict_fn_not_known ();
2337 *last_conflicts = chrec_dont_know;
2338 goto end_analyze_subs_aa;
2341 /* The classic "gcd-test". */
2342 if (!int_divides_p (gcd_alpha_beta, gamma))
2344 /* The "gcd-test" has determined that there is no integer
2345 solution, i.e. there is no dependence. */
2346 *overlaps_a = conflict_fn_no_dependence ();
2347 *overlaps_b = conflict_fn_no_dependence ();
2348 *last_conflicts = integer_zero_node;
2351 /* Both access functions are univariate. This includes SIV and MIV cases. */
2352 else if (nb_vars_a == 1 && nb_vars_b == 1)
2354 /* Both functions should have the same evolution sign. */
2355 if (((A[0][0] > 0 && -A[1][0] > 0)
2356 || (A[0][0] < 0 && -A[1][0] < 0)))
2358 /* The solutions are given by:
2360 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2361 | [u21 u22] [y0]
2363 For a given integer t. Using the following variables,
2365 | i0 = u11 * gamma / gcd_alpha_beta
2366 | j0 = u12 * gamma / gcd_alpha_beta
2367 | i1 = u21
2368 | j1 = u22
2370 the solutions are:
2372 | x0 = i0 + i1 * t,
2373 | y0 = j0 + j1 * t. */
2374 HOST_WIDE_INT i0, j0, i1, j1;
2376 i0 = U[0][0] * gamma / gcd_alpha_beta;
2377 j0 = U[0][1] * gamma / gcd_alpha_beta;
2378 i1 = U[1][0];
2379 j1 = U[1][1];
2381 if ((i1 == 0 && i0 < 0)
2382 || (j1 == 0 && j0 < 0))
2384 /* There is no solution.
2385 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2386 falls in here, but for the moment we don't look at the
2387 upper bound of the iteration domain. */
2388 *overlaps_a = conflict_fn_no_dependence ();
2389 *overlaps_b = conflict_fn_no_dependence ();
2390 *last_conflicts = integer_zero_node;
2391 goto end_analyze_subs_aa;
2394 if (i1 > 0 && j1 > 0)
2396 HOST_WIDE_INT niter_a = max_stmt_executions_int
2397 (get_chrec_loop (chrec_a), true);
2398 HOST_WIDE_INT niter_b = max_stmt_executions_int
2399 (get_chrec_loop (chrec_b), true);
2400 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2402 /* (X0, Y0) is a solution of the Diophantine equation:
2403 "chrec_a (X0) = chrec_b (Y0)". */
2404 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2405 CEIL (-j0, j1));
2406 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2407 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2409 /* (X1, Y1) is the smallest positive solution of the eq
2410 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2411 first conflict occurs. */
2412 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2413 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2414 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2416 if (niter > 0)
2418 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2419 FLOOR_DIV (niter - j0, j1));
2420 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2422 /* If the overlap occurs outside of the bounds of the
2423 loop, there is no dependence. */
2424 if (x1 >= niter || y1 >= niter)
2426 *overlaps_a = conflict_fn_no_dependence ();
2427 *overlaps_b = conflict_fn_no_dependence ();
2428 *last_conflicts = integer_zero_node;
2429 goto end_analyze_subs_aa;
2431 else
2432 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2434 else
2435 *last_conflicts = chrec_dont_know;
2437 *overlaps_a
2438 = conflict_fn (1,
2439 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2441 build_int_cst (NULL_TREE, i1)));
2442 *overlaps_b
2443 = conflict_fn (1,
2444 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2446 build_int_cst (NULL_TREE, j1)));
2448 else
2450 /* FIXME: For the moment, the upper bound of the
2451 iteration domain for i and j is not checked. */
2452 if (dump_file && (dump_flags & TDF_DETAILS))
2453 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2454 *overlaps_a = conflict_fn_not_known ();
2455 *overlaps_b = conflict_fn_not_known ();
2456 *last_conflicts = chrec_dont_know;
2459 else
2461 if (dump_file && (dump_flags & TDF_DETAILS))
2462 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2463 *overlaps_a = conflict_fn_not_known ();
2464 *overlaps_b = conflict_fn_not_known ();
2465 *last_conflicts = chrec_dont_know;
2468 else
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2472 *overlaps_a = conflict_fn_not_known ();
2473 *overlaps_b = conflict_fn_not_known ();
2474 *last_conflicts = chrec_dont_know;
2477 end_analyze_subs_aa:
2478 obstack_free (&scratch_obstack, NULL);
2479 if (dump_file && (dump_flags & TDF_DETAILS))
2481 fprintf (dump_file, " (overlaps_a = ");
2482 dump_conflict_function (dump_file, *overlaps_a);
2483 fprintf (dump_file, ")\n (overlaps_b = ");
2484 dump_conflict_function (dump_file, *overlaps_b);
2485 fprintf (dump_file, ")\n");
2486 fprintf (dump_file, ")\n");
2490 /* Returns true when analyze_subscript_affine_affine can be used for
2491 determining the dependence relation between chrec_a and chrec_b,
2492 that contain symbols. This function modifies chrec_a and chrec_b
2493 such that the analysis result is the same, and such that they don't
2494 contain symbols, and then can safely be passed to the analyzer.
2496 Example: The analysis of the following tuples of evolutions produce
2497 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2498 vs. {0, +, 1}_1
2500 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2501 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2504 static bool
2505 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2507 tree diff, type, left_a, left_b, right_b;
2509 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2510 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2511 /* FIXME: For the moment not handled. Might be refined later. */
2512 return false;
2514 type = chrec_type (*chrec_a);
2515 left_a = CHREC_LEFT (*chrec_a);
2516 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2517 diff = chrec_fold_minus (type, left_a, left_b);
2519 if (!evolution_function_is_constant_p (diff))
2520 return false;
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2523 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2525 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2526 diff, CHREC_RIGHT (*chrec_a));
2527 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2528 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2529 build_int_cst (type, 0),
2530 right_b);
2531 return true;
2534 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2535 *OVERLAPS_B are initialized to the functions that describe the
2536 relation between the elements accessed twice by CHREC_A and
2537 CHREC_B. For k >= 0, the following property is verified:
2539 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2541 static void
2542 analyze_siv_subscript (tree chrec_a,
2543 tree chrec_b,
2544 conflict_function **overlaps_a,
2545 conflict_function **overlaps_b,
2546 tree *last_conflicts,
2547 int loop_nest_num)
2549 dependence_stats.num_siv++;
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2552 fprintf (dump_file, "(analyze_siv_subscript \n");
2554 if (evolution_function_is_constant_p (chrec_a)
2555 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2556 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2557 overlaps_a, overlaps_b, last_conflicts);
2559 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2560 && evolution_function_is_constant_p (chrec_b))
2561 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2562 overlaps_b, overlaps_a, last_conflicts);
2564 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2565 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2567 if (!chrec_contains_symbols (chrec_a)
2568 && !chrec_contains_symbols (chrec_b))
2570 analyze_subscript_affine_affine (chrec_a, chrec_b,
2571 overlaps_a, overlaps_b,
2572 last_conflicts);
2574 if (CF_NOT_KNOWN_P (*overlaps_a)
2575 || CF_NOT_KNOWN_P (*overlaps_b))
2576 dependence_stats.num_siv_unimplemented++;
2577 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2578 || CF_NO_DEPENDENCE_P (*overlaps_b))
2579 dependence_stats.num_siv_independent++;
2580 else
2581 dependence_stats.num_siv_dependent++;
2583 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2584 &chrec_b))
2586 analyze_subscript_affine_affine (chrec_a, chrec_b,
2587 overlaps_a, overlaps_b,
2588 last_conflicts);
2590 if (CF_NOT_KNOWN_P (*overlaps_a)
2591 || CF_NOT_KNOWN_P (*overlaps_b))
2592 dependence_stats.num_siv_unimplemented++;
2593 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2594 || CF_NO_DEPENDENCE_P (*overlaps_b))
2595 dependence_stats.num_siv_independent++;
2596 else
2597 dependence_stats.num_siv_dependent++;
2599 else
2600 goto siv_subscript_dontknow;
2603 else
2605 siv_subscript_dontknow:;
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file, "siv test failed: unimplemented.\n");
2608 *overlaps_a = conflict_fn_not_known ();
2609 *overlaps_b = conflict_fn_not_known ();
2610 *last_conflicts = chrec_dont_know;
2611 dependence_stats.num_siv_unimplemented++;
2614 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, ")\n");
2618 /* Returns false if we can prove that the greatest common divisor of the steps
2619 of CHREC does not divide CST, false otherwise. */
2621 static bool
2622 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2624 HOST_WIDE_INT cd = 0, val;
2625 tree step;
2627 if (!host_integerp (cst, 0))
2628 return true;
2629 val = tree_low_cst (cst, 0);
2631 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2633 step = CHREC_RIGHT (chrec);
2634 if (!host_integerp (step, 0))
2635 return true;
2636 cd = gcd (cd, tree_low_cst (step, 0));
2637 chrec = CHREC_LEFT (chrec);
2640 return val % cd == 0;
2643 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2644 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2645 functions that describe the relation between the elements accessed
2646 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2647 is verified:
2649 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2651 static void
2652 analyze_miv_subscript (tree chrec_a,
2653 tree chrec_b,
2654 conflict_function **overlaps_a,
2655 conflict_function **overlaps_b,
2656 tree *last_conflicts,
2657 struct loop *loop_nest)
2659 tree type, difference;
2661 dependence_stats.num_miv++;
2662 if (dump_file && (dump_flags & TDF_DETAILS))
2663 fprintf (dump_file, "(analyze_miv_subscript \n");
2665 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2666 chrec_a = chrec_convert (type, chrec_a, NULL);
2667 chrec_b = chrec_convert (type, chrec_b, NULL);
2668 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2670 if (eq_evolutions_p (chrec_a, chrec_b))
2672 /* Access functions are the same: all the elements are accessed
2673 in the same order. */
2674 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2675 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2676 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2677 dependence_stats.num_miv_dependent++;
2680 else if (evolution_function_is_constant_p (difference)
2681 /* For the moment, the following is verified:
2682 evolution_function_is_affine_multivariate_p (chrec_a,
2683 loop_nest->num) */
2684 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2686 /* testsuite/.../ssa-chrec-33.c
2687 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2689 The difference is 1, and all the evolution steps are multiples
2690 of 2, consequently there are no overlapping elements. */
2691 *overlaps_a = conflict_fn_no_dependence ();
2692 *overlaps_b = conflict_fn_no_dependence ();
2693 *last_conflicts = integer_zero_node;
2694 dependence_stats.num_miv_independent++;
2697 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2698 && !chrec_contains_symbols (chrec_a)
2699 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2700 && !chrec_contains_symbols (chrec_b))
2702 /* testsuite/.../ssa-chrec-35.c
2703 {0, +, 1}_2 vs. {0, +, 1}_3
2704 the overlapping elements are respectively located at iterations:
2705 {0, +, 1}_x and {0, +, 1}_x,
2706 in other words, we have the equality:
2707 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2709 Other examples:
2710 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2711 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2713 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2714 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2716 analyze_subscript_affine_affine (chrec_a, chrec_b,
2717 overlaps_a, overlaps_b, last_conflicts);
2719 if (CF_NOT_KNOWN_P (*overlaps_a)
2720 || CF_NOT_KNOWN_P (*overlaps_b))
2721 dependence_stats.num_miv_unimplemented++;
2722 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2723 || CF_NO_DEPENDENCE_P (*overlaps_b))
2724 dependence_stats.num_miv_independent++;
2725 else
2726 dependence_stats.num_miv_dependent++;
2729 else
2731 /* When the analysis is too difficult, answer "don't know". */
2732 if (dump_file && (dump_flags & TDF_DETAILS))
2733 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2735 *overlaps_a = conflict_fn_not_known ();
2736 *overlaps_b = conflict_fn_not_known ();
2737 *last_conflicts = chrec_dont_know;
2738 dependence_stats.num_miv_unimplemented++;
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, ")\n");
2745 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2746 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2747 OVERLAP_ITERATIONS_B are initialized with two functions that
2748 describe the iterations that contain conflicting elements.
2750 Remark: For an integer k >= 0, the following equality is true:
2752 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2755 static void
2756 analyze_overlapping_iterations (tree chrec_a,
2757 tree chrec_b,
2758 conflict_function **overlap_iterations_a,
2759 conflict_function **overlap_iterations_b,
2760 tree *last_conflicts, struct loop *loop_nest)
2762 unsigned int lnn = loop_nest->num;
2764 dependence_stats.num_subscript_tests++;
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2769 fprintf (dump_file, " (chrec_a = ");
2770 print_generic_expr (dump_file, chrec_a, 0);
2771 fprintf (dump_file, ")\n (chrec_b = ");
2772 print_generic_expr (dump_file, chrec_b, 0);
2773 fprintf (dump_file, ")\n");
2776 if (chrec_a == NULL_TREE
2777 || chrec_b == NULL_TREE
2778 || chrec_contains_undetermined (chrec_a)
2779 || chrec_contains_undetermined (chrec_b))
2781 dependence_stats.num_subscript_undetermined++;
2783 *overlap_iterations_a = conflict_fn_not_known ();
2784 *overlap_iterations_b = conflict_fn_not_known ();
2787 /* If they are the same chrec, and are affine, they overlap
2788 on every iteration. */
2789 else if (eq_evolutions_p (chrec_a, chrec_b)
2790 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2791 || operand_equal_p (chrec_a, chrec_b, 0)))
2793 dependence_stats.num_same_subscript_function++;
2794 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2795 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2796 *last_conflicts = chrec_dont_know;
2799 /* If they aren't the same, and aren't affine, we can't do anything
2800 yet. */
2801 else if ((chrec_contains_symbols (chrec_a)
2802 || chrec_contains_symbols (chrec_b))
2803 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2804 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2806 dependence_stats.num_subscript_undetermined++;
2807 *overlap_iterations_a = conflict_fn_not_known ();
2808 *overlap_iterations_b = conflict_fn_not_known ();
2811 else if (ziv_subscript_p (chrec_a, chrec_b))
2812 analyze_ziv_subscript (chrec_a, chrec_b,
2813 overlap_iterations_a, overlap_iterations_b,
2814 last_conflicts);
2816 else if (siv_subscript_p (chrec_a, chrec_b))
2817 analyze_siv_subscript (chrec_a, chrec_b,
2818 overlap_iterations_a, overlap_iterations_b,
2819 last_conflicts, lnn);
2821 else
2822 analyze_miv_subscript (chrec_a, chrec_b,
2823 overlap_iterations_a, overlap_iterations_b,
2824 last_conflicts, loop_nest);
2826 if (dump_file && (dump_flags & TDF_DETAILS))
2828 fprintf (dump_file, " (overlap_iterations_a = ");
2829 dump_conflict_function (dump_file, *overlap_iterations_a);
2830 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2831 dump_conflict_function (dump_file, *overlap_iterations_b);
2832 fprintf (dump_file, ")\n");
2833 fprintf (dump_file, ")\n");
2837 /* Helper function for uniquely inserting distance vectors. */
2839 static void
2840 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2842 unsigned i;
2843 lambda_vector v;
2845 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2846 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2847 return;
2849 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2852 /* Helper function for uniquely inserting direction vectors. */
2854 static void
2855 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2857 unsigned i;
2858 lambda_vector v;
2860 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2861 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2862 return;
2864 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2867 /* Add a distance of 1 on all the loops outer than INDEX. If we
2868 haven't yet determined a distance for this outer loop, push a new
2869 distance vector composed of the previous distance, and a distance
2870 of 1 for this outer loop. Example:
2872 | loop_1
2873 | loop_2
2874 | A[10]
2875 | endloop_2
2876 | endloop_1
2878 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2879 save (0, 1), then we have to save (1, 0). */
2881 static void
2882 add_outer_distances (struct data_dependence_relation *ddr,
2883 lambda_vector dist_v, int index)
2885 /* For each outer loop where init_v is not set, the accesses are
2886 in dependence of distance 1 in the loop. */
2887 while (--index >= 0)
2889 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2890 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2891 save_v[index] = 1;
2892 save_dist_v (ddr, save_v);
2896 /* Return false when fail to represent the data dependence as a
2897 distance vector. INIT_B is set to true when a component has been
2898 added to the distance vector DIST_V. INDEX_CARRY is then set to
2899 the index in DIST_V that carries the dependence. */
2901 static bool
2902 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2903 struct data_reference *ddr_a,
2904 struct data_reference *ddr_b,
2905 lambda_vector dist_v, bool *init_b,
2906 int *index_carry)
2908 unsigned i;
2909 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2911 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2913 tree access_fn_a, access_fn_b;
2914 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2916 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2918 non_affine_dependence_relation (ddr);
2919 return false;
2922 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2923 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2925 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2926 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2928 int dist, index;
2929 int var_a = CHREC_VARIABLE (access_fn_a);
2930 int var_b = CHREC_VARIABLE (access_fn_b);
2932 if (var_a != var_b
2933 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2935 non_affine_dependence_relation (ddr);
2936 return false;
2939 dist = int_cst_value (SUB_DISTANCE (subscript));
2940 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2941 *index_carry = MIN (index, *index_carry);
2943 /* This is the subscript coupling test. If we have already
2944 recorded a distance for this loop (a distance coming from
2945 another subscript), it should be the same. For example,
2946 in the following code, there is no dependence:
2948 | loop i = 0, N, 1
2949 | T[i+1][i] = ...
2950 | ... = T[i][i]
2951 | endloop
2953 if (init_v[index] != 0 && dist_v[index] != dist)
2955 finalize_ddr_dependent (ddr, chrec_known);
2956 return false;
2959 dist_v[index] = dist;
2960 init_v[index] = 1;
2961 *init_b = true;
2963 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2965 /* This can be for example an affine vs. constant dependence
2966 (T[i] vs. T[3]) that is not an affine dependence and is
2967 not representable as a distance vector. */
2968 non_affine_dependence_relation (ddr);
2969 return false;
2973 return true;
2976 /* Return true when the DDR contains only constant access functions. */
2978 static bool
2979 constant_access_functions (const struct data_dependence_relation *ddr)
2981 unsigned i;
2983 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2984 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2985 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2986 return false;
2988 return true;
2991 /* Helper function for the case where DDR_A and DDR_B are the same
2992 multivariate access function with a constant step. For an example
2993 see pr34635-1.c. */
2995 static void
2996 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2998 int x_1, x_2;
2999 tree c_1 = CHREC_LEFT (c_2);
3000 tree c_0 = CHREC_LEFT (c_1);
3001 lambda_vector dist_v;
3002 int v1, v2, cd;
3004 /* Polynomials with more than 2 variables are not handled yet. When
3005 the evolution steps are parameters, it is not possible to
3006 represent the dependence using classical distance vectors. */
3007 if (TREE_CODE (c_0) != INTEGER_CST
3008 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3009 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3011 DDR_AFFINE_P (ddr) = false;
3012 return;
3015 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3016 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3018 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3019 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3020 v1 = int_cst_value (CHREC_RIGHT (c_1));
3021 v2 = int_cst_value (CHREC_RIGHT (c_2));
3022 cd = gcd (v1, v2);
3023 v1 /= cd;
3024 v2 /= cd;
3026 if (v2 < 0)
3028 v2 = -v2;
3029 v1 = -v1;
3032 dist_v[x_1] = v2;
3033 dist_v[x_2] = -v1;
3034 save_dist_v (ddr, dist_v);
3036 add_outer_distances (ddr, dist_v, x_1);
3039 /* Helper function for the case where DDR_A and DDR_B are the same
3040 access functions. */
3042 static void
3043 add_other_self_distances (struct data_dependence_relation *ddr)
3045 lambda_vector dist_v;
3046 unsigned i;
3047 int index_carry = DDR_NB_LOOPS (ddr);
3049 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3051 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3053 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3055 if (!evolution_function_is_univariate_p (access_fun))
3057 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3059 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3060 return;
3063 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3065 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3066 add_multivariate_self_dist (ddr, access_fun);
3067 else
3068 /* The evolution step is not constant: it varies in
3069 the outer loop, so this cannot be represented by a
3070 distance vector. For example in pr34635.c the
3071 evolution is {0, +, {0, +, 4}_1}_2. */
3072 DDR_AFFINE_P (ddr) = false;
3074 return;
3077 index_carry = MIN (index_carry,
3078 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3079 DDR_LOOP_NEST (ddr)));
3083 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3084 add_outer_distances (ddr, dist_v, index_carry);
3087 static void
3088 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3090 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3092 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3093 save_dist_v (ddr, dist_v);
3096 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3097 is the case for example when access functions are the same and
3098 equal to a constant, as in:
3100 | loop_1
3101 | A[3] = ...
3102 | ... = A[3]
3103 | endloop_1
3105 in which case the distance vectors are (0) and (1). */
3107 static void
3108 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3110 unsigned i, j;
3112 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3114 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3115 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3116 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3118 for (j = 0; j < ca->n; j++)
3119 if (affine_function_zero_p (ca->fns[j]))
3121 insert_innermost_unit_dist_vector (ddr);
3122 return;
3125 for (j = 0; j < cb->n; j++)
3126 if (affine_function_zero_p (cb->fns[j]))
3128 insert_innermost_unit_dist_vector (ddr);
3129 return;
3134 /* Compute the classic per loop distance vector. DDR is the data
3135 dependence relation to build a vector from. Return false when fail
3136 to represent the data dependence as a distance vector. */
3138 static bool
3139 build_classic_dist_vector (struct data_dependence_relation *ddr,
3140 struct loop *loop_nest)
3142 bool init_b = false;
3143 int index_carry = DDR_NB_LOOPS (ddr);
3144 lambda_vector dist_v;
3146 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3147 return false;
3149 if (same_access_functions (ddr))
3151 /* Save the 0 vector. */
3152 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3153 save_dist_v (ddr, dist_v);
3155 if (constant_access_functions (ddr))
3156 add_distance_for_zero_overlaps (ddr);
3158 if (DDR_NB_LOOPS (ddr) > 1)
3159 add_other_self_distances (ddr);
3161 return true;
3164 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3165 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3166 dist_v, &init_b, &index_carry))
3167 return false;
3169 /* Save the distance vector if we initialized one. */
3170 if (init_b)
3172 /* Verify a basic constraint: classic distance vectors should
3173 always be lexicographically positive.
3175 Data references are collected in the order of execution of
3176 the program, thus for the following loop
3178 | for (i = 1; i < 100; i++)
3179 | for (j = 1; j < 100; j++)
3181 | t = T[j+1][i-1]; // A
3182 | T[j][i] = t + 2; // B
3185 references are collected following the direction of the wind:
3186 A then B. The data dependence tests are performed also
3187 following this order, such that we're looking at the distance
3188 separating the elements accessed by A from the elements later
3189 accessed by B. But in this example, the distance returned by
3190 test_dep (A, B) is lexicographically negative (-1, 1), that
3191 means that the access A occurs later than B with respect to
3192 the outer loop, ie. we're actually looking upwind. In this
3193 case we solve test_dep (B, A) looking downwind to the
3194 lexicographically positive solution, that returns the
3195 distance vector (1, -1). */
3196 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3198 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3199 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3200 loop_nest))
3201 return false;
3202 compute_subscript_distance (ddr);
3203 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3204 save_v, &init_b, &index_carry))
3205 return false;
3206 save_dist_v (ddr, save_v);
3207 DDR_REVERSED_P (ddr) = true;
3209 /* In this case there is a dependence forward for all the
3210 outer loops:
3212 | for (k = 1; k < 100; k++)
3213 | for (i = 1; i < 100; i++)
3214 | for (j = 1; j < 100; j++)
3216 | t = T[j+1][i-1]; // A
3217 | T[j][i] = t + 2; // B
3220 the vectors are:
3221 (0, 1, -1)
3222 (1, 1, -1)
3223 (1, -1, 1)
3225 if (DDR_NB_LOOPS (ddr) > 1)
3227 add_outer_distances (ddr, save_v, index_carry);
3228 add_outer_distances (ddr, dist_v, index_carry);
3231 else
3233 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3234 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3236 if (DDR_NB_LOOPS (ddr) > 1)
3238 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3240 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3241 DDR_A (ddr), loop_nest))
3242 return false;
3243 compute_subscript_distance (ddr);
3244 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3245 opposite_v, &init_b,
3246 &index_carry))
3247 return false;
3249 save_dist_v (ddr, save_v);
3250 add_outer_distances (ddr, dist_v, index_carry);
3251 add_outer_distances (ddr, opposite_v, index_carry);
3253 else
3254 save_dist_v (ddr, save_v);
3257 else
3259 /* There is a distance of 1 on all the outer loops: Example:
3260 there is a dependence of distance 1 on loop_1 for the array A.
3262 | loop_1
3263 | A[5] = ...
3264 | endloop
3266 add_outer_distances (ddr, dist_v,
3267 lambda_vector_first_nz (dist_v,
3268 DDR_NB_LOOPS (ddr), 0));
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3273 unsigned i;
3275 fprintf (dump_file, "(build_classic_dist_vector\n");
3276 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3278 fprintf (dump_file, " dist_vector = (");
3279 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3280 DDR_NB_LOOPS (ddr));
3281 fprintf (dump_file, " )\n");
3283 fprintf (dump_file, ")\n");
3286 return true;
3289 /* Return the direction for a given distance.
3290 FIXME: Computing dir this way is suboptimal, since dir can catch
3291 cases that dist is unable to represent. */
3293 static inline enum data_dependence_direction
3294 dir_from_dist (int dist)
3296 if (dist > 0)
3297 return dir_positive;
3298 else if (dist < 0)
3299 return dir_negative;
3300 else
3301 return dir_equal;
3304 /* Compute the classic per loop direction vector. DDR is the data
3305 dependence relation to build a vector from. */
3307 static void
3308 build_classic_dir_vector (struct data_dependence_relation *ddr)
3310 unsigned i, j;
3311 lambda_vector dist_v;
3313 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3315 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3317 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3318 dir_v[j] = dir_from_dist (dist_v[j]);
3320 save_dir_v (ddr, dir_v);
3324 /* Helper function. Returns true when there is a dependence between
3325 data references DRA and DRB. */
3327 static bool
3328 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3329 struct data_reference *dra,
3330 struct data_reference *drb,
3331 struct loop *loop_nest)
3333 unsigned int i;
3334 tree last_conflicts;
3335 struct subscript *subscript;
3337 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3338 i++)
3340 conflict_function *overlaps_a, *overlaps_b;
3342 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3343 DR_ACCESS_FN (drb, i),
3344 &overlaps_a, &overlaps_b,
3345 &last_conflicts, loop_nest);
3347 if (CF_NOT_KNOWN_P (overlaps_a)
3348 || CF_NOT_KNOWN_P (overlaps_b))
3350 finalize_ddr_dependent (ddr, chrec_dont_know);
3351 dependence_stats.num_dependence_undetermined++;
3352 free_conflict_function (overlaps_a);
3353 free_conflict_function (overlaps_b);
3354 return false;
3357 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3358 || CF_NO_DEPENDENCE_P (overlaps_b))
3360 finalize_ddr_dependent (ddr, chrec_known);
3361 dependence_stats.num_dependence_independent++;
3362 free_conflict_function (overlaps_a);
3363 free_conflict_function (overlaps_b);
3364 return false;
3367 else
3369 if (SUB_CONFLICTS_IN_A (subscript))
3370 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3371 if (SUB_CONFLICTS_IN_B (subscript))
3372 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3374 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3375 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3376 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3380 return true;
3383 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3385 static void
3386 subscript_dependence_tester (struct data_dependence_relation *ddr,
3387 struct loop *loop_nest)
3390 if (dump_file && (dump_flags & TDF_DETAILS))
3391 fprintf (dump_file, "(subscript_dependence_tester \n");
3393 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3394 dependence_stats.num_dependence_dependent++;
3396 compute_subscript_distance (ddr);
3397 if (build_classic_dist_vector (ddr, loop_nest))
3398 build_classic_dir_vector (ddr);
3400 if (dump_file && (dump_flags & TDF_DETAILS))
3401 fprintf (dump_file, ")\n");
3404 /* Returns true when all the access functions of A are affine or
3405 constant with respect to LOOP_NEST. */
3407 static bool
3408 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3409 const struct loop *loop_nest)
3411 unsigned int i;
3412 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3413 tree t;
3415 FOR_EACH_VEC_ELT (tree, fns, i, t)
3416 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3417 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3418 return false;
3420 return true;
3423 /* Initializes an equation for an OMEGA problem using the information
3424 contained in the ACCESS_FUN. Returns true when the operation
3425 succeeded.
3427 PB is the omega constraint system.
3428 EQ is the number of the equation to be initialized.
3429 OFFSET is used for shifting the variables names in the constraints:
3430 a constrain is composed of 2 * the number of variables surrounding
3431 dependence accesses. OFFSET is set either to 0 for the first n variables,
3432 then it is set to n.
3433 ACCESS_FUN is expected to be an affine chrec. */
3435 static bool
3436 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3437 unsigned int offset, tree access_fun,
3438 struct data_dependence_relation *ddr)
3440 switch (TREE_CODE (access_fun))
3442 case POLYNOMIAL_CHREC:
3444 tree left = CHREC_LEFT (access_fun);
3445 tree right = CHREC_RIGHT (access_fun);
3446 int var = CHREC_VARIABLE (access_fun);
3447 unsigned var_idx;
3449 if (TREE_CODE (right) != INTEGER_CST)
3450 return false;
3452 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3453 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3455 /* Compute the innermost loop index. */
3456 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3458 if (offset == 0)
3459 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3460 += int_cst_value (right);
3462 switch (TREE_CODE (left))
3464 case POLYNOMIAL_CHREC:
3465 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3467 case INTEGER_CST:
3468 pb->eqs[eq].coef[0] += int_cst_value (left);
3469 return true;
3471 default:
3472 return false;
3476 case INTEGER_CST:
3477 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3478 return true;
3480 default:
3481 return false;
3485 /* As explained in the comments preceding init_omega_for_ddr, we have
3486 to set up a system for each loop level, setting outer loops
3487 variation to zero, and current loop variation to positive or zero.
3488 Save each lexico positive distance vector. */
3490 static void
3491 omega_extract_distance_vectors (omega_pb pb,
3492 struct data_dependence_relation *ddr)
3494 int eq, geq;
3495 unsigned i, j;
3496 struct loop *loopi, *loopj;
3497 enum omega_result res;
3499 /* Set a new problem for each loop in the nest. The basis is the
3500 problem that we have initialized until now. On top of this we
3501 add new constraints. */
3502 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3503 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3505 int dist = 0;
3506 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3507 DDR_NB_LOOPS (ddr));
3509 omega_copy_problem (copy, pb);
3511 /* For all the outer loops "loop_j", add "dj = 0". */
3512 for (j = 0;
3513 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3515 eq = omega_add_zero_eq (copy, omega_black);
3516 copy->eqs[eq].coef[j + 1] = 1;
3519 /* For "loop_i", add "0 <= di". */
3520 geq = omega_add_zero_geq (copy, omega_black);
3521 copy->geqs[geq].coef[i + 1] = 1;
3523 /* Reduce the constraint system, and test that the current
3524 problem is feasible. */
3525 res = omega_simplify_problem (copy);
3526 if (res == omega_false
3527 || res == omega_unknown
3528 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3529 goto next_problem;
3531 for (eq = 0; eq < copy->num_subs; eq++)
3532 if (copy->subs[eq].key == (int) i + 1)
3534 dist = copy->subs[eq].coef[0];
3535 goto found_dist;
3538 if (dist == 0)
3540 /* Reinitialize problem... */
3541 omega_copy_problem (copy, pb);
3542 for (j = 0;
3543 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3545 eq = omega_add_zero_eq (copy, omega_black);
3546 copy->eqs[eq].coef[j + 1] = 1;
3549 /* ..., but this time "di = 1". */
3550 eq = omega_add_zero_eq (copy, omega_black);
3551 copy->eqs[eq].coef[i + 1] = 1;
3552 copy->eqs[eq].coef[0] = -1;
3554 res = omega_simplify_problem (copy);
3555 if (res == omega_false
3556 || res == omega_unknown
3557 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3558 goto next_problem;
3560 for (eq = 0; eq < copy->num_subs; eq++)
3561 if (copy->subs[eq].key == (int) i + 1)
3563 dist = copy->subs[eq].coef[0];
3564 goto found_dist;
3568 found_dist:;
3569 /* Save the lexicographically positive distance vector. */
3570 if (dist >= 0)
3572 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3573 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3575 dist_v[i] = dist;
3577 for (eq = 0; eq < copy->num_subs; eq++)
3578 if (copy->subs[eq].key > 0)
3580 dist = copy->subs[eq].coef[0];
3581 dist_v[copy->subs[eq].key - 1] = dist;
3584 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3585 dir_v[j] = dir_from_dist (dist_v[j]);
3587 save_dist_v (ddr, dist_v);
3588 save_dir_v (ddr, dir_v);
3591 next_problem:;
3592 omega_free_problem (copy);
3596 /* This is called for each subscript of a tuple of data references:
3597 insert an equality for representing the conflicts. */
3599 static bool
3600 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3601 struct data_dependence_relation *ddr,
3602 omega_pb pb, bool *maybe_dependent)
3604 int eq;
3605 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3606 TREE_TYPE (access_fun_b));
3607 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3608 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3609 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3610 tree minus_one;
3612 /* When the fun_a - fun_b is not constant, the dependence is not
3613 captured by the classic distance vector representation. */
3614 if (TREE_CODE (difference) != INTEGER_CST)
3615 return false;
3617 /* ZIV test. */
3618 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3620 /* There is no dependence. */
3621 *maybe_dependent = false;
3622 return true;
3625 minus_one = build_int_cst (type, -1);
3626 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3628 eq = omega_add_zero_eq (pb, omega_black);
3629 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3630 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3631 /* There is probably a dependence, but the system of
3632 constraints cannot be built: answer "don't know". */
3633 return false;
3635 /* GCD test. */
3636 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3637 && !int_divides_p (lambda_vector_gcd
3638 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3639 2 * DDR_NB_LOOPS (ddr)),
3640 pb->eqs[eq].coef[0]))
3642 /* There is no dependence. */
3643 *maybe_dependent = false;
3644 return true;
3647 return true;
3650 /* Helper function, same as init_omega_for_ddr but specialized for
3651 data references A and B. */
3653 static bool
3654 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3655 struct data_dependence_relation *ddr,
3656 omega_pb pb, bool *maybe_dependent)
3658 unsigned i;
3659 int ineq;
3660 struct loop *loopi;
3661 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3663 /* Insert an equality per subscript. */
3664 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3666 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3667 ddr, pb, maybe_dependent))
3668 return false;
3669 else if (*maybe_dependent == false)
3671 /* There is no dependence. */
3672 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3673 return true;
3677 /* Insert inequalities: constraints corresponding to the iteration
3678 domain, i.e. the loops surrounding the references "loop_x" and
3679 the distance variables "dx". The layout of the OMEGA
3680 representation is as follows:
3681 - coef[0] is the constant
3682 - coef[1..nb_loops] are the protected variables that will not be
3683 removed by the solver: the "dx"
3684 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3686 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3687 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3689 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3691 /* 0 <= loop_x */
3692 ineq = omega_add_zero_geq (pb, omega_black);
3693 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3695 /* 0 <= loop_x + dx */
3696 ineq = omega_add_zero_geq (pb, omega_black);
3697 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3698 pb->geqs[ineq].coef[i + 1] = 1;
3700 if (nbi != -1)
3702 /* loop_x <= nb_iters */
3703 ineq = omega_add_zero_geq (pb, omega_black);
3704 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3705 pb->geqs[ineq].coef[0] = nbi;
3707 /* loop_x + dx <= nb_iters */
3708 ineq = omega_add_zero_geq (pb, omega_black);
3709 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3710 pb->geqs[ineq].coef[i + 1] = -1;
3711 pb->geqs[ineq].coef[0] = nbi;
3713 /* A step "dx" bigger than nb_iters is not feasible, so
3714 add "0 <= nb_iters + dx", */
3715 ineq = omega_add_zero_geq (pb, omega_black);
3716 pb->geqs[ineq].coef[i + 1] = 1;
3717 pb->geqs[ineq].coef[0] = nbi;
3718 /* and "dx <= nb_iters". */
3719 ineq = omega_add_zero_geq (pb, omega_black);
3720 pb->geqs[ineq].coef[i + 1] = -1;
3721 pb->geqs[ineq].coef[0] = nbi;
3725 omega_extract_distance_vectors (pb, ddr);
3727 return true;
3730 /* Sets up the Omega dependence problem for the data dependence
3731 relation DDR. Returns false when the constraint system cannot be
3732 built, ie. when the test answers "don't know". Returns true
3733 otherwise, and when independence has been proved (using one of the
3734 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3735 set MAYBE_DEPENDENT to true.
3737 Example: for setting up the dependence system corresponding to the
3738 conflicting accesses
3740 | loop_i
3741 | loop_j
3742 | A[i, i+1] = ...
3743 | ... A[2*j, 2*(i + j)]
3744 | endloop_j
3745 | endloop_i
3747 the following constraints come from the iteration domain:
3749 0 <= i <= Ni
3750 0 <= i + di <= Ni
3751 0 <= j <= Nj
3752 0 <= j + dj <= Nj
3754 where di, dj are the distance variables. The constraints
3755 representing the conflicting elements are:
3757 i = 2 * (j + dj)
3758 i + 1 = 2 * (i + di + j + dj)
3760 For asking that the resulting distance vector (di, dj) be
3761 lexicographically positive, we insert the constraint "di >= 0". If
3762 "di = 0" in the solution, we fix that component to zero, and we
3763 look at the inner loops: we set a new problem where all the outer
3764 loop distances are zero, and fix this inner component to be
3765 positive. When one of the components is positive, we save that
3766 distance, and set a new problem where the distance on this loop is
3767 zero, searching for other distances in the inner loops. Here is
3768 the classic example that illustrates that we have to set for each
3769 inner loop a new problem:
3771 | loop_1
3772 | loop_2
3773 | A[10]
3774 | endloop_2
3775 | endloop_1
3777 we have to save two distances (1, 0) and (0, 1).
3779 Given two array references, refA and refB, we have to set the
3780 dependence problem twice, refA vs. refB and refB vs. refA, and we
3781 cannot do a single test, as refB might occur before refA in the
3782 inner loops, and the contrary when considering outer loops: ex.
3784 | loop_0
3785 | loop_1
3786 | loop_2
3787 | T[{1,+,1}_2][{1,+,1}_1] // refA
3788 | T[{2,+,1}_2][{0,+,1}_1] // refB
3789 | endloop_2
3790 | endloop_1
3791 | endloop_0
3793 refB touches the elements in T before refA, and thus for the same
3794 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3795 but for successive loop_0 iterations, we have (1, -1, 1)
3797 The Omega solver expects the distance variables ("di" in the
3798 previous example) to come first in the constraint system (as
3799 variables to be protected, or "safe" variables), the constraint
3800 system is built using the following layout:
3802 "cst | distance vars | index vars".
3805 static bool
3806 init_omega_for_ddr (struct data_dependence_relation *ddr,
3807 bool *maybe_dependent)
3809 omega_pb pb;
3810 bool res = false;
3812 *maybe_dependent = true;
3814 if (same_access_functions (ddr))
3816 unsigned j;
3817 lambda_vector dir_v;
3819 /* Save the 0 vector. */
3820 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3821 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3822 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3823 dir_v[j] = dir_equal;
3824 save_dir_v (ddr, dir_v);
3826 /* Save the dependences carried by outer loops. */
3827 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3828 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3829 maybe_dependent);
3830 omega_free_problem (pb);
3831 return res;
3834 /* Omega expects the protected variables (those that have to be kept
3835 after elimination) to appear first in the constraint system.
3836 These variables are the distance variables. In the following
3837 initialization we declare NB_LOOPS safe variables, and the total
3838 number of variables for the constraint system is 2*NB_LOOPS. */
3839 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3840 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3841 maybe_dependent);
3842 omega_free_problem (pb);
3844 /* Stop computation if not decidable, or no dependence. */
3845 if (res == false || *maybe_dependent == false)
3846 return res;
3848 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3849 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3850 maybe_dependent);
3851 omega_free_problem (pb);
3853 return res;
3856 /* Return true when DDR contains the same information as that stored
3857 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3859 static bool
3860 ddr_consistent_p (FILE *file,
3861 struct data_dependence_relation *ddr,
3862 VEC (lambda_vector, heap) *dist_vects,
3863 VEC (lambda_vector, heap) *dir_vects)
3865 unsigned int i, j;
3867 /* If dump_file is set, output there. */
3868 if (dump_file && (dump_flags & TDF_DETAILS))
3869 file = dump_file;
3871 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3873 lambda_vector b_dist_v;
3874 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3875 VEC_length (lambda_vector, dist_vects),
3876 DDR_NUM_DIST_VECTS (ddr));
3878 fprintf (file, "Banerjee dist vectors:\n");
3879 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3880 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3882 fprintf (file, "Omega dist vectors:\n");
3883 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3884 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3886 fprintf (file, "data dependence relation:\n");
3887 dump_data_dependence_relation (file, ddr);
3889 fprintf (file, ")\n");
3890 return false;
3893 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3895 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3896 VEC_length (lambda_vector, dir_vects),
3897 DDR_NUM_DIR_VECTS (ddr));
3898 return false;
3901 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3903 lambda_vector a_dist_v;
3904 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3906 /* Distance vectors are not ordered in the same way in the DDR
3907 and in the DIST_VECTS: search for a matching vector. */
3908 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3909 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3910 break;
3912 if (j == VEC_length (lambda_vector, dist_vects))
3914 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3915 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3916 fprintf (file, "not found in Omega dist vectors:\n");
3917 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3918 fprintf (file, "data dependence relation:\n");
3919 dump_data_dependence_relation (file, ddr);
3920 fprintf (file, ")\n");
3924 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3926 lambda_vector a_dir_v;
3927 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3929 /* Direction vectors are not ordered in the same way in the DDR
3930 and in the DIR_VECTS: search for a matching vector. */
3931 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3932 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3933 break;
3935 if (j == VEC_length (lambda_vector, dist_vects))
3937 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3938 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3939 fprintf (file, "not found in Omega dir vectors:\n");
3940 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3941 fprintf (file, "data dependence relation:\n");
3942 dump_data_dependence_relation (file, ddr);
3943 fprintf (file, ")\n");
3947 return true;
3950 /* This computes the affine dependence relation between A and B with
3951 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3952 independence between two accesses, while CHREC_DONT_KNOW is used
3953 for representing the unknown relation.
3955 Note that it is possible to stop the computation of the dependence
3956 relation the first time we detect a CHREC_KNOWN element for a given
3957 subscript. */
3959 static void
3960 compute_affine_dependence (struct data_dependence_relation *ddr,
3961 struct loop *loop_nest)
3963 struct data_reference *dra = DDR_A (ddr);
3964 struct data_reference *drb = DDR_B (ddr);
3966 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, "(compute_affine_dependence\n");
3969 fprintf (dump_file, " (stmt_a = \n");
3970 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3971 fprintf (dump_file, ")\n (stmt_b = \n");
3972 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3973 fprintf (dump_file, ")\n");
3976 /* Analyze only when the dependence relation is not yet known. */
3977 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3978 && !DDR_SELF_REFERENCE (ddr))
3980 dependence_stats.num_dependence_tests++;
3982 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3983 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3985 if (flag_check_data_deps)
3987 /* Compute the dependences using the first algorithm. */
3988 subscript_dependence_tester (ddr, loop_nest);
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3992 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3993 dump_data_dependence_relation (dump_file, ddr);
3996 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3998 bool maybe_dependent;
3999 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4001 /* Save the result of the first DD analyzer. */
4002 dist_vects = DDR_DIST_VECTS (ddr);
4003 dir_vects = DDR_DIR_VECTS (ddr);
4005 /* Reset the information. */
4006 DDR_DIST_VECTS (ddr) = NULL;
4007 DDR_DIR_VECTS (ddr) = NULL;
4009 /* Compute the same information using Omega. */
4010 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4011 goto csys_dont_know;
4013 if (dump_file && (dump_flags & TDF_DETAILS))
4015 fprintf (dump_file, "Omega Analyzer\n");
4016 dump_data_dependence_relation (dump_file, ddr);
4019 /* Check that we get the same information. */
4020 if (maybe_dependent)
4021 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4022 dir_vects));
4025 else
4026 subscript_dependence_tester (ddr, loop_nest);
4029 /* As a last case, if the dependence cannot be determined, or if
4030 the dependence is considered too difficult to determine, answer
4031 "don't know". */
4032 else
4034 csys_dont_know:;
4035 dependence_stats.num_dependence_undetermined++;
4037 if (dump_file && (dump_flags & TDF_DETAILS))
4039 fprintf (dump_file, "Data ref a:\n");
4040 dump_data_reference (dump_file, dra);
4041 fprintf (dump_file, "Data ref b:\n");
4042 dump_data_reference (dump_file, drb);
4043 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4045 finalize_ddr_dependent (ddr, chrec_dont_know);
4049 if (dump_file && (dump_flags & TDF_DETAILS))
4050 fprintf (dump_file, ")\n");
4053 /* This computes the dependence relation for the same data
4054 reference into DDR. */
4056 static void
4057 compute_self_dependence (struct data_dependence_relation *ddr)
4059 unsigned int i;
4060 struct subscript *subscript;
4062 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4063 return;
4065 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4066 i++)
4068 if (SUB_CONFLICTS_IN_A (subscript))
4069 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4070 if (SUB_CONFLICTS_IN_B (subscript))
4071 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4073 /* The accessed index overlaps for each iteration. */
4074 SUB_CONFLICTS_IN_A (subscript)
4075 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4076 SUB_CONFLICTS_IN_B (subscript)
4077 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4078 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4081 /* The distance vector is the zero vector. */
4082 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4083 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4086 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4087 the data references in DATAREFS, in the LOOP_NEST. When
4088 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4089 relations. */
4091 void
4092 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4093 VEC (ddr_p, heap) **dependence_relations,
4094 VEC (loop_p, heap) *loop_nest,
4095 bool compute_self_and_rr)
4097 struct data_dependence_relation *ddr;
4098 struct data_reference *a, *b;
4099 unsigned int i, j;
4101 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4102 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4103 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4105 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4106 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4107 if (loop_nest)
4108 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4111 if (compute_self_and_rr)
4112 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4114 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4115 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4116 compute_self_dependence (ddr);
4120 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4121 true if STMT clobbers memory, false otherwise. */
4123 bool
4124 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4126 bool clobbers_memory = false;
4127 data_ref_loc *ref;
4128 tree *op0, *op1;
4129 enum gimple_code stmt_code = gimple_code (stmt);
4131 *references = NULL;
4133 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4134 Calls have side-effects, except those to const or pure
4135 functions. */
4136 if ((stmt_code == GIMPLE_CALL
4137 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4138 || (stmt_code == GIMPLE_ASM
4139 && gimple_asm_volatile_p (stmt)))
4140 clobbers_memory = true;
4142 if (!gimple_vuse (stmt))
4143 return clobbers_memory;
4145 if (stmt_code == GIMPLE_ASSIGN)
4147 tree base;
4148 op0 = gimple_assign_lhs_ptr (stmt);
4149 op1 = gimple_assign_rhs1_ptr (stmt);
4151 if (DECL_P (*op1)
4152 || (REFERENCE_CLASS_P (*op1)
4153 && (base = get_base_address (*op1))
4154 && TREE_CODE (base) != SSA_NAME))
4156 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4157 ref->pos = op1;
4158 ref->is_read = true;
4161 else if (stmt_code == GIMPLE_CALL)
4163 unsigned i, n;
4165 op0 = gimple_call_lhs_ptr (stmt);
4166 n = gimple_call_num_args (stmt);
4167 for (i = 0; i < n; i++)
4169 op1 = gimple_call_arg_ptr (stmt, i);
4171 if (DECL_P (*op1)
4172 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4174 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4175 ref->pos = op1;
4176 ref->is_read = true;
4180 else
4181 return clobbers_memory;
4183 if (*op0
4184 && (DECL_P (*op0)
4185 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4187 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4188 ref->pos = op0;
4189 ref->is_read = false;
4191 return clobbers_memory;
4194 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4195 reference, returns false, otherwise returns true. NEST is the outermost
4196 loop of the loop nest in which the references should be analyzed. */
4198 bool
4199 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4200 VEC (data_reference_p, heap) **datarefs)
4202 unsigned i;
4203 VEC (data_ref_loc, heap) *references;
4204 data_ref_loc *ref;
4205 bool ret = true;
4206 data_reference_p dr;
4208 if (get_references_in_stmt (stmt, &references))
4210 VEC_free (data_ref_loc, heap, references);
4211 return false;
4214 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4216 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4217 *ref->pos, stmt, ref->is_read);
4218 gcc_assert (dr != NULL);
4219 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4221 VEC_free (data_ref_loc, heap, references);
4222 return ret;
4225 /* Stores the data references in STMT to DATAREFS. If there is an
4226 unanalyzable reference, returns false, otherwise returns true.
4227 NEST is the outermost loop of the loop nest in which the references
4228 should be instantiated, LOOP is the loop in which the references
4229 should be analyzed. */
4231 bool
4232 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4233 VEC (data_reference_p, heap) **datarefs)
4235 unsigned i;
4236 VEC (data_ref_loc, heap) *references;
4237 data_ref_loc *ref;
4238 bool ret = true;
4239 data_reference_p dr;
4241 if (get_references_in_stmt (stmt, &references))
4243 VEC_free (data_ref_loc, heap, references);
4244 return false;
4247 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4249 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4250 gcc_assert (dr != NULL);
4251 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4254 VEC_free (data_ref_loc, heap, references);
4255 return ret;
4258 /* Search the data references in LOOP, and record the information into
4259 DATAREFS. Returns chrec_dont_know when failing to analyze a
4260 difficult case, returns NULL_TREE otherwise. */
4262 tree
4263 find_data_references_in_bb (struct loop *loop, basic_block bb,
4264 VEC (data_reference_p, heap) **datarefs)
4266 gimple_stmt_iterator bsi;
4268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4270 gimple stmt = gsi_stmt (bsi);
4272 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4274 struct data_reference *res;
4275 res = XCNEW (struct data_reference);
4276 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4278 return chrec_dont_know;
4282 return NULL_TREE;
4285 /* Search the data references in LOOP, and record the information into
4286 DATAREFS. Returns chrec_dont_know when failing to analyze a
4287 difficult case, returns NULL_TREE otherwise.
4289 TODO: This function should be made smarter so that it can handle address
4290 arithmetic as if they were array accesses, etc. */
4292 tree
4293 find_data_references_in_loop (struct loop *loop,
4294 VEC (data_reference_p, heap) **datarefs)
4296 basic_block bb, *bbs;
4297 unsigned int i;
4299 bbs = get_loop_body_in_dom_order (loop);
4301 for (i = 0; i < loop->num_nodes; i++)
4303 bb = bbs[i];
4305 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4307 free (bbs);
4308 return chrec_dont_know;
4311 free (bbs);
4313 return NULL_TREE;
4316 /* Recursive helper function. */
4318 static bool
4319 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4321 /* Inner loops of the nest should not contain siblings. Example:
4322 when there are two consecutive loops,
4324 | loop_0
4325 | loop_1
4326 | A[{0, +, 1}_1]
4327 | endloop_1
4328 | loop_2
4329 | A[{0, +, 1}_2]
4330 | endloop_2
4331 | endloop_0
4333 the dependence relation cannot be captured by the distance
4334 abstraction. */
4335 if (loop->next)
4336 return false;
4338 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4339 if (loop->inner)
4340 return find_loop_nest_1 (loop->inner, loop_nest);
4341 return true;
4344 /* Return false when the LOOP is not well nested. Otherwise return
4345 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4346 contain the loops from the outermost to the innermost, as they will
4347 appear in the classic distance vector. */
4349 bool
4350 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4352 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4353 if (loop->inner)
4354 return find_loop_nest_1 (loop->inner, loop_nest);
4355 return true;
4358 /* Returns true when the data dependences have been computed, false otherwise.
4359 Given a loop nest LOOP, the following vectors are returned:
4360 DATAREFS is initialized to all the array elements contained in this loop,
4361 DEPENDENCE_RELATIONS contains the relations between the data references.
4362 Compute read-read and self relations if
4363 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4365 bool
4366 compute_data_dependences_for_loop (struct loop *loop,
4367 bool compute_self_and_read_read_dependences,
4368 VEC (loop_p, heap) **loop_nest,
4369 VEC (data_reference_p, heap) **datarefs,
4370 VEC (ddr_p, heap) **dependence_relations)
4372 bool res = true;
4374 memset (&dependence_stats, 0, sizeof (dependence_stats));
4376 /* If the loop nest is not well formed, or one of the data references
4377 is not computable, give up without spending time to compute other
4378 dependences. */
4379 if (!loop
4380 || !find_loop_nest (loop, loop_nest)
4381 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4383 struct data_dependence_relation *ddr;
4385 /* Insert a single relation into dependence_relations:
4386 chrec_dont_know. */
4387 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4388 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4389 res = false;
4391 else
4392 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4393 compute_self_and_read_read_dependences);
4395 if (dump_file && (dump_flags & TDF_STATS))
4397 fprintf (dump_file, "Dependence tester statistics:\n");
4399 fprintf (dump_file, "Number of dependence tests: %d\n",
4400 dependence_stats.num_dependence_tests);
4401 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4402 dependence_stats.num_dependence_dependent);
4403 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4404 dependence_stats.num_dependence_independent);
4405 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4406 dependence_stats.num_dependence_undetermined);
4408 fprintf (dump_file, "Number of subscript tests: %d\n",
4409 dependence_stats.num_subscript_tests);
4410 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4411 dependence_stats.num_subscript_undetermined);
4412 fprintf (dump_file, "Number of same subscript function: %d\n",
4413 dependence_stats.num_same_subscript_function);
4415 fprintf (dump_file, "Number of ziv tests: %d\n",
4416 dependence_stats.num_ziv);
4417 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4418 dependence_stats.num_ziv_dependent);
4419 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4420 dependence_stats.num_ziv_independent);
4421 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4422 dependence_stats.num_ziv_unimplemented);
4424 fprintf (dump_file, "Number of siv tests: %d\n",
4425 dependence_stats.num_siv);
4426 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4427 dependence_stats.num_siv_dependent);
4428 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4429 dependence_stats.num_siv_independent);
4430 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4431 dependence_stats.num_siv_unimplemented);
4433 fprintf (dump_file, "Number of miv tests: %d\n",
4434 dependence_stats.num_miv);
4435 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4436 dependence_stats.num_miv_dependent);
4437 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4438 dependence_stats.num_miv_independent);
4439 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4440 dependence_stats.num_miv_unimplemented);
4443 return res;
4446 /* Returns true when the data dependences for the basic block BB have been
4447 computed, false otherwise.
4448 DATAREFS is initialized to all the array elements contained in this basic
4449 block, DEPENDENCE_RELATIONS contains the relations between the data
4450 references. Compute read-read and self relations if
4451 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4452 bool
4453 compute_data_dependences_for_bb (basic_block bb,
4454 bool compute_self_and_read_read_dependences,
4455 VEC (data_reference_p, heap) **datarefs,
4456 VEC (ddr_p, heap) **dependence_relations)
4458 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4459 return false;
4461 compute_all_dependences (*datarefs, dependence_relations, NULL,
4462 compute_self_and_read_read_dependences);
4463 return true;
4466 /* Entry point (for testing only). Analyze all the data references
4467 and the dependence relations in LOOP.
4469 The data references are computed first.
4471 A relation on these nodes is represented by a complete graph. Some
4472 of the relations could be of no interest, thus the relations can be
4473 computed on demand.
4475 In the following function we compute all the relations. This is
4476 just a first implementation that is here for:
4477 - for showing how to ask for the dependence relations,
4478 - for the debugging the whole dependence graph,
4479 - for the dejagnu testcases and maintenance.
4481 It is possible to ask only for a part of the graph, avoiding to
4482 compute the whole dependence graph. The computed dependences are
4483 stored in a knowledge base (KB) such that later queries don't
4484 recompute the same information. The implementation of this KB is
4485 transparent to the optimizer, and thus the KB can be changed with a
4486 more efficient implementation, or the KB could be disabled. */
4487 static void
4488 analyze_all_data_dependences (struct loop *loop)
4490 unsigned int i;
4491 int nb_data_refs = 10;
4492 VEC (data_reference_p, heap) *datarefs =
4493 VEC_alloc (data_reference_p, heap, nb_data_refs);
4494 VEC (ddr_p, heap) *dependence_relations =
4495 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4496 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4498 /* Compute DDs on the whole function. */
4499 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4500 &dependence_relations);
4502 if (dump_file)
4504 dump_data_dependence_relations (dump_file, dependence_relations);
4505 fprintf (dump_file, "\n\n");
4507 if (dump_flags & TDF_DETAILS)
4508 dump_dist_dir_vectors (dump_file, dependence_relations);
4510 if (dump_flags & TDF_STATS)
4512 unsigned nb_top_relations = 0;
4513 unsigned nb_bot_relations = 0;
4514 unsigned nb_chrec_relations = 0;
4515 struct data_dependence_relation *ddr;
4517 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4519 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4520 nb_top_relations++;
4522 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4523 nb_bot_relations++;
4525 else
4526 nb_chrec_relations++;
4529 gather_stats_on_scev_database ();
4533 VEC_free (loop_p, heap, loop_nest);
4534 free_dependence_relations (dependence_relations);
4535 free_data_refs (datarefs);
4538 /* Computes all the data dependences and check that the results of
4539 several analyzers are the same. */
4541 void
4542 tree_check_data_deps (void)
4544 loop_iterator li;
4545 struct loop *loop_nest;
4547 FOR_EACH_LOOP (li, loop_nest, 0)
4548 analyze_all_data_dependences (loop_nest);
4551 /* Free the memory used by a data dependence relation DDR. */
4553 void
4554 free_dependence_relation (struct data_dependence_relation *ddr)
4556 if (ddr == NULL)
4557 return;
4559 if (DDR_SUBSCRIPTS (ddr))
4560 free_subscripts (DDR_SUBSCRIPTS (ddr));
4561 if (DDR_DIST_VECTS (ddr))
4562 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4563 if (DDR_DIR_VECTS (ddr))
4564 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4566 free (ddr);
4569 /* Free the memory used by the data dependence relations from
4570 DEPENDENCE_RELATIONS. */
4572 void
4573 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4575 unsigned int i;
4576 struct data_dependence_relation *ddr;
4578 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4579 if (ddr)
4580 free_dependence_relation (ddr);
4582 VEC_free (ddr_p, heap, dependence_relations);
4585 /* Free the memory used by the data references from DATAREFS. */
4587 void
4588 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4590 unsigned int i;
4591 struct data_reference *dr;
4593 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4594 free_data_ref (dr);
4595 VEC_free (data_reference_p, heap, datarefs);
4600 /* Dump vertex I in RDG to FILE. */
4602 void
4603 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4605 struct vertex *v = &(rdg->vertices[i]);
4606 struct graph_edge *e;
4608 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4609 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4610 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4612 if (v->pred)
4613 for (e = v->pred; e; e = e->pred_next)
4614 fprintf (file, " %d", e->src);
4616 fprintf (file, ") (out:");
4618 if (v->succ)
4619 for (e = v->succ; e; e = e->succ_next)
4620 fprintf (file, " %d", e->dest);
4622 fprintf (file, ")\n");
4623 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4624 fprintf (file, ")\n");
4627 /* Call dump_rdg_vertex on stderr. */
4629 DEBUG_FUNCTION void
4630 debug_rdg_vertex (struct graph *rdg, int i)
4632 dump_rdg_vertex (stderr, rdg, i);
4635 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4636 dumped vertices to that bitmap. */
4638 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4640 int i;
4642 fprintf (file, "(%d\n", c);
4644 for (i = 0; i < rdg->n_vertices; i++)
4645 if (rdg->vertices[i].component == c)
4647 if (dumped)
4648 bitmap_set_bit (dumped, i);
4650 dump_rdg_vertex (file, rdg, i);
4653 fprintf (file, ")\n");
4656 /* Call dump_rdg_vertex on stderr. */
4658 DEBUG_FUNCTION void
4659 debug_rdg_component (struct graph *rdg, int c)
4661 dump_rdg_component (stderr, rdg, c, NULL);
4664 /* Dump the reduced dependence graph RDG to FILE. */
4666 void
4667 dump_rdg (FILE *file, struct graph *rdg)
4669 int i;
4670 bitmap dumped = BITMAP_ALLOC (NULL);
4672 fprintf (file, "(rdg\n");
4674 for (i = 0; i < rdg->n_vertices; i++)
4675 if (!bitmap_bit_p (dumped, i))
4676 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4678 fprintf (file, ")\n");
4679 BITMAP_FREE (dumped);
4682 /* Call dump_rdg on stderr. */
4684 DEBUG_FUNCTION void
4685 debug_rdg (struct graph *rdg)
4687 dump_rdg (stderr, rdg);
4690 static void
4691 dot_rdg_1 (FILE *file, struct graph *rdg)
4693 int i;
4695 fprintf (file, "digraph RDG {\n");
4697 for (i = 0; i < rdg->n_vertices; i++)
4699 struct vertex *v = &(rdg->vertices[i]);
4700 struct graph_edge *e;
4702 /* Highlight reads from memory. */
4703 if (RDG_MEM_READS_STMT (rdg, i))
4704 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4706 /* Highlight stores to memory. */
4707 if (RDG_MEM_WRITE_STMT (rdg, i))
4708 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4710 if (v->succ)
4711 for (e = v->succ; e; e = e->succ_next)
4712 switch (RDGE_TYPE (e))
4714 case input_dd:
4715 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4716 break;
4718 case output_dd:
4719 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4720 break;
4722 case flow_dd:
4723 /* These are the most common dependences: don't print these. */
4724 fprintf (file, "%d -> %d \n", i, e->dest);
4725 break;
4727 case anti_dd:
4728 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4729 break;
4731 default:
4732 gcc_unreachable ();
4736 fprintf (file, "}\n\n");
4739 /* Display the Reduced Dependence Graph using dotty. */
4740 extern void dot_rdg (struct graph *);
4742 DEBUG_FUNCTION void
4743 dot_rdg (struct graph *rdg)
4745 /* When debugging, enable the following code. This cannot be used
4746 in production compilers because it calls "system". */
4747 #if 0
4748 FILE *file = fopen ("/tmp/rdg.dot", "w");
4749 gcc_assert (file != NULL);
4751 dot_rdg_1 (file, rdg);
4752 fclose (file);
4754 system ("dotty /tmp/rdg.dot &");
4755 #else
4756 dot_rdg_1 (stderr, rdg);
4757 #endif
4760 /* This structure is used for recording the mapping statement index in
4761 the RDG. */
4763 struct GTY(()) rdg_vertex_info
4765 gimple stmt;
4766 int index;
4769 /* Returns the index of STMT in RDG. */
4772 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4774 struct rdg_vertex_info rvi, *slot;
4776 rvi.stmt = stmt;
4777 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4779 if (!slot)
4780 return -1;
4782 return slot->index;
4785 /* Creates an edge in RDG for each distance vector from DDR. The
4786 order that we keep track of in the RDG is the order in which
4787 statements have to be executed. */
4789 static void
4790 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4792 struct graph_edge *e;
4793 int va, vb;
4794 data_reference_p dra = DDR_A (ddr);
4795 data_reference_p drb = DDR_B (ddr);
4796 unsigned level = ddr_dependence_level (ddr);
4798 /* For non scalar dependences, when the dependence is REVERSED,
4799 statement B has to be executed before statement A. */
4800 if (level > 0
4801 && !DDR_REVERSED_P (ddr))
4803 data_reference_p tmp = dra;
4804 dra = drb;
4805 drb = tmp;
4808 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4809 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4811 if (va < 0 || vb < 0)
4812 return;
4814 e = add_edge (rdg, va, vb);
4815 e->data = XNEW (struct rdg_edge);
4817 RDGE_LEVEL (e) = level;
4818 RDGE_RELATION (e) = ddr;
4820 /* Determines the type of the data dependence. */
4821 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4822 RDGE_TYPE (e) = input_dd;
4823 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4824 RDGE_TYPE (e) = output_dd;
4825 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4826 RDGE_TYPE (e) = flow_dd;
4827 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4828 RDGE_TYPE (e) = anti_dd;
4831 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4832 the index of DEF in RDG. */
4834 static void
4835 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4837 use_operand_p imm_use_p;
4838 imm_use_iterator iterator;
4840 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4842 struct graph_edge *e;
4843 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4845 if (use < 0)
4846 continue;
4848 e = add_edge (rdg, idef, use);
4849 e->data = XNEW (struct rdg_edge);
4850 RDGE_TYPE (e) = flow_dd;
4851 RDGE_RELATION (e) = NULL;
4855 /* Creates the edges of the reduced dependence graph RDG. */
4857 static void
4858 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4860 int i;
4861 struct data_dependence_relation *ddr;
4862 def_operand_p def_p;
4863 ssa_op_iter iter;
4865 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4866 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4867 create_rdg_edge_for_ddr (rdg, ddr);
4869 for (i = 0; i < rdg->n_vertices; i++)
4870 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4871 iter, SSA_OP_DEF)
4872 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4875 /* Build the vertices of the reduced dependence graph RDG. */
4877 void
4878 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4880 int i, j;
4881 gimple stmt;
4883 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4885 VEC (data_ref_loc, heap) *references;
4886 data_ref_loc *ref;
4887 struct vertex *v = &(rdg->vertices[i]);
4888 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4889 struct rdg_vertex_info **slot;
4891 rvi->stmt = stmt;
4892 rvi->index = i;
4893 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4895 if (!*slot)
4896 *slot = rvi;
4897 else
4898 free (rvi);
4900 v->data = XNEW (struct rdg_vertex);
4901 RDG_STMT (rdg, i) = stmt;
4903 RDG_MEM_WRITE_STMT (rdg, i) = false;
4904 RDG_MEM_READS_STMT (rdg, i) = false;
4905 if (gimple_code (stmt) == GIMPLE_PHI)
4906 continue;
4908 get_references_in_stmt (stmt, &references);
4909 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4910 if (!ref->is_read)
4911 RDG_MEM_WRITE_STMT (rdg, i) = true;
4912 else
4913 RDG_MEM_READS_STMT (rdg, i) = true;
4915 VEC_free (data_ref_loc, heap, references);
4919 /* Initialize STMTS with all the statements of LOOP. When
4920 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4921 which we discover statements is important as
4922 generate_loops_for_partition is using the same traversal for
4923 identifying statements. */
4925 static void
4926 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4928 unsigned int i;
4929 basic_block *bbs = get_loop_body_in_dom_order (loop);
4931 for (i = 0; i < loop->num_nodes; i++)
4933 basic_block bb = bbs[i];
4934 gimple_stmt_iterator bsi;
4935 gimple stmt;
4937 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4938 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4940 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4942 stmt = gsi_stmt (bsi);
4943 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4944 VEC_safe_push (gimple, heap, *stmts, stmt);
4948 free (bbs);
4951 /* Returns true when all the dependences are computable. */
4953 static bool
4954 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4956 ddr_p ddr;
4957 unsigned int i;
4959 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4960 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4961 return false;
4963 return true;
4966 /* Computes a hash function for element ELT. */
4968 static hashval_t
4969 hash_stmt_vertex_info (const void *elt)
4971 const struct rdg_vertex_info *const rvi =
4972 (const struct rdg_vertex_info *) elt;
4973 gimple stmt = rvi->stmt;
4975 return htab_hash_pointer (stmt);
4978 /* Compares database elements E1 and E2. */
4980 static int
4981 eq_stmt_vertex_info (const void *e1, const void *e2)
4983 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4984 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4986 return elt1->stmt == elt2->stmt;
4989 /* Free the element E. */
4991 static void
4992 hash_stmt_vertex_del (void *e)
4994 free (e);
4997 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4998 statement of the loop nest, and one edge per data dependence or
4999 scalar dependence. */
5001 struct graph *
5002 build_empty_rdg (int n_stmts)
5004 int nb_data_refs = 10;
5005 struct graph *rdg = new_graph (n_stmts);
5007 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5008 eq_stmt_vertex_info, hash_stmt_vertex_del);
5009 return rdg;
5012 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5013 statement of the loop nest, and one edge per data dependence or
5014 scalar dependence. */
5016 struct graph *
5017 build_rdg (struct loop *loop,
5018 VEC (loop_p, heap) **loop_nest,
5019 VEC (ddr_p, heap) **dependence_relations,
5020 VEC (data_reference_p, heap) **datarefs)
5022 struct graph *rdg = NULL;
5023 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5025 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5026 dependence_relations);
5028 if (known_dependences_p (*dependence_relations))
5030 stmts_from_loop (loop, &stmts);
5031 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5032 create_rdg_vertices (rdg, stmts);
5033 create_rdg_edges (rdg, *dependence_relations);
5036 VEC_free (gimple, heap, stmts);
5037 return rdg;
5040 /* Free the reduced dependence graph RDG. */
5042 void
5043 free_rdg (struct graph *rdg)
5045 int i;
5047 for (i = 0; i < rdg->n_vertices; i++)
5049 struct vertex *v = &(rdg->vertices[i]);
5050 struct graph_edge *e;
5052 for (e = v->succ; e; e = e->succ_next)
5053 free (e->data);
5055 free (v->data);
5058 htab_delete (rdg->indices);
5059 free_graph (rdg);
5062 /* Initialize STMTS with all the statements of LOOP that contain a
5063 store to memory. */
5065 void
5066 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5068 unsigned int i;
5069 basic_block *bbs = get_loop_body_in_dom_order (loop);
5071 for (i = 0; i < loop->num_nodes; i++)
5073 basic_block bb = bbs[i];
5074 gimple_stmt_iterator bsi;
5076 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5077 if (gimple_vdef (gsi_stmt (bsi)))
5078 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5081 free (bbs);
5084 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5085 that contains a data reference on its LHS with a stride of the same
5086 size as its unit type. */
5088 bool
5089 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5091 tree op0, op1;
5092 bool res;
5093 struct data_reference *dr;
5095 if (!stmt
5096 || !gimple_vdef (stmt)
5097 || !is_gimple_assign (stmt)
5098 || !gimple_assign_single_p (stmt)
5099 || !(op1 = gimple_assign_rhs1 (stmt))
5100 || !(integer_zerop (op1) || real_zerop (op1)))
5101 return false;
5103 dr = XCNEW (struct data_reference);
5104 op0 = gimple_assign_lhs (stmt);
5106 DR_STMT (dr) = stmt;
5107 DR_REF (dr) = op0;
5109 res = dr_analyze_innermost (dr)
5110 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5112 free_data_ref (dr);
5113 return res;
5116 /* Initialize STMTS with all the statements of LOOP that contain a
5117 store to memory of the form "A[i] = 0". */
5119 void
5120 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5122 unsigned int i;
5123 basic_block bb;
5124 gimple_stmt_iterator si;
5125 gimple stmt;
5126 basic_block *bbs = get_loop_body_in_dom_order (loop);
5128 for (i = 0; i < loop->num_nodes; i++)
5129 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5130 if ((stmt = gsi_stmt (si))
5131 && stmt_with_adjacent_zero_store_dr_p (stmt))
5132 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5134 free (bbs);
5137 /* For a data reference REF, return the declaration of its base
5138 address or NULL_TREE if the base is not determined. */
5140 static inline tree
5141 ref_base_address (gimple stmt, data_ref_loc *ref)
5143 tree base = NULL_TREE;
5144 tree base_address;
5145 struct data_reference *dr = XCNEW (struct data_reference);
5147 DR_STMT (dr) = stmt;
5148 DR_REF (dr) = *ref->pos;
5149 dr_analyze_innermost (dr);
5150 base_address = DR_BASE_ADDRESS (dr);
5152 if (!base_address)
5153 goto end;
5155 switch (TREE_CODE (base_address))
5157 case ADDR_EXPR:
5158 base = TREE_OPERAND (base_address, 0);
5159 break;
5161 default:
5162 base = base_address;
5163 break;
5166 end:
5167 free_data_ref (dr);
5168 return base;
5171 /* Determines whether the statement from vertex V of the RDG has a
5172 definition used outside the loop that contains this statement. */
5174 bool
5175 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5177 gimple stmt = RDG_STMT (rdg, v);
5178 struct loop *loop = loop_containing_stmt (stmt);
5179 use_operand_p imm_use_p;
5180 imm_use_iterator iterator;
5181 ssa_op_iter it;
5182 def_operand_p def_p;
5184 if (!loop)
5185 return true;
5187 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5189 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5191 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5192 return true;
5196 return false;
5199 /* Determines whether statements S1 and S2 access to similar memory
5200 locations. Two memory accesses are considered similar when they
5201 have the same base address declaration, i.e. when their
5202 ref_base_address is the same. */
5204 bool
5205 have_similar_memory_accesses (gimple s1, gimple s2)
5207 bool res = false;
5208 unsigned i, j;
5209 VEC (data_ref_loc, heap) *refs1, *refs2;
5210 data_ref_loc *ref1, *ref2;
5212 get_references_in_stmt (s1, &refs1);
5213 get_references_in_stmt (s2, &refs2);
5215 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5217 tree base1 = ref_base_address (s1, ref1);
5219 if (base1)
5220 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5221 if (base1 == ref_base_address (s2, ref2))
5223 res = true;
5224 goto end;
5228 end:
5229 VEC_free (data_ref_loc, heap, refs1);
5230 VEC_free (data_ref_loc, heap, refs2);
5231 return res;
5234 /* Helper function for the hashtab. */
5236 static int
5237 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5239 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5240 CONST_CAST_GIMPLE ((const_gimple) s2));
5243 /* Helper function for the hashtab. */
5245 static hashval_t
5246 ref_base_address_1 (const void *s)
5248 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5249 unsigned i;
5250 VEC (data_ref_loc, heap) *refs;
5251 data_ref_loc *ref;
5252 hashval_t res = 0;
5254 get_references_in_stmt (stmt, &refs);
5256 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5257 if (!ref->is_read)
5259 res = htab_hash_pointer (ref_base_address (stmt, ref));
5260 break;
5263 VEC_free (data_ref_loc, heap, refs);
5264 return res;
5267 /* Try to remove duplicated write data references from STMTS. */
5269 void
5270 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5272 unsigned i;
5273 gimple stmt;
5274 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5275 have_similar_memory_accesses_1, NULL);
5277 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5279 void **slot;
5281 slot = htab_find_slot (seen, stmt, INSERT);
5283 if (*slot)
5284 VEC_ordered_remove (gimple, *stmts, i);
5285 else
5287 *slot = (void *) stmt;
5288 i++;
5292 htab_delete (seen);
5295 /* Returns the index of PARAMETER in the parameters vector of the
5296 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5299 access_matrix_get_index_for_parameter (tree parameter,
5300 struct access_matrix *access_matrix)
5302 int i;
5303 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5304 tree lambda_parameter;
5306 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5307 if (lambda_parameter == parameter)
5308 return i + AM_NB_INDUCTION_VARS (access_matrix);
5310 return -1;