* config/sh/sh.c (sh_delegitimize_address): Handle UNSPEC_SYMOFF
[official-gcc.git] / gcc / tree-data-ref.c
blob54cb46c9464db191ff719f4af02e01f6e6897180
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
129 /* Returns true iff A divides B. */
131 static inline bool
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 unsigned int i;
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
173 unsigned int i;
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
194 unsigned int i;
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
218 unsigned i;
219 tree coef;
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
235 unsigned i;
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
243 for (i = 0; i < cf->n; i++)
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
292 int eq;
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
299 switch (dir)
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
336 unsigned j;
337 lambda_vector v;
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 int i;
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
361 unsigned j;
362 lambda_vector v;
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
368 /* Debug version. */
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 if (ddr)
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
402 return;
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 unsigned int i;
416 struct loop *loopi;
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
457 switch (dir)
459 case dir_positive:
460 fprintf (file, "+");
461 break;
463 case dir_negative:
464 fprintf (file, "-");
465 break;
467 case dir_equal:
468 fprintf (file, "=");
469 break;
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
483 case dir_star:
484 fprintf (file, "*");
485 break;
487 default:
488 break;
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 unsigned int i;
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
553 *var = NULL_TREE;
554 *off = NULL_TREE;
556 switch (code)
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
583 case ADDR_EXPR:
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
602 if (poffset)
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
631 return false;
633 *var = var0;
634 *off = off0;
635 return true;
638 case SSA_NAME:
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
652 CASE_CONVERT:
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
668 return false;
671 default:
672 return false;
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
679 void
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
685 *var = exp;
686 *off = ssize_int (0);
687 STRIP_NOPS (exp);
689 if (automatically_generated_chrec_p (exp))
690 return;
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
698 *off = o;
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
705 static tree
706 canonicalize_base_object_address (tree addr)
708 tree orig = addr;
710 STRIP_NOPS (addr);
712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
727 bool
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
734 tree base, poffset;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
752 return false;
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
759 if (!poffset)
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
769 else
770 base = build_fold_addr_expr (base);
771 if (in_loop)
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
781 else
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
788 if (!poffset)
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
793 else
795 if (!in_loop)
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
831 return true;
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
837 static void
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
845 if (nest)
846 before_loop = block_before_loop (nest);
848 while (handled_component_p (aref))
850 if (TREE_CODE (aref) == ARRAY_REF)
852 op = TREE_OPERAND (aref, 1);
853 if (nest)
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
863 aref = TREE_OPERAND (aref, 0);
866 if (nest
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
905 /* Extracts the alias analysis information from the memory reference DR. */
907 static void
908 dr_analyze_alias (struct data_reference *dr)
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
922 /* Returns true if the address of DR is invariant. */
924 static bool
925 dr_address_invariant_p (struct data_reference *dr)
927 unsigned i;
928 tree idx;
930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
931 if (tree_contains_chrecs (idx, NULL))
932 return false;
934 return true;
937 /* Frees data reference DR. */
939 void
940 free_data_ref (data_reference_p dr)
942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
943 free (dr);
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
952 struct data_reference *
953 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
954 bool is_read)
956 struct data_reference *dr;
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
965 dr = XCNEW (struct data_reference);
966 DR_STMT (dr) = stmt;
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
970 dr_analyze_innermost (dr);
971 dr_analyze_indices (dr, nest, loop);
972 dr_analyze_alias (dr);
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "\tbase_address: ");
977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
988 fprintf (dump_file, "\n");
991 return dr;
994 /* Returns true if FNA == FNB. */
996 static bool
997 affine_function_equal_p (affine_fn fna, affine_fn fnb)
999 unsigned i, n = VEC_length (tree, fna);
1001 if (n != VEC_length (tree, fnb))
1002 return false;
1004 for (i = 0; i < n; i++)
1005 if (!operand_equal_p (VEC_index (tree, fna, i),
1006 VEC_index (tree, fnb, i), 0))
1007 return false;
1009 return true;
1012 /* If all the functions in CF are the same, returns one of them,
1013 otherwise returns NULL. */
1015 static affine_fn
1016 common_affine_function (conflict_function *cf)
1018 unsigned i;
1019 affine_fn comm;
1021 if (!CF_NONTRIVIAL_P (cf))
1022 return NULL;
1024 comm = cf->fns[0];
1026 for (i = 1; i < cf->n; i++)
1027 if (!affine_function_equal_p (comm, cf->fns[i]))
1028 return NULL;
1030 return comm;
1033 /* Returns the base of the affine function FN. */
1035 static tree
1036 affine_function_base (affine_fn fn)
1038 return VEC_index (tree, fn, 0);
1041 /* Returns true if FN is a constant. */
1043 static bool
1044 affine_function_constant_p (affine_fn fn)
1046 unsigned i;
1047 tree coef;
1049 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1050 if (!integer_zerop (coef))
1051 return false;
1053 return true;
1056 /* Returns true if FN is the zero constant function. */
1058 static bool
1059 affine_function_zero_p (affine_fn fn)
1061 return (integer_zerop (affine_function_base (fn))
1062 && affine_function_constant_p (fn));
1065 /* Returns a signed integer type with the largest precision from TA
1066 and TB. */
1068 static tree
1069 signed_type_for_types (tree ta, tree tb)
1071 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1072 return signed_type_for (ta);
1073 else
1074 return signed_type_for (tb);
1077 /* Applies operation OP on affine functions FNA and FNB, and returns the
1078 result. */
1080 static affine_fn
1081 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1083 unsigned i, n, m;
1084 affine_fn ret;
1085 tree coef;
1087 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1089 n = VEC_length (tree, fna);
1090 m = VEC_length (tree, fnb);
1092 else
1094 n = VEC_length (tree, fnb);
1095 m = VEC_length (tree, fna);
1098 ret = VEC_alloc (tree, heap, m);
1099 for (i = 0; i < n; i++)
1101 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1102 TREE_TYPE (VEC_index (tree, fnb, i)));
1104 VEC_quick_push (tree, ret,
1105 fold_build2 (op, type,
1106 VEC_index (tree, fna, i),
1107 VEC_index (tree, fnb, i)));
1110 for (; VEC_iterate (tree, fna, i, coef); i++)
1111 VEC_quick_push (tree, ret,
1112 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1113 coef, integer_zero_node));
1114 for (; VEC_iterate (tree, fnb, i, coef); i++)
1115 VEC_quick_push (tree, ret,
1116 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1117 integer_zero_node, coef));
1119 return ret;
1122 /* Returns the sum of affine functions FNA and FNB. */
1124 static affine_fn
1125 affine_fn_plus (affine_fn fna, affine_fn fnb)
1127 return affine_fn_op (PLUS_EXPR, fna, fnb);
1130 /* Returns the difference of affine functions FNA and FNB. */
1132 static affine_fn
1133 affine_fn_minus (affine_fn fna, affine_fn fnb)
1135 return affine_fn_op (MINUS_EXPR, fna, fnb);
1138 /* Frees affine function FN. */
1140 static void
1141 affine_fn_free (affine_fn fn)
1143 VEC_free (tree, heap, fn);
1146 /* Determine for each subscript in the data dependence relation DDR
1147 the distance. */
1149 static void
1150 compute_subscript_distance (struct data_dependence_relation *ddr)
1152 conflict_function *cf_a, *cf_b;
1153 affine_fn fn_a, fn_b, diff;
1155 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1157 unsigned int i;
1159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1161 struct subscript *subscript;
1163 subscript = DDR_SUBSCRIPT (ddr, i);
1164 cf_a = SUB_CONFLICTS_IN_A (subscript);
1165 cf_b = SUB_CONFLICTS_IN_B (subscript);
1167 fn_a = common_affine_function (cf_a);
1168 fn_b = common_affine_function (cf_b);
1169 if (!fn_a || !fn_b)
1171 SUB_DISTANCE (subscript) = chrec_dont_know;
1172 return;
1174 diff = affine_fn_minus (fn_a, fn_b);
1176 if (affine_function_constant_p (diff))
1177 SUB_DISTANCE (subscript) = affine_function_base (diff);
1178 else
1179 SUB_DISTANCE (subscript) = chrec_dont_know;
1181 affine_fn_free (diff);
1186 /* Returns the conflict function for "unknown". */
1188 static conflict_function *
1189 conflict_fn_not_known (void)
1191 conflict_function *fn = XCNEW (conflict_function);
1192 fn->n = NOT_KNOWN;
1194 return fn;
1197 /* Returns the conflict function for "independent". */
1199 static conflict_function *
1200 conflict_fn_no_dependence (void)
1202 conflict_function *fn = XCNEW (conflict_function);
1203 fn->n = NO_DEPENDENCE;
1205 return fn;
1208 /* Returns true if the address of OBJ is invariant in LOOP. */
1210 static bool
1211 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1213 while (handled_component_p (obj))
1215 if (TREE_CODE (obj) == ARRAY_REF)
1217 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1218 need to check the stride and the lower bound of the reference. */
1219 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1220 loop->num)
1221 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1222 loop->num))
1223 return false;
1225 else if (TREE_CODE (obj) == COMPONENT_REF)
1227 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1228 loop->num))
1229 return false;
1231 obj = TREE_OPERAND (obj, 0);
1234 if (!INDIRECT_REF_P (obj)
1235 && TREE_CODE (obj) != MEM_REF)
1236 return true;
1238 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1239 loop->num);
1242 /* Returns false if we can prove that data references A and B do not alias,
1243 true otherwise. */
1245 bool
1246 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1248 tree addr_a = DR_BASE_OBJECT (a);
1249 tree addr_b = DR_BASE_OBJECT (b);
1251 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1252 return refs_output_dependent_p (addr_a, addr_b);
1253 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1254 return refs_anti_dependent_p (addr_a, addr_b);
1255 return refs_may_alias_p (addr_a, addr_b);
1258 static void compute_self_dependence (struct data_dependence_relation *);
1260 /* Initialize a data dependence relation between data accesses A and
1261 B. NB_LOOPS is the number of loops surrounding the references: the
1262 size of the classic distance/direction vectors. */
1264 static struct data_dependence_relation *
1265 initialize_data_dependence_relation (struct data_reference *a,
1266 struct data_reference *b,
1267 VEC (loop_p, heap) *loop_nest)
1269 struct data_dependence_relation *res;
1270 unsigned int i;
1272 res = XNEW (struct data_dependence_relation);
1273 DDR_A (res) = a;
1274 DDR_B (res) = b;
1275 DDR_LOOP_NEST (res) = NULL;
1276 DDR_REVERSED_P (res) = false;
1277 DDR_SUBSCRIPTS (res) = NULL;
1278 DDR_DIR_VECTS (res) = NULL;
1279 DDR_DIST_VECTS (res) = NULL;
1281 if (a == NULL || b == NULL)
1283 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1284 return res;
1287 /* If the data references do not alias, then they are independent. */
1288 if (!dr_may_alias_p (a, b))
1290 DDR_ARE_DEPENDENT (res) = chrec_known;
1291 return res;
1294 /* When the references are exactly the same, don't spend time doing
1295 the data dependence tests, just initialize the ddr and return. */
1296 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1298 DDR_AFFINE_P (res) = true;
1299 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1300 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1301 DDR_LOOP_NEST (res) = loop_nest;
1302 DDR_INNER_LOOP (res) = 0;
1303 DDR_SELF_REFERENCE (res) = true;
1304 compute_self_dependence (res);
1305 return res;
1308 /* If the references do not access the same object, we do not know
1309 whether they alias or not. */
1310 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1312 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1313 return res;
1316 /* If the base of the object is not invariant in the loop nest, we cannot
1317 analyze it. TODO -- in fact, it would suffice to record that there may
1318 be arbitrary dependences in the loops where the base object varies. */
1319 if (loop_nest
1320 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1321 DR_BASE_OBJECT (a)))
1323 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1324 return res;
1327 /* If the number of dimensions of the access to not agree we can have
1328 a pointer access to a component of the array element type and an
1329 array access while the base-objects are still the same. Punt. */
1330 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1332 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1333 return res;
1336 DDR_AFFINE_P (res) = true;
1337 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1338 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1339 DDR_LOOP_NEST (res) = loop_nest;
1340 DDR_INNER_LOOP (res) = 0;
1341 DDR_SELF_REFERENCE (res) = false;
1343 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1345 struct subscript *subscript;
1347 subscript = XNEW (struct subscript);
1348 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1349 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1350 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1351 SUB_DISTANCE (subscript) = chrec_dont_know;
1352 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1355 return res;
1358 /* Frees memory used by the conflict function F. */
1360 static void
1361 free_conflict_function (conflict_function *f)
1363 unsigned i;
1365 if (CF_NONTRIVIAL_P (f))
1367 for (i = 0; i < f->n; i++)
1368 affine_fn_free (f->fns[i]);
1370 free (f);
1373 /* Frees memory used by SUBSCRIPTS. */
1375 static void
1376 free_subscripts (VEC (subscript_p, heap) *subscripts)
1378 unsigned i;
1379 subscript_p s;
1381 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1383 free_conflict_function (s->conflicting_iterations_in_a);
1384 free_conflict_function (s->conflicting_iterations_in_b);
1385 free (s);
1387 VEC_free (subscript_p, heap, subscripts);
1390 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1391 description. */
1393 static inline void
1394 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1395 tree chrec)
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "(dependence classified: ");
1400 print_generic_expr (dump_file, chrec, 0);
1401 fprintf (dump_file, ")\n");
1404 DDR_ARE_DEPENDENT (ddr) = chrec;
1405 free_subscripts (DDR_SUBSCRIPTS (ddr));
1406 DDR_SUBSCRIPTS (ddr) = NULL;
1409 /* The dependence relation DDR cannot be represented by a distance
1410 vector. */
1412 static inline void
1413 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1418 DDR_AFFINE_P (ddr) = false;
1423 /* This section contains the classic Banerjee tests. */
1425 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1426 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1428 static inline bool
1429 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1431 return (evolution_function_is_constant_p (chrec_a)
1432 && evolution_function_is_constant_p (chrec_b));
1435 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1436 variable, i.e., if the SIV (Single Index Variable) test is true. */
1438 static bool
1439 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1441 if ((evolution_function_is_constant_p (chrec_a)
1442 && evolution_function_is_univariate_p (chrec_b))
1443 || (evolution_function_is_constant_p (chrec_b)
1444 && evolution_function_is_univariate_p (chrec_a)))
1445 return true;
1447 if (evolution_function_is_univariate_p (chrec_a)
1448 && evolution_function_is_univariate_p (chrec_b))
1450 switch (TREE_CODE (chrec_a))
1452 case POLYNOMIAL_CHREC:
1453 switch (TREE_CODE (chrec_b))
1455 case POLYNOMIAL_CHREC:
1456 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1457 return false;
1459 default:
1460 return true;
1463 default:
1464 return true;
1468 return false;
1471 /* Creates a conflict function with N dimensions. The affine functions
1472 in each dimension follow. */
1474 static conflict_function *
1475 conflict_fn (unsigned n, ...)
1477 unsigned i;
1478 conflict_function *ret = XCNEW (conflict_function);
1479 va_list ap;
1481 gcc_assert (0 < n && n <= MAX_DIM);
1482 va_start(ap, n);
1484 ret->n = n;
1485 for (i = 0; i < n; i++)
1486 ret->fns[i] = va_arg (ap, affine_fn);
1487 va_end(ap);
1489 return ret;
1492 /* Returns constant affine function with value CST. */
1494 static affine_fn
1495 affine_fn_cst (tree cst)
1497 affine_fn fn = VEC_alloc (tree, heap, 1);
1498 VEC_quick_push (tree, fn, cst);
1499 return fn;
1502 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1504 static affine_fn
1505 affine_fn_univar (tree cst, unsigned dim, tree coef)
1507 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1508 unsigned i;
1510 gcc_assert (dim > 0);
1511 VEC_quick_push (tree, fn, cst);
1512 for (i = 1; i < dim; i++)
1513 VEC_quick_push (tree, fn, integer_zero_node);
1514 VEC_quick_push (tree, fn, coef);
1515 return fn;
1518 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1519 *OVERLAPS_B are initialized to the functions that describe the
1520 relation between the elements accessed twice by CHREC_A and
1521 CHREC_B. For k >= 0, the following property is verified:
1523 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1525 static void
1526 analyze_ziv_subscript (tree chrec_a,
1527 tree chrec_b,
1528 conflict_function **overlaps_a,
1529 conflict_function **overlaps_b,
1530 tree *last_conflicts)
1532 tree type, difference;
1533 dependence_stats.num_ziv++;
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "(analyze_ziv_subscript \n");
1538 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1539 chrec_a = chrec_convert (type, chrec_a, NULL);
1540 chrec_b = chrec_convert (type, chrec_b, NULL);
1541 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1543 switch (TREE_CODE (difference))
1545 case INTEGER_CST:
1546 if (integer_zerop (difference))
1548 /* The difference is equal to zero: the accessed index
1549 overlaps for each iteration in the loop. */
1550 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1551 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1552 *last_conflicts = chrec_dont_know;
1553 dependence_stats.num_ziv_dependent++;
1555 else
1557 /* The accesses do not overlap. */
1558 *overlaps_a = conflict_fn_no_dependence ();
1559 *overlaps_b = conflict_fn_no_dependence ();
1560 *last_conflicts = integer_zero_node;
1561 dependence_stats.num_ziv_independent++;
1563 break;
1565 default:
1566 /* We're not sure whether the indexes overlap. For the moment,
1567 conservatively answer "don't know". */
1568 if (dump_file && (dump_flags & TDF_DETAILS))
1569 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1571 *overlaps_a = conflict_fn_not_known ();
1572 *overlaps_b = conflict_fn_not_known ();
1573 *last_conflicts = chrec_dont_know;
1574 dependence_stats.num_ziv_unimplemented++;
1575 break;
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, ")\n");
1582 /* Sets NIT to the estimated number of executions of the statements in
1583 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1584 large as the number of iterations. If we have no reliable estimate,
1585 the function returns false, otherwise returns true. */
1587 bool
1588 estimated_loop_iterations (struct loop *loop, bool conservative,
1589 double_int *nit)
1591 estimate_numbers_of_iterations_loop (loop, true);
1592 if (conservative)
1594 if (!loop->any_upper_bound)
1595 return false;
1597 *nit = loop->nb_iterations_upper_bound;
1599 else
1601 if (!loop->any_estimate)
1602 return false;
1604 *nit = loop->nb_iterations_estimate;
1607 return true;
1610 /* Similar to estimated_loop_iterations, but returns the estimate only
1611 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1612 on the number of iterations of LOOP could not be derived, returns -1. */
1614 HOST_WIDE_INT
1615 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1617 double_int nit;
1618 HOST_WIDE_INT hwi_nit;
1620 if (!estimated_loop_iterations (loop, conservative, &nit))
1621 return -1;
1623 if (!double_int_fits_in_shwi_p (nit))
1624 return -1;
1625 hwi_nit = double_int_to_shwi (nit);
1627 return hwi_nit < 0 ? -1 : hwi_nit;
1630 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1631 and only if it fits to the int type. If this is not the case, or the
1632 estimate on the number of iterations of LOOP could not be derived, returns
1633 chrec_dont_know. */
1635 static tree
1636 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1638 double_int nit;
1639 tree type;
1641 if (!estimated_loop_iterations (loop, conservative, &nit))
1642 return chrec_dont_know;
1644 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1645 if (!double_int_fits_to_tree_p (type, nit))
1646 return chrec_dont_know;
1648 return double_int_to_tree (type, nit);
1651 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1652 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1653 *OVERLAPS_B are initialized to the functions that describe the
1654 relation between the elements accessed twice by CHREC_A and
1655 CHREC_B. For k >= 0, the following property is verified:
1657 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1659 static void
1660 analyze_siv_subscript_cst_affine (tree chrec_a,
1661 tree chrec_b,
1662 conflict_function **overlaps_a,
1663 conflict_function **overlaps_b,
1664 tree *last_conflicts)
1666 bool value0, value1, value2;
1667 tree type, difference, tmp;
1669 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1670 chrec_a = chrec_convert (type, chrec_a, NULL);
1671 chrec_b = chrec_convert (type, chrec_b, NULL);
1672 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1674 if (!chrec_is_positive (initial_condition (difference), &value0))
1676 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1679 dependence_stats.num_siv_unimplemented++;
1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
1682 *last_conflicts = chrec_dont_know;
1683 return;
1685 else
1687 if (value0 == false)
1689 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1694 *overlaps_a = conflict_fn_not_known ();
1695 *overlaps_b = conflict_fn_not_known ();
1696 *last_conflicts = chrec_dont_know;
1697 dependence_stats.num_siv_unimplemented++;
1698 return;
1700 else
1702 if (value1 == true)
1704 /* Example:
1705 chrec_a = 12
1706 chrec_b = {10, +, 1}
1709 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1711 HOST_WIDE_INT numiter;
1712 struct loop *loop = get_chrec_loop (chrec_b);
1714 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1715 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1716 fold_build1 (ABS_EXPR, type, difference),
1717 CHREC_RIGHT (chrec_b));
1718 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1719 *last_conflicts = integer_one_node;
1722 /* Perform weak-zero siv test to see if overlap is
1723 outside the loop bounds. */
1724 numiter = estimated_loop_iterations_int (loop, false);
1726 if (numiter >= 0
1727 && compare_tree_int (tmp, numiter) > 0)
1729 free_conflict_function (*overlaps_a);
1730 free_conflict_function (*overlaps_b);
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1735 return;
1737 dependence_stats.num_siv_dependent++;
1738 return;
1741 /* When the step does not divide the difference, there are
1742 no overlaps. */
1743 else
1745 *overlaps_a = conflict_fn_no_dependence ();
1746 *overlaps_b = conflict_fn_no_dependence ();
1747 *last_conflicts = integer_zero_node;
1748 dependence_stats.num_siv_independent++;
1749 return;
1753 else
1755 /* Example:
1756 chrec_a = 12
1757 chrec_b = {10, +, -1}
1759 In this case, chrec_a will not overlap with chrec_b. */
1760 *overlaps_a = conflict_fn_no_dependence ();
1761 *overlaps_b = conflict_fn_no_dependence ();
1762 *last_conflicts = integer_zero_node;
1763 dependence_stats.num_siv_independent++;
1764 return;
1768 else
1770 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1772 if (dump_file && (dump_flags & TDF_DETAILS))
1773 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1775 *overlaps_a = conflict_fn_not_known ();
1776 *overlaps_b = conflict_fn_not_known ();
1777 *last_conflicts = chrec_dont_know;
1778 dependence_stats.num_siv_unimplemented++;
1779 return;
1781 else
1783 if (value2 == false)
1785 /* Example:
1786 chrec_a = 3
1787 chrec_b = {10, +, -1}
1789 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1791 HOST_WIDE_INT numiter;
1792 struct loop *loop = get_chrec_loop (chrec_b);
1794 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1795 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1796 CHREC_RIGHT (chrec_b));
1797 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1798 *last_conflicts = integer_one_node;
1800 /* Perform weak-zero siv test to see if overlap is
1801 outside the loop bounds. */
1802 numiter = estimated_loop_iterations_int (loop, false);
1804 if (numiter >= 0
1805 && compare_tree_int (tmp, numiter) > 0)
1807 free_conflict_function (*overlaps_a);
1808 free_conflict_function (*overlaps_b);
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1813 return;
1815 dependence_stats.num_siv_dependent++;
1816 return;
1819 /* When the step does not divide the difference, there
1820 are no overlaps. */
1821 else
1823 *overlaps_a = conflict_fn_no_dependence ();
1824 *overlaps_b = conflict_fn_no_dependence ();
1825 *last_conflicts = integer_zero_node;
1826 dependence_stats.num_siv_independent++;
1827 return;
1830 else
1832 /* Example:
1833 chrec_a = 3
1834 chrec_b = {4, +, 1}
1836 In this case, chrec_a will not overlap with chrec_b. */
1837 *overlaps_a = conflict_fn_no_dependence ();
1838 *overlaps_b = conflict_fn_no_dependence ();
1839 *last_conflicts = integer_zero_node;
1840 dependence_stats.num_siv_independent++;
1841 return;
1848 /* Helper recursive function for initializing the matrix A. Returns
1849 the initial value of CHREC. */
1851 static tree
1852 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1854 gcc_assert (chrec);
1856 switch (TREE_CODE (chrec))
1858 case POLYNOMIAL_CHREC:
1859 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1861 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1862 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1864 case PLUS_EXPR:
1865 case MULT_EXPR:
1866 case MINUS_EXPR:
1868 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1869 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1871 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1874 case NOP_EXPR:
1876 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1877 return chrec_convert (chrec_type (chrec), op, NULL);
1880 case BIT_NOT_EXPR:
1882 /* Handle ~X as -1 - X. */
1883 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1884 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1885 build_int_cst (TREE_TYPE (chrec), -1), op);
1888 case INTEGER_CST:
1889 return chrec;
1891 default:
1892 gcc_unreachable ();
1893 return NULL_TREE;
1897 #define FLOOR_DIV(x,y) ((x) / (y))
1899 /* Solves the special case of the Diophantine equation:
1900 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1902 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1903 number of iterations that loops X and Y run. The overlaps will be
1904 constructed as evolutions in dimension DIM. */
1906 static void
1907 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1908 affine_fn *overlaps_a,
1909 affine_fn *overlaps_b,
1910 tree *last_conflicts, int dim)
1912 if (((step_a > 0 && step_b > 0)
1913 || (step_a < 0 && step_b < 0)))
1915 int step_overlaps_a, step_overlaps_b;
1916 int gcd_steps_a_b, last_conflict, tau2;
1918 gcd_steps_a_b = gcd (step_a, step_b);
1919 step_overlaps_a = step_b / gcd_steps_a_b;
1920 step_overlaps_b = step_a / gcd_steps_a_b;
1922 if (niter > 0)
1924 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1925 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1926 last_conflict = tau2;
1927 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1929 else
1930 *last_conflicts = chrec_dont_know;
1932 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1933 build_int_cst (NULL_TREE,
1934 step_overlaps_a));
1935 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1936 build_int_cst (NULL_TREE,
1937 step_overlaps_b));
1940 else
1942 *overlaps_a = affine_fn_cst (integer_zero_node);
1943 *overlaps_b = affine_fn_cst (integer_zero_node);
1944 *last_conflicts = integer_zero_node;
1948 /* Solves the special case of a Diophantine equation where CHREC_A is
1949 an affine bivariate function, and CHREC_B is an affine univariate
1950 function. For example,
1952 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1954 has the following overlapping functions:
1956 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1957 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1958 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1960 FORNOW: This is a specialized implementation for a case occurring in
1961 a common benchmark. Implement the general algorithm. */
1963 static void
1964 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1965 conflict_function **overlaps_a,
1966 conflict_function **overlaps_b,
1967 tree *last_conflicts)
1969 bool xz_p, yz_p, xyz_p;
1970 int step_x, step_y, step_z;
1971 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1972 affine_fn overlaps_a_xz, overlaps_b_xz;
1973 affine_fn overlaps_a_yz, overlaps_b_yz;
1974 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1975 affine_fn ova1, ova2, ovb;
1976 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1978 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1979 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1980 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1982 niter_x =
1983 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1984 false);
1985 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1986 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1988 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1993 *overlaps_a = conflict_fn_not_known ();
1994 *overlaps_b = conflict_fn_not_known ();
1995 *last_conflicts = chrec_dont_know;
1996 return;
1999 niter = MIN (niter_x, niter_z);
2000 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2001 &overlaps_a_xz,
2002 &overlaps_b_xz,
2003 &last_conflicts_xz, 1);
2004 niter = MIN (niter_y, niter_z);
2005 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2006 &overlaps_a_yz,
2007 &overlaps_b_yz,
2008 &last_conflicts_yz, 2);
2009 niter = MIN (niter_x, niter_z);
2010 niter = MIN (niter_y, niter);
2011 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2012 &overlaps_a_xyz,
2013 &overlaps_b_xyz,
2014 &last_conflicts_xyz, 3);
2016 xz_p = !integer_zerop (last_conflicts_xz);
2017 yz_p = !integer_zerop (last_conflicts_yz);
2018 xyz_p = !integer_zerop (last_conflicts_xyz);
2020 if (xz_p || yz_p || xyz_p)
2022 ova1 = affine_fn_cst (integer_zero_node);
2023 ova2 = affine_fn_cst (integer_zero_node);
2024 ovb = affine_fn_cst (integer_zero_node);
2025 if (xz_p)
2027 affine_fn t0 = ova1;
2028 affine_fn t2 = ovb;
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2031 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2032 affine_fn_free (t0);
2033 affine_fn_free (t2);
2034 *last_conflicts = last_conflicts_xz;
2036 if (yz_p)
2038 affine_fn t0 = ova2;
2039 affine_fn t2 = ovb;
2041 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2042 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2043 affine_fn_free (t0);
2044 affine_fn_free (t2);
2045 *last_conflicts = last_conflicts_yz;
2047 if (xyz_p)
2049 affine_fn t0 = ova1;
2050 affine_fn t2 = ova2;
2051 affine_fn t4 = ovb;
2053 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2054 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2055 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2056 affine_fn_free (t0);
2057 affine_fn_free (t2);
2058 affine_fn_free (t4);
2059 *last_conflicts = last_conflicts_xyz;
2061 *overlaps_a = conflict_fn (2, ova1, ova2);
2062 *overlaps_b = conflict_fn (1, ovb);
2064 else
2066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2067 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2068 *last_conflicts = integer_zero_node;
2071 affine_fn_free (overlaps_a_xz);
2072 affine_fn_free (overlaps_b_xz);
2073 affine_fn_free (overlaps_a_yz);
2074 affine_fn_free (overlaps_b_yz);
2075 affine_fn_free (overlaps_a_xyz);
2076 affine_fn_free (overlaps_b_xyz);
2079 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2081 static void
2082 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2083 int size)
2085 memcpy (vec2, vec1, size * sizeof (*vec1));
2088 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2090 static void
2091 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2092 int m, int n)
2094 int i;
2096 for (i = 0; i < m; i++)
2097 lambda_vector_copy (mat1[i], mat2[i], n);
2100 /* Store the N x N identity matrix in MAT. */
2102 static void
2103 lambda_matrix_id (lambda_matrix mat, int size)
2105 int i, j;
2107 for (i = 0; i < size; i++)
2108 for (j = 0; j < size; j++)
2109 mat[i][j] = (i == j) ? 1 : 0;
2112 /* Return the first nonzero element of vector VEC1 between START and N.
2113 We must have START <= N. Returns N if VEC1 is the zero vector. */
2115 static int
2116 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2118 int j = start;
2119 while (j < n && vec1[j] == 0)
2120 j++;
2121 return j;
2124 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2125 R2 = R2 + CONST1 * R1. */
2127 static void
2128 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2130 int i;
2132 if (const1 == 0)
2133 return;
2135 for (i = 0; i < n; i++)
2136 mat[r2][i] += const1 * mat[r1][i];
2139 /* Swap rows R1 and R2 in matrix MAT. */
2141 static void
2142 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2144 lambda_vector row;
2146 row = mat[r1];
2147 mat[r1] = mat[r2];
2148 mat[r2] = row;
2151 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2152 and store the result in VEC2. */
2154 static void
2155 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2156 int size, int const1)
2158 int i;
2160 if (const1 == 0)
2161 lambda_vector_clear (vec2, size);
2162 else
2163 for (i = 0; i < size; i++)
2164 vec2[i] = const1 * vec1[i];
2167 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2169 static void
2170 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2171 int size)
2173 lambda_vector_mult_const (vec1, vec2, size, -1);
2176 /* Negate row R1 of matrix MAT which has N columns. */
2178 static void
2179 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2181 lambda_vector_negate (mat[r1], mat[r1], n);
2184 /* Return true if two vectors are equal. */
2186 static bool
2187 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2189 int i;
2190 for (i = 0; i < size; i++)
2191 if (vec1[i] != vec2[i])
2192 return false;
2193 return true;
2196 /* Given an M x N integer matrix A, this function determines an M x
2197 M unimodular matrix U, and an M x N echelon matrix S such that
2198 "U.A = S". This decomposition is also known as "right Hermite".
2200 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2201 Restructuring Compilers" Utpal Banerjee. */
2203 static void
2204 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2205 lambda_matrix S, lambda_matrix U)
2207 int i, j, i0 = 0;
2209 lambda_matrix_copy (A, S, m, n);
2210 lambda_matrix_id (U, m);
2212 for (j = 0; j < n; j++)
2214 if (lambda_vector_first_nz (S[j], m, i0) < m)
2216 ++i0;
2217 for (i = m - 1; i >= i0; i--)
2219 while (S[i][j] != 0)
2221 int sigma, factor, a, b;
2223 a = S[i-1][j];
2224 b = S[i][j];
2225 sigma = (a * b < 0) ? -1: 1;
2226 a = abs (a);
2227 b = abs (b);
2228 factor = sigma * (a / b);
2230 lambda_matrix_row_add (S, n, i, i-1, -factor);
2231 lambda_matrix_row_exchange (S, i, i-1);
2233 lambda_matrix_row_add (U, m, i, i-1, -factor);
2234 lambda_matrix_row_exchange (U, i, i-1);
2241 /* Determines the overlapping elements due to accesses CHREC_A and
2242 CHREC_B, that are affine functions. This function cannot handle
2243 symbolic evolution functions, ie. when initial conditions are
2244 parameters, because it uses lambda matrices of integers. */
2246 static void
2247 analyze_subscript_affine_affine (tree chrec_a,
2248 tree chrec_b,
2249 conflict_function **overlaps_a,
2250 conflict_function **overlaps_b,
2251 tree *last_conflicts)
2253 unsigned nb_vars_a, nb_vars_b, dim;
2254 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2255 lambda_matrix A, U, S;
2256 struct obstack scratch_obstack;
2258 if (eq_evolutions_p (chrec_a, chrec_b))
2260 /* The accessed index overlaps for each iteration in the
2261 loop. */
2262 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2263 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2264 *last_conflicts = chrec_dont_know;
2265 return;
2267 if (dump_file && (dump_flags & TDF_DETAILS))
2268 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2270 /* For determining the initial intersection, we have to solve a
2271 Diophantine equation. This is the most time consuming part.
2273 For answering to the question: "Is there a dependence?" we have
2274 to prove that there exists a solution to the Diophantine
2275 equation, and that the solution is in the iteration domain,
2276 i.e. the solution is positive or zero, and that the solution
2277 happens before the upper bound loop.nb_iterations. Otherwise
2278 there is no dependence. This function outputs a description of
2279 the iterations that hold the intersections. */
2281 nb_vars_a = nb_vars_in_chrec (chrec_a);
2282 nb_vars_b = nb_vars_in_chrec (chrec_b);
2284 gcc_obstack_init (&scratch_obstack);
2286 dim = nb_vars_a + nb_vars_b;
2287 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2288 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2289 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2291 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2292 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2293 gamma = init_b - init_a;
2295 /* Don't do all the hard work of solving the Diophantine equation
2296 when we already know the solution: for example,
2297 | {3, +, 1}_1
2298 | {3, +, 4}_2
2299 | gamma = 3 - 3 = 0.
2300 Then the first overlap occurs during the first iterations:
2301 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2303 if (gamma == 0)
2305 if (nb_vars_a == 1 && nb_vars_b == 1)
2307 HOST_WIDE_INT step_a, step_b;
2308 HOST_WIDE_INT niter, niter_a, niter_b;
2309 affine_fn ova, ovb;
2311 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2312 false);
2313 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2314 false);
2315 niter = MIN (niter_a, niter_b);
2316 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2317 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2319 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2320 &ova, &ovb,
2321 last_conflicts, 1);
2322 *overlaps_a = conflict_fn (1, ova);
2323 *overlaps_b = conflict_fn (1, ovb);
2326 else if (nb_vars_a == 2 && nb_vars_b == 1)
2327 compute_overlap_steps_for_affine_1_2
2328 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2330 else if (nb_vars_a == 1 && nb_vars_b == 2)
2331 compute_overlap_steps_for_affine_1_2
2332 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2334 else
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2338 *overlaps_a = conflict_fn_not_known ();
2339 *overlaps_b = conflict_fn_not_known ();
2340 *last_conflicts = chrec_dont_know;
2342 goto end_analyze_subs_aa;
2345 /* U.A = S */
2346 lambda_matrix_right_hermite (A, dim, 1, S, U);
2348 if (S[0][0] < 0)
2350 S[0][0] *= -1;
2351 lambda_matrix_row_negate (U, dim, 0);
2353 gcd_alpha_beta = S[0][0];
2355 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2356 but that is a quite strange case. Instead of ICEing, answer
2357 don't know. */
2358 if (gcd_alpha_beta == 0)
2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
2362 *last_conflicts = chrec_dont_know;
2363 goto end_analyze_subs_aa;
2366 /* The classic "gcd-test". */
2367 if (!int_divides_p (gcd_alpha_beta, gamma))
2369 /* The "gcd-test" has determined that there is no integer
2370 solution, i.e. there is no dependence. */
2371 *overlaps_a = conflict_fn_no_dependence ();
2372 *overlaps_b = conflict_fn_no_dependence ();
2373 *last_conflicts = integer_zero_node;
2376 /* Both access functions are univariate. This includes SIV and MIV cases. */
2377 else if (nb_vars_a == 1 && nb_vars_b == 1)
2379 /* Both functions should have the same evolution sign. */
2380 if (((A[0][0] > 0 && -A[1][0] > 0)
2381 || (A[0][0] < 0 && -A[1][0] < 0)))
2383 /* The solutions are given by:
2385 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2386 | [u21 u22] [y0]
2388 For a given integer t. Using the following variables,
2390 | i0 = u11 * gamma / gcd_alpha_beta
2391 | j0 = u12 * gamma / gcd_alpha_beta
2392 | i1 = u21
2393 | j1 = u22
2395 the solutions are:
2397 | x0 = i0 + i1 * t,
2398 | y0 = j0 + j1 * t. */
2399 HOST_WIDE_INT i0, j0, i1, j1;
2401 i0 = U[0][0] * gamma / gcd_alpha_beta;
2402 j0 = U[0][1] * gamma / gcd_alpha_beta;
2403 i1 = U[1][0];
2404 j1 = U[1][1];
2406 if ((i1 == 0 && i0 < 0)
2407 || (j1 == 0 && j0 < 0))
2409 /* There is no solution.
2410 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2411 falls in here, but for the moment we don't look at the
2412 upper bound of the iteration domain. */
2413 *overlaps_a = conflict_fn_no_dependence ();
2414 *overlaps_b = conflict_fn_no_dependence ();
2415 *last_conflicts = integer_zero_node;
2416 goto end_analyze_subs_aa;
2419 if (i1 > 0 && j1 > 0)
2421 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2422 (get_chrec_loop (chrec_a), false);
2423 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2424 (get_chrec_loop (chrec_b), false);
2425 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2427 /* (X0, Y0) is a solution of the Diophantine equation:
2428 "chrec_a (X0) = chrec_b (Y0)". */
2429 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2430 CEIL (-j0, j1));
2431 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2432 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2434 /* (X1, Y1) is the smallest positive solution of the eq
2435 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2436 first conflict occurs. */
2437 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2438 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2439 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2441 if (niter > 0)
2443 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2444 FLOOR_DIV (niter - j0, j1));
2445 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2447 /* If the overlap occurs outside of the bounds of the
2448 loop, there is no dependence. */
2449 if (x1 >= niter || y1 >= niter)
2451 *overlaps_a = conflict_fn_no_dependence ();
2452 *overlaps_b = conflict_fn_no_dependence ();
2453 *last_conflicts = integer_zero_node;
2454 goto end_analyze_subs_aa;
2456 else
2457 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2459 else
2460 *last_conflicts = chrec_dont_know;
2462 *overlaps_a
2463 = conflict_fn (1,
2464 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2466 build_int_cst (NULL_TREE, i1)));
2467 *overlaps_b
2468 = conflict_fn (1,
2469 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2471 build_int_cst (NULL_TREE, j1)));
2473 else
2475 /* FIXME: For the moment, the upper bound of the
2476 iteration domain for i and j is not checked. */
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2484 else
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2493 else
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2497 *overlaps_a = conflict_fn_not_known ();
2498 *overlaps_b = conflict_fn_not_known ();
2499 *last_conflicts = chrec_dont_know;
2502 end_analyze_subs_aa:
2503 obstack_free (&scratch_obstack, NULL);
2504 if (dump_file && (dump_flags & TDF_DETAILS))
2506 fprintf (dump_file, " (overlaps_a = ");
2507 dump_conflict_function (dump_file, *overlaps_a);
2508 fprintf (dump_file, ")\n (overlaps_b = ");
2509 dump_conflict_function (dump_file, *overlaps_b);
2510 fprintf (dump_file, ")\n");
2511 fprintf (dump_file, ")\n");
2515 /* Returns true when analyze_subscript_affine_affine can be used for
2516 determining the dependence relation between chrec_a and chrec_b,
2517 that contain symbols. This function modifies chrec_a and chrec_b
2518 such that the analysis result is the same, and such that they don't
2519 contain symbols, and then can safely be passed to the analyzer.
2521 Example: The analysis of the following tuples of evolutions produce
2522 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2523 vs. {0, +, 1}_1
2525 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2526 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2529 static bool
2530 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2532 tree diff, type, left_a, left_b, right_b;
2534 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2535 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2536 /* FIXME: For the moment not handled. Might be refined later. */
2537 return false;
2539 type = chrec_type (*chrec_a);
2540 left_a = CHREC_LEFT (*chrec_a);
2541 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2542 diff = chrec_fold_minus (type, left_a, left_b);
2544 if (!evolution_function_is_constant_p (diff))
2545 return false;
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2548 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2550 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2551 diff, CHREC_RIGHT (*chrec_a));
2552 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2553 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2554 build_int_cst (type, 0),
2555 right_b);
2556 return true;
2559 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2560 *OVERLAPS_B are initialized to the functions that describe the
2561 relation between the elements accessed twice by CHREC_A and
2562 CHREC_B. For k >= 0, the following property is verified:
2564 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2566 static void
2567 analyze_siv_subscript (tree chrec_a,
2568 tree chrec_b,
2569 conflict_function **overlaps_a,
2570 conflict_function **overlaps_b,
2571 tree *last_conflicts,
2572 int loop_nest_num)
2574 dependence_stats.num_siv++;
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file, "(analyze_siv_subscript \n");
2579 if (evolution_function_is_constant_p (chrec_a)
2580 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2581 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2582 overlaps_a, overlaps_b, last_conflicts);
2584 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2585 && evolution_function_is_constant_p (chrec_b))
2586 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2587 overlaps_b, overlaps_a, last_conflicts);
2589 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2590 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2592 if (!chrec_contains_symbols (chrec_a)
2593 && !chrec_contains_symbols (chrec_b))
2595 analyze_subscript_affine_affine (chrec_a, chrec_b,
2596 overlaps_a, overlaps_b,
2597 last_conflicts);
2599 if (CF_NOT_KNOWN_P (*overlaps_a)
2600 || CF_NOT_KNOWN_P (*overlaps_b))
2601 dependence_stats.num_siv_unimplemented++;
2602 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2603 || CF_NO_DEPENDENCE_P (*overlaps_b))
2604 dependence_stats.num_siv_independent++;
2605 else
2606 dependence_stats.num_siv_dependent++;
2608 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2609 &chrec_b))
2611 analyze_subscript_affine_affine (chrec_a, chrec_b,
2612 overlaps_a, overlaps_b,
2613 last_conflicts);
2615 if (CF_NOT_KNOWN_P (*overlaps_a)
2616 || CF_NOT_KNOWN_P (*overlaps_b))
2617 dependence_stats.num_siv_unimplemented++;
2618 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2619 || CF_NO_DEPENDENCE_P (*overlaps_b))
2620 dependence_stats.num_siv_independent++;
2621 else
2622 dependence_stats.num_siv_dependent++;
2624 else
2625 goto siv_subscript_dontknow;
2628 else
2630 siv_subscript_dontknow:;
2631 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "siv test failed: unimplemented.\n");
2633 *overlaps_a = conflict_fn_not_known ();
2634 *overlaps_b = conflict_fn_not_known ();
2635 *last_conflicts = chrec_dont_know;
2636 dependence_stats.num_siv_unimplemented++;
2639 if (dump_file && (dump_flags & TDF_DETAILS))
2640 fprintf (dump_file, ")\n");
2643 /* Returns false if we can prove that the greatest common divisor of the steps
2644 of CHREC does not divide CST, false otherwise. */
2646 static bool
2647 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2649 HOST_WIDE_INT cd = 0, val;
2650 tree step;
2652 if (!host_integerp (cst, 0))
2653 return true;
2654 val = tree_low_cst (cst, 0);
2656 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2658 step = CHREC_RIGHT (chrec);
2659 if (!host_integerp (step, 0))
2660 return true;
2661 cd = gcd (cd, tree_low_cst (step, 0));
2662 chrec = CHREC_LEFT (chrec);
2665 return val % cd == 0;
2668 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2669 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2670 functions that describe the relation between the elements accessed
2671 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2672 is verified:
2674 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2676 static void
2677 analyze_miv_subscript (tree chrec_a,
2678 tree chrec_b,
2679 conflict_function **overlaps_a,
2680 conflict_function **overlaps_b,
2681 tree *last_conflicts,
2682 struct loop *loop_nest)
2684 tree type, difference;
2686 dependence_stats.num_miv++;
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, "(analyze_miv_subscript \n");
2690 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2691 chrec_a = chrec_convert (type, chrec_a, NULL);
2692 chrec_b = chrec_convert (type, chrec_b, NULL);
2693 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2695 if (eq_evolutions_p (chrec_a, chrec_b))
2697 /* Access functions are the same: all the elements are accessed
2698 in the same order. */
2699 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2700 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2701 *last_conflicts = estimated_loop_iterations_tree
2702 (get_chrec_loop (chrec_a), true);
2703 dependence_stats.num_miv_dependent++;
2706 else if (evolution_function_is_constant_p (difference)
2707 /* For the moment, the following is verified:
2708 evolution_function_is_affine_multivariate_p (chrec_a,
2709 loop_nest->num) */
2710 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2712 /* testsuite/.../ssa-chrec-33.c
2713 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2715 The difference is 1, and all the evolution steps are multiples
2716 of 2, consequently there are no overlapping elements. */
2717 *overlaps_a = conflict_fn_no_dependence ();
2718 *overlaps_b = conflict_fn_no_dependence ();
2719 *last_conflicts = integer_zero_node;
2720 dependence_stats.num_miv_independent++;
2723 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2724 && !chrec_contains_symbols (chrec_a)
2725 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2726 && !chrec_contains_symbols (chrec_b))
2728 /* testsuite/.../ssa-chrec-35.c
2729 {0, +, 1}_2 vs. {0, +, 1}_3
2730 the overlapping elements are respectively located at iterations:
2731 {0, +, 1}_x and {0, +, 1}_x,
2732 in other words, we have the equality:
2733 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2735 Other examples:
2736 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2737 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2739 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2740 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2742 analyze_subscript_affine_affine (chrec_a, chrec_b,
2743 overlaps_a, overlaps_b, last_conflicts);
2745 if (CF_NOT_KNOWN_P (*overlaps_a)
2746 || CF_NOT_KNOWN_P (*overlaps_b))
2747 dependence_stats.num_miv_unimplemented++;
2748 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2749 || CF_NO_DEPENDENCE_P (*overlaps_b))
2750 dependence_stats.num_miv_independent++;
2751 else
2752 dependence_stats.num_miv_dependent++;
2755 else
2757 /* When the analysis is too difficult, answer "don't know". */
2758 if (dump_file && (dump_flags & TDF_DETAILS))
2759 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2761 *overlaps_a = conflict_fn_not_known ();
2762 *overlaps_b = conflict_fn_not_known ();
2763 *last_conflicts = chrec_dont_know;
2764 dependence_stats.num_miv_unimplemented++;
2767 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, ")\n");
2771 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2772 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2773 OVERLAP_ITERATIONS_B are initialized with two functions that
2774 describe the iterations that contain conflicting elements.
2776 Remark: For an integer k >= 0, the following equality is true:
2778 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2781 static void
2782 analyze_overlapping_iterations (tree chrec_a,
2783 tree chrec_b,
2784 conflict_function **overlap_iterations_a,
2785 conflict_function **overlap_iterations_b,
2786 tree *last_conflicts, struct loop *loop_nest)
2788 unsigned int lnn = loop_nest->num;
2790 dependence_stats.num_subscript_tests++;
2792 if (dump_file && (dump_flags & TDF_DETAILS))
2794 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2795 fprintf (dump_file, " (chrec_a = ");
2796 print_generic_expr (dump_file, chrec_a, 0);
2797 fprintf (dump_file, ")\n (chrec_b = ");
2798 print_generic_expr (dump_file, chrec_b, 0);
2799 fprintf (dump_file, ")\n");
2802 if (chrec_a == NULL_TREE
2803 || chrec_b == NULL_TREE
2804 || chrec_contains_undetermined (chrec_a)
2805 || chrec_contains_undetermined (chrec_b))
2807 dependence_stats.num_subscript_undetermined++;
2809 *overlap_iterations_a = conflict_fn_not_known ();
2810 *overlap_iterations_b = conflict_fn_not_known ();
2813 /* If they are the same chrec, and are affine, they overlap
2814 on every iteration. */
2815 else if (eq_evolutions_p (chrec_a, chrec_b)
2816 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2817 || operand_equal_p (chrec_a, chrec_b, 0)))
2819 dependence_stats.num_same_subscript_function++;
2820 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2821 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2822 *last_conflicts = chrec_dont_know;
2825 /* If they aren't the same, and aren't affine, we can't do anything
2826 yet. */
2827 else if ((chrec_contains_symbols (chrec_a)
2828 || chrec_contains_symbols (chrec_b))
2829 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2830 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2832 dependence_stats.num_subscript_undetermined++;
2833 *overlap_iterations_a = conflict_fn_not_known ();
2834 *overlap_iterations_b = conflict_fn_not_known ();
2837 else if (ziv_subscript_p (chrec_a, chrec_b))
2838 analyze_ziv_subscript (chrec_a, chrec_b,
2839 overlap_iterations_a, overlap_iterations_b,
2840 last_conflicts);
2842 else if (siv_subscript_p (chrec_a, chrec_b))
2843 analyze_siv_subscript (chrec_a, chrec_b,
2844 overlap_iterations_a, overlap_iterations_b,
2845 last_conflicts, lnn);
2847 else
2848 analyze_miv_subscript (chrec_a, chrec_b,
2849 overlap_iterations_a, overlap_iterations_b,
2850 last_conflicts, loop_nest);
2852 if (dump_file && (dump_flags & TDF_DETAILS))
2854 fprintf (dump_file, " (overlap_iterations_a = ");
2855 dump_conflict_function (dump_file, *overlap_iterations_a);
2856 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2857 dump_conflict_function (dump_file, *overlap_iterations_b);
2858 fprintf (dump_file, ")\n");
2859 fprintf (dump_file, ")\n");
2863 /* Helper function for uniquely inserting distance vectors. */
2865 static void
2866 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2868 unsigned i;
2869 lambda_vector v;
2871 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2872 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2873 return;
2875 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2878 /* Helper function for uniquely inserting direction vectors. */
2880 static void
2881 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2883 unsigned i;
2884 lambda_vector v;
2886 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2887 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2888 return;
2890 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2893 /* Add a distance of 1 on all the loops outer than INDEX. If we
2894 haven't yet determined a distance for this outer loop, push a new
2895 distance vector composed of the previous distance, and a distance
2896 of 1 for this outer loop. Example:
2898 | loop_1
2899 | loop_2
2900 | A[10]
2901 | endloop_2
2902 | endloop_1
2904 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2905 save (0, 1), then we have to save (1, 0). */
2907 static void
2908 add_outer_distances (struct data_dependence_relation *ddr,
2909 lambda_vector dist_v, int index)
2911 /* For each outer loop where init_v is not set, the accesses are
2912 in dependence of distance 1 in the loop. */
2913 while (--index >= 0)
2915 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2916 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2917 save_v[index] = 1;
2918 save_dist_v (ddr, save_v);
2922 /* Return false when fail to represent the data dependence as a
2923 distance vector. INIT_B is set to true when a component has been
2924 added to the distance vector DIST_V. INDEX_CARRY is then set to
2925 the index in DIST_V that carries the dependence. */
2927 static bool
2928 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2929 struct data_reference *ddr_a,
2930 struct data_reference *ddr_b,
2931 lambda_vector dist_v, bool *init_b,
2932 int *index_carry)
2934 unsigned i;
2935 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2937 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2939 tree access_fn_a, access_fn_b;
2940 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2942 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2944 non_affine_dependence_relation (ddr);
2945 return false;
2948 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2949 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2951 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2952 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2954 int dist, index;
2955 int var_a = CHREC_VARIABLE (access_fn_a);
2956 int var_b = CHREC_VARIABLE (access_fn_b);
2958 if (var_a != var_b
2959 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2961 non_affine_dependence_relation (ddr);
2962 return false;
2965 dist = int_cst_value (SUB_DISTANCE (subscript));
2966 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2967 *index_carry = MIN (index, *index_carry);
2969 /* This is the subscript coupling test. If we have already
2970 recorded a distance for this loop (a distance coming from
2971 another subscript), it should be the same. For example,
2972 in the following code, there is no dependence:
2974 | loop i = 0, N, 1
2975 | T[i+1][i] = ...
2976 | ... = T[i][i]
2977 | endloop
2979 if (init_v[index] != 0 && dist_v[index] != dist)
2981 finalize_ddr_dependent (ddr, chrec_known);
2982 return false;
2985 dist_v[index] = dist;
2986 init_v[index] = 1;
2987 *init_b = true;
2989 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2991 /* This can be for example an affine vs. constant dependence
2992 (T[i] vs. T[3]) that is not an affine dependence and is
2993 not representable as a distance vector. */
2994 non_affine_dependence_relation (ddr);
2995 return false;
2999 return true;
3002 /* Return true when the DDR contains only constant access functions. */
3004 static bool
3005 constant_access_functions (const struct data_dependence_relation *ddr)
3007 unsigned i;
3009 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3010 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3011 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3012 return false;
3014 return true;
3017 /* Helper function for the case where DDR_A and DDR_B are the same
3018 multivariate access function with a constant step. For an example
3019 see pr34635-1.c. */
3021 static void
3022 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3024 int x_1, x_2;
3025 tree c_1 = CHREC_LEFT (c_2);
3026 tree c_0 = CHREC_LEFT (c_1);
3027 lambda_vector dist_v;
3028 int v1, v2, cd;
3030 /* Polynomials with more than 2 variables are not handled yet. When
3031 the evolution steps are parameters, it is not possible to
3032 represent the dependence using classical distance vectors. */
3033 if (TREE_CODE (c_0) != INTEGER_CST
3034 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3035 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3037 DDR_AFFINE_P (ddr) = false;
3038 return;
3041 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3042 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3044 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3045 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3046 v1 = int_cst_value (CHREC_RIGHT (c_1));
3047 v2 = int_cst_value (CHREC_RIGHT (c_2));
3048 cd = gcd (v1, v2);
3049 v1 /= cd;
3050 v2 /= cd;
3052 if (v2 < 0)
3054 v2 = -v2;
3055 v1 = -v1;
3058 dist_v[x_1] = v2;
3059 dist_v[x_2] = -v1;
3060 save_dist_v (ddr, dist_v);
3062 add_outer_distances (ddr, dist_v, x_1);
3065 /* Helper function for the case where DDR_A and DDR_B are the same
3066 access functions. */
3068 static void
3069 add_other_self_distances (struct data_dependence_relation *ddr)
3071 lambda_vector dist_v;
3072 unsigned i;
3073 int index_carry = DDR_NB_LOOPS (ddr);
3075 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3077 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3079 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3081 if (!evolution_function_is_univariate_p (access_fun))
3083 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3085 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3086 return;
3089 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3091 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3092 add_multivariate_self_dist (ddr, access_fun);
3093 else
3094 /* The evolution step is not constant: it varies in
3095 the outer loop, so this cannot be represented by a
3096 distance vector. For example in pr34635.c the
3097 evolution is {0, +, {0, +, 4}_1}_2. */
3098 DDR_AFFINE_P (ddr) = false;
3100 return;
3103 index_carry = MIN (index_carry,
3104 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3105 DDR_LOOP_NEST (ddr)));
3109 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3110 add_outer_distances (ddr, dist_v, index_carry);
3113 static void
3114 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3116 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3118 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3119 save_dist_v (ddr, dist_v);
3122 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3123 is the case for example when access functions are the same and
3124 equal to a constant, as in:
3126 | loop_1
3127 | A[3] = ...
3128 | ... = A[3]
3129 | endloop_1
3131 in which case the distance vectors are (0) and (1). */
3133 static void
3134 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3136 unsigned i, j;
3138 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3140 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3141 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3142 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3144 for (j = 0; j < ca->n; j++)
3145 if (affine_function_zero_p (ca->fns[j]))
3147 insert_innermost_unit_dist_vector (ddr);
3148 return;
3151 for (j = 0; j < cb->n; j++)
3152 if (affine_function_zero_p (cb->fns[j]))
3154 insert_innermost_unit_dist_vector (ddr);
3155 return;
3160 /* Compute the classic per loop distance vector. DDR is the data
3161 dependence relation to build a vector from. Return false when fail
3162 to represent the data dependence as a distance vector. */
3164 static bool
3165 build_classic_dist_vector (struct data_dependence_relation *ddr,
3166 struct loop *loop_nest)
3168 bool init_b = false;
3169 int index_carry = DDR_NB_LOOPS (ddr);
3170 lambda_vector dist_v;
3172 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3173 return false;
3175 if (same_access_functions (ddr))
3177 /* Save the 0 vector. */
3178 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3179 save_dist_v (ddr, dist_v);
3181 if (constant_access_functions (ddr))
3182 add_distance_for_zero_overlaps (ddr);
3184 if (DDR_NB_LOOPS (ddr) > 1)
3185 add_other_self_distances (ddr);
3187 return true;
3190 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3191 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3192 dist_v, &init_b, &index_carry))
3193 return false;
3195 /* Save the distance vector if we initialized one. */
3196 if (init_b)
3198 /* Verify a basic constraint: classic distance vectors should
3199 always be lexicographically positive.
3201 Data references are collected in the order of execution of
3202 the program, thus for the following loop
3204 | for (i = 1; i < 100; i++)
3205 | for (j = 1; j < 100; j++)
3207 | t = T[j+1][i-1]; // A
3208 | T[j][i] = t + 2; // B
3211 references are collected following the direction of the wind:
3212 A then B. The data dependence tests are performed also
3213 following this order, such that we're looking at the distance
3214 separating the elements accessed by A from the elements later
3215 accessed by B. But in this example, the distance returned by
3216 test_dep (A, B) is lexicographically negative (-1, 1), that
3217 means that the access A occurs later than B with respect to
3218 the outer loop, ie. we're actually looking upwind. In this
3219 case we solve test_dep (B, A) looking downwind to the
3220 lexicographically positive solution, that returns the
3221 distance vector (1, -1). */
3222 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3224 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3225 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3226 loop_nest))
3227 return false;
3228 compute_subscript_distance (ddr);
3229 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3230 save_v, &init_b, &index_carry))
3231 return false;
3232 save_dist_v (ddr, save_v);
3233 DDR_REVERSED_P (ddr) = true;
3235 /* In this case there is a dependence forward for all the
3236 outer loops:
3238 | for (k = 1; k < 100; k++)
3239 | for (i = 1; i < 100; i++)
3240 | for (j = 1; j < 100; j++)
3242 | t = T[j+1][i-1]; // A
3243 | T[j][i] = t + 2; // B
3246 the vectors are:
3247 (0, 1, -1)
3248 (1, 1, -1)
3249 (1, -1, 1)
3251 if (DDR_NB_LOOPS (ddr) > 1)
3253 add_outer_distances (ddr, save_v, index_carry);
3254 add_outer_distances (ddr, dist_v, index_carry);
3257 else
3259 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3260 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3262 if (DDR_NB_LOOPS (ddr) > 1)
3264 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3266 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3267 DDR_A (ddr), loop_nest))
3268 return false;
3269 compute_subscript_distance (ddr);
3270 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3271 opposite_v, &init_b,
3272 &index_carry))
3273 return false;
3275 save_dist_v (ddr, save_v);
3276 add_outer_distances (ddr, dist_v, index_carry);
3277 add_outer_distances (ddr, opposite_v, index_carry);
3279 else
3280 save_dist_v (ddr, save_v);
3283 else
3285 /* There is a distance of 1 on all the outer loops: Example:
3286 there is a dependence of distance 1 on loop_1 for the array A.
3288 | loop_1
3289 | A[5] = ...
3290 | endloop
3292 add_outer_distances (ddr, dist_v,
3293 lambda_vector_first_nz (dist_v,
3294 DDR_NB_LOOPS (ddr), 0));
3297 if (dump_file && (dump_flags & TDF_DETAILS))
3299 unsigned i;
3301 fprintf (dump_file, "(build_classic_dist_vector\n");
3302 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3304 fprintf (dump_file, " dist_vector = (");
3305 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3306 DDR_NB_LOOPS (ddr));
3307 fprintf (dump_file, " )\n");
3309 fprintf (dump_file, ")\n");
3312 return true;
3315 /* Return the direction for a given distance.
3316 FIXME: Computing dir this way is suboptimal, since dir can catch
3317 cases that dist is unable to represent. */
3319 static inline enum data_dependence_direction
3320 dir_from_dist (int dist)
3322 if (dist > 0)
3323 return dir_positive;
3324 else if (dist < 0)
3325 return dir_negative;
3326 else
3327 return dir_equal;
3330 /* Compute the classic per loop direction vector. DDR is the data
3331 dependence relation to build a vector from. */
3333 static void
3334 build_classic_dir_vector (struct data_dependence_relation *ddr)
3336 unsigned i, j;
3337 lambda_vector dist_v;
3339 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3341 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3343 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3344 dir_v[j] = dir_from_dist (dist_v[j]);
3346 save_dir_v (ddr, dir_v);
3350 /* Helper function. Returns true when there is a dependence between
3351 data references DRA and DRB. */
3353 static bool
3354 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3355 struct data_reference *dra,
3356 struct data_reference *drb,
3357 struct loop *loop_nest)
3359 unsigned int i;
3360 tree last_conflicts;
3361 struct subscript *subscript;
3363 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3364 i++)
3366 conflict_function *overlaps_a, *overlaps_b;
3368 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3369 DR_ACCESS_FN (drb, i),
3370 &overlaps_a, &overlaps_b,
3371 &last_conflicts, loop_nest);
3373 if (CF_NOT_KNOWN_P (overlaps_a)
3374 || CF_NOT_KNOWN_P (overlaps_b))
3376 finalize_ddr_dependent (ddr, chrec_dont_know);
3377 dependence_stats.num_dependence_undetermined++;
3378 free_conflict_function (overlaps_a);
3379 free_conflict_function (overlaps_b);
3380 return false;
3383 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3384 || CF_NO_DEPENDENCE_P (overlaps_b))
3386 finalize_ddr_dependent (ddr, chrec_known);
3387 dependence_stats.num_dependence_independent++;
3388 free_conflict_function (overlaps_a);
3389 free_conflict_function (overlaps_b);
3390 return false;
3393 else
3395 if (SUB_CONFLICTS_IN_A (subscript))
3396 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3397 if (SUB_CONFLICTS_IN_B (subscript))
3398 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3400 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3401 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3402 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3406 return true;
3409 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3411 static void
3412 subscript_dependence_tester (struct data_dependence_relation *ddr,
3413 struct loop *loop_nest)
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, "(subscript_dependence_tester \n");
3419 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3420 dependence_stats.num_dependence_dependent++;
3422 compute_subscript_distance (ddr);
3423 if (build_classic_dist_vector (ddr, loop_nest))
3424 build_classic_dir_vector (ddr);
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3427 fprintf (dump_file, ")\n");
3430 /* Returns true when all the access functions of A are affine or
3431 constant with respect to LOOP_NEST. */
3433 static bool
3434 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3435 const struct loop *loop_nest)
3437 unsigned int i;
3438 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3439 tree t;
3441 FOR_EACH_VEC_ELT (tree, fns, i, t)
3442 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3443 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3444 return false;
3446 return true;
3449 /* Initializes an equation for an OMEGA problem using the information
3450 contained in the ACCESS_FUN. Returns true when the operation
3451 succeeded.
3453 PB is the omega constraint system.
3454 EQ is the number of the equation to be initialized.
3455 OFFSET is used for shifting the variables names in the constraints:
3456 a constrain is composed of 2 * the number of variables surrounding
3457 dependence accesses. OFFSET is set either to 0 for the first n variables,
3458 then it is set to n.
3459 ACCESS_FUN is expected to be an affine chrec. */
3461 static bool
3462 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3463 unsigned int offset, tree access_fun,
3464 struct data_dependence_relation *ddr)
3466 switch (TREE_CODE (access_fun))
3468 case POLYNOMIAL_CHREC:
3470 tree left = CHREC_LEFT (access_fun);
3471 tree right = CHREC_RIGHT (access_fun);
3472 int var = CHREC_VARIABLE (access_fun);
3473 unsigned var_idx;
3475 if (TREE_CODE (right) != INTEGER_CST)
3476 return false;
3478 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3479 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3481 /* Compute the innermost loop index. */
3482 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3484 if (offset == 0)
3485 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3486 += int_cst_value (right);
3488 switch (TREE_CODE (left))
3490 case POLYNOMIAL_CHREC:
3491 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3493 case INTEGER_CST:
3494 pb->eqs[eq].coef[0] += int_cst_value (left);
3495 return true;
3497 default:
3498 return false;
3502 case INTEGER_CST:
3503 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3504 return true;
3506 default:
3507 return false;
3511 /* As explained in the comments preceding init_omega_for_ddr, we have
3512 to set up a system for each loop level, setting outer loops
3513 variation to zero, and current loop variation to positive or zero.
3514 Save each lexico positive distance vector. */
3516 static void
3517 omega_extract_distance_vectors (omega_pb pb,
3518 struct data_dependence_relation *ddr)
3520 int eq, geq;
3521 unsigned i, j;
3522 struct loop *loopi, *loopj;
3523 enum omega_result res;
3525 /* Set a new problem for each loop in the nest. The basis is the
3526 problem that we have initialized until now. On top of this we
3527 add new constraints. */
3528 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3529 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3531 int dist = 0;
3532 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3533 DDR_NB_LOOPS (ddr));
3535 omega_copy_problem (copy, pb);
3537 /* For all the outer loops "loop_j", add "dj = 0". */
3538 for (j = 0;
3539 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3541 eq = omega_add_zero_eq (copy, omega_black);
3542 copy->eqs[eq].coef[j + 1] = 1;
3545 /* For "loop_i", add "0 <= di". */
3546 geq = omega_add_zero_geq (copy, omega_black);
3547 copy->geqs[geq].coef[i + 1] = 1;
3549 /* Reduce the constraint system, and test that the current
3550 problem is feasible. */
3551 res = omega_simplify_problem (copy);
3552 if (res == omega_false
3553 || res == omega_unknown
3554 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3555 goto next_problem;
3557 for (eq = 0; eq < copy->num_subs; eq++)
3558 if (copy->subs[eq].key == (int) i + 1)
3560 dist = copy->subs[eq].coef[0];
3561 goto found_dist;
3564 if (dist == 0)
3566 /* Reinitialize problem... */
3567 omega_copy_problem (copy, pb);
3568 for (j = 0;
3569 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3571 eq = omega_add_zero_eq (copy, omega_black);
3572 copy->eqs[eq].coef[j + 1] = 1;
3575 /* ..., but this time "di = 1". */
3576 eq = omega_add_zero_eq (copy, omega_black);
3577 copy->eqs[eq].coef[i + 1] = 1;
3578 copy->eqs[eq].coef[0] = -1;
3580 res = omega_simplify_problem (copy);
3581 if (res == omega_false
3582 || res == omega_unknown
3583 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3584 goto next_problem;
3586 for (eq = 0; eq < copy->num_subs; eq++)
3587 if (copy->subs[eq].key == (int) i + 1)
3589 dist = copy->subs[eq].coef[0];
3590 goto found_dist;
3594 found_dist:;
3595 /* Save the lexicographically positive distance vector. */
3596 if (dist >= 0)
3598 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3599 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3601 dist_v[i] = dist;
3603 for (eq = 0; eq < copy->num_subs; eq++)
3604 if (copy->subs[eq].key > 0)
3606 dist = copy->subs[eq].coef[0];
3607 dist_v[copy->subs[eq].key - 1] = dist;
3610 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3611 dir_v[j] = dir_from_dist (dist_v[j]);
3613 save_dist_v (ddr, dist_v);
3614 save_dir_v (ddr, dir_v);
3617 next_problem:;
3618 omega_free_problem (copy);
3622 /* This is called for each subscript of a tuple of data references:
3623 insert an equality for representing the conflicts. */
3625 static bool
3626 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3627 struct data_dependence_relation *ddr,
3628 omega_pb pb, bool *maybe_dependent)
3630 int eq;
3631 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3632 TREE_TYPE (access_fun_b));
3633 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3634 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3635 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3636 tree minus_one;
3638 /* When the fun_a - fun_b is not constant, the dependence is not
3639 captured by the classic distance vector representation. */
3640 if (TREE_CODE (difference) != INTEGER_CST)
3641 return false;
3643 /* ZIV test. */
3644 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3646 /* There is no dependence. */
3647 *maybe_dependent = false;
3648 return true;
3651 minus_one = build_int_cst (type, -1);
3652 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3654 eq = omega_add_zero_eq (pb, omega_black);
3655 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3656 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3657 /* There is probably a dependence, but the system of
3658 constraints cannot be built: answer "don't know". */
3659 return false;
3661 /* GCD test. */
3662 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3663 && !int_divides_p (lambda_vector_gcd
3664 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3665 2 * DDR_NB_LOOPS (ddr)),
3666 pb->eqs[eq].coef[0]))
3668 /* There is no dependence. */
3669 *maybe_dependent = false;
3670 return true;
3673 return true;
3676 /* Helper function, same as init_omega_for_ddr but specialized for
3677 data references A and B. */
3679 static bool
3680 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3681 struct data_dependence_relation *ddr,
3682 omega_pb pb, bool *maybe_dependent)
3684 unsigned i;
3685 int ineq;
3686 struct loop *loopi;
3687 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3689 /* Insert an equality per subscript. */
3690 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3692 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3693 ddr, pb, maybe_dependent))
3694 return false;
3695 else if (*maybe_dependent == false)
3697 /* There is no dependence. */
3698 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3699 return true;
3703 /* Insert inequalities: constraints corresponding to the iteration
3704 domain, i.e. the loops surrounding the references "loop_x" and
3705 the distance variables "dx". The layout of the OMEGA
3706 representation is as follows:
3707 - coef[0] is the constant
3708 - coef[1..nb_loops] are the protected variables that will not be
3709 removed by the solver: the "dx"
3710 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3712 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3713 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3715 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3717 /* 0 <= loop_x */
3718 ineq = omega_add_zero_geq (pb, omega_black);
3719 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3721 /* 0 <= loop_x + dx */
3722 ineq = omega_add_zero_geq (pb, omega_black);
3723 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3724 pb->geqs[ineq].coef[i + 1] = 1;
3726 if (nbi != -1)
3728 /* loop_x <= nb_iters */
3729 ineq = omega_add_zero_geq (pb, omega_black);
3730 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3731 pb->geqs[ineq].coef[0] = nbi;
3733 /* loop_x + dx <= nb_iters */
3734 ineq = omega_add_zero_geq (pb, omega_black);
3735 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3736 pb->geqs[ineq].coef[i + 1] = -1;
3737 pb->geqs[ineq].coef[0] = nbi;
3739 /* A step "dx" bigger than nb_iters is not feasible, so
3740 add "0 <= nb_iters + dx", */
3741 ineq = omega_add_zero_geq (pb, omega_black);
3742 pb->geqs[ineq].coef[i + 1] = 1;
3743 pb->geqs[ineq].coef[0] = nbi;
3744 /* and "dx <= nb_iters". */
3745 ineq = omega_add_zero_geq (pb, omega_black);
3746 pb->geqs[ineq].coef[i + 1] = -1;
3747 pb->geqs[ineq].coef[0] = nbi;
3751 omega_extract_distance_vectors (pb, ddr);
3753 return true;
3756 /* Sets up the Omega dependence problem for the data dependence
3757 relation DDR. Returns false when the constraint system cannot be
3758 built, ie. when the test answers "don't know". Returns true
3759 otherwise, and when independence has been proved (using one of the
3760 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3761 set MAYBE_DEPENDENT to true.
3763 Example: for setting up the dependence system corresponding to the
3764 conflicting accesses
3766 | loop_i
3767 | loop_j
3768 | A[i, i+1] = ...
3769 | ... A[2*j, 2*(i + j)]
3770 | endloop_j
3771 | endloop_i
3773 the following constraints come from the iteration domain:
3775 0 <= i <= Ni
3776 0 <= i + di <= Ni
3777 0 <= j <= Nj
3778 0 <= j + dj <= Nj
3780 where di, dj are the distance variables. The constraints
3781 representing the conflicting elements are:
3783 i = 2 * (j + dj)
3784 i + 1 = 2 * (i + di + j + dj)
3786 For asking that the resulting distance vector (di, dj) be
3787 lexicographically positive, we insert the constraint "di >= 0". If
3788 "di = 0" in the solution, we fix that component to zero, and we
3789 look at the inner loops: we set a new problem where all the outer
3790 loop distances are zero, and fix this inner component to be
3791 positive. When one of the components is positive, we save that
3792 distance, and set a new problem where the distance on this loop is
3793 zero, searching for other distances in the inner loops. Here is
3794 the classic example that illustrates that we have to set for each
3795 inner loop a new problem:
3797 | loop_1
3798 | loop_2
3799 | A[10]
3800 | endloop_2
3801 | endloop_1
3803 we have to save two distances (1, 0) and (0, 1).
3805 Given two array references, refA and refB, we have to set the
3806 dependence problem twice, refA vs. refB and refB vs. refA, and we
3807 cannot do a single test, as refB might occur before refA in the
3808 inner loops, and the contrary when considering outer loops: ex.
3810 | loop_0
3811 | loop_1
3812 | loop_2
3813 | T[{1,+,1}_2][{1,+,1}_1] // refA
3814 | T[{2,+,1}_2][{0,+,1}_1] // refB
3815 | endloop_2
3816 | endloop_1
3817 | endloop_0
3819 refB touches the elements in T before refA, and thus for the same
3820 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3821 but for successive loop_0 iterations, we have (1, -1, 1)
3823 The Omega solver expects the distance variables ("di" in the
3824 previous example) to come first in the constraint system (as
3825 variables to be protected, or "safe" variables), the constraint
3826 system is built using the following layout:
3828 "cst | distance vars | index vars".
3831 static bool
3832 init_omega_for_ddr (struct data_dependence_relation *ddr,
3833 bool *maybe_dependent)
3835 omega_pb pb;
3836 bool res = false;
3838 *maybe_dependent = true;
3840 if (same_access_functions (ddr))
3842 unsigned j;
3843 lambda_vector dir_v;
3845 /* Save the 0 vector. */
3846 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3847 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3848 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3849 dir_v[j] = dir_equal;
3850 save_dir_v (ddr, dir_v);
3852 /* Save the dependences carried by outer loops. */
3853 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3854 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3855 maybe_dependent);
3856 omega_free_problem (pb);
3857 return res;
3860 /* Omega expects the protected variables (those that have to be kept
3861 after elimination) to appear first in the constraint system.
3862 These variables are the distance variables. In the following
3863 initialization we declare NB_LOOPS safe variables, and the total
3864 number of variables for the constraint system is 2*NB_LOOPS. */
3865 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3866 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3867 maybe_dependent);
3868 omega_free_problem (pb);
3870 /* Stop computation if not decidable, or no dependence. */
3871 if (res == false || *maybe_dependent == false)
3872 return res;
3874 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3875 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3876 maybe_dependent);
3877 omega_free_problem (pb);
3879 return res;
3882 /* Return true when DDR contains the same information as that stored
3883 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3885 static bool
3886 ddr_consistent_p (FILE *file,
3887 struct data_dependence_relation *ddr,
3888 VEC (lambda_vector, heap) *dist_vects,
3889 VEC (lambda_vector, heap) *dir_vects)
3891 unsigned int i, j;
3893 /* If dump_file is set, output there. */
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3895 file = dump_file;
3897 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3899 lambda_vector b_dist_v;
3900 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3901 VEC_length (lambda_vector, dist_vects),
3902 DDR_NUM_DIST_VECTS (ddr));
3904 fprintf (file, "Banerjee dist vectors:\n");
3905 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3906 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3908 fprintf (file, "Omega dist vectors:\n");
3909 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3910 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3912 fprintf (file, "data dependence relation:\n");
3913 dump_data_dependence_relation (file, ddr);
3915 fprintf (file, ")\n");
3916 return false;
3919 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3921 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3922 VEC_length (lambda_vector, dir_vects),
3923 DDR_NUM_DIR_VECTS (ddr));
3924 return false;
3927 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3929 lambda_vector a_dist_v;
3930 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3932 /* Distance vectors are not ordered in the same way in the DDR
3933 and in the DIST_VECTS: search for a matching vector. */
3934 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3935 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3936 break;
3938 if (j == VEC_length (lambda_vector, dist_vects))
3940 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3941 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3942 fprintf (file, "not found in Omega dist vectors:\n");
3943 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3944 fprintf (file, "data dependence relation:\n");
3945 dump_data_dependence_relation (file, ddr);
3946 fprintf (file, ")\n");
3950 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3952 lambda_vector a_dir_v;
3953 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3955 /* Direction vectors are not ordered in the same way in the DDR
3956 and in the DIR_VECTS: search for a matching vector. */
3957 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3958 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3959 break;
3961 if (j == VEC_length (lambda_vector, dist_vects))
3963 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3964 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3965 fprintf (file, "not found in Omega dir vectors:\n");
3966 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3967 fprintf (file, "data dependence relation:\n");
3968 dump_data_dependence_relation (file, ddr);
3969 fprintf (file, ")\n");
3973 return true;
3976 /* This computes the affine dependence relation between A and B with
3977 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3978 independence between two accesses, while CHREC_DONT_KNOW is used
3979 for representing the unknown relation.
3981 Note that it is possible to stop the computation of the dependence
3982 relation the first time we detect a CHREC_KNOWN element for a given
3983 subscript. */
3985 static void
3986 compute_affine_dependence (struct data_dependence_relation *ddr,
3987 struct loop *loop_nest)
3989 struct data_reference *dra = DDR_A (ddr);
3990 struct data_reference *drb = DDR_B (ddr);
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3994 fprintf (dump_file, "(compute_affine_dependence\n");
3995 fprintf (dump_file, " (stmt_a = \n");
3996 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3997 fprintf (dump_file, ")\n (stmt_b = \n");
3998 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3999 fprintf (dump_file, ")\n");
4002 /* Analyze only when the dependence relation is not yet known. */
4003 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4004 && !DDR_SELF_REFERENCE (ddr))
4006 dependence_stats.num_dependence_tests++;
4008 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4009 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4011 if (flag_check_data_deps)
4013 /* Compute the dependences using the first algorithm. */
4014 subscript_dependence_tester (ddr, loop_nest);
4016 if (dump_file && (dump_flags & TDF_DETAILS))
4018 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4019 dump_data_dependence_relation (dump_file, ddr);
4022 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4024 bool maybe_dependent;
4025 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4027 /* Save the result of the first DD analyzer. */
4028 dist_vects = DDR_DIST_VECTS (ddr);
4029 dir_vects = DDR_DIR_VECTS (ddr);
4031 /* Reset the information. */
4032 DDR_DIST_VECTS (ddr) = NULL;
4033 DDR_DIR_VECTS (ddr) = NULL;
4035 /* Compute the same information using Omega. */
4036 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4037 goto csys_dont_know;
4039 if (dump_file && (dump_flags & TDF_DETAILS))
4041 fprintf (dump_file, "Omega Analyzer\n");
4042 dump_data_dependence_relation (dump_file, ddr);
4045 /* Check that we get the same information. */
4046 if (maybe_dependent)
4047 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4048 dir_vects));
4051 else
4052 subscript_dependence_tester (ddr, loop_nest);
4055 /* As a last case, if the dependence cannot be determined, or if
4056 the dependence is considered too difficult to determine, answer
4057 "don't know". */
4058 else
4060 csys_dont_know:;
4061 dependence_stats.num_dependence_undetermined++;
4063 if (dump_file && (dump_flags & TDF_DETAILS))
4065 fprintf (dump_file, "Data ref a:\n");
4066 dump_data_reference (dump_file, dra);
4067 fprintf (dump_file, "Data ref b:\n");
4068 dump_data_reference (dump_file, drb);
4069 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4071 finalize_ddr_dependent (ddr, chrec_dont_know);
4075 if (dump_file && (dump_flags & TDF_DETAILS))
4076 fprintf (dump_file, ")\n");
4079 /* This computes the dependence relation for the same data
4080 reference into DDR. */
4082 static void
4083 compute_self_dependence (struct data_dependence_relation *ddr)
4085 unsigned int i;
4086 struct subscript *subscript;
4088 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4089 return;
4091 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4092 i++)
4094 if (SUB_CONFLICTS_IN_A (subscript))
4095 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4096 if (SUB_CONFLICTS_IN_B (subscript))
4097 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4099 /* The accessed index overlaps for each iteration. */
4100 SUB_CONFLICTS_IN_A (subscript)
4101 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4102 SUB_CONFLICTS_IN_B (subscript)
4103 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4104 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4107 /* The distance vector is the zero vector. */
4108 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4109 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4112 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4113 the data references in DATAREFS, in the LOOP_NEST. When
4114 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4115 relations. */
4117 void
4118 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4119 VEC (ddr_p, heap) **dependence_relations,
4120 VEC (loop_p, heap) *loop_nest,
4121 bool compute_self_and_rr)
4123 struct data_dependence_relation *ddr;
4124 struct data_reference *a, *b;
4125 unsigned int i, j;
4127 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4128 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4129 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4131 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4132 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4133 if (loop_nest)
4134 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4137 if (compute_self_and_rr)
4138 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4140 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4141 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4142 compute_self_dependence (ddr);
4146 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4147 true if STMT clobbers memory, false otherwise. */
4149 bool
4150 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4152 bool clobbers_memory = false;
4153 data_ref_loc *ref;
4154 tree *op0, *op1;
4155 enum gimple_code stmt_code = gimple_code (stmt);
4157 *references = NULL;
4159 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4160 Calls have side-effects, except those to const or pure
4161 functions. */
4162 if ((stmt_code == GIMPLE_CALL
4163 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4164 || (stmt_code == GIMPLE_ASM
4165 && gimple_asm_volatile_p (stmt)))
4166 clobbers_memory = true;
4168 if (!gimple_vuse (stmt))
4169 return clobbers_memory;
4171 if (stmt_code == GIMPLE_ASSIGN)
4173 tree base;
4174 op0 = gimple_assign_lhs_ptr (stmt);
4175 op1 = gimple_assign_rhs1_ptr (stmt);
4177 if (DECL_P (*op1)
4178 || (REFERENCE_CLASS_P (*op1)
4179 && (base = get_base_address (*op1))
4180 && TREE_CODE (base) != SSA_NAME))
4182 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4183 ref->pos = op1;
4184 ref->is_read = true;
4187 if (DECL_P (*op0)
4188 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4190 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4191 ref->pos = op0;
4192 ref->is_read = false;
4195 else if (stmt_code == GIMPLE_CALL)
4197 unsigned i, n = gimple_call_num_args (stmt);
4199 for (i = 0; i < n; i++)
4201 op0 = gimple_call_arg_ptr (stmt, i);
4203 if (DECL_P (*op0)
4204 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4206 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4207 ref->pos = op0;
4208 ref->is_read = true;
4213 return clobbers_memory;
4216 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4217 reference, returns false, otherwise returns true. NEST is the outermost
4218 loop of the loop nest in which the references should be analyzed. */
4220 bool
4221 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4222 VEC (data_reference_p, heap) **datarefs)
4224 unsigned i;
4225 VEC (data_ref_loc, heap) *references;
4226 data_ref_loc *ref;
4227 bool ret = true;
4228 data_reference_p dr;
4230 if (get_references_in_stmt (stmt, &references))
4232 VEC_free (data_ref_loc, heap, references);
4233 return false;
4236 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4238 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4239 *ref->pos, stmt, ref->is_read);
4240 gcc_assert (dr != NULL);
4242 /* FIXME -- data dependence analysis does not work correctly for objects
4243 with invariant addresses in loop nests. Let us fail here until the
4244 problem is fixed. */
4245 if (dr_address_invariant_p (dr) && nest)
4247 free_data_ref (dr);
4248 if (dump_file && (dump_flags & TDF_DETAILS))
4249 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4250 ret = false;
4251 break;
4254 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4256 VEC_free (data_ref_loc, heap, references);
4257 return ret;
4260 /* Stores the data references in STMT to DATAREFS. If there is an
4261 unanalyzable reference, returns false, otherwise returns true.
4262 NEST is the outermost loop of the loop nest in which the references
4263 should be instantiated, LOOP is the loop in which the references
4264 should be analyzed. */
4266 bool
4267 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4268 VEC (data_reference_p, heap) **datarefs)
4270 unsigned i;
4271 VEC (data_ref_loc, heap) *references;
4272 data_ref_loc *ref;
4273 bool ret = true;
4274 data_reference_p dr;
4276 if (get_references_in_stmt (stmt, &references))
4278 VEC_free (data_ref_loc, heap, references);
4279 return false;
4282 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4284 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4285 gcc_assert (dr != NULL);
4286 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4289 VEC_free (data_ref_loc, heap, references);
4290 return ret;
4293 /* Search the data references in LOOP, and record the information into
4294 DATAREFS. Returns chrec_dont_know when failing to analyze a
4295 difficult case, returns NULL_TREE otherwise. */
4297 static tree
4298 find_data_references_in_bb (struct loop *loop, basic_block bb,
4299 VEC (data_reference_p, heap) **datarefs)
4301 gimple_stmt_iterator bsi;
4303 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4305 gimple stmt = gsi_stmt (bsi);
4307 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4309 struct data_reference *res;
4310 res = XCNEW (struct data_reference);
4311 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4313 return chrec_dont_know;
4317 return NULL_TREE;
4320 /* Search the data references in LOOP, and record the information into
4321 DATAREFS. Returns chrec_dont_know when failing to analyze a
4322 difficult case, returns NULL_TREE otherwise.
4324 TODO: This function should be made smarter so that it can handle address
4325 arithmetic as if they were array accesses, etc. */
4327 tree
4328 find_data_references_in_loop (struct loop *loop,
4329 VEC (data_reference_p, heap) **datarefs)
4331 basic_block bb, *bbs;
4332 unsigned int i;
4334 bbs = get_loop_body_in_dom_order (loop);
4336 for (i = 0; i < loop->num_nodes; i++)
4338 bb = bbs[i];
4340 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4342 free (bbs);
4343 return chrec_dont_know;
4346 free (bbs);
4348 return NULL_TREE;
4351 /* Recursive helper function. */
4353 static bool
4354 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4356 /* Inner loops of the nest should not contain siblings. Example:
4357 when there are two consecutive loops,
4359 | loop_0
4360 | loop_1
4361 | A[{0, +, 1}_1]
4362 | endloop_1
4363 | loop_2
4364 | A[{0, +, 1}_2]
4365 | endloop_2
4366 | endloop_0
4368 the dependence relation cannot be captured by the distance
4369 abstraction. */
4370 if (loop->next)
4371 return false;
4373 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4374 if (loop->inner)
4375 return find_loop_nest_1 (loop->inner, loop_nest);
4376 return true;
4379 /* Return false when the LOOP is not well nested. Otherwise return
4380 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4381 contain the loops from the outermost to the innermost, as they will
4382 appear in the classic distance vector. */
4384 bool
4385 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4387 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4388 if (loop->inner)
4389 return find_loop_nest_1 (loop->inner, loop_nest);
4390 return true;
4393 /* Returns true when the data dependences have been computed, false otherwise.
4394 Given a loop nest LOOP, the following vectors are returned:
4395 DATAREFS is initialized to all the array elements contained in this loop,
4396 DEPENDENCE_RELATIONS contains the relations between the data references.
4397 Compute read-read and self relations if
4398 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4400 bool
4401 compute_data_dependences_for_loop (struct loop *loop,
4402 bool compute_self_and_read_read_dependences,
4403 VEC (loop_p, heap) **loop_nest,
4404 VEC (data_reference_p, heap) **datarefs,
4405 VEC (ddr_p, heap) **dependence_relations)
4407 bool res = true;
4409 memset (&dependence_stats, 0, sizeof (dependence_stats));
4411 /* If the loop nest is not well formed, or one of the data references
4412 is not computable, give up without spending time to compute other
4413 dependences. */
4414 if (!loop
4415 || !find_loop_nest (loop, loop_nest)
4416 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4418 struct data_dependence_relation *ddr;
4420 /* Insert a single relation into dependence_relations:
4421 chrec_dont_know. */
4422 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4423 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4424 res = false;
4426 else
4427 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4428 compute_self_and_read_read_dependences);
4430 if (dump_file && (dump_flags & TDF_STATS))
4432 fprintf (dump_file, "Dependence tester statistics:\n");
4434 fprintf (dump_file, "Number of dependence tests: %d\n",
4435 dependence_stats.num_dependence_tests);
4436 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4437 dependence_stats.num_dependence_dependent);
4438 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4439 dependence_stats.num_dependence_independent);
4440 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4441 dependence_stats.num_dependence_undetermined);
4443 fprintf (dump_file, "Number of subscript tests: %d\n",
4444 dependence_stats.num_subscript_tests);
4445 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4446 dependence_stats.num_subscript_undetermined);
4447 fprintf (dump_file, "Number of same subscript function: %d\n",
4448 dependence_stats.num_same_subscript_function);
4450 fprintf (dump_file, "Number of ziv tests: %d\n",
4451 dependence_stats.num_ziv);
4452 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4453 dependence_stats.num_ziv_dependent);
4454 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4455 dependence_stats.num_ziv_independent);
4456 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4457 dependence_stats.num_ziv_unimplemented);
4459 fprintf (dump_file, "Number of siv tests: %d\n",
4460 dependence_stats.num_siv);
4461 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4462 dependence_stats.num_siv_dependent);
4463 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4464 dependence_stats.num_siv_independent);
4465 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4466 dependence_stats.num_siv_unimplemented);
4468 fprintf (dump_file, "Number of miv tests: %d\n",
4469 dependence_stats.num_miv);
4470 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4471 dependence_stats.num_miv_dependent);
4472 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4473 dependence_stats.num_miv_independent);
4474 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4475 dependence_stats.num_miv_unimplemented);
4478 return res;
4481 /* Returns true when the data dependences for the basic block BB have been
4482 computed, false otherwise.
4483 DATAREFS is initialized to all the array elements contained in this basic
4484 block, DEPENDENCE_RELATIONS contains the relations between the data
4485 references. Compute read-read and self relations if
4486 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4487 bool
4488 compute_data_dependences_for_bb (basic_block bb,
4489 bool compute_self_and_read_read_dependences,
4490 VEC (data_reference_p, heap) **datarefs,
4491 VEC (ddr_p, heap) **dependence_relations)
4493 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4494 return false;
4496 compute_all_dependences (*datarefs, dependence_relations, NULL,
4497 compute_self_and_read_read_dependences);
4498 return true;
4501 /* Entry point (for testing only). Analyze all the data references
4502 and the dependence relations in LOOP.
4504 The data references are computed first.
4506 A relation on these nodes is represented by a complete graph. Some
4507 of the relations could be of no interest, thus the relations can be
4508 computed on demand.
4510 In the following function we compute all the relations. This is
4511 just a first implementation that is here for:
4512 - for showing how to ask for the dependence relations,
4513 - for the debugging the whole dependence graph,
4514 - for the dejagnu testcases and maintenance.
4516 It is possible to ask only for a part of the graph, avoiding to
4517 compute the whole dependence graph. The computed dependences are
4518 stored in a knowledge base (KB) such that later queries don't
4519 recompute the same information. The implementation of this KB is
4520 transparent to the optimizer, and thus the KB can be changed with a
4521 more efficient implementation, or the KB could be disabled. */
4522 static void
4523 analyze_all_data_dependences (struct loop *loop)
4525 unsigned int i;
4526 int nb_data_refs = 10;
4527 VEC (data_reference_p, heap) *datarefs =
4528 VEC_alloc (data_reference_p, heap, nb_data_refs);
4529 VEC (ddr_p, heap) *dependence_relations =
4530 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4531 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4533 /* Compute DDs on the whole function. */
4534 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4535 &dependence_relations);
4537 if (dump_file)
4539 dump_data_dependence_relations (dump_file, dependence_relations);
4540 fprintf (dump_file, "\n\n");
4542 if (dump_flags & TDF_DETAILS)
4543 dump_dist_dir_vectors (dump_file, dependence_relations);
4545 if (dump_flags & TDF_STATS)
4547 unsigned nb_top_relations = 0;
4548 unsigned nb_bot_relations = 0;
4549 unsigned nb_chrec_relations = 0;
4550 struct data_dependence_relation *ddr;
4552 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4554 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4555 nb_top_relations++;
4557 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4558 nb_bot_relations++;
4560 else
4561 nb_chrec_relations++;
4564 gather_stats_on_scev_database ();
4568 VEC_free (loop_p, heap, loop_nest);
4569 free_dependence_relations (dependence_relations);
4570 free_data_refs (datarefs);
4573 /* Computes all the data dependences and check that the results of
4574 several analyzers are the same. */
4576 void
4577 tree_check_data_deps (void)
4579 loop_iterator li;
4580 struct loop *loop_nest;
4582 FOR_EACH_LOOP (li, loop_nest, 0)
4583 analyze_all_data_dependences (loop_nest);
4586 /* Free the memory used by a data dependence relation DDR. */
4588 void
4589 free_dependence_relation (struct data_dependence_relation *ddr)
4591 if (ddr == NULL)
4592 return;
4594 if (DDR_SUBSCRIPTS (ddr))
4595 free_subscripts (DDR_SUBSCRIPTS (ddr));
4596 if (DDR_DIST_VECTS (ddr))
4597 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4598 if (DDR_DIR_VECTS (ddr))
4599 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4601 free (ddr);
4604 /* Free the memory used by the data dependence relations from
4605 DEPENDENCE_RELATIONS. */
4607 void
4608 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4610 unsigned int i;
4611 struct data_dependence_relation *ddr;
4613 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4614 if (ddr)
4615 free_dependence_relation (ddr);
4617 VEC_free (ddr_p, heap, dependence_relations);
4620 /* Free the memory used by the data references from DATAREFS. */
4622 void
4623 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4625 unsigned int i;
4626 struct data_reference *dr;
4628 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4629 free_data_ref (dr);
4630 VEC_free (data_reference_p, heap, datarefs);
4635 /* Dump vertex I in RDG to FILE. */
4637 void
4638 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4640 struct vertex *v = &(rdg->vertices[i]);
4641 struct graph_edge *e;
4643 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4644 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4645 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4647 if (v->pred)
4648 for (e = v->pred; e; e = e->pred_next)
4649 fprintf (file, " %d", e->src);
4651 fprintf (file, ") (out:");
4653 if (v->succ)
4654 for (e = v->succ; e; e = e->succ_next)
4655 fprintf (file, " %d", e->dest);
4657 fprintf (file, ")\n");
4658 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4659 fprintf (file, ")\n");
4662 /* Call dump_rdg_vertex on stderr. */
4664 DEBUG_FUNCTION void
4665 debug_rdg_vertex (struct graph *rdg, int i)
4667 dump_rdg_vertex (stderr, rdg, i);
4670 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4671 dumped vertices to that bitmap. */
4673 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4675 int i;
4677 fprintf (file, "(%d\n", c);
4679 for (i = 0; i < rdg->n_vertices; i++)
4680 if (rdg->vertices[i].component == c)
4682 if (dumped)
4683 bitmap_set_bit (dumped, i);
4685 dump_rdg_vertex (file, rdg, i);
4688 fprintf (file, ")\n");
4691 /* Call dump_rdg_vertex on stderr. */
4693 DEBUG_FUNCTION void
4694 debug_rdg_component (struct graph *rdg, int c)
4696 dump_rdg_component (stderr, rdg, c, NULL);
4699 /* Dump the reduced dependence graph RDG to FILE. */
4701 void
4702 dump_rdg (FILE *file, struct graph *rdg)
4704 int i;
4705 bitmap dumped = BITMAP_ALLOC (NULL);
4707 fprintf (file, "(rdg\n");
4709 for (i = 0; i < rdg->n_vertices; i++)
4710 if (!bitmap_bit_p (dumped, i))
4711 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4713 fprintf (file, ")\n");
4714 BITMAP_FREE (dumped);
4717 /* Call dump_rdg on stderr. */
4719 DEBUG_FUNCTION void
4720 debug_rdg (struct graph *rdg)
4722 dump_rdg (stderr, rdg);
4725 static void
4726 dot_rdg_1 (FILE *file, struct graph *rdg)
4728 int i;
4730 fprintf (file, "digraph RDG {\n");
4732 for (i = 0; i < rdg->n_vertices; i++)
4734 struct vertex *v = &(rdg->vertices[i]);
4735 struct graph_edge *e;
4737 /* Highlight reads from memory. */
4738 if (RDG_MEM_READS_STMT (rdg, i))
4739 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4741 /* Highlight stores to memory. */
4742 if (RDG_MEM_WRITE_STMT (rdg, i))
4743 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4745 if (v->succ)
4746 for (e = v->succ; e; e = e->succ_next)
4747 switch (RDGE_TYPE (e))
4749 case input_dd:
4750 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4751 break;
4753 case output_dd:
4754 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4755 break;
4757 case flow_dd:
4758 /* These are the most common dependences: don't print these. */
4759 fprintf (file, "%d -> %d \n", i, e->dest);
4760 break;
4762 case anti_dd:
4763 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4764 break;
4766 default:
4767 gcc_unreachable ();
4771 fprintf (file, "}\n\n");
4774 /* Display the Reduced Dependence Graph using dotty. */
4775 extern void dot_rdg (struct graph *);
4777 DEBUG_FUNCTION void
4778 dot_rdg (struct graph *rdg)
4780 /* When debugging, enable the following code. This cannot be used
4781 in production compilers because it calls "system". */
4782 #if 0
4783 FILE *file = fopen ("/tmp/rdg.dot", "w");
4784 gcc_assert (file != NULL);
4786 dot_rdg_1 (file, rdg);
4787 fclose (file);
4789 system ("dotty /tmp/rdg.dot &");
4790 #else
4791 dot_rdg_1 (stderr, rdg);
4792 #endif
4795 /* This structure is used for recording the mapping statement index in
4796 the RDG. */
4798 struct GTY(()) rdg_vertex_info
4800 gimple stmt;
4801 int index;
4804 /* Returns the index of STMT in RDG. */
4807 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4809 struct rdg_vertex_info rvi, *slot;
4811 rvi.stmt = stmt;
4812 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4814 if (!slot)
4815 return -1;
4817 return slot->index;
4820 /* Creates an edge in RDG for each distance vector from DDR. The
4821 order that we keep track of in the RDG is the order in which
4822 statements have to be executed. */
4824 static void
4825 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4827 struct graph_edge *e;
4828 int va, vb;
4829 data_reference_p dra = DDR_A (ddr);
4830 data_reference_p drb = DDR_B (ddr);
4831 unsigned level = ddr_dependence_level (ddr);
4833 /* For non scalar dependences, when the dependence is REVERSED,
4834 statement B has to be executed before statement A. */
4835 if (level > 0
4836 && !DDR_REVERSED_P (ddr))
4838 data_reference_p tmp = dra;
4839 dra = drb;
4840 drb = tmp;
4843 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4844 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4846 if (va < 0 || vb < 0)
4847 return;
4849 e = add_edge (rdg, va, vb);
4850 e->data = XNEW (struct rdg_edge);
4852 RDGE_LEVEL (e) = level;
4853 RDGE_RELATION (e) = ddr;
4855 /* Determines the type of the data dependence. */
4856 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4857 RDGE_TYPE (e) = input_dd;
4858 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4859 RDGE_TYPE (e) = output_dd;
4860 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4861 RDGE_TYPE (e) = flow_dd;
4862 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4863 RDGE_TYPE (e) = anti_dd;
4866 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4867 the index of DEF in RDG. */
4869 static void
4870 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4872 use_operand_p imm_use_p;
4873 imm_use_iterator iterator;
4875 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4877 struct graph_edge *e;
4878 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4880 if (use < 0)
4881 continue;
4883 e = add_edge (rdg, idef, use);
4884 e->data = XNEW (struct rdg_edge);
4885 RDGE_TYPE (e) = flow_dd;
4886 RDGE_RELATION (e) = NULL;
4890 /* Creates the edges of the reduced dependence graph RDG. */
4892 static void
4893 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4895 int i;
4896 struct data_dependence_relation *ddr;
4897 def_operand_p def_p;
4898 ssa_op_iter iter;
4900 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4901 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4902 create_rdg_edge_for_ddr (rdg, ddr);
4904 for (i = 0; i < rdg->n_vertices; i++)
4905 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4906 iter, SSA_OP_DEF)
4907 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4910 /* Build the vertices of the reduced dependence graph RDG. */
4912 void
4913 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4915 int i, j;
4916 gimple stmt;
4918 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4920 VEC (data_ref_loc, heap) *references;
4921 data_ref_loc *ref;
4922 struct vertex *v = &(rdg->vertices[i]);
4923 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4924 struct rdg_vertex_info **slot;
4926 rvi->stmt = stmt;
4927 rvi->index = i;
4928 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4930 if (!*slot)
4931 *slot = rvi;
4932 else
4933 free (rvi);
4935 v->data = XNEW (struct rdg_vertex);
4936 RDG_STMT (rdg, i) = stmt;
4938 RDG_MEM_WRITE_STMT (rdg, i) = false;
4939 RDG_MEM_READS_STMT (rdg, i) = false;
4940 if (gimple_code (stmt) == GIMPLE_PHI)
4941 continue;
4943 get_references_in_stmt (stmt, &references);
4944 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4945 if (!ref->is_read)
4946 RDG_MEM_WRITE_STMT (rdg, i) = true;
4947 else
4948 RDG_MEM_READS_STMT (rdg, i) = true;
4950 VEC_free (data_ref_loc, heap, references);
4954 /* Initialize STMTS with all the statements of LOOP. When
4955 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4956 which we discover statements is important as
4957 generate_loops_for_partition is using the same traversal for
4958 identifying statements. */
4960 static void
4961 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4963 unsigned int i;
4964 basic_block *bbs = get_loop_body_in_dom_order (loop);
4966 for (i = 0; i < loop->num_nodes; i++)
4968 basic_block bb = bbs[i];
4969 gimple_stmt_iterator bsi;
4970 gimple stmt;
4972 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4973 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4975 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4977 stmt = gsi_stmt (bsi);
4978 if (gimple_code (stmt) != GIMPLE_LABEL)
4979 VEC_safe_push (gimple, heap, *stmts, stmt);
4983 free (bbs);
4986 /* Returns true when all the dependences are computable. */
4988 static bool
4989 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4991 ddr_p ddr;
4992 unsigned int i;
4994 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4995 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4996 return false;
4998 return true;
5001 /* Computes a hash function for element ELT. */
5003 static hashval_t
5004 hash_stmt_vertex_info (const void *elt)
5006 const struct rdg_vertex_info *const rvi =
5007 (const struct rdg_vertex_info *) elt;
5008 gimple stmt = rvi->stmt;
5010 return htab_hash_pointer (stmt);
5013 /* Compares database elements E1 and E2. */
5015 static int
5016 eq_stmt_vertex_info (const void *e1, const void *e2)
5018 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5019 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5021 return elt1->stmt == elt2->stmt;
5024 /* Free the element E. */
5026 static void
5027 hash_stmt_vertex_del (void *e)
5029 free (e);
5032 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5033 statement of the loop nest, and one edge per data dependence or
5034 scalar dependence. */
5036 struct graph *
5037 build_empty_rdg (int n_stmts)
5039 int nb_data_refs = 10;
5040 struct graph *rdg = new_graph (n_stmts);
5042 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5043 eq_stmt_vertex_info, hash_stmt_vertex_del);
5044 return rdg;
5047 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5048 statement of the loop nest, and one edge per data dependence or
5049 scalar dependence. */
5051 struct graph *
5052 build_rdg (struct loop *loop,
5053 VEC (loop_p, heap) **loop_nest,
5054 VEC (ddr_p, heap) **dependence_relations,
5055 VEC (data_reference_p, heap) **datarefs)
5057 struct graph *rdg = NULL;
5058 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5060 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5061 dependence_relations);
5063 if (known_dependences_p (*dependence_relations))
5065 stmts_from_loop (loop, &stmts);
5066 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5067 create_rdg_vertices (rdg, stmts);
5068 create_rdg_edges (rdg, *dependence_relations);
5071 VEC_free (gimple, heap, stmts);
5072 return rdg;
5075 /* Free the reduced dependence graph RDG. */
5077 void
5078 free_rdg (struct graph *rdg)
5080 int i;
5082 for (i = 0; i < rdg->n_vertices; i++)
5084 struct vertex *v = &(rdg->vertices[i]);
5085 struct graph_edge *e;
5087 for (e = v->succ; e; e = e->succ_next)
5088 if (e->data)
5089 free (e->data);
5091 if (v->data)
5092 free (v->data);
5095 htab_delete (rdg->indices);
5096 free_graph (rdg);
5099 /* Initialize STMTS with all the statements of LOOP that contain a
5100 store to memory. */
5102 void
5103 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5105 unsigned int i;
5106 basic_block *bbs = get_loop_body_in_dom_order (loop);
5108 for (i = 0; i < loop->num_nodes; i++)
5110 basic_block bb = bbs[i];
5111 gimple_stmt_iterator bsi;
5113 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5114 if (gimple_vdef (gsi_stmt (bsi)))
5115 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5118 free (bbs);
5121 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5122 that contains a data reference on its LHS with a stride of the same
5123 size as its unit type. */
5125 bool
5126 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5128 tree op0, op1;
5129 bool res;
5130 struct data_reference *dr;
5132 if (!stmt
5133 || !gimple_vdef (stmt)
5134 || !is_gimple_assign (stmt)
5135 || !gimple_assign_single_p (stmt)
5136 || !(op1 = gimple_assign_rhs1 (stmt))
5137 || !(integer_zerop (op1) || real_zerop (op1)))
5138 return false;
5140 dr = XCNEW (struct data_reference);
5141 op0 = gimple_assign_lhs (stmt);
5143 DR_STMT (dr) = stmt;
5144 DR_REF (dr) = op0;
5146 res = dr_analyze_innermost (dr)
5147 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5149 free_data_ref (dr);
5150 return res;
5153 /* Initialize STMTS with all the statements of LOOP that contain a
5154 store to memory of the form "A[i] = 0". */
5156 void
5157 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5159 unsigned int i;
5160 basic_block bb;
5161 gimple_stmt_iterator si;
5162 gimple stmt;
5163 basic_block *bbs = get_loop_body_in_dom_order (loop);
5165 for (i = 0; i < loop->num_nodes; i++)
5166 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5167 if ((stmt = gsi_stmt (si))
5168 && stmt_with_adjacent_zero_store_dr_p (stmt))
5169 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5171 free (bbs);
5174 /* For a data reference REF, return the declaration of its base
5175 address or NULL_TREE if the base is not determined. */
5177 static inline tree
5178 ref_base_address (gimple stmt, data_ref_loc *ref)
5180 tree base = NULL_TREE;
5181 tree base_address;
5182 struct data_reference *dr = XCNEW (struct data_reference);
5184 DR_STMT (dr) = stmt;
5185 DR_REF (dr) = *ref->pos;
5186 dr_analyze_innermost (dr);
5187 base_address = DR_BASE_ADDRESS (dr);
5189 if (!base_address)
5190 goto end;
5192 switch (TREE_CODE (base_address))
5194 case ADDR_EXPR:
5195 base = TREE_OPERAND (base_address, 0);
5196 break;
5198 default:
5199 base = base_address;
5200 break;
5203 end:
5204 free_data_ref (dr);
5205 return base;
5208 /* Determines whether the statement from vertex V of the RDG has a
5209 definition used outside the loop that contains this statement. */
5211 bool
5212 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5214 gimple stmt = RDG_STMT (rdg, v);
5215 struct loop *loop = loop_containing_stmt (stmt);
5216 use_operand_p imm_use_p;
5217 imm_use_iterator iterator;
5218 ssa_op_iter it;
5219 def_operand_p def_p;
5221 if (!loop)
5222 return true;
5224 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5226 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5228 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5229 return true;
5233 return false;
5236 /* Determines whether statements S1 and S2 access to similar memory
5237 locations. Two memory accesses are considered similar when they
5238 have the same base address declaration, i.e. when their
5239 ref_base_address is the same. */
5241 bool
5242 have_similar_memory_accesses (gimple s1, gimple s2)
5244 bool res = false;
5245 unsigned i, j;
5246 VEC (data_ref_loc, heap) *refs1, *refs2;
5247 data_ref_loc *ref1, *ref2;
5249 get_references_in_stmt (s1, &refs1);
5250 get_references_in_stmt (s2, &refs2);
5252 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5254 tree base1 = ref_base_address (s1, ref1);
5256 if (base1)
5257 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5258 if (base1 == ref_base_address (s2, ref2))
5260 res = true;
5261 goto end;
5265 end:
5266 VEC_free (data_ref_loc, heap, refs1);
5267 VEC_free (data_ref_loc, heap, refs2);
5268 return res;
5271 /* Helper function for the hashtab. */
5273 static int
5274 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5276 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5277 CONST_CAST_GIMPLE ((const_gimple) s2));
5280 /* Helper function for the hashtab. */
5282 static hashval_t
5283 ref_base_address_1 (const void *s)
5285 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5286 unsigned i;
5287 VEC (data_ref_loc, heap) *refs;
5288 data_ref_loc *ref;
5289 hashval_t res = 0;
5291 get_references_in_stmt (stmt, &refs);
5293 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5294 if (!ref->is_read)
5296 res = htab_hash_pointer (ref_base_address (stmt, ref));
5297 break;
5300 VEC_free (data_ref_loc, heap, refs);
5301 return res;
5304 /* Try to remove duplicated write data references from STMTS. */
5306 void
5307 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5309 unsigned i;
5310 gimple stmt;
5311 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5312 have_similar_memory_accesses_1, NULL);
5314 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5316 void **slot;
5318 slot = htab_find_slot (seen, stmt, INSERT);
5320 if (*slot)
5321 VEC_ordered_remove (gimple, *stmts, i);
5322 else
5324 *slot = (void *) stmt;
5325 i++;
5329 htab_delete (seen);
5332 /* Returns the index of PARAMETER in the parameters vector of the
5333 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5336 access_matrix_get_index_for_parameter (tree parameter,
5337 struct access_matrix *access_matrix)
5339 int i;
5340 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5341 tree lambda_parameter;
5343 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5344 if (lambda_parameter == parameter)
5345 return i + AM_NB_INDUCTION_VARS (access_matrix);
5347 return -1;