Restore 2012 entries that hasn't been saved.
[official-gcc.git] / gcc / tree-data-ref.c
bloba0d86ec9c117572dbcbe73df4e9250ebbe56df5d
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
48 information,
50 - to define an interface to access this data.
53 Definitions:
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
65 References:
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
122 static inline bool
123 tree_fold_divides_p (const_tree a, const_tree b)
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
130 /* Returns true iff A divides B. */
132 static inline bool
133 int_divides_p (int a, int b)
135 return ((b % a) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
142 void
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 unsigned int i;
146 struct data_reference *dr;
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
152 /* Dump into STDERR all the data references from DATAREFS. */
154 DEBUG_FUNCTION void
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 dump_data_references (stderr, datarefs);
160 /* Dump to STDERR all the dependence relations from DDRS. */
162 DEBUG_FUNCTION void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
174 unsigned int i;
175 struct data_dependence_relation *ddr;
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
181 /* Print to STDERR the data_reference DR. */
183 DEBUG_FUNCTION void
184 debug_data_reference (struct data_reference *dr)
186 dump_data_reference (stderr, dr);
189 /* Dump function for a DATA_REFERENCE structure. */
191 void
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
195 unsigned int i;
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 fprintf (outf, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
216 static void
217 dump_affine_function (FILE *outf, affine_fn fn)
219 unsigned i;
220 tree coef;
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
231 /* Dumps the conflict function CF to the file OUTF. */
233 static void
234 dump_conflict_function (FILE *outf, conflict_function *cf)
236 unsigned i;
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
242 else
244 for (i = 0; i < cf->n; i++)
246 fprintf (outf, "[");
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
255 void
256 dump_subscript (FILE *outf, struct subscript *subscript)
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
288 void
289 print_direction_vector (FILE *outf,
290 lambda_vector dirv,
291 int length)
293 int eq;
295 for (eq = 0; eq < length; eq++)
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
298 dirv[eq]);
300 switch (dir)
302 case dir_positive:
303 fprintf (outf, " +");
304 break;
305 case dir_negative:
306 fprintf (outf, " -");
307 break;
308 case dir_equal:
309 fprintf (outf, " =");
310 break;
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
313 break;
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
316 break;
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
319 break;
320 case dir_star:
321 fprintf (outf, " *");
322 break;
323 default:
324 fprintf (outf, "indep");
325 break;
328 fprintf (outf, "\n");
331 /* Print a vector of direction vectors. */
333 void
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
335 int length)
337 unsigned j;
338 lambda_vector v;
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
344 /* Print out a vector VEC of length N to OUTFILE. */
346 static inline void
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
349 int i;
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
356 /* Print a vector of distance vectors. */
358 void
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
360 int length)
362 unsigned j;
363 lambda_vector v;
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
369 /* Debug version. */
371 DEBUG_FUNCTION void
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 dump_data_dependence_relation (stderr, ddr);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 void
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
383 struct data_reference *dra, *drb;
385 fprintf (outf, "(Data Dep: \n");
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389 if (ddr)
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 if (dra)
394 dump_data_reference (outf, dra);
395 else
396 fprintf (outf, " (nil)\n");
397 if (drb)
398 dump_data_reference (outf, drb);
399 else
400 fprintf (outf, " (nil)\n");
402 fprintf (outf, " (don't know)\n)\n");
403 return;
406 dra = DDR_A (ddr);
407 drb = DDR_B (ddr);
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
416 unsigned int i;
417 struct loop *loopi;
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
445 DDR_NB_LOOPS (ddr));
449 fprintf (outf, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 void
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
458 switch (dir)
460 case dir_positive:
461 fprintf (file, "+");
462 break;
464 case dir_negative:
465 fprintf (file, "-");
466 break;
468 case dir_equal:
469 fprintf (file, "=");
470 break;
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
474 break;
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
478 break;
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
482 break;
484 case dir_star:
485 fprintf (file, "*");
486 break;
488 default:
489 break;
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
496 considered nest. */
498 void
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 unsigned int i, j;
502 struct data_dependence_relation *ddr;
503 lambda_vector v;
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
523 fprintf (file, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
528 void
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 unsigned int i;
532 struct data_dependence_relation *ddr;
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
537 fprintf (file, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
544 is returned. */
546 static bool
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
550 tree var0, var1;
551 tree off0, off1;
552 enum tree_code ocode = code;
554 *var = NULL_TREE;
555 *off = NULL_TREE;
557 switch (code)
559 case INTEGER_CST:
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
562 return true;
564 case POINTER_PLUS_EXPR:
565 ocode = PLUS_EXPR;
566 /* FALLTHROUGH */
567 case PLUS_EXPR:
568 case MINUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
573 return true;
575 case MULT_EXPR:
576 if (TREE_CODE (op1) != INTEGER_CST)
577 return false;
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
582 return true;
584 case ADDR_EXPR:
586 tree base, poffset;
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
591 op0 = TREE_OPERAND (op0, 0);
592 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
593 &pmode, &punsignedp, &pvolatilep, false);
595 if (pbitpos % BITS_PER_UNIT != 0)
596 return false;
597 base = build_fold_addr_expr (base);
598 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
600 if (poffset)
602 split_constant_offset (poffset, &poffset, &off1);
603 off0 = size_binop (PLUS_EXPR, off0, off1);
604 if (POINTER_TYPE_P (TREE_TYPE (base)))
605 base = fold_build_pointer_plus (base, poffset);
606 else
607 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
608 fold_convert (TREE_TYPE (base), poffset));
611 var0 = fold_convert (type, base);
613 /* If variable length types are involved, punt, otherwise casts
614 might be converted into ARRAY_REFs in gimplify_conversion.
615 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
616 possibly no longer appears in current GIMPLE, might resurface.
617 This perhaps could run
618 if (CONVERT_EXPR_P (var0))
620 gimplify_conversion (&var0);
621 // Attempt to fill in any within var0 found ARRAY_REF's
622 // element size from corresponding op embedded ARRAY_REF,
623 // if unsuccessful, just punt.
624 } */
625 while (POINTER_TYPE_P (type))
626 type = TREE_TYPE (type);
627 if (int_size_in_bytes (type) < 0)
628 return false;
630 *var = var0;
631 *off = off0;
632 return true;
635 case SSA_NAME:
637 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
638 enum tree_code subcode;
640 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
641 return false;
643 var0 = gimple_assign_rhs1 (def_stmt);
644 subcode = gimple_assign_rhs_code (def_stmt);
645 var1 = gimple_assign_rhs2 (def_stmt);
647 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
649 CASE_CONVERT:
651 /* We must not introduce undefined overflow, and we must not change the value.
652 Hence we're okay if the inner type doesn't overflow to start with
653 (pointer or signed), the outer type also is an integer or pointer
654 and the outer precision is at least as large as the inner. */
655 tree itype = TREE_TYPE (op0);
656 if ((POINTER_TYPE_P (itype)
657 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
658 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
659 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
661 split_constant_offset (op0, &var0, off);
662 *var = fold_convert (type, var0);
663 return true;
665 return false;
668 default:
669 return false;
673 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
674 will be ssizetype. */
676 void
677 split_constant_offset (tree exp, tree *var, tree *off)
679 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
680 enum tree_code code;
682 *var = exp;
683 *off = ssize_int (0);
684 STRIP_NOPS (exp);
686 if (tree_is_chrec (exp)
687 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
688 return;
690 otype = TREE_TYPE (exp);
691 code = TREE_CODE (exp);
692 extract_ops_from_tree (exp, &code, &op0, &op1);
693 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
695 *var = fold_convert (type, e);
696 *off = o;
700 /* Returns the address ADDR of an object in a canonical shape (without nop
701 casts, and with type of pointer to the object). */
703 static tree
704 canonicalize_base_object_address (tree addr)
706 tree orig = addr;
708 STRIP_NOPS (addr);
710 /* The base address may be obtained by casting from integer, in that case
711 keep the cast. */
712 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
713 return orig;
715 if (TREE_CODE (addr) != ADDR_EXPR)
716 return addr;
718 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
722 basic block that contains it. Returns true if analysis succeed or false
723 otherwise. */
725 bool
726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
728 gimple stmt = DR_STMT (dr);
729 struct loop *loop = loop_containing_stmt (stmt);
730 tree ref = DR_REF (dr);
731 HOST_WIDE_INT pbitsize, pbitpos;
732 tree base, poffset;
733 enum machine_mode pmode;
734 int punsignedp, pvolatilep;
735 affine_iv base_iv, offset_iv;
736 tree init, dinit, step;
737 bool in_loop = (loop && loop->num);
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "analyze_innermost: ");
742 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
743 &pmode, &punsignedp, &pvolatilep, false);
744 gcc_assert (base != NULL_TREE);
746 if (pbitpos % BITS_PER_UNIT != 0)
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 fprintf (dump_file, "failed: bit offset alignment.\n");
750 return false;
753 if (TREE_CODE (base) == MEM_REF)
755 if (!integer_zerop (TREE_OPERAND (base, 1)))
757 if (!poffset)
759 double_int moff = mem_ref_offset (base);
760 poffset = double_int_to_tree (sizetype, moff);
762 else
763 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
765 base = TREE_OPERAND (base, 0);
767 else
768 base = build_fold_addr_expr (base);
770 if (in_loop)
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
775 if (nest)
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, "failed: evolution of base is not"
779 " affine.\n");
780 return false;
782 else
784 base_iv.base = base;
785 base_iv.step = ssize_int (0);
786 base_iv.no_overflow = true;
790 else
792 base_iv.base = base;
793 base_iv.step = ssize_int (0);
794 base_iv.no_overflow = true;
797 if (!poffset)
799 offset_iv.base = ssize_int (0);
800 offset_iv.step = ssize_int (0);
802 else
804 if (!in_loop)
806 offset_iv.base = poffset;
807 offset_iv.step = ssize_int (0);
809 else if (!simple_iv (loop, loop_containing_stmt (stmt),
810 poffset, &offset_iv, false))
812 if (nest)
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "failed: evolution of offset is not"
816 " affine.\n");
817 return false;
819 else
821 offset_iv.base = poffset;
822 offset_iv.step = ssize_int (0);
827 init = ssize_int (pbitpos / BITS_PER_UNIT);
828 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
829 init = size_binop (PLUS_EXPR, init, dinit);
830 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
831 init = size_binop (PLUS_EXPR, init, dinit);
833 step = size_binop (PLUS_EXPR,
834 fold_convert (ssizetype, base_iv.step),
835 fold_convert (ssizetype, offset_iv.step));
837 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
839 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
840 DR_INIT (dr) = init;
841 DR_STEP (dr) = step;
843 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "success.\n");
848 return true;
851 /* Determines the base object and the list of indices of memory reference
852 DR, analyzed in LOOP and instantiated in loop nest NEST. */
854 static void
855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
857 VEC (tree, heap) *access_fns = NULL;
858 tree ref, *aref, op;
859 tree base, off, access_fn;
860 basic_block before_loop;
862 /* If analyzing a basic-block there are no indices to analyze
863 and thus no access functions. */
864 if (!nest)
866 DR_BASE_OBJECT (dr) = DR_REF (dr);
867 DR_ACCESS_FNS (dr) = NULL;
868 return;
871 ref = unshare_expr (DR_REF (dr));
872 before_loop = block_before_loop (nest);
874 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
875 into a two element array with a constant index. The base is
876 then just the immediate underlying object. */
877 if (TREE_CODE (ref) == REALPART_EXPR)
879 ref = TREE_OPERAND (ref, 0);
880 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
882 else if (TREE_CODE (ref) == IMAGPART_EXPR)
884 ref = TREE_OPERAND (ref, 0);
885 VEC_safe_push (tree, heap, access_fns, integer_one_node);
888 /* Analyze access functions of dimensions we know to be independent. */
889 aref = &ref;
890 while (handled_component_p (*aref))
892 if (TREE_CODE (*aref) == ARRAY_REF)
894 op = TREE_OPERAND (*aref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
898 /* For ARRAY_REFs the base is the reference with the index replaced
899 by zero if we can not strip it as the outermost component. */
900 if (*aref == ref)
902 *aref = TREE_OPERAND (*aref, 0);
903 continue;
905 else
906 TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
909 aref = &TREE_OPERAND (*aref, 0);
912 /* If the address operand of a MEM_REF base has an evolution in the
913 analyzed nest, add it as an additional independent access-function. */
914 if (TREE_CODE (*aref) == MEM_REF)
916 op = TREE_OPERAND (*aref, 0);
917 access_fn = analyze_scalar_evolution (loop, op);
918 access_fn = instantiate_scev (before_loop, loop, access_fn);
919 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
921 tree orig_type;
922 base = initial_condition (access_fn);
923 orig_type = TREE_TYPE (base);
924 STRIP_USELESS_TYPE_CONVERSION (base);
925 split_constant_offset (base, &base, &off);
926 /* Fold the MEM_REF offset into the evolutions initial
927 value to make more bases comparable. */
928 if (!integer_zerop (TREE_OPERAND (*aref, 1)))
930 off = size_binop (PLUS_EXPR, off,
931 fold_convert (ssizetype,
932 TREE_OPERAND (*aref, 1)));
933 TREE_OPERAND (*aref, 1)
934 = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
936 access_fn = chrec_replace_initial_condition
937 (access_fn, fold_convert (orig_type, off));
938 *aref = fold_build2_loc (EXPR_LOCATION (*aref),
939 MEM_REF, TREE_TYPE (*aref),
940 base, TREE_OPERAND (*aref, 1));
941 VEC_safe_push (tree, heap, access_fns, access_fn);
945 DR_BASE_OBJECT (dr) = ref;
946 DR_ACCESS_FNS (dr) = access_fns;
949 /* Extracts the alias analysis information from the memory reference DR. */
951 static void
952 dr_analyze_alias (struct data_reference *dr)
954 tree ref = DR_REF (dr);
955 tree base = get_base_address (ref), addr;
957 if (INDIRECT_REF_P (base)
958 || TREE_CODE (base) == MEM_REF)
960 addr = TREE_OPERAND (base, 0);
961 if (TREE_CODE (addr) == SSA_NAME)
962 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
966 /* Frees data reference DR. */
968 void
969 free_data_ref (data_reference_p dr)
971 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
972 free (dr);
975 /* Analyzes memory reference MEMREF accessed in STMT. The reference
976 is read if IS_READ is true, write otherwise. Returns the
977 data_reference description of MEMREF. NEST is the outermost loop
978 in which the reference should be instantiated, LOOP is the loop in
979 which the data reference should be analyzed. */
981 struct data_reference *
982 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
983 bool is_read)
985 struct data_reference *dr;
987 if (dump_file && (dump_flags & TDF_DETAILS))
989 fprintf (dump_file, "Creating dr for ");
990 print_generic_expr (dump_file, memref, TDF_SLIM);
991 fprintf (dump_file, "\n");
994 dr = XCNEW (struct data_reference);
995 DR_STMT (dr) = stmt;
996 DR_REF (dr) = memref;
997 DR_IS_READ (dr) = is_read;
999 dr_analyze_innermost (dr, nest);
1000 dr_analyze_indices (dr, nest, loop);
1001 dr_analyze_alias (dr);
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1005 unsigned i;
1006 fprintf (dump_file, "\tbase_address: ");
1007 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1008 fprintf (dump_file, "\n\toffset from base address: ");
1009 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1010 fprintf (dump_file, "\n\tconstant offset from base address: ");
1011 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1012 fprintf (dump_file, "\n\tstep: ");
1013 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1014 fprintf (dump_file, "\n\taligned to: ");
1015 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1016 fprintf (dump_file, "\n\tbase_object: ");
1017 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1018 fprintf (dump_file, "\n");
1019 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1021 fprintf (dump_file, "\tAccess function %d: ", i);
1022 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1026 return dr;
1029 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1030 expressions. */
1031 static bool
1032 dr_equal_offsets_p1 (tree offset1, tree offset2)
1034 bool res;
1036 STRIP_NOPS (offset1);
1037 STRIP_NOPS (offset2);
1039 if (offset1 == offset2)
1040 return true;
1042 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1043 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1044 return false;
1046 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1047 TREE_OPERAND (offset2, 0));
1049 if (!res || !BINARY_CLASS_P (offset1))
1050 return res;
1052 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1053 TREE_OPERAND (offset2, 1));
1055 return res;
1058 /* Check if DRA and DRB have equal offsets. */
1059 bool
1060 dr_equal_offsets_p (struct data_reference *dra,
1061 struct data_reference *drb)
1063 tree offset1, offset2;
1065 offset1 = DR_OFFSET (dra);
1066 offset2 = DR_OFFSET (drb);
1068 return dr_equal_offsets_p1 (offset1, offset2);
1071 /* Returns true if FNA == FNB. */
1073 static bool
1074 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1076 unsigned i, n = VEC_length (tree, fna);
1078 if (n != VEC_length (tree, fnb))
1079 return false;
1081 for (i = 0; i < n; i++)
1082 if (!operand_equal_p (VEC_index (tree, fna, i),
1083 VEC_index (tree, fnb, i), 0))
1084 return false;
1086 return true;
1089 /* If all the functions in CF are the same, returns one of them,
1090 otherwise returns NULL. */
1092 static affine_fn
1093 common_affine_function (conflict_function *cf)
1095 unsigned i;
1096 affine_fn comm;
1098 if (!CF_NONTRIVIAL_P (cf))
1099 return NULL;
1101 comm = cf->fns[0];
1103 for (i = 1; i < cf->n; i++)
1104 if (!affine_function_equal_p (comm, cf->fns[i]))
1105 return NULL;
1107 return comm;
1110 /* Returns the base of the affine function FN. */
1112 static tree
1113 affine_function_base (affine_fn fn)
1115 return VEC_index (tree, fn, 0);
1118 /* Returns true if FN is a constant. */
1120 static bool
1121 affine_function_constant_p (affine_fn fn)
1123 unsigned i;
1124 tree coef;
1126 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1127 if (!integer_zerop (coef))
1128 return false;
1130 return true;
1133 /* Returns true if FN is the zero constant function. */
1135 static bool
1136 affine_function_zero_p (affine_fn fn)
1138 return (integer_zerop (affine_function_base (fn))
1139 && affine_function_constant_p (fn));
1142 /* Returns a signed integer type with the largest precision from TA
1143 and TB. */
1145 static tree
1146 signed_type_for_types (tree ta, tree tb)
1148 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1149 return signed_type_for (ta);
1150 else
1151 return signed_type_for (tb);
1154 /* Applies operation OP on affine functions FNA and FNB, and returns the
1155 result. */
1157 static affine_fn
1158 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1160 unsigned i, n, m;
1161 affine_fn ret;
1162 tree coef;
1164 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1166 n = VEC_length (tree, fna);
1167 m = VEC_length (tree, fnb);
1169 else
1171 n = VEC_length (tree, fnb);
1172 m = VEC_length (tree, fna);
1175 ret = VEC_alloc (tree, heap, m);
1176 for (i = 0; i < n; i++)
1178 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1179 TREE_TYPE (VEC_index (tree, fnb, i)));
1181 VEC_quick_push (tree, ret,
1182 fold_build2 (op, type,
1183 VEC_index (tree, fna, i),
1184 VEC_index (tree, fnb, i)));
1187 for (; VEC_iterate (tree, fna, i, coef); i++)
1188 VEC_quick_push (tree, ret,
1189 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1190 coef, integer_zero_node));
1191 for (; VEC_iterate (tree, fnb, i, coef); i++)
1192 VEC_quick_push (tree, ret,
1193 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1194 integer_zero_node, coef));
1196 return ret;
1199 /* Returns the sum of affine functions FNA and FNB. */
1201 static affine_fn
1202 affine_fn_plus (affine_fn fna, affine_fn fnb)
1204 return affine_fn_op (PLUS_EXPR, fna, fnb);
1207 /* Returns the difference of affine functions FNA and FNB. */
1209 static affine_fn
1210 affine_fn_minus (affine_fn fna, affine_fn fnb)
1212 return affine_fn_op (MINUS_EXPR, fna, fnb);
1215 /* Frees affine function FN. */
1217 static void
1218 affine_fn_free (affine_fn fn)
1220 VEC_free (tree, heap, fn);
1223 /* Determine for each subscript in the data dependence relation DDR
1224 the distance. */
1226 static void
1227 compute_subscript_distance (struct data_dependence_relation *ddr)
1229 conflict_function *cf_a, *cf_b;
1230 affine_fn fn_a, fn_b, diff;
1232 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1234 unsigned int i;
1236 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1238 struct subscript *subscript;
1240 subscript = DDR_SUBSCRIPT (ddr, i);
1241 cf_a = SUB_CONFLICTS_IN_A (subscript);
1242 cf_b = SUB_CONFLICTS_IN_B (subscript);
1244 fn_a = common_affine_function (cf_a);
1245 fn_b = common_affine_function (cf_b);
1246 if (!fn_a || !fn_b)
1248 SUB_DISTANCE (subscript) = chrec_dont_know;
1249 return;
1251 diff = affine_fn_minus (fn_a, fn_b);
1253 if (affine_function_constant_p (diff))
1254 SUB_DISTANCE (subscript) = affine_function_base (diff);
1255 else
1256 SUB_DISTANCE (subscript) = chrec_dont_know;
1258 affine_fn_free (diff);
1263 /* Returns the conflict function for "unknown". */
1265 static conflict_function *
1266 conflict_fn_not_known (void)
1268 conflict_function *fn = XCNEW (conflict_function);
1269 fn->n = NOT_KNOWN;
1271 return fn;
1274 /* Returns the conflict function for "independent". */
1276 static conflict_function *
1277 conflict_fn_no_dependence (void)
1279 conflict_function *fn = XCNEW (conflict_function);
1280 fn->n = NO_DEPENDENCE;
1282 return fn;
1285 /* Returns true if the address of OBJ is invariant in LOOP. */
1287 static bool
1288 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1290 while (handled_component_p (obj))
1292 if (TREE_CODE (obj) == ARRAY_REF)
1294 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1295 need to check the stride and the lower bound of the reference. */
1296 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297 loop->num)
1298 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1299 loop->num))
1300 return false;
1302 else if (TREE_CODE (obj) == COMPONENT_REF)
1304 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1305 loop->num))
1306 return false;
1308 obj = TREE_OPERAND (obj, 0);
1311 if (!INDIRECT_REF_P (obj)
1312 && TREE_CODE (obj) != MEM_REF)
1313 return true;
1315 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1316 loop->num);
1319 /* Returns false if we can prove that data references A and B do not alias,
1320 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1321 considered. */
1323 bool
1324 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1325 bool loop_nest)
1327 tree addr_a = DR_BASE_OBJECT (a);
1328 tree addr_b = DR_BASE_OBJECT (b);
1330 /* If we are not processing a loop nest but scalar code we
1331 do not need to care about possible cross-iteration dependences
1332 and thus can process the full original reference. Do so,
1333 similar to how loop invariant motion applies extra offset-based
1334 disambiguation. */
1335 if (!loop_nest)
1337 aff_tree off1, off2;
1338 double_int size1, size2;
1339 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1340 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1341 aff_combination_scale (&off1, double_int_minus_one);
1342 aff_combination_add (&off2, &off1);
1343 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1344 return false;
1347 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1348 return refs_output_dependent_p (addr_a, addr_b);
1349 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1350 return refs_anti_dependent_p (addr_a, addr_b);
1351 return refs_may_alias_p (addr_a, addr_b);
1354 /* Initialize a data dependence relation between data accesses A and
1355 B. NB_LOOPS is the number of loops surrounding the references: the
1356 size of the classic distance/direction vectors. */
1358 struct data_dependence_relation *
1359 initialize_data_dependence_relation (struct data_reference *a,
1360 struct data_reference *b,
1361 VEC (loop_p, heap) *loop_nest)
1363 struct data_dependence_relation *res;
1364 unsigned int i;
1366 res = XNEW (struct data_dependence_relation);
1367 DDR_A (res) = a;
1368 DDR_B (res) = b;
1369 DDR_LOOP_NEST (res) = NULL;
1370 DDR_REVERSED_P (res) = false;
1371 DDR_SUBSCRIPTS (res) = NULL;
1372 DDR_DIR_VECTS (res) = NULL;
1373 DDR_DIST_VECTS (res) = NULL;
1375 if (a == NULL || b == NULL)
1377 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1378 return res;
1381 /* If the data references do not alias, then they are independent. */
1382 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1384 DDR_ARE_DEPENDENT (res) = chrec_known;
1385 return res;
1388 /* The case where the references are exactly the same. */
1389 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1391 if (loop_nest
1392 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1393 DR_BASE_OBJECT (a)))
1395 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1396 return res;
1398 DDR_AFFINE_P (res) = true;
1399 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1400 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1401 DDR_LOOP_NEST (res) = loop_nest;
1402 DDR_INNER_LOOP (res) = 0;
1403 DDR_SELF_REFERENCE (res) = true;
1404 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1406 struct subscript *subscript;
1408 subscript = XNEW (struct subscript);
1409 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1410 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1411 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1412 SUB_DISTANCE (subscript) = chrec_dont_know;
1413 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1415 return res;
1418 /* If the references do not access the same object, we do not know
1419 whether they alias or not. */
1420 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1422 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1423 return res;
1426 /* If the base of the object is not invariant in the loop nest, we cannot
1427 analyze it. TODO -- in fact, it would suffice to record that there may
1428 be arbitrary dependences in the loops where the base object varies. */
1429 if (loop_nest
1430 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1431 DR_BASE_OBJECT (a)))
1433 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1434 return res;
1437 /* If the number of dimensions of the access to not agree we can have
1438 a pointer access to a component of the array element type and an
1439 array access while the base-objects are still the same. Punt. */
1440 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1442 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1443 return res;
1446 DDR_AFFINE_P (res) = true;
1447 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1448 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1449 DDR_LOOP_NEST (res) = loop_nest;
1450 DDR_INNER_LOOP (res) = 0;
1451 DDR_SELF_REFERENCE (res) = false;
1453 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1455 struct subscript *subscript;
1457 subscript = XNEW (struct subscript);
1458 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1459 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1460 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1461 SUB_DISTANCE (subscript) = chrec_dont_know;
1462 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1465 return res;
1468 /* Frees memory used by the conflict function F. */
1470 static void
1471 free_conflict_function (conflict_function *f)
1473 unsigned i;
1475 if (CF_NONTRIVIAL_P (f))
1477 for (i = 0; i < f->n; i++)
1478 affine_fn_free (f->fns[i]);
1480 free (f);
1483 /* Frees memory used by SUBSCRIPTS. */
1485 static void
1486 free_subscripts (VEC (subscript_p, heap) *subscripts)
1488 unsigned i;
1489 subscript_p s;
1491 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1493 free_conflict_function (s->conflicting_iterations_in_a);
1494 free_conflict_function (s->conflicting_iterations_in_b);
1495 free (s);
1497 VEC_free (subscript_p, heap, subscripts);
1500 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1501 description. */
1503 static inline void
1504 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1505 tree chrec)
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, "(dependence classified: ");
1510 print_generic_expr (dump_file, chrec, 0);
1511 fprintf (dump_file, ")\n");
1514 DDR_ARE_DEPENDENT (ddr) = chrec;
1515 free_subscripts (DDR_SUBSCRIPTS (ddr));
1516 DDR_SUBSCRIPTS (ddr) = NULL;
1519 /* The dependence relation DDR cannot be represented by a distance
1520 vector. */
1522 static inline void
1523 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1525 if (dump_file && (dump_flags & TDF_DETAILS))
1526 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1528 DDR_AFFINE_P (ddr) = false;
1533 /* This section contains the classic Banerjee tests. */
1535 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1536 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1538 static inline bool
1539 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1541 return (evolution_function_is_constant_p (chrec_a)
1542 && evolution_function_is_constant_p (chrec_b));
1545 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1546 variable, i.e., if the SIV (Single Index Variable) test is true. */
1548 static bool
1549 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1551 if ((evolution_function_is_constant_p (chrec_a)
1552 && evolution_function_is_univariate_p (chrec_b))
1553 || (evolution_function_is_constant_p (chrec_b)
1554 && evolution_function_is_univariate_p (chrec_a)))
1555 return true;
1557 if (evolution_function_is_univariate_p (chrec_a)
1558 && evolution_function_is_univariate_p (chrec_b))
1560 switch (TREE_CODE (chrec_a))
1562 case POLYNOMIAL_CHREC:
1563 switch (TREE_CODE (chrec_b))
1565 case POLYNOMIAL_CHREC:
1566 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1567 return false;
1569 default:
1570 return true;
1573 default:
1574 return true;
1578 return false;
1581 /* Creates a conflict function with N dimensions. The affine functions
1582 in each dimension follow. */
1584 static conflict_function *
1585 conflict_fn (unsigned n, ...)
1587 unsigned i;
1588 conflict_function *ret = XCNEW (conflict_function);
1589 va_list ap;
1591 gcc_assert (0 < n && n <= MAX_DIM);
1592 va_start(ap, n);
1594 ret->n = n;
1595 for (i = 0; i < n; i++)
1596 ret->fns[i] = va_arg (ap, affine_fn);
1597 va_end(ap);
1599 return ret;
1602 /* Returns constant affine function with value CST. */
1604 static affine_fn
1605 affine_fn_cst (tree cst)
1607 affine_fn fn = VEC_alloc (tree, heap, 1);
1608 VEC_quick_push (tree, fn, cst);
1609 return fn;
1612 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1614 static affine_fn
1615 affine_fn_univar (tree cst, unsigned dim, tree coef)
1617 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1618 unsigned i;
1620 gcc_assert (dim > 0);
1621 VEC_quick_push (tree, fn, cst);
1622 for (i = 1; i < dim; i++)
1623 VEC_quick_push (tree, fn, integer_zero_node);
1624 VEC_quick_push (tree, fn, coef);
1625 return fn;
1628 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1629 *OVERLAPS_B are initialized to the functions that describe the
1630 relation between the elements accessed twice by CHREC_A and
1631 CHREC_B. For k >= 0, the following property is verified:
1633 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1635 static void
1636 analyze_ziv_subscript (tree chrec_a,
1637 tree chrec_b,
1638 conflict_function **overlaps_a,
1639 conflict_function **overlaps_b,
1640 tree *last_conflicts)
1642 tree type, difference;
1643 dependence_stats.num_ziv++;
1645 if (dump_file && (dump_flags & TDF_DETAILS))
1646 fprintf (dump_file, "(analyze_ziv_subscript \n");
1648 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1649 chrec_a = chrec_convert (type, chrec_a, NULL);
1650 chrec_b = chrec_convert (type, chrec_b, NULL);
1651 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1653 switch (TREE_CODE (difference))
1655 case INTEGER_CST:
1656 if (integer_zerop (difference))
1658 /* The difference is equal to zero: the accessed index
1659 overlaps for each iteration in the loop. */
1660 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1661 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1662 *last_conflicts = chrec_dont_know;
1663 dependence_stats.num_ziv_dependent++;
1665 else
1667 /* The accesses do not overlap. */
1668 *overlaps_a = conflict_fn_no_dependence ();
1669 *overlaps_b = conflict_fn_no_dependence ();
1670 *last_conflicts = integer_zero_node;
1671 dependence_stats.num_ziv_independent++;
1673 break;
1675 default:
1676 /* We're not sure whether the indexes overlap. For the moment,
1677 conservatively answer "don't know". */
1678 if (dump_file && (dump_flags & TDF_DETAILS))
1679 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1681 *overlaps_a = conflict_fn_not_known ();
1682 *overlaps_b = conflict_fn_not_known ();
1683 *last_conflicts = chrec_dont_know;
1684 dependence_stats.num_ziv_unimplemented++;
1685 break;
1688 if (dump_file && (dump_flags & TDF_DETAILS))
1689 fprintf (dump_file, ")\n");
1692 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1693 and only if it fits to the int type. If this is not the case, or the
1694 bound on the number of iterations of LOOP could not be derived, returns
1695 chrec_dont_know. */
1697 static tree
1698 max_stmt_executions_tree (struct loop *loop)
1700 double_int nit;
1702 if (!max_stmt_executions (loop, true, &nit))
1703 return chrec_dont_know;
1705 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1706 return chrec_dont_know;
1708 return double_int_to_tree (unsigned_type_node, nit);
1711 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1712 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1713 *OVERLAPS_B are initialized to the functions that describe the
1714 relation between the elements accessed twice by CHREC_A and
1715 CHREC_B. For k >= 0, the following property is verified:
1717 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1719 static void
1720 analyze_siv_subscript_cst_affine (tree chrec_a,
1721 tree chrec_b,
1722 conflict_function **overlaps_a,
1723 conflict_function **overlaps_b,
1724 tree *last_conflicts)
1726 bool value0, value1, value2;
1727 tree type, difference, tmp;
1729 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1730 chrec_a = chrec_convert (type, chrec_a, NULL);
1731 chrec_b = chrec_convert (type, chrec_b, NULL);
1732 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1734 if (!chrec_is_positive (initial_condition (difference), &value0))
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1739 dependence_stats.num_siv_unimplemented++;
1740 *overlaps_a = conflict_fn_not_known ();
1741 *overlaps_b = conflict_fn_not_known ();
1742 *last_conflicts = chrec_dont_know;
1743 return;
1745 else
1747 if (value0 == false)
1749 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1751 if (dump_file && (dump_flags & TDF_DETAILS))
1752 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1754 *overlaps_a = conflict_fn_not_known ();
1755 *overlaps_b = conflict_fn_not_known ();
1756 *last_conflicts = chrec_dont_know;
1757 dependence_stats.num_siv_unimplemented++;
1758 return;
1760 else
1762 if (value1 == true)
1764 /* Example:
1765 chrec_a = 12
1766 chrec_b = {10, +, 1}
1769 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1771 HOST_WIDE_INT numiter;
1772 struct loop *loop = get_chrec_loop (chrec_b);
1774 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1775 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1776 fold_build1 (ABS_EXPR, type, difference),
1777 CHREC_RIGHT (chrec_b));
1778 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1779 *last_conflicts = integer_one_node;
1782 /* Perform weak-zero siv test to see if overlap is
1783 outside the loop bounds. */
1784 numiter = max_stmt_executions_int (loop, true);
1786 if (numiter >= 0
1787 && compare_tree_int (tmp, numiter) > 0)
1789 free_conflict_function (*overlaps_a);
1790 free_conflict_function (*overlaps_b);
1791 *overlaps_a = conflict_fn_no_dependence ();
1792 *overlaps_b = conflict_fn_no_dependence ();
1793 *last_conflicts = integer_zero_node;
1794 dependence_stats.num_siv_independent++;
1795 return;
1797 dependence_stats.num_siv_dependent++;
1798 return;
1801 /* When the step does not divide the difference, there are
1802 no overlaps. */
1803 else
1805 *overlaps_a = conflict_fn_no_dependence ();
1806 *overlaps_b = conflict_fn_no_dependence ();
1807 *last_conflicts = integer_zero_node;
1808 dependence_stats.num_siv_independent++;
1809 return;
1813 else
1815 /* Example:
1816 chrec_a = 12
1817 chrec_b = {10, +, -1}
1819 In this case, chrec_a will not overlap with chrec_b. */
1820 *overlaps_a = conflict_fn_no_dependence ();
1821 *overlaps_b = conflict_fn_no_dependence ();
1822 *last_conflicts = integer_zero_node;
1823 dependence_stats.num_siv_independent++;
1824 return;
1828 else
1830 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1832 if (dump_file && (dump_flags & TDF_DETAILS))
1833 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1835 *overlaps_a = conflict_fn_not_known ();
1836 *overlaps_b = conflict_fn_not_known ();
1837 *last_conflicts = chrec_dont_know;
1838 dependence_stats.num_siv_unimplemented++;
1839 return;
1841 else
1843 if (value2 == false)
1845 /* Example:
1846 chrec_a = 3
1847 chrec_b = {10, +, -1}
1849 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1851 HOST_WIDE_INT numiter;
1852 struct loop *loop = get_chrec_loop (chrec_b);
1854 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1855 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1856 CHREC_RIGHT (chrec_b));
1857 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1858 *last_conflicts = integer_one_node;
1860 /* Perform weak-zero siv test to see if overlap is
1861 outside the loop bounds. */
1862 numiter = max_stmt_executions_int (loop, true);
1864 if (numiter >= 0
1865 && compare_tree_int (tmp, numiter) > 0)
1867 free_conflict_function (*overlaps_a);
1868 free_conflict_function (*overlaps_b);
1869 *overlaps_a = conflict_fn_no_dependence ();
1870 *overlaps_b = conflict_fn_no_dependence ();
1871 *last_conflicts = integer_zero_node;
1872 dependence_stats.num_siv_independent++;
1873 return;
1875 dependence_stats.num_siv_dependent++;
1876 return;
1879 /* When the step does not divide the difference, there
1880 are no overlaps. */
1881 else
1883 *overlaps_a = conflict_fn_no_dependence ();
1884 *overlaps_b = conflict_fn_no_dependence ();
1885 *last_conflicts = integer_zero_node;
1886 dependence_stats.num_siv_independent++;
1887 return;
1890 else
1892 /* Example:
1893 chrec_a = 3
1894 chrec_b = {4, +, 1}
1896 In this case, chrec_a will not overlap with chrec_b. */
1897 *overlaps_a = conflict_fn_no_dependence ();
1898 *overlaps_b = conflict_fn_no_dependence ();
1899 *last_conflicts = integer_zero_node;
1900 dependence_stats.num_siv_independent++;
1901 return;
1908 /* Helper recursive function for initializing the matrix A. Returns
1909 the initial value of CHREC. */
1911 static tree
1912 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1914 gcc_assert (chrec);
1916 switch (TREE_CODE (chrec))
1918 case POLYNOMIAL_CHREC:
1919 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1921 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1922 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1924 case PLUS_EXPR:
1925 case MULT_EXPR:
1926 case MINUS_EXPR:
1928 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1929 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1931 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1934 case NOP_EXPR:
1936 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1937 return chrec_convert (chrec_type (chrec), op, NULL);
1940 case BIT_NOT_EXPR:
1942 /* Handle ~X as -1 - X. */
1943 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1944 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1945 build_int_cst (TREE_TYPE (chrec), -1), op);
1948 case INTEGER_CST:
1949 return chrec;
1951 default:
1952 gcc_unreachable ();
1953 return NULL_TREE;
1957 #define FLOOR_DIV(x,y) ((x) / (y))
1959 /* Solves the special case of the Diophantine equation:
1960 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1962 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1963 number of iterations that loops X and Y run. The overlaps will be
1964 constructed as evolutions in dimension DIM. */
1966 static void
1967 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1968 affine_fn *overlaps_a,
1969 affine_fn *overlaps_b,
1970 tree *last_conflicts, int dim)
1972 if (((step_a > 0 && step_b > 0)
1973 || (step_a < 0 && step_b < 0)))
1975 int step_overlaps_a, step_overlaps_b;
1976 int gcd_steps_a_b, last_conflict, tau2;
1978 gcd_steps_a_b = gcd (step_a, step_b);
1979 step_overlaps_a = step_b / gcd_steps_a_b;
1980 step_overlaps_b = step_a / gcd_steps_a_b;
1982 if (niter > 0)
1984 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1985 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1986 last_conflict = tau2;
1987 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1989 else
1990 *last_conflicts = chrec_dont_know;
1992 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1993 build_int_cst (NULL_TREE,
1994 step_overlaps_a));
1995 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1996 build_int_cst (NULL_TREE,
1997 step_overlaps_b));
2000 else
2002 *overlaps_a = affine_fn_cst (integer_zero_node);
2003 *overlaps_b = affine_fn_cst (integer_zero_node);
2004 *last_conflicts = integer_zero_node;
2008 /* Solves the special case of a Diophantine equation where CHREC_A is
2009 an affine bivariate function, and CHREC_B is an affine univariate
2010 function. For example,
2012 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2014 has the following overlapping functions:
2016 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2017 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2018 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2020 FORNOW: This is a specialized implementation for a case occurring in
2021 a common benchmark. Implement the general algorithm. */
2023 static void
2024 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2025 conflict_function **overlaps_a,
2026 conflict_function **overlaps_b,
2027 tree *last_conflicts)
2029 bool xz_p, yz_p, xyz_p;
2030 int step_x, step_y, step_z;
2031 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2032 affine_fn overlaps_a_xz, overlaps_b_xz;
2033 affine_fn overlaps_a_yz, overlaps_b_yz;
2034 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2035 affine_fn ova1, ova2, ovb;
2036 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2038 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2039 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2040 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2042 niter_x =
2043 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2044 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2045 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2047 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2049 if (dump_file && (dump_flags & TDF_DETAILS))
2050 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2052 *overlaps_a = conflict_fn_not_known ();
2053 *overlaps_b = conflict_fn_not_known ();
2054 *last_conflicts = chrec_dont_know;
2055 return;
2058 niter = MIN (niter_x, niter_z);
2059 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2060 &overlaps_a_xz,
2061 &overlaps_b_xz,
2062 &last_conflicts_xz, 1);
2063 niter = MIN (niter_y, niter_z);
2064 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2065 &overlaps_a_yz,
2066 &overlaps_b_yz,
2067 &last_conflicts_yz, 2);
2068 niter = MIN (niter_x, niter_z);
2069 niter = MIN (niter_y, niter);
2070 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2071 &overlaps_a_xyz,
2072 &overlaps_b_xyz,
2073 &last_conflicts_xyz, 3);
2075 xz_p = !integer_zerop (last_conflicts_xz);
2076 yz_p = !integer_zerop (last_conflicts_yz);
2077 xyz_p = !integer_zerop (last_conflicts_xyz);
2079 if (xz_p || yz_p || xyz_p)
2081 ova1 = affine_fn_cst (integer_zero_node);
2082 ova2 = affine_fn_cst (integer_zero_node);
2083 ovb = affine_fn_cst (integer_zero_node);
2084 if (xz_p)
2086 affine_fn t0 = ova1;
2087 affine_fn t2 = ovb;
2089 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2090 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2091 affine_fn_free (t0);
2092 affine_fn_free (t2);
2093 *last_conflicts = last_conflicts_xz;
2095 if (yz_p)
2097 affine_fn t0 = ova2;
2098 affine_fn t2 = ovb;
2100 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2101 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2102 affine_fn_free (t0);
2103 affine_fn_free (t2);
2104 *last_conflicts = last_conflicts_yz;
2106 if (xyz_p)
2108 affine_fn t0 = ova1;
2109 affine_fn t2 = ova2;
2110 affine_fn t4 = ovb;
2112 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2113 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2114 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2115 affine_fn_free (t0);
2116 affine_fn_free (t2);
2117 affine_fn_free (t4);
2118 *last_conflicts = last_conflicts_xyz;
2120 *overlaps_a = conflict_fn (2, ova1, ova2);
2121 *overlaps_b = conflict_fn (1, ovb);
2123 else
2125 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2126 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2127 *last_conflicts = integer_zero_node;
2130 affine_fn_free (overlaps_a_xz);
2131 affine_fn_free (overlaps_b_xz);
2132 affine_fn_free (overlaps_a_yz);
2133 affine_fn_free (overlaps_b_yz);
2134 affine_fn_free (overlaps_a_xyz);
2135 affine_fn_free (overlaps_b_xyz);
2138 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2140 static void
2141 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2142 int size)
2144 memcpy (vec2, vec1, size * sizeof (*vec1));
2147 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2149 static void
2150 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2151 int m, int n)
2153 int i;
2155 for (i = 0; i < m; i++)
2156 lambda_vector_copy (mat1[i], mat2[i], n);
2159 /* Store the N x N identity matrix in MAT. */
2161 static void
2162 lambda_matrix_id (lambda_matrix mat, int size)
2164 int i, j;
2166 for (i = 0; i < size; i++)
2167 for (j = 0; j < size; j++)
2168 mat[i][j] = (i == j) ? 1 : 0;
2171 /* Return the first nonzero element of vector VEC1 between START and N.
2172 We must have START <= N. Returns N if VEC1 is the zero vector. */
2174 static int
2175 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2177 int j = start;
2178 while (j < n && vec1[j] == 0)
2179 j++;
2180 return j;
2183 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2184 R2 = R2 + CONST1 * R1. */
2186 static void
2187 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2189 int i;
2191 if (const1 == 0)
2192 return;
2194 for (i = 0; i < n; i++)
2195 mat[r2][i] += const1 * mat[r1][i];
2198 /* Swap rows R1 and R2 in matrix MAT. */
2200 static void
2201 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2203 lambda_vector row;
2205 row = mat[r1];
2206 mat[r1] = mat[r2];
2207 mat[r2] = row;
2210 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2211 and store the result in VEC2. */
2213 static void
2214 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2215 int size, int const1)
2217 int i;
2219 if (const1 == 0)
2220 lambda_vector_clear (vec2, size);
2221 else
2222 for (i = 0; i < size; i++)
2223 vec2[i] = const1 * vec1[i];
2226 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2228 static void
2229 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2230 int size)
2232 lambda_vector_mult_const (vec1, vec2, size, -1);
2235 /* Negate row R1 of matrix MAT which has N columns. */
2237 static void
2238 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2240 lambda_vector_negate (mat[r1], mat[r1], n);
2243 /* Return true if two vectors are equal. */
2245 static bool
2246 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2248 int i;
2249 for (i = 0; i < size; i++)
2250 if (vec1[i] != vec2[i])
2251 return false;
2252 return true;
2255 /* Given an M x N integer matrix A, this function determines an M x
2256 M unimodular matrix U, and an M x N echelon matrix S such that
2257 "U.A = S". This decomposition is also known as "right Hermite".
2259 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2260 Restructuring Compilers" Utpal Banerjee. */
2262 static void
2263 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2264 lambda_matrix S, lambda_matrix U)
2266 int i, j, i0 = 0;
2268 lambda_matrix_copy (A, S, m, n);
2269 lambda_matrix_id (U, m);
2271 for (j = 0; j < n; j++)
2273 if (lambda_vector_first_nz (S[j], m, i0) < m)
2275 ++i0;
2276 for (i = m - 1; i >= i0; i--)
2278 while (S[i][j] != 0)
2280 int sigma, factor, a, b;
2282 a = S[i-1][j];
2283 b = S[i][j];
2284 sigma = (a * b < 0) ? -1: 1;
2285 a = abs (a);
2286 b = abs (b);
2287 factor = sigma * (a / b);
2289 lambda_matrix_row_add (S, n, i, i-1, -factor);
2290 lambda_matrix_row_exchange (S, i, i-1);
2292 lambda_matrix_row_add (U, m, i, i-1, -factor);
2293 lambda_matrix_row_exchange (U, i, i-1);
2300 /* Determines the overlapping elements due to accesses CHREC_A and
2301 CHREC_B, that are affine functions. This function cannot handle
2302 symbolic evolution functions, ie. when initial conditions are
2303 parameters, because it uses lambda matrices of integers. */
2305 static void
2306 analyze_subscript_affine_affine (tree chrec_a,
2307 tree chrec_b,
2308 conflict_function **overlaps_a,
2309 conflict_function **overlaps_b,
2310 tree *last_conflicts)
2312 unsigned nb_vars_a, nb_vars_b, dim;
2313 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2314 lambda_matrix A, U, S;
2315 struct obstack scratch_obstack;
2317 if (eq_evolutions_p (chrec_a, chrec_b))
2319 /* The accessed index overlaps for each iteration in the
2320 loop. */
2321 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2322 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2323 *last_conflicts = chrec_dont_know;
2324 return;
2326 if (dump_file && (dump_flags & TDF_DETAILS))
2327 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2329 /* For determining the initial intersection, we have to solve a
2330 Diophantine equation. This is the most time consuming part.
2332 For answering to the question: "Is there a dependence?" we have
2333 to prove that there exists a solution to the Diophantine
2334 equation, and that the solution is in the iteration domain,
2335 i.e. the solution is positive or zero, and that the solution
2336 happens before the upper bound loop.nb_iterations. Otherwise
2337 there is no dependence. This function outputs a description of
2338 the iterations that hold the intersections. */
2340 nb_vars_a = nb_vars_in_chrec (chrec_a);
2341 nb_vars_b = nb_vars_in_chrec (chrec_b);
2343 gcc_obstack_init (&scratch_obstack);
2345 dim = nb_vars_a + nb_vars_b;
2346 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2347 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2348 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2350 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2351 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2352 gamma = init_b - init_a;
2354 /* Don't do all the hard work of solving the Diophantine equation
2355 when we already know the solution: for example,
2356 | {3, +, 1}_1
2357 | {3, +, 4}_2
2358 | gamma = 3 - 3 = 0.
2359 Then the first overlap occurs during the first iterations:
2360 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2362 if (gamma == 0)
2364 if (nb_vars_a == 1 && nb_vars_b == 1)
2366 HOST_WIDE_INT step_a, step_b;
2367 HOST_WIDE_INT niter, niter_a, niter_b;
2368 affine_fn ova, ovb;
2370 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2371 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2372 niter = MIN (niter_a, niter_b);
2373 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2374 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2376 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2377 &ova, &ovb,
2378 last_conflicts, 1);
2379 *overlaps_a = conflict_fn (1, ova);
2380 *overlaps_b = conflict_fn (1, ovb);
2383 else if (nb_vars_a == 2 && nb_vars_b == 1)
2384 compute_overlap_steps_for_affine_1_2
2385 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2387 else if (nb_vars_a == 1 && nb_vars_b == 2)
2388 compute_overlap_steps_for_affine_1_2
2389 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2391 else
2393 if (dump_file && (dump_flags & TDF_DETAILS))
2394 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2395 *overlaps_a = conflict_fn_not_known ();
2396 *overlaps_b = conflict_fn_not_known ();
2397 *last_conflicts = chrec_dont_know;
2399 goto end_analyze_subs_aa;
2402 /* U.A = S */
2403 lambda_matrix_right_hermite (A, dim, 1, S, U);
2405 if (S[0][0] < 0)
2407 S[0][0] *= -1;
2408 lambda_matrix_row_negate (U, dim, 0);
2410 gcd_alpha_beta = S[0][0];
2412 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2413 but that is a quite strange case. Instead of ICEing, answer
2414 don't know. */
2415 if (gcd_alpha_beta == 0)
2417 *overlaps_a = conflict_fn_not_known ();
2418 *overlaps_b = conflict_fn_not_known ();
2419 *last_conflicts = chrec_dont_know;
2420 goto end_analyze_subs_aa;
2423 /* The classic "gcd-test". */
2424 if (!int_divides_p (gcd_alpha_beta, gamma))
2426 /* The "gcd-test" has determined that there is no integer
2427 solution, i.e. there is no dependence. */
2428 *overlaps_a = conflict_fn_no_dependence ();
2429 *overlaps_b = conflict_fn_no_dependence ();
2430 *last_conflicts = integer_zero_node;
2433 /* Both access functions are univariate. This includes SIV and MIV cases. */
2434 else if (nb_vars_a == 1 && nb_vars_b == 1)
2436 /* Both functions should have the same evolution sign. */
2437 if (((A[0][0] > 0 && -A[1][0] > 0)
2438 || (A[0][0] < 0 && -A[1][0] < 0)))
2440 /* The solutions are given by:
2442 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2443 | [u21 u22] [y0]
2445 For a given integer t. Using the following variables,
2447 | i0 = u11 * gamma / gcd_alpha_beta
2448 | j0 = u12 * gamma / gcd_alpha_beta
2449 | i1 = u21
2450 | j1 = u22
2452 the solutions are:
2454 | x0 = i0 + i1 * t,
2455 | y0 = j0 + j1 * t. */
2456 HOST_WIDE_INT i0, j0, i1, j1;
2458 i0 = U[0][0] * gamma / gcd_alpha_beta;
2459 j0 = U[0][1] * gamma / gcd_alpha_beta;
2460 i1 = U[1][0];
2461 j1 = U[1][1];
2463 if ((i1 == 0 && i0 < 0)
2464 || (j1 == 0 && j0 < 0))
2466 /* There is no solution.
2467 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2468 falls in here, but for the moment we don't look at the
2469 upper bound of the iteration domain. */
2470 *overlaps_a = conflict_fn_no_dependence ();
2471 *overlaps_b = conflict_fn_no_dependence ();
2472 *last_conflicts = integer_zero_node;
2473 goto end_analyze_subs_aa;
2476 if (i1 > 0 && j1 > 0)
2478 HOST_WIDE_INT niter_a = max_stmt_executions_int
2479 (get_chrec_loop (chrec_a), true);
2480 HOST_WIDE_INT niter_b = max_stmt_executions_int
2481 (get_chrec_loop (chrec_b), true);
2482 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2484 /* (X0, Y0) is a solution of the Diophantine equation:
2485 "chrec_a (X0) = chrec_b (Y0)". */
2486 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2487 CEIL (-j0, j1));
2488 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2489 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2491 /* (X1, Y1) is the smallest positive solution of the eq
2492 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2493 first conflict occurs. */
2494 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2495 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2496 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2498 if (niter > 0)
2500 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2501 FLOOR_DIV (niter - j0, j1));
2502 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2504 /* If the overlap occurs outside of the bounds of the
2505 loop, there is no dependence. */
2506 if (x1 >= niter || y1 >= niter)
2508 *overlaps_a = conflict_fn_no_dependence ();
2509 *overlaps_b = conflict_fn_no_dependence ();
2510 *last_conflicts = integer_zero_node;
2511 goto end_analyze_subs_aa;
2513 else
2514 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2516 else
2517 *last_conflicts = chrec_dont_know;
2519 *overlaps_a
2520 = conflict_fn (1,
2521 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2523 build_int_cst (NULL_TREE, i1)));
2524 *overlaps_b
2525 = conflict_fn (1,
2526 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2528 build_int_cst (NULL_TREE, j1)));
2530 else
2532 /* FIXME: For the moment, the upper bound of the
2533 iteration domain for i and j is not checked. */
2534 if (dump_file && (dump_flags & TDF_DETAILS))
2535 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2536 *overlaps_a = conflict_fn_not_known ();
2537 *overlaps_b = conflict_fn_not_known ();
2538 *last_conflicts = chrec_dont_know;
2541 else
2543 if (dump_file && (dump_flags & TDF_DETAILS))
2544 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2545 *overlaps_a = conflict_fn_not_known ();
2546 *overlaps_b = conflict_fn_not_known ();
2547 *last_conflicts = chrec_dont_know;
2550 else
2552 if (dump_file && (dump_flags & TDF_DETAILS))
2553 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2554 *overlaps_a = conflict_fn_not_known ();
2555 *overlaps_b = conflict_fn_not_known ();
2556 *last_conflicts = chrec_dont_know;
2559 end_analyze_subs_aa:
2560 obstack_free (&scratch_obstack, NULL);
2561 if (dump_file && (dump_flags & TDF_DETAILS))
2563 fprintf (dump_file, " (overlaps_a = ");
2564 dump_conflict_function (dump_file, *overlaps_a);
2565 fprintf (dump_file, ")\n (overlaps_b = ");
2566 dump_conflict_function (dump_file, *overlaps_b);
2567 fprintf (dump_file, ")\n");
2568 fprintf (dump_file, ")\n");
2572 /* Returns true when analyze_subscript_affine_affine can be used for
2573 determining the dependence relation between chrec_a and chrec_b,
2574 that contain symbols. This function modifies chrec_a and chrec_b
2575 such that the analysis result is the same, and such that they don't
2576 contain symbols, and then can safely be passed to the analyzer.
2578 Example: The analysis of the following tuples of evolutions produce
2579 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2580 vs. {0, +, 1}_1
2582 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2583 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2586 static bool
2587 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2589 tree diff, type, left_a, left_b, right_b;
2591 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2592 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2593 /* FIXME: For the moment not handled. Might be refined later. */
2594 return false;
2596 type = chrec_type (*chrec_a);
2597 left_a = CHREC_LEFT (*chrec_a);
2598 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2599 diff = chrec_fold_minus (type, left_a, left_b);
2601 if (!evolution_function_is_constant_p (diff))
2602 return false;
2604 if (dump_file && (dump_flags & TDF_DETAILS))
2605 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2607 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2608 diff, CHREC_RIGHT (*chrec_a));
2609 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2610 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2611 build_int_cst (type, 0),
2612 right_b);
2613 return true;
2616 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2617 *OVERLAPS_B are initialized to the functions that describe the
2618 relation between the elements accessed twice by CHREC_A and
2619 CHREC_B. For k >= 0, the following property is verified:
2621 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2623 static void
2624 analyze_siv_subscript (tree chrec_a,
2625 tree chrec_b,
2626 conflict_function **overlaps_a,
2627 conflict_function **overlaps_b,
2628 tree *last_conflicts,
2629 int loop_nest_num)
2631 dependence_stats.num_siv++;
2633 if (dump_file && (dump_flags & TDF_DETAILS))
2634 fprintf (dump_file, "(analyze_siv_subscript \n");
2636 if (evolution_function_is_constant_p (chrec_a)
2637 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2638 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2639 overlaps_a, overlaps_b, last_conflicts);
2641 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2642 && evolution_function_is_constant_p (chrec_b))
2643 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2644 overlaps_b, overlaps_a, last_conflicts);
2646 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2647 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2649 if (!chrec_contains_symbols (chrec_a)
2650 && !chrec_contains_symbols (chrec_b))
2652 analyze_subscript_affine_affine (chrec_a, chrec_b,
2653 overlaps_a, overlaps_b,
2654 last_conflicts);
2656 if (CF_NOT_KNOWN_P (*overlaps_a)
2657 || CF_NOT_KNOWN_P (*overlaps_b))
2658 dependence_stats.num_siv_unimplemented++;
2659 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2660 || CF_NO_DEPENDENCE_P (*overlaps_b))
2661 dependence_stats.num_siv_independent++;
2662 else
2663 dependence_stats.num_siv_dependent++;
2665 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2666 &chrec_b))
2668 analyze_subscript_affine_affine (chrec_a, chrec_b,
2669 overlaps_a, overlaps_b,
2670 last_conflicts);
2672 if (CF_NOT_KNOWN_P (*overlaps_a)
2673 || CF_NOT_KNOWN_P (*overlaps_b))
2674 dependence_stats.num_siv_unimplemented++;
2675 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2676 || CF_NO_DEPENDENCE_P (*overlaps_b))
2677 dependence_stats.num_siv_independent++;
2678 else
2679 dependence_stats.num_siv_dependent++;
2681 else
2682 goto siv_subscript_dontknow;
2685 else
2687 siv_subscript_dontknow:;
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2689 fprintf (dump_file, "siv test failed: unimplemented.\n");
2690 *overlaps_a = conflict_fn_not_known ();
2691 *overlaps_b = conflict_fn_not_known ();
2692 *last_conflicts = chrec_dont_know;
2693 dependence_stats.num_siv_unimplemented++;
2696 if (dump_file && (dump_flags & TDF_DETAILS))
2697 fprintf (dump_file, ")\n");
2700 /* Returns false if we can prove that the greatest common divisor of the steps
2701 of CHREC does not divide CST, false otherwise. */
2703 static bool
2704 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2706 HOST_WIDE_INT cd = 0, val;
2707 tree step;
2709 if (!host_integerp (cst, 0))
2710 return true;
2711 val = tree_low_cst (cst, 0);
2713 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2715 step = CHREC_RIGHT (chrec);
2716 if (!host_integerp (step, 0))
2717 return true;
2718 cd = gcd (cd, tree_low_cst (step, 0));
2719 chrec = CHREC_LEFT (chrec);
2722 return val % cd == 0;
2725 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2726 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2727 functions that describe the relation between the elements accessed
2728 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2729 is verified:
2731 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2733 static void
2734 analyze_miv_subscript (tree chrec_a,
2735 tree chrec_b,
2736 conflict_function **overlaps_a,
2737 conflict_function **overlaps_b,
2738 tree *last_conflicts,
2739 struct loop *loop_nest)
2741 tree type, difference;
2743 dependence_stats.num_miv++;
2744 if (dump_file && (dump_flags & TDF_DETAILS))
2745 fprintf (dump_file, "(analyze_miv_subscript \n");
2747 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2748 chrec_a = chrec_convert (type, chrec_a, NULL);
2749 chrec_b = chrec_convert (type, chrec_b, NULL);
2750 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2752 if (eq_evolutions_p (chrec_a, chrec_b))
2754 /* Access functions are the same: all the elements are accessed
2755 in the same order. */
2756 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2757 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2758 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2759 dependence_stats.num_miv_dependent++;
2762 else if (evolution_function_is_constant_p (difference)
2763 /* For the moment, the following is verified:
2764 evolution_function_is_affine_multivariate_p (chrec_a,
2765 loop_nest->num) */
2766 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2768 /* testsuite/.../ssa-chrec-33.c
2769 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2771 The difference is 1, and all the evolution steps are multiples
2772 of 2, consequently there are no overlapping elements. */
2773 *overlaps_a = conflict_fn_no_dependence ();
2774 *overlaps_b = conflict_fn_no_dependence ();
2775 *last_conflicts = integer_zero_node;
2776 dependence_stats.num_miv_independent++;
2779 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2780 && !chrec_contains_symbols (chrec_a)
2781 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2782 && !chrec_contains_symbols (chrec_b))
2784 /* testsuite/.../ssa-chrec-35.c
2785 {0, +, 1}_2 vs. {0, +, 1}_3
2786 the overlapping elements are respectively located at iterations:
2787 {0, +, 1}_x and {0, +, 1}_x,
2788 in other words, we have the equality:
2789 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2791 Other examples:
2792 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2793 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2795 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2796 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2798 analyze_subscript_affine_affine (chrec_a, chrec_b,
2799 overlaps_a, overlaps_b, last_conflicts);
2801 if (CF_NOT_KNOWN_P (*overlaps_a)
2802 || CF_NOT_KNOWN_P (*overlaps_b))
2803 dependence_stats.num_miv_unimplemented++;
2804 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2805 || CF_NO_DEPENDENCE_P (*overlaps_b))
2806 dependence_stats.num_miv_independent++;
2807 else
2808 dependence_stats.num_miv_dependent++;
2811 else
2813 /* When the analysis is too difficult, answer "don't know". */
2814 if (dump_file && (dump_flags & TDF_DETAILS))
2815 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2817 *overlaps_a = conflict_fn_not_known ();
2818 *overlaps_b = conflict_fn_not_known ();
2819 *last_conflicts = chrec_dont_know;
2820 dependence_stats.num_miv_unimplemented++;
2823 if (dump_file && (dump_flags & TDF_DETAILS))
2824 fprintf (dump_file, ")\n");
2827 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2828 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2829 OVERLAP_ITERATIONS_B are initialized with two functions that
2830 describe the iterations that contain conflicting elements.
2832 Remark: For an integer k >= 0, the following equality is true:
2834 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2837 static void
2838 analyze_overlapping_iterations (tree chrec_a,
2839 tree chrec_b,
2840 conflict_function **overlap_iterations_a,
2841 conflict_function **overlap_iterations_b,
2842 tree *last_conflicts, struct loop *loop_nest)
2844 unsigned int lnn = loop_nest->num;
2846 dependence_stats.num_subscript_tests++;
2848 if (dump_file && (dump_flags & TDF_DETAILS))
2850 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2851 fprintf (dump_file, " (chrec_a = ");
2852 print_generic_expr (dump_file, chrec_a, 0);
2853 fprintf (dump_file, ")\n (chrec_b = ");
2854 print_generic_expr (dump_file, chrec_b, 0);
2855 fprintf (dump_file, ")\n");
2858 if (chrec_a == NULL_TREE
2859 || chrec_b == NULL_TREE
2860 || chrec_contains_undetermined (chrec_a)
2861 || chrec_contains_undetermined (chrec_b))
2863 dependence_stats.num_subscript_undetermined++;
2865 *overlap_iterations_a = conflict_fn_not_known ();
2866 *overlap_iterations_b = conflict_fn_not_known ();
2869 /* If they are the same chrec, and are affine, they overlap
2870 on every iteration. */
2871 else if (eq_evolutions_p (chrec_a, chrec_b)
2872 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2873 || operand_equal_p (chrec_a, chrec_b, 0)))
2875 dependence_stats.num_same_subscript_function++;
2876 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2877 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2878 *last_conflicts = chrec_dont_know;
2881 /* If they aren't the same, and aren't affine, we can't do anything
2882 yet. */
2883 else if ((chrec_contains_symbols (chrec_a)
2884 || chrec_contains_symbols (chrec_b))
2885 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2886 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2888 dependence_stats.num_subscript_undetermined++;
2889 *overlap_iterations_a = conflict_fn_not_known ();
2890 *overlap_iterations_b = conflict_fn_not_known ();
2893 else if (ziv_subscript_p (chrec_a, chrec_b))
2894 analyze_ziv_subscript (chrec_a, chrec_b,
2895 overlap_iterations_a, overlap_iterations_b,
2896 last_conflicts);
2898 else if (siv_subscript_p (chrec_a, chrec_b))
2899 analyze_siv_subscript (chrec_a, chrec_b,
2900 overlap_iterations_a, overlap_iterations_b,
2901 last_conflicts, lnn);
2903 else
2904 analyze_miv_subscript (chrec_a, chrec_b,
2905 overlap_iterations_a, overlap_iterations_b,
2906 last_conflicts, loop_nest);
2908 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, " (overlap_iterations_a = ");
2911 dump_conflict_function (dump_file, *overlap_iterations_a);
2912 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2913 dump_conflict_function (dump_file, *overlap_iterations_b);
2914 fprintf (dump_file, ")\n");
2915 fprintf (dump_file, ")\n");
2919 /* Helper function for uniquely inserting distance vectors. */
2921 static void
2922 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2924 unsigned i;
2925 lambda_vector v;
2927 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2928 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2929 return;
2931 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2934 /* Helper function for uniquely inserting direction vectors. */
2936 static void
2937 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2939 unsigned i;
2940 lambda_vector v;
2942 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2943 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2944 return;
2946 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2949 /* Add a distance of 1 on all the loops outer than INDEX. If we
2950 haven't yet determined a distance for this outer loop, push a new
2951 distance vector composed of the previous distance, and a distance
2952 of 1 for this outer loop. Example:
2954 | loop_1
2955 | loop_2
2956 | A[10]
2957 | endloop_2
2958 | endloop_1
2960 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2961 save (0, 1), then we have to save (1, 0). */
2963 static void
2964 add_outer_distances (struct data_dependence_relation *ddr,
2965 lambda_vector dist_v, int index)
2967 /* For each outer loop where init_v is not set, the accesses are
2968 in dependence of distance 1 in the loop. */
2969 while (--index >= 0)
2971 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2972 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2973 save_v[index] = 1;
2974 save_dist_v (ddr, save_v);
2978 /* Return false when fail to represent the data dependence as a
2979 distance vector. INIT_B is set to true when a component has been
2980 added to the distance vector DIST_V. INDEX_CARRY is then set to
2981 the index in DIST_V that carries the dependence. */
2983 static bool
2984 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2985 struct data_reference *ddr_a,
2986 struct data_reference *ddr_b,
2987 lambda_vector dist_v, bool *init_b,
2988 int *index_carry)
2990 unsigned i;
2991 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2993 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2995 tree access_fn_a, access_fn_b;
2996 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2998 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3000 non_affine_dependence_relation (ddr);
3001 return false;
3004 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3005 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3007 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3008 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3010 int dist, index;
3011 int var_a = CHREC_VARIABLE (access_fn_a);
3012 int var_b = CHREC_VARIABLE (access_fn_b);
3014 if (var_a != var_b
3015 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3017 non_affine_dependence_relation (ddr);
3018 return false;
3021 dist = int_cst_value (SUB_DISTANCE (subscript));
3022 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3023 *index_carry = MIN (index, *index_carry);
3025 /* This is the subscript coupling test. If we have already
3026 recorded a distance for this loop (a distance coming from
3027 another subscript), it should be the same. For example,
3028 in the following code, there is no dependence:
3030 | loop i = 0, N, 1
3031 | T[i+1][i] = ...
3032 | ... = T[i][i]
3033 | endloop
3035 if (init_v[index] != 0 && dist_v[index] != dist)
3037 finalize_ddr_dependent (ddr, chrec_known);
3038 return false;
3041 dist_v[index] = dist;
3042 init_v[index] = 1;
3043 *init_b = true;
3045 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3047 /* This can be for example an affine vs. constant dependence
3048 (T[i] vs. T[3]) that is not an affine dependence and is
3049 not representable as a distance vector. */
3050 non_affine_dependence_relation (ddr);
3051 return false;
3055 return true;
3058 /* Return true when the DDR contains only constant access functions. */
3060 static bool
3061 constant_access_functions (const struct data_dependence_relation *ddr)
3063 unsigned i;
3065 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3066 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3067 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3068 return false;
3070 return true;
3073 /* Helper function for the case where DDR_A and DDR_B are the same
3074 multivariate access function with a constant step. For an example
3075 see pr34635-1.c. */
3077 static void
3078 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3080 int x_1, x_2;
3081 tree c_1 = CHREC_LEFT (c_2);
3082 tree c_0 = CHREC_LEFT (c_1);
3083 lambda_vector dist_v;
3084 int v1, v2, cd;
3086 /* Polynomials with more than 2 variables are not handled yet. When
3087 the evolution steps are parameters, it is not possible to
3088 represent the dependence using classical distance vectors. */
3089 if (TREE_CODE (c_0) != INTEGER_CST
3090 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3091 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3093 DDR_AFFINE_P (ddr) = false;
3094 return;
3097 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3098 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3100 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3101 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3102 v1 = int_cst_value (CHREC_RIGHT (c_1));
3103 v2 = int_cst_value (CHREC_RIGHT (c_2));
3104 cd = gcd (v1, v2);
3105 v1 /= cd;
3106 v2 /= cd;
3108 if (v2 < 0)
3110 v2 = -v2;
3111 v1 = -v1;
3114 dist_v[x_1] = v2;
3115 dist_v[x_2] = -v1;
3116 save_dist_v (ddr, dist_v);
3118 add_outer_distances (ddr, dist_v, x_1);
3121 /* Helper function for the case where DDR_A and DDR_B are the same
3122 access functions. */
3124 static void
3125 add_other_self_distances (struct data_dependence_relation *ddr)
3127 lambda_vector dist_v;
3128 unsigned i;
3129 int index_carry = DDR_NB_LOOPS (ddr);
3131 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3133 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3135 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3137 if (!evolution_function_is_univariate_p (access_fun))
3139 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3141 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3142 return;
3145 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3147 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3148 add_multivariate_self_dist (ddr, access_fun);
3149 else
3150 /* The evolution step is not constant: it varies in
3151 the outer loop, so this cannot be represented by a
3152 distance vector. For example in pr34635.c the
3153 evolution is {0, +, {0, +, 4}_1}_2. */
3154 DDR_AFFINE_P (ddr) = false;
3156 return;
3159 index_carry = MIN (index_carry,
3160 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3161 DDR_LOOP_NEST (ddr)));
3165 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3166 add_outer_distances (ddr, dist_v, index_carry);
3169 static void
3170 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3172 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3174 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3175 save_dist_v (ddr, dist_v);
3178 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3179 is the case for example when access functions are the same and
3180 equal to a constant, as in:
3182 | loop_1
3183 | A[3] = ...
3184 | ... = A[3]
3185 | endloop_1
3187 in which case the distance vectors are (0) and (1). */
3189 static void
3190 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3192 unsigned i, j;
3194 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3196 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3197 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3198 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3200 for (j = 0; j < ca->n; j++)
3201 if (affine_function_zero_p (ca->fns[j]))
3203 insert_innermost_unit_dist_vector (ddr);
3204 return;
3207 for (j = 0; j < cb->n; j++)
3208 if (affine_function_zero_p (cb->fns[j]))
3210 insert_innermost_unit_dist_vector (ddr);
3211 return;
3216 /* Compute the classic per loop distance vector. DDR is the data
3217 dependence relation to build a vector from. Return false when fail
3218 to represent the data dependence as a distance vector. */
3220 static bool
3221 build_classic_dist_vector (struct data_dependence_relation *ddr,
3222 struct loop *loop_nest)
3224 bool init_b = false;
3225 int index_carry = DDR_NB_LOOPS (ddr);
3226 lambda_vector dist_v;
3228 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3229 return false;
3231 if (same_access_functions (ddr))
3233 /* Save the 0 vector. */
3234 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3235 save_dist_v (ddr, dist_v);
3237 if (constant_access_functions (ddr))
3238 add_distance_for_zero_overlaps (ddr);
3240 if (DDR_NB_LOOPS (ddr) > 1)
3241 add_other_self_distances (ddr);
3243 return true;
3246 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3247 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3248 dist_v, &init_b, &index_carry))
3249 return false;
3251 /* Save the distance vector if we initialized one. */
3252 if (init_b)
3254 /* Verify a basic constraint: classic distance vectors should
3255 always be lexicographically positive.
3257 Data references are collected in the order of execution of
3258 the program, thus for the following loop
3260 | for (i = 1; i < 100; i++)
3261 | for (j = 1; j < 100; j++)
3263 | t = T[j+1][i-1]; // A
3264 | T[j][i] = t + 2; // B
3267 references are collected following the direction of the wind:
3268 A then B. The data dependence tests are performed also
3269 following this order, such that we're looking at the distance
3270 separating the elements accessed by A from the elements later
3271 accessed by B. But in this example, the distance returned by
3272 test_dep (A, B) is lexicographically negative (-1, 1), that
3273 means that the access A occurs later than B with respect to
3274 the outer loop, ie. we're actually looking upwind. In this
3275 case we solve test_dep (B, A) looking downwind to the
3276 lexicographically positive solution, that returns the
3277 distance vector (1, -1). */
3278 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3280 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3281 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3282 loop_nest))
3283 return false;
3284 compute_subscript_distance (ddr);
3285 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3286 save_v, &init_b, &index_carry))
3287 return false;
3288 save_dist_v (ddr, save_v);
3289 DDR_REVERSED_P (ddr) = true;
3291 /* In this case there is a dependence forward for all the
3292 outer loops:
3294 | for (k = 1; k < 100; k++)
3295 | for (i = 1; i < 100; i++)
3296 | for (j = 1; j < 100; j++)
3298 | t = T[j+1][i-1]; // A
3299 | T[j][i] = t + 2; // B
3302 the vectors are:
3303 (0, 1, -1)
3304 (1, 1, -1)
3305 (1, -1, 1)
3307 if (DDR_NB_LOOPS (ddr) > 1)
3309 add_outer_distances (ddr, save_v, index_carry);
3310 add_outer_distances (ddr, dist_v, index_carry);
3313 else
3315 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3316 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3318 if (DDR_NB_LOOPS (ddr) > 1)
3320 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3322 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3323 DDR_A (ddr), loop_nest))
3324 return false;
3325 compute_subscript_distance (ddr);
3326 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3327 opposite_v, &init_b,
3328 &index_carry))
3329 return false;
3331 save_dist_v (ddr, save_v);
3332 add_outer_distances (ddr, dist_v, index_carry);
3333 add_outer_distances (ddr, opposite_v, index_carry);
3335 else
3336 save_dist_v (ddr, save_v);
3339 else
3341 /* There is a distance of 1 on all the outer loops: Example:
3342 there is a dependence of distance 1 on loop_1 for the array A.
3344 | loop_1
3345 | A[5] = ...
3346 | endloop
3348 add_outer_distances (ddr, dist_v,
3349 lambda_vector_first_nz (dist_v,
3350 DDR_NB_LOOPS (ddr), 0));
3353 if (dump_file && (dump_flags & TDF_DETAILS))
3355 unsigned i;
3357 fprintf (dump_file, "(build_classic_dist_vector\n");
3358 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3360 fprintf (dump_file, " dist_vector = (");
3361 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3362 DDR_NB_LOOPS (ddr));
3363 fprintf (dump_file, " )\n");
3365 fprintf (dump_file, ")\n");
3368 return true;
3371 /* Return the direction for a given distance.
3372 FIXME: Computing dir this way is suboptimal, since dir can catch
3373 cases that dist is unable to represent. */
3375 static inline enum data_dependence_direction
3376 dir_from_dist (int dist)
3378 if (dist > 0)
3379 return dir_positive;
3380 else if (dist < 0)
3381 return dir_negative;
3382 else
3383 return dir_equal;
3386 /* Compute the classic per loop direction vector. DDR is the data
3387 dependence relation to build a vector from. */
3389 static void
3390 build_classic_dir_vector (struct data_dependence_relation *ddr)
3392 unsigned i, j;
3393 lambda_vector dist_v;
3395 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3397 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3400 dir_v[j] = dir_from_dist (dist_v[j]);
3402 save_dir_v (ddr, dir_v);
3406 /* Helper function. Returns true when there is a dependence between
3407 data references DRA and DRB. */
3409 static bool
3410 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3411 struct data_reference *dra,
3412 struct data_reference *drb,
3413 struct loop *loop_nest)
3415 unsigned int i;
3416 tree last_conflicts;
3417 struct subscript *subscript;
3419 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3420 i++)
3422 conflict_function *overlaps_a, *overlaps_b;
3424 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3425 DR_ACCESS_FN (drb, i),
3426 &overlaps_a, &overlaps_b,
3427 &last_conflicts, loop_nest);
3429 if (CF_NOT_KNOWN_P (overlaps_a)
3430 || CF_NOT_KNOWN_P (overlaps_b))
3432 finalize_ddr_dependent (ddr, chrec_dont_know);
3433 dependence_stats.num_dependence_undetermined++;
3434 free_conflict_function (overlaps_a);
3435 free_conflict_function (overlaps_b);
3436 return false;
3439 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3440 || CF_NO_DEPENDENCE_P (overlaps_b))
3442 finalize_ddr_dependent (ddr, chrec_known);
3443 dependence_stats.num_dependence_independent++;
3444 free_conflict_function (overlaps_a);
3445 free_conflict_function (overlaps_b);
3446 return false;
3449 else
3451 if (SUB_CONFLICTS_IN_A (subscript))
3452 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3453 if (SUB_CONFLICTS_IN_B (subscript))
3454 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3456 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3457 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3458 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3462 return true;
3465 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3467 static void
3468 subscript_dependence_tester (struct data_dependence_relation *ddr,
3469 struct loop *loop_nest)
3472 if (dump_file && (dump_flags & TDF_DETAILS))
3473 fprintf (dump_file, "(subscript_dependence_tester \n");
3475 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3476 dependence_stats.num_dependence_dependent++;
3478 compute_subscript_distance (ddr);
3479 if (build_classic_dist_vector (ddr, loop_nest))
3480 build_classic_dir_vector (ddr);
3482 if (dump_file && (dump_flags & TDF_DETAILS))
3483 fprintf (dump_file, ")\n");
3486 /* Returns true when all the access functions of A are affine or
3487 constant with respect to LOOP_NEST. */
3489 static bool
3490 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3491 const struct loop *loop_nest)
3493 unsigned int i;
3494 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3495 tree t;
3497 FOR_EACH_VEC_ELT (tree, fns, i, t)
3498 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3499 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3500 return false;
3502 return true;
3505 /* Initializes an equation for an OMEGA problem using the information
3506 contained in the ACCESS_FUN. Returns true when the operation
3507 succeeded.
3509 PB is the omega constraint system.
3510 EQ is the number of the equation to be initialized.
3511 OFFSET is used for shifting the variables names in the constraints:
3512 a constrain is composed of 2 * the number of variables surrounding
3513 dependence accesses. OFFSET is set either to 0 for the first n variables,
3514 then it is set to n.
3515 ACCESS_FUN is expected to be an affine chrec. */
3517 static bool
3518 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3519 unsigned int offset, tree access_fun,
3520 struct data_dependence_relation *ddr)
3522 switch (TREE_CODE (access_fun))
3524 case POLYNOMIAL_CHREC:
3526 tree left = CHREC_LEFT (access_fun);
3527 tree right = CHREC_RIGHT (access_fun);
3528 int var = CHREC_VARIABLE (access_fun);
3529 unsigned var_idx;
3531 if (TREE_CODE (right) != INTEGER_CST)
3532 return false;
3534 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3535 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3537 /* Compute the innermost loop index. */
3538 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3540 if (offset == 0)
3541 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3542 += int_cst_value (right);
3544 switch (TREE_CODE (left))
3546 case POLYNOMIAL_CHREC:
3547 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3549 case INTEGER_CST:
3550 pb->eqs[eq].coef[0] += int_cst_value (left);
3551 return true;
3553 default:
3554 return false;
3558 case INTEGER_CST:
3559 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3560 return true;
3562 default:
3563 return false;
3567 /* As explained in the comments preceding init_omega_for_ddr, we have
3568 to set up a system for each loop level, setting outer loops
3569 variation to zero, and current loop variation to positive or zero.
3570 Save each lexico positive distance vector. */
3572 static void
3573 omega_extract_distance_vectors (omega_pb pb,
3574 struct data_dependence_relation *ddr)
3576 int eq, geq;
3577 unsigned i, j;
3578 struct loop *loopi, *loopj;
3579 enum omega_result res;
3581 /* Set a new problem for each loop in the nest. The basis is the
3582 problem that we have initialized until now. On top of this we
3583 add new constraints. */
3584 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3585 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3587 int dist = 0;
3588 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3589 DDR_NB_LOOPS (ddr));
3591 omega_copy_problem (copy, pb);
3593 /* For all the outer loops "loop_j", add "dj = 0". */
3594 for (j = 0;
3595 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3597 eq = omega_add_zero_eq (copy, omega_black);
3598 copy->eqs[eq].coef[j + 1] = 1;
3601 /* For "loop_i", add "0 <= di". */
3602 geq = omega_add_zero_geq (copy, omega_black);
3603 copy->geqs[geq].coef[i + 1] = 1;
3605 /* Reduce the constraint system, and test that the current
3606 problem is feasible. */
3607 res = omega_simplify_problem (copy);
3608 if (res == omega_false
3609 || res == omega_unknown
3610 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3611 goto next_problem;
3613 for (eq = 0; eq < copy->num_subs; eq++)
3614 if (copy->subs[eq].key == (int) i + 1)
3616 dist = copy->subs[eq].coef[0];
3617 goto found_dist;
3620 if (dist == 0)
3622 /* Reinitialize problem... */
3623 omega_copy_problem (copy, pb);
3624 for (j = 0;
3625 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3627 eq = omega_add_zero_eq (copy, omega_black);
3628 copy->eqs[eq].coef[j + 1] = 1;
3631 /* ..., but this time "di = 1". */
3632 eq = omega_add_zero_eq (copy, omega_black);
3633 copy->eqs[eq].coef[i + 1] = 1;
3634 copy->eqs[eq].coef[0] = -1;
3636 res = omega_simplify_problem (copy);
3637 if (res == omega_false
3638 || res == omega_unknown
3639 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3640 goto next_problem;
3642 for (eq = 0; eq < copy->num_subs; eq++)
3643 if (copy->subs[eq].key == (int) i + 1)
3645 dist = copy->subs[eq].coef[0];
3646 goto found_dist;
3650 found_dist:;
3651 /* Save the lexicographically positive distance vector. */
3652 if (dist >= 0)
3654 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3655 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3657 dist_v[i] = dist;
3659 for (eq = 0; eq < copy->num_subs; eq++)
3660 if (copy->subs[eq].key > 0)
3662 dist = copy->subs[eq].coef[0];
3663 dist_v[copy->subs[eq].key - 1] = dist;
3666 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3667 dir_v[j] = dir_from_dist (dist_v[j]);
3669 save_dist_v (ddr, dist_v);
3670 save_dir_v (ddr, dir_v);
3673 next_problem:;
3674 omega_free_problem (copy);
3678 /* This is called for each subscript of a tuple of data references:
3679 insert an equality for representing the conflicts. */
3681 static bool
3682 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3683 struct data_dependence_relation *ddr,
3684 omega_pb pb, bool *maybe_dependent)
3686 int eq;
3687 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3688 TREE_TYPE (access_fun_b));
3689 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3690 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3691 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3692 tree minus_one;
3694 /* When the fun_a - fun_b is not constant, the dependence is not
3695 captured by the classic distance vector representation. */
3696 if (TREE_CODE (difference) != INTEGER_CST)
3697 return false;
3699 /* ZIV test. */
3700 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3702 /* There is no dependence. */
3703 *maybe_dependent = false;
3704 return true;
3707 minus_one = build_int_cst (type, -1);
3708 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3710 eq = omega_add_zero_eq (pb, omega_black);
3711 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3712 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3713 /* There is probably a dependence, but the system of
3714 constraints cannot be built: answer "don't know". */
3715 return false;
3717 /* GCD test. */
3718 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3719 && !int_divides_p (lambda_vector_gcd
3720 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3721 2 * DDR_NB_LOOPS (ddr)),
3722 pb->eqs[eq].coef[0]))
3724 /* There is no dependence. */
3725 *maybe_dependent = false;
3726 return true;
3729 return true;
3732 /* Helper function, same as init_omega_for_ddr but specialized for
3733 data references A and B. */
3735 static bool
3736 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3737 struct data_dependence_relation *ddr,
3738 omega_pb pb, bool *maybe_dependent)
3740 unsigned i;
3741 int ineq;
3742 struct loop *loopi;
3743 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3745 /* Insert an equality per subscript. */
3746 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3748 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3749 ddr, pb, maybe_dependent))
3750 return false;
3751 else if (*maybe_dependent == false)
3753 /* There is no dependence. */
3754 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3755 return true;
3759 /* Insert inequalities: constraints corresponding to the iteration
3760 domain, i.e. the loops surrounding the references "loop_x" and
3761 the distance variables "dx". The layout of the OMEGA
3762 representation is as follows:
3763 - coef[0] is the constant
3764 - coef[1..nb_loops] are the protected variables that will not be
3765 removed by the solver: the "dx"
3766 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3768 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3769 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3771 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3773 /* 0 <= loop_x */
3774 ineq = omega_add_zero_geq (pb, omega_black);
3775 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3777 /* 0 <= loop_x + dx */
3778 ineq = omega_add_zero_geq (pb, omega_black);
3779 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3780 pb->geqs[ineq].coef[i + 1] = 1;
3782 if (nbi != -1)
3784 /* loop_x <= nb_iters */
3785 ineq = omega_add_zero_geq (pb, omega_black);
3786 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3787 pb->geqs[ineq].coef[0] = nbi;
3789 /* loop_x + dx <= nb_iters */
3790 ineq = omega_add_zero_geq (pb, omega_black);
3791 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3792 pb->geqs[ineq].coef[i + 1] = -1;
3793 pb->geqs[ineq].coef[0] = nbi;
3795 /* A step "dx" bigger than nb_iters is not feasible, so
3796 add "0 <= nb_iters + dx", */
3797 ineq = omega_add_zero_geq (pb, omega_black);
3798 pb->geqs[ineq].coef[i + 1] = 1;
3799 pb->geqs[ineq].coef[0] = nbi;
3800 /* and "dx <= nb_iters". */
3801 ineq = omega_add_zero_geq (pb, omega_black);
3802 pb->geqs[ineq].coef[i + 1] = -1;
3803 pb->geqs[ineq].coef[0] = nbi;
3807 omega_extract_distance_vectors (pb, ddr);
3809 return true;
3812 /* Sets up the Omega dependence problem for the data dependence
3813 relation DDR. Returns false when the constraint system cannot be
3814 built, ie. when the test answers "don't know". Returns true
3815 otherwise, and when independence has been proved (using one of the
3816 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3817 set MAYBE_DEPENDENT to true.
3819 Example: for setting up the dependence system corresponding to the
3820 conflicting accesses
3822 | loop_i
3823 | loop_j
3824 | A[i, i+1] = ...
3825 | ... A[2*j, 2*(i + j)]
3826 | endloop_j
3827 | endloop_i
3829 the following constraints come from the iteration domain:
3831 0 <= i <= Ni
3832 0 <= i + di <= Ni
3833 0 <= j <= Nj
3834 0 <= j + dj <= Nj
3836 where di, dj are the distance variables. The constraints
3837 representing the conflicting elements are:
3839 i = 2 * (j + dj)
3840 i + 1 = 2 * (i + di + j + dj)
3842 For asking that the resulting distance vector (di, dj) be
3843 lexicographically positive, we insert the constraint "di >= 0". If
3844 "di = 0" in the solution, we fix that component to zero, and we
3845 look at the inner loops: we set a new problem where all the outer
3846 loop distances are zero, and fix this inner component to be
3847 positive. When one of the components is positive, we save that
3848 distance, and set a new problem where the distance on this loop is
3849 zero, searching for other distances in the inner loops. Here is
3850 the classic example that illustrates that we have to set for each
3851 inner loop a new problem:
3853 | loop_1
3854 | loop_2
3855 | A[10]
3856 | endloop_2
3857 | endloop_1
3859 we have to save two distances (1, 0) and (0, 1).
3861 Given two array references, refA and refB, we have to set the
3862 dependence problem twice, refA vs. refB and refB vs. refA, and we
3863 cannot do a single test, as refB might occur before refA in the
3864 inner loops, and the contrary when considering outer loops: ex.
3866 | loop_0
3867 | loop_1
3868 | loop_2
3869 | T[{1,+,1}_2][{1,+,1}_1] // refA
3870 | T[{2,+,1}_2][{0,+,1}_1] // refB
3871 | endloop_2
3872 | endloop_1
3873 | endloop_0
3875 refB touches the elements in T before refA, and thus for the same
3876 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3877 but for successive loop_0 iterations, we have (1, -1, 1)
3879 The Omega solver expects the distance variables ("di" in the
3880 previous example) to come first in the constraint system (as
3881 variables to be protected, or "safe" variables), the constraint
3882 system is built using the following layout:
3884 "cst | distance vars | index vars".
3887 static bool
3888 init_omega_for_ddr (struct data_dependence_relation *ddr,
3889 bool *maybe_dependent)
3891 omega_pb pb;
3892 bool res = false;
3894 *maybe_dependent = true;
3896 if (same_access_functions (ddr))
3898 unsigned j;
3899 lambda_vector dir_v;
3901 /* Save the 0 vector. */
3902 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3903 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3904 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3905 dir_v[j] = dir_equal;
3906 save_dir_v (ddr, dir_v);
3908 /* Save the dependences carried by outer loops. */
3909 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3910 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3911 maybe_dependent);
3912 omega_free_problem (pb);
3913 return res;
3916 /* Omega expects the protected variables (those that have to be kept
3917 after elimination) to appear first in the constraint system.
3918 These variables are the distance variables. In the following
3919 initialization we declare NB_LOOPS safe variables, and the total
3920 number of variables for the constraint system is 2*NB_LOOPS. */
3921 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3922 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3923 maybe_dependent);
3924 omega_free_problem (pb);
3926 /* Stop computation if not decidable, or no dependence. */
3927 if (res == false || *maybe_dependent == false)
3928 return res;
3930 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3931 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3932 maybe_dependent);
3933 omega_free_problem (pb);
3935 return res;
3938 /* Return true when DDR contains the same information as that stored
3939 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3941 static bool
3942 ddr_consistent_p (FILE *file,
3943 struct data_dependence_relation *ddr,
3944 VEC (lambda_vector, heap) *dist_vects,
3945 VEC (lambda_vector, heap) *dir_vects)
3947 unsigned int i, j;
3949 /* If dump_file is set, output there. */
3950 if (dump_file && (dump_flags & TDF_DETAILS))
3951 file = dump_file;
3953 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3955 lambda_vector b_dist_v;
3956 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3957 VEC_length (lambda_vector, dist_vects),
3958 DDR_NUM_DIST_VECTS (ddr));
3960 fprintf (file, "Banerjee dist vectors:\n");
3961 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3962 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3964 fprintf (file, "Omega dist vectors:\n");
3965 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3966 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3968 fprintf (file, "data dependence relation:\n");
3969 dump_data_dependence_relation (file, ddr);
3971 fprintf (file, ")\n");
3972 return false;
3975 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3977 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3978 VEC_length (lambda_vector, dir_vects),
3979 DDR_NUM_DIR_VECTS (ddr));
3980 return false;
3983 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3985 lambda_vector a_dist_v;
3986 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3988 /* Distance vectors are not ordered in the same way in the DDR
3989 and in the DIST_VECTS: search for a matching vector. */
3990 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3991 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3992 break;
3994 if (j == VEC_length (lambda_vector, dist_vects))
3996 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3997 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3998 fprintf (file, "not found in Omega dist vectors:\n");
3999 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4000 fprintf (file, "data dependence relation:\n");
4001 dump_data_dependence_relation (file, ddr);
4002 fprintf (file, ")\n");
4006 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4008 lambda_vector a_dir_v;
4009 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4011 /* Direction vectors are not ordered in the same way in the DDR
4012 and in the DIR_VECTS: search for a matching vector. */
4013 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4014 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4015 break;
4017 if (j == VEC_length (lambda_vector, dist_vects))
4019 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4020 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4021 fprintf (file, "not found in Omega dir vectors:\n");
4022 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4023 fprintf (file, "data dependence relation:\n");
4024 dump_data_dependence_relation (file, ddr);
4025 fprintf (file, ")\n");
4029 return true;
4032 /* This computes the affine dependence relation between A and B with
4033 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4034 independence between two accesses, while CHREC_DONT_KNOW is used
4035 for representing the unknown relation.
4037 Note that it is possible to stop the computation of the dependence
4038 relation the first time we detect a CHREC_KNOWN element for a given
4039 subscript. */
4041 static void
4042 compute_affine_dependence (struct data_dependence_relation *ddr,
4043 struct loop *loop_nest)
4045 struct data_reference *dra = DDR_A (ddr);
4046 struct data_reference *drb = DDR_B (ddr);
4048 if (dump_file && (dump_flags & TDF_DETAILS))
4050 fprintf (dump_file, "(compute_affine_dependence\n");
4051 fprintf (dump_file, " (stmt_a = \n");
4052 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4053 fprintf (dump_file, ")\n (stmt_b = \n");
4054 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4055 fprintf (dump_file, ")\n");
4058 /* Analyze only when the dependence relation is not yet known. */
4059 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4061 dependence_stats.num_dependence_tests++;
4063 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4064 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4066 if (flag_check_data_deps)
4068 /* Compute the dependences using the first algorithm. */
4069 subscript_dependence_tester (ddr, loop_nest);
4071 if (dump_file && (dump_flags & TDF_DETAILS))
4073 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4074 dump_data_dependence_relation (dump_file, ddr);
4077 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4079 bool maybe_dependent;
4080 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4082 /* Save the result of the first DD analyzer. */
4083 dist_vects = DDR_DIST_VECTS (ddr);
4084 dir_vects = DDR_DIR_VECTS (ddr);
4086 /* Reset the information. */
4087 DDR_DIST_VECTS (ddr) = NULL;
4088 DDR_DIR_VECTS (ddr) = NULL;
4090 /* Compute the same information using Omega. */
4091 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4092 goto csys_dont_know;
4094 if (dump_file && (dump_flags & TDF_DETAILS))
4096 fprintf (dump_file, "Omega Analyzer\n");
4097 dump_data_dependence_relation (dump_file, ddr);
4100 /* Check that we get the same information. */
4101 if (maybe_dependent)
4102 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4103 dir_vects));
4106 else
4107 subscript_dependence_tester (ddr, loop_nest);
4110 /* As a last case, if the dependence cannot be determined, or if
4111 the dependence is considered too difficult to determine, answer
4112 "don't know". */
4113 else
4115 csys_dont_know:;
4116 dependence_stats.num_dependence_undetermined++;
4118 if (dump_file && (dump_flags & TDF_DETAILS))
4120 fprintf (dump_file, "Data ref a:\n");
4121 dump_data_reference (dump_file, dra);
4122 fprintf (dump_file, "Data ref b:\n");
4123 dump_data_reference (dump_file, drb);
4124 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4126 finalize_ddr_dependent (ddr, chrec_dont_know);
4130 if (dump_file && (dump_flags & TDF_DETAILS))
4131 fprintf (dump_file, ")\n");
4134 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4135 the data references in DATAREFS, in the LOOP_NEST. When
4136 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4137 relations. */
4139 void
4140 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4141 VEC (ddr_p, heap) **dependence_relations,
4142 VEC (loop_p, heap) *loop_nest,
4143 bool compute_self_and_rr)
4145 struct data_dependence_relation *ddr;
4146 struct data_reference *a, *b;
4147 unsigned int i, j;
4149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4150 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4151 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4153 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4154 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4155 if (loop_nest)
4156 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4159 if (compute_self_and_rr)
4160 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4162 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4163 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4164 if (loop_nest)
4165 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4169 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4170 true if STMT clobbers memory, false otherwise. */
4172 bool
4173 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4175 bool clobbers_memory = false;
4176 data_ref_loc *ref;
4177 tree *op0, *op1;
4178 enum gimple_code stmt_code = gimple_code (stmt);
4180 *references = NULL;
4182 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4183 Calls have side-effects, except those to const or pure
4184 functions. */
4185 if ((stmt_code == GIMPLE_CALL
4186 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4187 || (stmt_code == GIMPLE_ASM
4188 && gimple_asm_volatile_p (stmt)))
4189 clobbers_memory = true;
4191 if (!gimple_vuse (stmt))
4192 return clobbers_memory;
4194 if (stmt_code == GIMPLE_ASSIGN)
4196 tree base;
4197 op0 = gimple_assign_lhs_ptr (stmt);
4198 op1 = gimple_assign_rhs1_ptr (stmt);
4200 if (DECL_P (*op1)
4201 || (REFERENCE_CLASS_P (*op1)
4202 && (base = get_base_address (*op1))
4203 && TREE_CODE (base) != SSA_NAME))
4205 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4206 ref->pos = op1;
4207 ref->is_read = true;
4210 else if (stmt_code == GIMPLE_CALL)
4212 unsigned i, n;
4214 op0 = gimple_call_lhs_ptr (stmt);
4215 n = gimple_call_num_args (stmt);
4216 for (i = 0; i < n; i++)
4218 op1 = gimple_call_arg_ptr (stmt, i);
4220 if (DECL_P (*op1)
4221 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4223 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4224 ref->pos = op1;
4225 ref->is_read = true;
4229 else
4230 return clobbers_memory;
4232 if (*op0
4233 && (DECL_P (*op0)
4234 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4236 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4237 ref->pos = op0;
4238 ref->is_read = false;
4240 return clobbers_memory;
4243 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4244 reference, returns false, otherwise returns true. NEST is the outermost
4245 loop of the loop nest in which the references should be analyzed. */
4247 bool
4248 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4249 VEC (data_reference_p, heap) **datarefs)
4251 unsigned i;
4252 VEC (data_ref_loc, heap) *references;
4253 data_ref_loc *ref;
4254 bool ret = true;
4255 data_reference_p dr;
4257 if (get_references_in_stmt (stmt, &references))
4259 VEC_free (data_ref_loc, heap, references);
4260 return false;
4263 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4265 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4266 *ref->pos, stmt, ref->is_read);
4267 gcc_assert (dr != NULL);
4268 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4270 VEC_free (data_ref_loc, heap, references);
4271 return ret;
4274 /* Stores the data references in STMT to DATAREFS. If there is an
4275 unanalyzable reference, returns false, otherwise returns true.
4276 NEST is the outermost loop of the loop nest in which the references
4277 should be instantiated, LOOP is the loop in which the references
4278 should be analyzed. */
4280 bool
4281 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4282 VEC (data_reference_p, heap) **datarefs)
4284 unsigned i;
4285 VEC (data_ref_loc, heap) *references;
4286 data_ref_loc *ref;
4287 bool ret = true;
4288 data_reference_p dr;
4290 if (get_references_in_stmt (stmt, &references))
4292 VEC_free (data_ref_loc, heap, references);
4293 return false;
4296 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4298 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4299 gcc_assert (dr != NULL);
4300 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4303 VEC_free (data_ref_loc, heap, references);
4304 return ret;
4307 /* Search the data references in LOOP, and record the information into
4308 DATAREFS. Returns chrec_dont_know when failing to analyze a
4309 difficult case, returns NULL_TREE otherwise. */
4311 tree
4312 find_data_references_in_bb (struct loop *loop, basic_block bb,
4313 VEC (data_reference_p, heap) **datarefs)
4315 gimple_stmt_iterator bsi;
4317 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4319 gimple stmt = gsi_stmt (bsi);
4321 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4323 struct data_reference *res;
4324 res = XCNEW (struct data_reference);
4325 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4327 return chrec_dont_know;
4331 return NULL_TREE;
4334 /* Search the data references in LOOP, and record the information into
4335 DATAREFS. Returns chrec_dont_know when failing to analyze a
4336 difficult case, returns NULL_TREE otherwise.
4338 TODO: This function should be made smarter so that it can handle address
4339 arithmetic as if they were array accesses, etc. */
4341 tree
4342 find_data_references_in_loop (struct loop *loop,
4343 VEC (data_reference_p, heap) **datarefs)
4345 basic_block bb, *bbs;
4346 unsigned int i;
4348 bbs = get_loop_body_in_dom_order (loop);
4350 for (i = 0; i < loop->num_nodes; i++)
4352 bb = bbs[i];
4354 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4356 free (bbs);
4357 return chrec_dont_know;
4360 free (bbs);
4362 return NULL_TREE;
4365 /* Recursive helper function. */
4367 static bool
4368 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4370 /* Inner loops of the nest should not contain siblings. Example:
4371 when there are two consecutive loops,
4373 | loop_0
4374 | loop_1
4375 | A[{0, +, 1}_1]
4376 | endloop_1
4377 | loop_2
4378 | A[{0, +, 1}_2]
4379 | endloop_2
4380 | endloop_0
4382 the dependence relation cannot be captured by the distance
4383 abstraction. */
4384 if (loop->next)
4385 return false;
4387 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4388 if (loop->inner)
4389 return find_loop_nest_1 (loop->inner, loop_nest);
4390 return true;
4393 /* Return false when the LOOP is not well nested. Otherwise return
4394 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4395 contain the loops from the outermost to the innermost, as they will
4396 appear in the classic distance vector. */
4398 bool
4399 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4401 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4402 if (loop->inner)
4403 return find_loop_nest_1 (loop->inner, loop_nest);
4404 return true;
4407 /* Returns true when the data dependences have been computed, false otherwise.
4408 Given a loop nest LOOP, the following vectors are returned:
4409 DATAREFS is initialized to all the array elements contained in this loop,
4410 DEPENDENCE_RELATIONS contains the relations between the data references.
4411 Compute read-read and self relations if
4412 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4414 bool
4415 compute_data_dependences_for_loop (struct loop *loop,
4416 bool compute_self_and_read_read_dependences,
4417 VEC (loop_p, heap) **loop_nest,
4418 VEC (data_reference_p, heap) **datarefs,
4419 VEC (ddr_p, heap) **dependence_relations)
4421 bool res = true;
4423 memset (&dependence_stats, 0, sizeof (dependence_stats));
4425 /* If the loop nest is not well formed, or one of the data references
4426 is not computable, give up without spending time to compute other
4427 dependences. */
4428 if (!loop
4429 || !find_loop_nest (loop, loop_nest)
4430 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4432 struct data_dependence_relation *ddr;
4434 /* Insert a single relation into dependence_relations:
4435 chrec_dont_know. */
4436 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4437 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4438 res = false;
4440 else
4441 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4442 compute_self_and_read_read_dependences);
4444 if (dump_file && (dump_flags & TDF_STATS))
4446 fprintf (dump_file, "Dependence tester statistics:\n");
4448 fprintf (dump_file, "Number of dependence tests: %d\n",
4449 dependence_stats.num_dependence_tests);
4450 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4451 dependence_stats.num_dependence_dependent);
4452 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4453 dependence_stats.num_dependence_independent);
4454 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4455 dependence_stats.num_dependence_undetermined);
4457 fprintf (dump_file, "Number of subscript tests: %d\n",
4458 dependence_stats.num_subscript_tests);
4459 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4460 dependence_stats.num_subscript_undetermined);
4461 fprintf (dump_file, "Number of same subscript function: %d\n",
4462 dependence_stats.num_same_subscript_function);
4464 fprintf (dump_file, "Number of ziv tests: %d\n",
4465 dependence_stats.num_ziv);
4466 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4467 dependence_stats.num_ziv_dependent);
4468 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4469 dependence_stats.num_ziv_independent);
4470 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4471 dependence_stats.num_ziv_unimplemented);
4473 fprintf (dump_file, "Number of siv tests: %d\n",
4474 dependence_stats.num_siv);
4475 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4476 dependence_stats.num_siv_dependent);
4477 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4478 dependence_stats.num_siv_independent);
4479 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4480 dependence_stats.num_siv_unimplemented);
4482 fprintf (dump_file, "Number of miv tests: %d\n",
4483 dependence_stats.num_miv);
4484 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4485 dependence_stats.num_miv_dependent);
4486 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4487 dependence_stats.num_miv_independent);
4488 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4489 dependence_stats.num_miv_unimplemented);
4492 return res;
4495 /* Returns true when the data dependences for the basic block BB have been
4496 computed, false otherwise.
4497 DATAREFS is initialized to all the array elements contained in this basic
4498 block, DEPENDENCE_RELATIONS contains the relations between the data
4499 references. Compute read-read and self relations if
4500 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4501 bool
4502 compute_data_dependences_for_bb (basic_block bb,
4503 bool compute_self_and_read_read_dependences,
4504 VEC (data_reference_p, heap) **datarefs,
4505 VEC (ddr_p, heap) **dependence_relations)
4507 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4508 return false;
4510 compute_all_dependences (*datarefs, dependence_relations, NULL,
4511 compute_self_and_read_read_dependences);
4512 return true;
4515 /* Entry point (for testing only). Analyze all the data references
4516 and the dependence relations in LOOP.
4518 The data references are computed first.
4520 A relation on these nodes is represented by a complete graph. Some
4521 of the relations could be of no interest, thus the relations can be
4522 computed on demand.
4524 In the following function we compute all the relations. This is
4525 just a first implementation that is here for:
4526 - for showing how to ask for the dependence relations,
4527 - for the debugging the whole dependence graph,
4528 - for the dejagnu testcases and maintenance.
4530 It is possible to ask only for a part of the graph, avoiding to
4531 compute the whole dependence graph. The computed dependences are
4532 stored in a knowledge base (KB) such that later queries don't
4533 recompute the same information. The implementation of this KB is
4534 transparent to the optimizer, and thus the KB can be changed with a
4535 more efficient implementation, or the KB could be disabled. */
4536 static void
4537 analyze_all_data_dependences (struct loop *loop)
4539 unsigned int i;
4540 int nb_data_refs = 10;
4541 VEC (data_reference_p, heap) *datarefs =
4542 VEC_alloc (data_reference_p, heap, nb_data_refs);
4543 VEC (ddr_p, heap) *dependence_relations =
4544 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4545 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4547 /* Compute DDs on the whole function. */
4548 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4549 &dependence_relations);
4551 if (dump_file)
4553 dump_data_dependence_relations (dump_file, dependence_relations);
4554 fprintf (dump_file, "\n\n");
4556 if (dump_flags & TDF_DETAILS)
4557 dump_dist_dir_vectors (dump_file, dependence_relations);
4559 if (dump_flags & TDF_STATS)
4561 unsigned nb_top_relations = 0;
4562 unsigned nb_bot_relations = 0;
4563 unsigned nb_chrec_relations = 0;
4564 struct data_dependence_relation *ddr;
4566 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4568 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4569 nb_top_relations++;
4571 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4572 nb_bot_relations++;
4574 else
4575 nb_chrec_relations++;
4578 gather_stats_on_scev_database ();
4582 VEC_free (loop_p, heap, loop_nest);
4583 free_dependence_relations (dependence_relations);
4584 free_data_refs (datarefs);
4587 /* Computes all the data dependences and check that the results of
4588 several analyzers are the same. */
4590 void
4591 tree_check_data_deps (void)
4593 loop_iterator li;
4594 struct loop *loop_nest;
4596 FOR_EACH_LOOP (li, loop_nest, 0)
4597 analyze_all_data_dependences (loop_nest);
4600 /* Free the memory used by a data dependence relation DDR. */
4602 void
4603 free_dependence_relation (struct data_dependence_relation *ddr)
4605 if (ddr == NULL)
4606 return;
4608 if (DDR_SUBSCRIPTS (ddr))
4609 free_subscripts (DDR_SUBSCRIPTS (ddr));
4610 if (DDR_DIST_VECTS (ddr))
4611 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4612 if (DDR_DIR_VECTS (ddr))
4613 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4615 free (ddr);
4618 /* Free the memory used by the data dependence relations from
4619 DEPENDENCE_RELATIONS. */
4621 void
4622 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4624 unsigned int i;
4625 struct data_dependence_relation *ddr;
4627 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4628 if (ddr)
4629 free_dependence_relation (ddr);
4631 VEC_free (ddr_p, heap, dependence_relations);
4634 /* Free the memory used by the data references from DATAREFS. */
4636 void
4637 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4639 unsigned int i;
4640 struct data_reference *dr;
4642 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4643 free_data_ref (dr);
4644 VEC_free (data_reference_p, heap, datarefs);
4649 /* Dump vertex I in RDG to FILE. */
4651 void
4652 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4654 struct vertex *v = &(rdg->vertices[i]);
4655 struct graph_edge *e;
4657 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4658 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4659 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4661 if (v->pred)
4662 for (e = v->pred; e; e = e->pred_next)
4663 fprintf (file, " %d", e->src);
4665 fprintf (file, ") (out:");
4667 if (v->succ)
4668 for (e = v->succ; e; e = e->succ_next)
4669 fprintf (file, " %d", e->dest);
4671 fprintf (file, ")\n");
4672 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4673 fprintf (file, ")\n");
4676 /* Call dump_rdg_vertex on stderr. */
4678 DEBUG_FUNCTION void
4679 debug_rdg_vertex (struct graph *rdg, int i)
4681 dump_rdg_vertex (stderr, rdg, i);
4684 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4685 dumped vertices to that bitmap. */
4687 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4689 int i;
4691 fprintf (file, "(%d\n", c);
4693 for (i = 0; i < rdg->n_vertices; i++)
4694 if (rdg->vertices[i].component == c)
4696 if (dumped)
4697 bitmap_set_bit (dumped, i);
4699 dump_rdg_vertex (file, rdg, i);
4702 fprintf (file, ")\n");
4705 /* Call dump_rdg_vertex on stderr. */
4707 DEBUG_FUNCTION void
4708 debug_rdg_component (struct graph *rdg, int c)
4710 dump_rdg_component (stderr, rdg, c, NULL);
4713 /* Dump the reduced dependence graph RDG to FILE. */
4715 void
4716 dump_rdg (FILE *file, struct graph *rdg)
4718 int i;
4719 bitmap dumped = BITMAP_ALLOC (NULL);
4721 fprintf (file, "(rdg\n");
4723 for (i = 0; i < rdg->n_vertices; i++)
4724 if (!bitmap_bit_p (dumped, i))
4725 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4727 fprintf (file, ")\n");
4728 BITMAP_FREE (dumped);
4731 /* Call dump_rdg on stderr. */
4733 DEBUG_FUNCTION void
4734 debug_rdg (struct graph *rdg)
4736 dump_rdg (stderr, rdg);
4739 static void
4740 dot_rdg_1 (FILE *file, struct graph *rdg)
4742 int i;
4744 fprintf (file, "digraph RDG {\n");
4746 for (i = 0; i < rdg->n_vertices; i++)
4748 struct vertex *v = &(rdg->vertices[i]);
4749 struct graph_edge *e;
4751 /* Highlight reads from memory. */
4752 if (RDG_MEM_READS_STMT (rdg, i))
4753 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4755 /* Highlight stores to memory. */
4756 if (RDG_MEM_WRITE_STMT (rdg, i))
4757 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4759 if (v->succ)
4760 for (e = v->succ; e; e = e->succ_next)
4761 switch (RDGE_TYPE (e))
4763 case input_dd:
4764 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4765 break;
4767 case output_dd:
4768 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4769 break;
4771 case flow_dd:
4772 /* These are the most common dependences: don't print these. */
4773 fprintf (file, "%d -> %d \n", i, e->dest);
4774 break;
4776 case anti_dd:
4777 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4778 break;
4780 default:
4781 gcc_unreachable ();
4785 fprintf (file, "}\n\n");
4788 /* Display the Reduced Dependence Graph using dotty. */
4789 extern void dot_rdg (struct graph *);
4791 DEBUG_FUNCTION void
4792 dot_rdg (struct graph *rdg)
4794 /* When debugging, enable the following code. This cannot be used
4795 in production compilers because it calls "system". */
4796 #if 0
4797 FILE *file = fopen ("/tmp/rdg.dot", "w");
4798 gcc_assert (file != NULL);
4800 dot_rdg_1 (file, rdg);
4801 fclose (file);
4803 system ("dotty /tmp/rdg.dot &");
4804 #else
4805 dot_rdg_1 (stderr, rdg);
4806 #endif
4809 /* This structure is used for recording the mapping statement index in
4810 the RDG. */
4812 struct GTY(()) rdg_vertex_info
4814 gimple stmt;
4815 int index;
4818 /* Returns the index of STMT in RDG. */
4821 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4823 struct rdg_vertex_info rvi, *slot;
4825 rvi.stmt = stmt;
4826 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4828 if (!slot)
4829 return -1;
4831 return slot->index;
4834 /* Creates an edge in RDG for each distance vector from DDR. The
4835 order that we keep track of in the RDG is the order in which
4836 statements have to be executed. */
4838 static void
4839 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4841 struct graph_edge *e;
4842 int va, vb;
4843 data_reference_p dra = DDR_A (ddr);
4844 data_reference_p drb = DDR_B (ddr);
4845 unsigned level = ddr_dependence_level (ddr);
4847 /* For non scalar dependences, when the dependence is REVERSED,
4848 statement B has to be executed before statement A. */
4849 if (level > 0
4850 && !DDR_REVERSED_P (ddr))
4852 data_reference_p tmp = dra;
4853 dra = drb;
4854 drb = tmp;
4857 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4858 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4860 if (va < 0 || vb < 0)
4861 return;
4863 e = add_edge (rdg, va, vb);
4864 e->data = XNEW (struct rdg_edge);
4866 RDGE_LEVEL (e) = level;
4867 RDGE_RELATION (e) = ddr;
4869 /* Determines the type of the data dependence. */
4870 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4871 RDGE_TYPE (e) = input_dd;
4872 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4873 RDGE_TYPE (e) = output_dd;
4874 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4875 RDGE_TYPE (e) = flow_dd;
4876 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4877 RDGE_TYPE (e) = anti_dd;
4880 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4881 the index of DEF in RDG. */
4883 static void
4884 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4886 use_operand_p imm_use_p;
4887 imm_use_iterator iterator;
4889 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4891 struct graph_edge *e;
4892 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4894 if (use < 0)
4895 continue;
4897 e = add_edge (rdg, idef, use);
4898 e->data = XNEW (struct rdg_edge);
4899 RDGE_TYPE (e) = flow_dd;
4900 RDGE_RELATION (e) = NULL;
4904 /* Creates the edges of the reduced dependence graph RDG. */
4906 static void
4907 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4909 int i;
4910 struct data_dependence_relation *ddr;
4911 def_operand_p def_p;
4912 ssa_op_iter iter;
4914 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4915 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4916 create_rdg_edge_for_ddr (rdg, ddr);
4918 for (i = 0; i < rdg->n_vertices; i++)
4919 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4920 iter, SSA_OP_DEF)
4921 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4924 /* Build the vertices of the reduced dependence graph RDG. */
4926 void
4927 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4929 int i, j;
4930 gimple stmt;
4932 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4934 VEC (data_ref_loc, heap) *references;
4935 data_ref_loc *ref;
4936 struct vertex *v = &(rdg->vertices[i]);
4937 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4938 struct rdg_vertex_info **slot;
4940 rvi->stmt = stmt;
4941 rvi->index = i;
4942 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4944 if (!*slot)
4945 *slot = rvi;
4946 else
4947 free (rvi);
4949 v->data = XNEW (struct rdg_vertex);
4950 RDG_STMT (rdg, i) = stmt;
4952 RDG_MEM_WRITE_STMT (rdg, i) = false;
4953 RDG_MEM_READS_STMT (rdg, i) = false;
4954 if (gimple_code (stmt) == GIMPLE_PHI)
4955 continue;
4957 get_references_in_stmt (stmt, &references);
4958 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4959 if (!ref->is_read)
4960 RDG_MEM_WRITE_STMT (rdg, i) = true;
4961 else
4962 RDG_MEM_READS_STMT (rdg, i) = true;
4964 VEC_free (data_ref_loc, heap, references);
4968 /* Initialize STMTS with all the statements of LOOP. When
4969 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4970 which we discover statements is important as
4971 generate_loops_for_partition is using the same traversal for
4972 identifying statements. */
4974 static void
4975 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4977 unsigned int i;
4978 basic_block *bbs = get_loop_body_in_dom_order (loop);
4980 for (i = 0; i < loop->num_nodes; i++)
4982 basic_block bb = bbs[i];
4983 gimple_stmt_iterator bsi;
4984 gimple stmt;
4986 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4987 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4989 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4991 stmt = gsi_stmt (bsi);
4992 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4993 VEC_safe_push (gimple, heap, *stmts, stmt);
4997 free (bbs);
5000 /* Returns true when all the dependences are computable. */
5002 static bool
5003 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5005 ddr_p ddr;
5006 unsigned int i;
5008 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5009 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5010 return false;
5012 return true;
5015 /* Computes a hash function for element ELT. */
5017 static hashval_t
5018 hash_stmt_vertex_info (const void *elt)
5020 const struct rdg_vertex_info *const rvi =
5021 (const struct rdg_vertex_info *) elt;
5022 gimple stmt = rvi->stmt;
5024 return htab_hash_pointer (stmt);
5027 /* Compares database elements E1 and E2. */
5029 static int
5030 eq_stmt_vertex_info (const void *e1, const void *e2)
5032 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5033 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5035 return elt1->stmt == elt2->stmt;
5038 /* Free the element E. */
5040 static void
5041 hash_stmt_vertex_del (void *e)
5043 free (e);
5046 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5047 statement of the loop nest, and one edge per data dependence or
5048 scalar dependence. */
5050 struct graph *
5051 build_empty_rdg (int n_stmts)
5053 int nb_data_refs = 10;
5054 struct graph *rdg = new_graph (n_stmts);
5056 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5057 eq_stmt_vertex_info, hash_stmt_vertex_del);
5058 return rdg;
5061 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5062 statement of the loop nest, and one edge per data dependence or
5063 scalar dependence. */
5065 struct graph *
5066 build_rdg (struct loop *loop,
5067 VEC (loop_p, heap) **loop_nest,
5068 VEC (ddr_p, heap) **dependence_relations,
5069 VEC (data_reference_p, heap) **datarefs)
5071 struct graph *rdg = NULL;
5072 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5074 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5075 dependence_relations);
5077 if (known_dependences_p (*dependence_relations))
5079 stmts_from_loop (loop, &stmts);
5080 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5081 create_rdg_vertices (rdg, stmts);
5082 create_rdg_edges (rdg, *dependence_relations);
5085 VEC_free (gimple, heap, stmts);
5086 return rdg;
5089 /* Free the reduced dependence graph RDG. */
5091 void
5092 free_rdg (struct graph *rdg)
5094 int i;
5096 for (i = 0; i < rdg->n_vertices; i++)
5098 struct vertex *v = &(rdg->vertices[i]);
5099 struct graph_edge *e;
5101 for (e = v->succ; e; e = e->succ_next)
5102 free (e->data);
5104 free (v->data);
5107 htab_delete (rdg->indices);
5108 free_graph (rdg);
5111 /* Initialize STMTS with all the statements of LOOP that contain a
5112 store to memory. */
5114 void
5115 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5117 unsigned int i;
5118 basic_block *bbs = get_loop_body_in_dom_order (loop);
5120 for (i = 0; i < loop->num_nodes; i++)
5122 basic_block bb = bbs[i];
5123 gimple_stmt_iterator bsi;
5125 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5126 if (gimple_vdef (gsi_stmt (bsi)))
5127 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5130 free (bbs);
5133 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5134 that contains a data reference on its LHS with a stride of the same
5135 size as its unit type. */
5137 bool
5138 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5140 tree op0, op1;
5141 bool res;
5142 struct data_reference *dr;
5144 if (!stmt
5145 || !gimple_vdef (stmt)
5146 || !is_gimple_assign (stmt)
5147 || !gimple_assign_single_p (stmt)
5148 || !(op1 = gimple_assign_rhs1 (stmt))
5149 || !(integer_zerop (op1) || real_zerop (op1)))
5150 return false;
5152 dr = XCNEW (struct data_reference);
5153 op0 = gimple_assign_lhs (stmt);
5155 DR_STMT (dr) = stmt;
5156 DR_REF (dr) = op0;
5158 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5159 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5161 free_data_ref (dr);
5162 return res;
5165 /* Initialize STMTS with all the statements of LOOP that contain a
5166 store to memory of the form "A[i] = 0". */
5168 void
5169 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5171 unsigned int i;
5172 basic_block bb;
5173 gimple_stmt_iterator si;
5174 gimple stmt;
5175 basic_block *bbs = get_loop_body_in_dom_order (loop);
5177 for (i = 0; i < loop->num_nodes; i++)
5178 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5179 if ((stmt = gsi_stmt (si))
5180 && stmt_with_adjacent_zero_store_dr_p (stmt))
5181 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5183 free (bbs);
5186 /* For a data reference REF, return the declaration of its base
5187 address or NULL_TREE if the base is not determined. */
5189 static inline tree
5190 ref_base_address (gimple stmt, data_ref_loc *ref)
5192 tree base = NULL_TREE;
5193 tree base_address;
5194 struct data_reference *dr = XCNEW (struct data_reference);
5196 DR_STMT (dr) = stmt;
5197 DR_REF (dr) = *ref->pos;
5198 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5199 base_address = DR_BASE_ADDRESS (dr);
5201 if (!base_address)
5202 goto end;
5204 switch (TREE_CODE (base_address))
5206 case ADDR_EXPR:
5207 base = TREE_OPERAND (base_address, 0);
5208 break;
5210 default:
5211 base = base_address;
5212 break;
5215 end:
5216 free_data_ref (dr);
5217 return base;
5220 /* Determines whether the statement from vertex V of the RDG has a
5221 definition used outside the loop that contains this statement. */
5223 bool
5224 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5226 gimple stmt = RDG_STMT (rdg, v);
5227 struct loop *loop = loop_containing_stmt (stmt);
5228 use_operand_p imm_use_p;
5229 imm_use_iterator iterator;
5230 ssa_op_iter it;
5231 def_operand_p def_p;
5233 if (!loop)
5234 return true;
5236 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5238 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5240 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5241 return true;
5245 return false;
5248 /* Determines whether statements S1 and S2 access to similar memory
5249 locations. Two memory accesses are considered similar when they
5250 have the same base address declaration, i.e. when their
5251 ref_base_address is the same. */
5253 bool
5254 have_similar_memory_accesses (gimple s1, gimple s2)
5256 bool res = false;
5257 unsigned i, j;
5258 VEC (data_ref_loc, heap) *refs1, *refs2;
5259 data_ref_loc *ref1, *ref2;
5261 get_references_in_stmt (s1, &refs1);
5262 get_references_in_stmt (s2, &refs2);
5264 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5266 tree base1 = ref_base_address (s1, ref1);
5268 if (base1)
5269 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5270 if (base1 == ref_base_address (s2, ref2))
5272 res = true;
5273 goto end;
5277 end:
5278 VEC_free (data_ref_loc, heap, refs1);
5279 VEC_free (data_ref_loc, heap, refs2);
5280 return res;
5283 /* Helper function for the hashtab. */
5285 static int
5286 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5288 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5289 CONST_CAST_GIMPLE ((const_gimple) s2));
5292 /* Helper function for the hashtab. */
5294 static hashval_t
5295 ref_base_address_1 (const void *s)
5297 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5298 unsigned i;
5299 VEC (data_ref_loc, heap) *refs;
5300 data_ref_loc *ref;
5301 hashval_t res = 0;
5303 get_references_in_stmt (stmt, &refs);
5305 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5306 if (!ref->is_read)
5308 res = htab_hash_pointer (ref_base_address (stmt, ref));
5309 break;
5312 VEC_free (data_ref_loc, heap, refs);
5313 return res;
5316 /* Try to remove duplicated write data references from STMTS. */
5318 void
5319 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5321 unsigned i;
5322 gimple stmt;
5323 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5324 have_similar_memory_accesses_1, NULL);
5326 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5328 void **slot;
5330 slot = htab_find_slot (seen, stmt, INSERT);
5332 if (*slot)
5333 VEC_ordered_remove (gimple, *stmts, i);
5334 else
5336 *slot = (void *) stmt;
5337 i++;
5341 htab_delete (seen);
5344 /* Returns the index of PARAMETER in the parameters vector of the
5345 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5348 access_matrix_get_index_for_parameter (tree parameter,
5349 struct access_matrix *access_matrix)
5351 int i;
5352 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5353 tree lambda_parameter;
5355 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5356 if (lambda_parameter == parameter)
5357 return i + AM_NB_INDUCTION_VARS (access_matrix);
5359 return -1;