2013-10-11 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / gcc / tree-data-ref.c
blob9133df4a2b75eac8015d88a36602f326917e661e
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
47 information,
49 - to define an interface to access this data.
52 Definitions:
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
64 References:
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tree.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "dumpfile.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
90 static struct datadep_stats
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
131 /* Returns true iff A divides B. */
133 static inline bool
134 int_divides_p (int a, int b)
136 return ((b % a) == 0);
141 /* Dump into FILE all the data references from DATAREFS. */
143 static void
144 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
146 unsigned int i;
147 struct data_reference *dr;
149 FOR_EACH_VEC_ELT (datarefs, i, dr)
150 dump_data_reference (file, dr);
153 /* Unified dump into FILE all the data references from DATAREFS. */
155 DEBUG_FUNCTION void
156 debug (vec<data_reference_p> &ref)
158 dump_data_references (stderr, ref);
161 DEBUG_FUNCTION void
162 debug (vec<data_reference_p> *ptr)
164 if (ptr)
165 debug (*ptr);
166 else
167 fprintf (stderr, "<nil>\n");
171 /* Dump into STDERR all the data references from DATAREFS. */
173 DEBUG_FUNCTION void
174 debug_data_references (vec<data_reference_p> datarefs)
176 dump_data_references (stderr, datarefs);
179 /* Print to STDERR the data_reference DR. */
181 DEBUG_FUNCTION void
182 debug_data_reference (struct data_reference *dr)
184 dump_data_reference (stderr, dr);
187 /* Dump function for a DATA_REFERENCE structure. */
189 void
190 dump_data_reference (FILE *outf,
191 struct data_reference *dr)
193 unsigned int i;
195 fprintf (outf, "#(Data Ref: \n");
196 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
197 fprintf (outf, "# stmt: ");
198 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
199 fprintf (outf, "# ref: ");
200 print_generic_stmt (outf, DR_REF (dr), 0);
201 fprintf (outf, "# base_object: ");
202 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
204 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
206 fprintf (outf, "# Access function %d: ", i);
207 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
209 fprintf (outf, "#)\n");
212 /* Unified dump function for a DATA_REFERENCE structure. */
214 DEBUG_FUNCTION void
215 debug (data_reference &ref)
217 dump_data_reference (stderr, &ref);
220 DEBUG_FUNCTION void
221 debug (data_reference *ptr)
223 if (ptr)
224 debug (*ptr);
225 else
226 fprintf (stderr, "<nil>\n");
230 /* Dumps the affine function described by FN to the file OUTF. */
232 static void
233 dump_affine_function (FILE *outf, affine_fn fn)
235 unsigned i;
236 tree coef;
238 print_generic_expr (outf, fn[0], TDF_SLIM);
239 for (i = 1; fn.iterate (i, &coef); i++)
241 fprintf (outf, " + ");
242 print_generic_expr (outf, coef, TDF_SLIM);
243 fprintf (outf, " * x_%u", i);
247 /* Dumps the conflict function CF to the file OUTF. */
249 static void
250 dump_conflict_function (FILE *outf, conflict_function *cf)
252 unsigned i;
254 if (cf->n == NO_DEPENDENCE)
255 fprintf (outf, "no dependence");
256 else if (cf->n == NOT_KNOWN)
257 fprintf (outf, "not known");
258 else
260 for (i = 0; i < cf->n; i++)
262 if (i != 0)
263 fprintf (outf, " ");
264 fprintf (outf, "[");
265 dump_affine_function (outf, cf->fns[i]);
266 fprintf (outf, "]");
271 /* Dump function for a SUBSCRIPT structure. */
273 static void
274 dump_subscript (FILE *outf, struct subscript *subscript)
276 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
278 fprintf (outf, "\n (subscript \n");
279 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
280 dump_conflict_function (outf, cf);
281 if (CF_NONTRIVIAL_P (cf))
283 tree last_iteration = SUB_LAST_CONFLICT (subscript);
284 fprintf (outf, "\n last_conflict: ");
285 print_generic_expr (outf, last_iteration, 0);
288 cf = SUB_CONFLICTS_IN_B (subscript);
289 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
290 dump_conflict_function (outf, cf);
291 if (CF_NONTRIVIAL_P (cf))
293 tree last_iteration = SUB_LAST_CONFLICT (subscript);
294 fprintf (outf, "\n last_conflict: ");
295 print_generic_expr (outf, last_iteration, 0);
298 fprintf (outf, "\n (Subscript distance: ");
299 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
300 fprintf (outf, " ))\n");
303 /* Print the classic direction vector DIRV to OUTF. */
305 static void
306 print_direction_vector (FILE *outf,
307 lambda_vector dirv,
308 int length)
310 int eq;
312 for (eq = 0; eq < length; eq++)
314 enum data_dependence_direction dir = ((enum data_dependence_direction)
315 dirv[eq]);
317 switch (dir)
319 case dir_positive:
320 fprintf (outf, " +");
321 break;
322 case dir_negative:
323 fprintf (outf, " -");
324 break;
325 case dir_equal:
326 fprintf (outf, " =");
327 break;
328 case dir_positive_or_equal:
329 fprintf (outf, " +=");
330 break;
331 case dir_positive_or_negative:
332 fprintf (outf, " +-");
333 break;
334 case dir_negative_or_equal:
335 fprintf (outf, " -=");
336 break;
337 case dir_star:
338 fprintf (outf, " *");
339 break;
340 default:
341 fprintf (outf, "indep");
342 break;
345 fprintf (outf, "\n");
348 /* Print a vector of direction vectors. */
350 static void
351 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
352 int length)
354 unsigned j;
355 lambda_vector v;
357 FOR_EACH_VEC_ELT (dir_vects, j, v)
358 print_direction_vector (outf, v, length);
361 /* Print out a vector VEC of length N to OUTFILE. */
363 static inline void
364 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
366 int i;
368 for (i = 0; i < n; i++)
369 fprintf (outfile, "%3d ", vector[i]);
370 fprintf (outfile, "\n");
373 /* Print a vector of distance vectors. */
375 static void
376 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
377 int length)
379 unsigned j;
380 lambda_vector v;
382 FOR_EACH_VEC_ELT (dist_vects, j, v)
383 print_lambda_vector (outf, v, length);
386 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
388 static void
389 dump_data_dependence_relation (FILE *outf,
390 struct data_dependence_relation *ddr)
392 struct data_reference *dra, *drb;
394 fprintf (outf, "(Data Dep: \n");
396 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
398 if (ddr)
400 dra = DDR_A (ddr);
401 drb = DDR_B (ddr);
402 if (dra)
403 dump_data_reference (outf, dra);
404 else
405 fprintf (outf, " (nil)\n");
406 if (drb)
407 dump_data_reference (outf, drb);
408 else
409 fprintf (outf, " (nil)\n");
411 fprintf (outf, " (don't know)\n)\n");
412 return;
415 dra = DDR_A (ddr);
416 drb = DDR_B (ddr);
417 dump_data_reference (outf, dra);
418 dump_data_reference (outf, drb);
420 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
421 fprintf (outf, " (no dependence)\n");
423 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
425 unsigned int i;
426 struct loop *loopi;
428 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
430 fprintf (outf, " access_fn_A: ");
431 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
432 fprintf (outf, " access_fn_B: ");
433 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
434 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
437 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
438 fprintf (outf, " loop nest: (");
439 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
440 fprintf (outf, "%d ", loopi->num);
441 fprintf (outf, ")\n");
443 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
445 fprintf (outf, " distance_vector: ");
446 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
447 DDR_NB_LOOPS (ddr));
450 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
452 fprintf (outf, " direction_vector: ");
453 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
454 DDR_NB_LOOPS (ddr));
458 fprintf (outf, ")\n");
461 /* Debug version. */
463 DEBUG_FUNCTION void
464 debug_data_dependence_relation (struct data_dependence_relation *ddr)
466 dump_data_dependence_relation (stderr, ddr);
469 /* Dump into FILE all the dependence relations from DDRS. */
471 void
472 dump_data_dependence_relations (FILE *file,
473 vec<ddr_p> ddrs)
475 unsigned int i;
476 struct data_dependence_relation *ddr;
478 FOR_EACH_VEC_ELT (ddrs, i, ddr)
479 dump_data_dependence_relation (file, ddr);
482 DEBUG_FUNCTION void
483 debug (vec<ddr_p> &ref)
485 dump_data_dependence_relations (stderr, ref);
488 DEBUG_FUNCTION void
489 debug (vec<ddr_p> *ptr)
491 if (ptr)
492 debug (*ptr);
493 else
494 fprintf (stderr, "<nil>\n");
498 /* Dump to STDERR all the dependence relations from DDRS. */
500 DEBUG_FUNCTION void
501 debug_data_dependence_relations (vec<ddr_p> ddrs)
503 dump_data_dependence_relations (stderr, ddrs);
506 /* Dumps the distance and direction vectors in FILE. DDRS contains
507 the dependence relations, and VECT_SIZE is the size of the
508 dependence vectors, or in other words the number of loops in the
509 considered nest. */
511 static void
512 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
514 unsigned int i, j;
515 struct data_dependence_relation *ddr;
516 lambda_vector v;
518 FOR_EACH_VEC_ELT (ddrs, i, ddr)
519 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
521 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
523 fprintf (file, "DISTANCE_V (");
524 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
525 fprintf (file, ")\n");
528 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
530 fprintf (file, "DIRECTION_V (");
531 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
532 fprintf (file, ")\n");
536 fprintf (file, "\n\n");
539 /* Dumps the data dependence relations DDRS in FILE. */
541 static void
542 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
544 unsigned int i;
545 struct data_dependence_relation *ddr;
547 FOR_EACH_VEC_ELT (ddrs, i, ddr)
548 dump_data_dependence_relation (file, ddr);
550 fprintf (file, "\n\n");
553 DEBUG_FUNCTION void
554 debug_ddrs (vec<ddr_p> ddrs)
556 dump_ddrs (stderr, ddrs);
559 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
560 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
561 constant of type ssizetype, and returns true. If we cannot do this
562 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
563 is returned. */
565 static bool
566 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
567 tree *var, tree *off)
569 tree var0, var1;
570 tree off0, off1;
571 enum tree_code ocode = code;
573 *var = NULL_TREE;
574 *off = NULL_TREE;
576 switch (code)
578 case INTEGER_CST:
579 *var = build_int_cst (type, 0);
580 *off = fold_convert (ssizetype, op0);
581 return true;
583 case POINTER_PLUS_EXPR:
584 ocode = PLUS_EXPR;
585 /* FALLTHROUGH */
586 case PLUS_EXPR:
587 case MINUS_EXPR:
588 split_constant_offset (op0, &var0, &off0);
589 split_constant_offset (op1, &var1, &off1);
590 *var = fold_build2 (code, type, var0, var1);
591 *off = size_binop (ocode, off0, off1);
592 return true;
594 case MULT_EXPR:
595 if (TREE_CODE (op1) != INTEGER_CST)
596 return false;
598 split_constant_offset (op0, &var0, &off0);
599 *var = fold_build2 (MULT_EXPR, type, var0, op1);
600 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
601 return true;
603 case ADDR_EXPR:
605 tree base, poffset;
606 HOST_WIDE_INT pbitsize, pbitpos;
607 enum machine_mode pmode;
608 int punsignedp, pvolatilep;
610 op0 = TREE_OPERAND (op0, 0);
611 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
612 &pmode, &punsignedp, &pvolatilep, false);
614 if (pbitpos % BITS_PER_UNIT != 0)
615 return false;
616 base = build_fold_addr_expr (base);
617 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
619 if (poffset)
621 split_constant_offset (poffset, &poffset, &off1);
622 off0 = size_binop (PLUS_EXPR, off0, off1);
623 if (POINTER_TYPE_P (TREE_TYPE (base)))
624 base = fold_build_pointer_plus (base, poffset);
625 else
626 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
627 fold_convert (TREE_TYPE (base), poffset));
630 var0 = fold_convert (type, base);
632 /* If variable length types are involved, punt, otherwise casts
633 might be converted into ARRAY_REFs in gimplify_conversion.
634 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
635 possibly no longer appears in current GIMPLE, might resurface.
636 This perhaps could run
637 if (CONVERT_EXPR_P (var0))
639 gimplify_conversion (&var0);
640 // Attempt to fill in any within var0 found ARRAY_REF's
641 // element size from corresponding op embedded ARRAY_REF,
642 // if unsuccessful, just punt.
643 } */
644 while (POINTER_TYPE_P (type))
645 type = TREE_TYPE (type);
646 if (int_size_in_bytes (type) < 0)
647 return false;
649 *var = var0;
650 *off = off0;
651 return true;
654 case SSA_NAME:
656 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
657 enum tree_code subcode;
659 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
660 return false;
662 var0 = gimple_assign_rhs1 (def_stmt);
663 subcode = gimple_assign_rhs_code (def_stmt);
664 var1 = gimple_assign_rhs2 (def_stmt);
666 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
668 CASE_CONVERT:
670 /* We must not introduce undefined overflow, and we must not change the value.
671 Hence we're okay if the inner type doesn't overflow to start with
672 (pointer or signed), the outer type also is an integer or pointer
673 and the outer precision is at least as large as the inner. */
674 tree itype = TREE_TYPE (op0);
675 if ((POINTER_TYPE_P (itype)
676 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
677 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
678 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
680 split_constant_offset (op0, &var0, off);
681 *var = fold_convert (type, var0);
682 return true;
684 return false;
687 default:
688 return false;
692 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
693 will be ssizetype. */
695 void
696 split_constant_offset (tree exp, tree *var, tree *off)
698 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
699 enum tree_code code;
701 *var = exp;
702 *off = ssize_int (0);
703 STRIP_NOPS (exp);
705 if (tree_is_chrec (exp)
706 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
707 return;
709 otype = TREE_TYPE (exp);
710 code = TREE_CODE (exp);
711 extract_ops_from_tree (exp, &code, &op0, &op1);
712 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
714 *var = fold_convert (type, e);
715 *off = o;
719 /* Returns the address ADDR of an object in a canonical shape (without nop
720 casts, and with type of pointer to the object). */
722 static tree
723 canonicalize_base_object_address (tree addr)
725 tree orig = addr;
727 STRIP_NOPS (addr);
729 /* The base address may be obtained by casting from integer, in that case
730 keep the cast. */
731 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
732 return orig;
734 if (TREE_CODE (addr) != ADDR_EXPR)
735 return addr;
737 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
740 /* Analyzes the behavior of the memory reference DR in the innermost loop or
741 basic block that contains it. Returns true if analysis succeed or false
742 otherwise. */
744 bool
745 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
747 gimple stmt = DR_STMT (dr);
748 struct loop *loop = loop_containing_stmt (stmt);
749 tree ref = DR_REF (dr);
750 HOST_WIDE_INT pbitsize, pbitpos;
751 tree base, poffset;
752 enum machine_mode pmode;
753 int punsignedp, pvolatilep;
754 affine_iv base_iv, offset_iv;
755 tree init, dinit, step;
756 bool in_loop = (loop && loop->num);
758 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "analyze_innermost: ");
761 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
762 &pmode, &punsignedp, &pvolatilep, false);
763 gcc_assert (base != NULL_TREE);
765 if (pbitpos % BITS_PER_UNIT != 0)
767 if (dump_file && (dump_flags & TDF_DETAILS))
768 fprintf (dump_file, "failed: bit offset alignment.\n");
769 return false;
772 if (TREE_CODE (base) == MEM_REF)
774 if (!integer_zerop (TREE_OPERAND (base, 1)))
776 double_int moff = mem_ref_offset (base);
777 tree mofft = double_int_to_tree (sizetype, moff);
778 if (!poffset)
779 poffset = mofft;
780 else
781 poffset = size_binop (PLUS_EXPR, poffset, mofft);
783 base = TREE_OPERAND (base, 0);
785 else
786 base = build_fold_addr_expr (base);
788 if (in_loop)
790 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
791 nest ? true : false))
793 if (nest)
795 if (dump_file && (dump_flags & TDF_DETAILS))
796 fprintf (dump_file, "failed: evolution of base is not"
797 " affine.\n");
798 return false;
800 else
802 base_iv.base = base;
803 base_iv.step = ssize_int (0);
804 base_iv.no_overflow = true;
808 else
810 base_iv.base = base;
811 base_iv.step = ssize_int (0);
812 base_iv.no_overflow = true;
815 if (!poffset)
817 offset_iv.base = ssize_int (0);
818 offset_iv.step = ssize_int (0);
820 else
822 if (!in_loop)
824 offset_iv.base = poffset;
825 offset_iv.step = ssize_int (0);
827 else if (!simple_iv (loop, loop_containing_stmt (stmt),
828 poffset, &offset_iv,
829 nest ? true : false))
831 if (nest)
833 if (dump_file && (dump_flags & TDF_DETAILS))
834 fprintf (dump_file, "failed: evolution of offset is not"
835 " affine.\n");
836 return false;
838 else
840 offset_iv.base = poffset;
841 offset_iv.step = ssize_int (0);
846 init = ssize_int (pbitpos / BITS_PER_UNIT);
847 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
848 init = size_binop (PLUS_EXPR, init, dinit);
849 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
850 init = size_binop (PLUS_EXPR, init, dinit);
852 step = size_binop (PLUS_EXPR,
853 fold_convert (ssizetype, base_iv.step),
854 fold_convert (ssizetype, offset_iv.step));
856 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
858 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
859 DR_INIT (dr) = init;
860 DR_STEP (dr) = step;
862 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
864 if (dump_file && (dump_flags & TDF_DETAILS))
865 fprintf (dump_file, "success.\n");
867 return true;
870 /* Determines the base object and the list of indices of memory reference
871 DR, analyzed in LOOP and instantiated in loop nest NEST. */
873 static void
874 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
876 vec<tree> access_fns = vNULL;
877 tree ref, op;
878 tree base, off, access_fn;
879 basic_block before_loop;
881 /* If analyzing a basic-block there are no indices to analyze
882 and thus no access functions. */
883 if (!nest)
885 DR_BASE_OBJECT (dr) = DR_REF (dr);
886 DR_ACCESS_FNS (dr).create (0);
887 return;
890 ref = DR_REF (dr);
891 before_loop = block_before_loop (nest);
893 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
894 into a two element array with a constant index. The base is
895 then just the immediate underlying object. */
896 if (TREE_CODE (ref) == REALPART_EXPR)
898 ref = TREE_OPERAND (ref, 0);
899 access_fns.safe_push (integer_zero_node);
901 else if (TREE_CODE (ref) == IMAGPART_EXPR)
903 ref = TREE_OPERAND (ref, 0);
904 access_fns.safe_push (integer_one_node);
907 /* Analyze access functions of dimensions we know to be independent. */
908 while (handled_component_p (ref))
910 if (TREE_CODE (ref) == ARRAY_REF)
912 op = TREE_OPERAND (ref, 1);
913 access_fn = analyze_scalar_evolution (loop, op);
914 access_fn = instantiate_scev (before_loop, loop, access_fn);
915 access_fns.safe_push (access_fn);
917 else if (TREE_CODE (ref) == COMPONENT_REF
918 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
920 /* For COMPONENT_REFs of records (but not unions!) use the
921 FIELD_DECL offset as constant access function so we can
922 disambiguate a[i].f1 and a[i].f2. */
923 tree off = component_ref_field_offset (ref);
924 off = size_binop (PLUS_EXPR,
925 size_binop (MULT_EXPR,
926 fold_convert (bitsizetype, off),
927 bitsize_int (BITS_PER_UNIT)),
928 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
929 access_fns.safe_push (off);
931 else
932 /* If we have an unhandled component we could not translate
933 to an access function stop analyzing. We have determined
934 our base object in this case. */
935 break;
937 ref = TREE_OPERAND (ref, 0);
940 /* If the address operand of a MEM_REF base has an evolution in the
941 analyzed nest, add it as an additional independent access-function. */
942 if (TREE_CODE (ref) == MEM_REF)
944 op = TREE_OPERAND (ref, 0);
945 access_fn = analyze_scalar_evolution (loop, op);
946 access_fn = instantiate_scev (before_loop, loop, access_fn);
947 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
949 tree orig_type;
950 tree memoff = TREE_OPERAND (ref, 1);
951 base = initial_condition (access_fn);
952 orig_type = TREE_TYPE (base);
953 STRIP_USELESS_TYPE_CONVERSION (base);
954 split_constant_offset (base, &base, &off);
955 /* Fold the MEM_REF offset into the evolutions initial
956 value to make more bases comparable. */
957 if (!integer_zerop (memoff))
959 off = size_binop (PLUS_EXPR, off,
960 fold_convert (ssizetype, memoff));
961 memoff = build_int_cst (TREE_TYPE (memoff), 0);
963 access_fn = chrec_replace_initial_condition
964 (access_fn, fold_convert (orig_type, off));
965 /* ??? This is still not a suitable base object for
966 dr_may_alias_p - the base object needs to be an
967 access that covers the object as whole. With
968 an evolution in the pointer this cannot be
969 guaranteed.
970 As a band-aid, mark the access so we can special-case
971 it in dr_may_alias_p. */
972 ref = fold_build2_loc (EXPR_LOCATION (ref),
973 MEM_REF, TREE_TYPE (ref),
974 base, memoff);
975 DR_UNCONSTRAINED_BASE (dr) = true;
976 access_fns.safe_push (access_fn);
979 else if (DECL_P (ref))
981 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
982 ref = build2 (MEM_REF, TREE_TYPE (ref),
983 build_fold_addr_expr (ref),
984 build_int_cst (reference_alias_ptr_type (ref), 0));
987 DR_BASE_OBJECT (dr) = ref;
988 DR_ACCESS_FNS (dr) = access_fns;
991 /* Extracts the alias analysis information from the memory reference DR. */
993 static void
994 dr_analyze_alias (struct data_reference *dr)
996 tree ref = DR_REF (dr);
997 tree base = get_base_address (ref), addr;
999 if (INDIRECT_REF_P (base)
1000 || TREE_CODE (base) == MEM_REF)
1002 addr = TREE_OPERAND (base, 0);
1003 if (TREE_CODE (addr) == SSA_NAME)
1004 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1008 /* Frees data reference DR. */
1010 void
1011 free_data_ref (data_reference_p dr)
1013 DR_ACCESS_FNS (dr).release ();
1014 free (dr);
1017 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1018 is read if IS_READ is true, write otherwise. Returns the
1019 data_reference description of MEMREF. NEST is the outermost loop
1020 in which the reference should be instantiated, LOOP is the loop in
1021 which the data reference should be analyzed. */
1023 struct data_reference *
1024 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1025 bool is_read)
1027 struct data_reference *dr;
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Creating dr for ");
1032 print_generic_expr (dump_file, memref, TDF_SLIM);
1033 fprintf (dump_file, "\n");
1036 dr = XCNEW (struct data_reference);
1037 DR_STMT (dr) = stmt;
1038 DR_REF (dr) = memref;
1039 DR_IS_READ (dr) = is_read;
1041 dr_analyze_innermost (dr, nest);
1042 dr_analyze_indices (dr, nest, loop);
1043 dr_analyze_alias (dr);
1045 if (dump_file && (dump_flags & TDF_DETAILS))
1047 unsigned i;
1048 fprintf (dump_file, "\tbase_address: ");
1049 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1050 fprintf (dump_file, "\n\toffset from base address: ");
1051 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1052 fprintf (dump_file, "\n\tconstant offset from base address: ");
1053 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1054 fprintf (dump_file, "\n\tstep: ");
1055 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1056 fprintf (dump_file, "\n\taligned to: ");
1057 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1058 fprintf (dump_file, "\n\tbase_object: ");
1059 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1060 fprintf (dump_file, "\n");
1061 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1063 fprintf (dump_file, "\tAccess function %d: ", i);
1064 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1068 return dr;
1071 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1072 expressions. */
1073 static bool
1074 dr_equal_offsets_p1 (tree offset1, tree offset2)
1076 bool res;
1078 STRIP_NOPS (offset1);
1079 STRIP_NOPS (offset2);
1081 if (offset1 == offset2)
1082 return true;
1084 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1085 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1086 return false;
1088 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1089 TREE_OPERAND (offset2, 0));
1091 if (!res || !BINARY_CLASS_P (offset1))
1092 return res;
1094 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1095 TREE_OPERAND (offset2, 1));
1097 return res;
1100 /* Check if DRA and DRB have equal offsets. */
1101 bool
1102 dr_equal_offsets_p (struct data_reference *dra,
1103 struct data_reference *drb)
1105 tree offset1, offset2;
1107 offset1 = DR_OFFSET (dra);
1108 offset2 = DR_OFFSET (drb);
1110 return dr_equal_offsets_p1 (offset1, offset2);
1113 /* Returns true if FNA == FNB. */
1115 static bool
1116 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1118 unsigned i, n = fna.length ();
1120 if (n != fnb.length ())
1121 return false;
1123 for (i = 0; i < n; i++)
1124 if (!operand_equal_p (fna[i], fnb[i], 0))
1125 return false;
1127 return true;
1130 /* If all the functions in CF are the same, returns one of them,
1131 otherwise returns NULL. */
1133 static affine_fn
1134 common_affine_function (conflict_function *cf)
1136 unsigned i;
1137 affine_fn comm;
1139 if (!CF_NONTRIVIAL_P (cf))
1140 return affine_fn ();
1142 comm = cf->fns[0];
1144 for (i = 1; i < cf->n; i++)
1145 if (!affine_function_equal_p (comm, cf->fns[i]))
1146 return affine_fn ();
1148 return comm;
1151 /* Returns the base of the affine function FN. */
1153 static tree
1154 affine_function_base (affine_fn fn)
1156 return fn[0];
1159 /* Returns true if FN is a constant. */
1161 static bool
1162 affine_function_constant_p (affine_fn fn)
1164 unsigned i;
1165 tree coef;
1167 for (i = 1; fn.iterate (i, &coef); i++)
1168 if (!integer_zerop (coef))
1169 return false;
1171 return true;
1174 /* Returns true if FN is the zero constant function. */
1176 static bool
1177 affine_function_zero_p (affine_fn fn)
1179 return (integer_zerop (affine_function_base (fn))
1180 && affine_function_constant_p (fn));
1183 /* Returns a signed integer type with the largest precision from TA
1184 and TB. */
1186 static tree
1187 signed_type_for_types (tree ta, tree tb)
1189 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1190 return signed_type_for (ta);
1191 else
1192 return signed_type_for (tb);
1195 /* Applies operation OP on affine functions FNA and FNB, and returns the
1196 result. */
1198 static affine_fn
1199 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1201 unsigned i, n, m;
1202 affine_fn ret;
1203 tree coef;
1205 if (fnb.length () > fna.length ())
1207 n = fna.length ();
1208 m = fnb.length ();
1210 else
1212 n = fnb.length ();
1213 m = fna.length ();
1216 ret.create (m);
1217 for (i = 0; i < n; i++)
1219 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1220 TREE_TYPE (fnb[i]));
1221 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1224 for (; fna.iterate (i, &coef); i++)
1225 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1226 coef, integer_zero_node));
1227 for (; fnb.iterate (i, &coef); i++)
1228 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1229 integer_zero_node, coef));
1231 return ret;
1234 /* Returns the sum of affine functions FNA and FNB. */
1236 static affine_fn
1237 affine_fn_plus (affine_fn fna, affine_fn fnb)
1239 return affine_fn_op (PLUS_EXPR, fna, fnb);
1242 /* Returns the difference of affine functions FNA and FNB. */
1244 static affine_fn
1245 affine_fn_minus (affine_fn fna, affine_fn fnb)
1247 return affine_fn_op (MINUS_EXPR, fna, fnb);
1250 /* Frees affine function FN. */
1252 static void
1253 affine_fn_free (affine_fn fn)
1255 fn.release ();
1258 /* Determine for each subscript in the data dependence relation DDR
1259 the distance. */
1261 static void
1262 compute_subscript_distance (struct data_dependence_relation *ddr)
1264 conflict_function *cf_a, *cf_b;
1265 affine_fn fn_a, fn_b, diff;
1267 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1269 unsigned int i;
1271 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1273 struct subscript *subscript;
1275 subscript = DDR_SUBSCRIPT (ddr, i);
1276 cf_a = SUB_CONFLICTS_IN_A (subscript);
1277 cf_b = SUB_CONFLICTS_IN_B (subscript);
1279 fn_a = common_affine_function (cf_a);
1280 fn_b = common_affine_function (cf_b);
1281 if (!fn_a.exists () || !fn_b.exists ())
1283 SUB_DISTANCE (subscript) = chrec_dont_know;
1284 return;
1286 diff = affine_fn_minus (fn_a, fn_b);
1288 if (affine_function_constant_p (diff))
1289 SUB_DISTANCE (subscript) = affine_function_base (diff);
1290 else
1291 SUB_DISTANCE (subscript) = chrec_dont_know;
1293 affine_fn_free (diff);
1298 /* Returns the conflict function for "unknown". */
1300 static conflict_function *
1301 conflict_fn_not_known (void)
1303 conflict_function *fn = XCNEW (conflict_function);
1304 fn->n = NOT_KNOWN;
1306 return fn;
1309 /* Returns the conflict function for "independent". */
1311 static conflict_function *
1312 conflict_fn_no_dependence (void)
1314 conflict_function *fn = XCNEW (conflict_function);
1315 fn->n = NO_DEPENDENCE;
1317 return fn;
1320 /* Returns true if the address of OBJ is invariant in LOOP. */
1322 static bool
1323 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1325 while (handled_component_p (obj))
1327 if (TREE_CODE (obj) == ARRAY_REF)
1329 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1330 need to check the stride and the lower bound of the reference. */
1331 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1332 loop->num)
1333 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1334 loop->num))
1335 return false;
1337 else if (TREE_CODE (obj) == COMPONENT_REF)
1339 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1340 loop->num))
1341 return false;
1343 obj = TREE_OPERAND (obj, 0);
1346 if (!INDIRECT_REF_P (obj)
1347 && TREE_CODE (obj) != MEM_REF)
1348 return true;
1350 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1351 loop->num);
1354 /* Returns false if we can prove that data references A and B do not alias,
1355 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1356 considered. */
1358 bool
1359 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1360 bool loop_nest)
1362 tree addr_a = DR_BASE_OBJECT (a);
1363 tree addr_b = DR_BASE_OBJECT (b);
1365 /* If we are not processing a loop nest but scalar code we
1366 do not need to care about possible cross-iteration dependences
1367 and thus can process the full original reference. Do so,
1368 similar to how loop invariant motion applies extra offset-based
1369 disambiguation. */
1370 if (!loop_nest)
1372 aff_tree off1, off2;
1373 double_int size1, size2;
1374 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1375 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1376 aff_combination_scale (&off1, double_int_minus_one);
1377 aff_combination_add (&off2, &off1);
1378 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1379 return false;
1382 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1383 the size of the base-object. So we cannot do any offset/overlap
1384 based analysis but have to rely on points-to information only. */
1385 if (TREE_CODE (addr_a) == MEM_REF
1386 && DR_UNCONSTRAINED_BASE (a))
1388 if (TREE_CODE (addr_b) == MEM_REF
1389 && DR_UNCONSTRAINED_BASE (b))
1390 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1391 TREE_OPERAND (addr_b, 0));
1392 else
1393 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1394 build_fold_addr_expr (addr_b));
1396 else if (TREE_CODE (addr_b) == MEM_REF
1397 && DR_UNCONSTRAINED_BASE (b))
1398 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1399 TREE_OPERAND (addr_b, 0));
1401 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1402 that is being subsetted in the loop nest. */
1403 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1404 return refs_output_dependent_p (addr_a, addr_b);
1405 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1406 return refs_anti_dependent_p (addr_a, addr_b);
1407 return refs_may_alias_p (addr_a, addr_b);
1410 /* Initialize a data dependence relation between data accesses A and
1411 B. NB_LOOPS is the number of loops surrounding the references: the
1412 size of the classic distance/direction vectors. */
1414 struct data_dependence_relation *
1415 initialize_data_dependence_relation (struct data_reference *a,
1416 struct data_reference *b,
1417 vec<loop_p> loop_nest)
1419 struct data_dependence_relation *res;
1420 unsigned int i;
1422 res = XNEW (struct data_dependence_relation);
1423 DDR_A (res) = a;
1424 DDR_B (res) = b;
1425 DDR_LOOP_NEST (res).create (0);
1426 DDR_REVERSED_P (res) = false;
1427 DDR_SUBSCRIPTS (res).create (0);
1428 DDR_DIR_VECTS (res).create (0);
1429 DDR_DIST_VECTS (res).create (0);
1431 if (a == NULL || b == NULL)
1433 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1434 return res;
1437 /* If the data references do not alias, then they are independent. */
1438 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1440 DDR_ARE_DEPENDENT (res) = chrec_known;
1441 return res;
1444 /* The case where the references are exactly the same. */
1445 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1447 if (loop_nest.exists ()
1448 && !object_address_invariant_in_loop_p (loop_nest[0],
1449 DR_BASE_OBJECT (a)))
1451 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1452 return res;
1454 DDR_AFFINE_P (res) = true;
1455 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1456 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1457 DDR_LOOP_NEST (res) = loop_nest;
1458 DDR_INNER_LOOP (res) = 0;
1459 DDR_SELF_REFERENCE (res) = true;
1460 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1462 struct subscript *subscript;
1464 subscript = XNEW (struct subscript);
1465 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1466 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1467 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1468 SUB_DISTANCE (subscript) = chrec_dont_know;
1469 DDR_SUBSCRIPTS (res).safe_push (subscript);
1471 return res;
1474 /* If the references do not access the same object, we do not know
1475 whether they alias or not. */
1476 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1478 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1479 return res;
1482 /* If the base of the object is not invariant in the loop nest, we cannot
1483 analyze it. TODO -- in fact, it would suffice to record that there may
1484 be arbitrary dependences in the loops where the base object varies. */
1485 if (loop_nest.exists ()
1486 && !object_address_invariant_in_loop_p (loop_nest[0],
1487 DR_BASE_OBJECT (a)))
1489 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1490 return res;
1493 /* If the number of dimensions of the access to not agree we can have
1494 a pointer access to a component of the array element type and an
1495 array access while the base-objects are still the same. Punt. */
1496 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1498 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1499 return res;
1502 DDR_AFFINE_P (res) = true;
1503 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1504 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1505 DDR_LOOP_NEST (res) = loop_nest;
1506 DDR_INNER_LOOP (res) = 0;
1507 DDR_SELF_REFERENCE (res) = false;
1509 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1511 struct subscript *subscript;
1513 subscript = XNEW (struct subscript);
1514 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1515 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1516 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1517 SUB_DISTANCE (subscript) = chrec_dont_know;
1518 DDR_SUBSCRIPTS (res).safe_push (subscript);
1521 return res;
1524 /* Frees memory used by the conflict function F. */
1526 static void
1527 free_conflict_function (conflict_function *f)
1529 unsigned i;
1531 if (CF_NONTRIVIAL_P (f))
1533 for (i = 0; i < f->n; i++)
1534 affine_fn_free (f->fns[i]);
1536 free (f);
1539 /* Frees memory used by SUBSCRIPTS. */
1541 static void
1542 free_subscripts (vec<subscript_p> subscripts)
1544 unsigned i;
1545 subscript_p s;
1547 FOR_EACH_VEC_ELT (subscripts, i, s)
1549 free_conflict_function (s->conflicting_iterations_in_a);
1550 free_conflict_function (s->conflicting_iterations_in_b);
1551 free (s);
1553 subscripts.release ();
1556 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1557 description. */
1559 static inline void
1560 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1561 tree chrec)
1563 DDR_ARE_DEPENDENT (ddr) = chrec;
1564 free_subscripts (DDR_SUBSCRIPTS (ddr));
1565 DDR_SUBSCRIPTS (ddr).create (0);
1568 /* The dependence relation DDR cannot be represented by a distance
1569 vector. */
1571 static inline void
1572 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1574 if (dump_file && (dump_flags & TDF_DETAILS))
1575 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1577 DDR_AFFINE_P (ddr) = false;
1582 /* This section contains the classic Banerjee tests. */
1584 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1585 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1587 static inline bool
1588 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1590 return (evolution_function_is_constant_p (chrec_a)
1591 && evolution_function_is_constant_p (chrec_b));
1594 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1595 variable, i.e., if the SIV (Single Index Variable) test is true. */
1597 static bool
1598 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1600 if ((evolution_function_is_constant_p (chrec_a)
1601 && evolution_function_is_univariate_p (chrec_b))
1602 || (evolution_function_is_constant_p (chrec_b)
1603 && evolution_function_is_univariate_p (chrec_a)))
1604 return true;
1606 if (evolution_function_is_univariate_p (chrec_a)
1607 && evolution_function_is_univariate_p (chrec_b))
1609 switch (TREE_CODE (chrec_a))
1611 case POLYNOMIAL_CHREC:
1612 switch (TREE_CODE (chrec_b))
1614 case POLYNOMIAL_CHREC:
1615 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1616 return false;
1618 default:
1619 return true;
1622 default:
1623 return true;
1627 return false;
1630 /* Creates a conflict function with N dimensions. The affine functions
1631 in each dimension follow. */
1633 static conflict_function *
1634 conflict_fn (unsigned n, ...)
1636 unsigned i;
1637 conflict_function *ret = XCNEW (conflict_function);
1638 va_list ap;
1640 gcc_assert (0 < n && n <= MAX_DIM);
1641 va_start (ap, n);
1643 ret->n = n;
1644 for (i = 0; i < n; i++)
1645 ret->fns[i] = va_arg (ap, affine_fn);
1646 va_end (ap);
1648 return ret;
1651 /* Returns constant affine function with value CST. */
1653 static affine_fn
1654 affine_fn_cst (tree cst)
1656 affine_fn fn;
1657 fn.create (1);
1658 fn.quick_push (cst);
1659 return fn;
1662 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1664 static affine_fn
1665 affine_fn_univar (tree cst, unsigned dim, tree coef)
1667 affine_fn fn;
1668 fn.create (dim + 1);
1669 unsigned i;
1671 gcc_assert (dim > 0);
1672 fn.quick_push (cst);
1673 for (i = 1; i < dim; i++)
1674 fn.quick_push (integer_zero_node);
1675 fn.quick_push (coef);
1676 return fn;
1679 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1680 *OVERLAPS_B are initialized to the functions that describe the
1681 relation between the elements accessed twice by CHREC_A and
1682 CHREC_B. For k >= 0, the following property is verified:
1684 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1686 static void
1687 analyze_ziv_subscript (tree chrec_a,
1688 tree chrec_b,
1689 conflict_function **overlaps_a,
1690 conflict_function **overlaps_b,
1691 tree *last_conflicts)
1693 tree type, difference;
1694 dependence_stats.num_ziv++;
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "(analyze_ziv_subscript \n");
1699 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1700 chrec_a = chrec_convert (type, chrec_a, NULL);
1701 chrec_b = chrec_convert (type, chrec_b, NULL);
1702 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1704 switch (TREE_CODE (difference))
1706 case INTEGER_CST:
1707 if (integer_zerop (difference))
1709 /* The difference is equal to zero: the accessed index
1710 overlaps for each iteration in the loop. */
1711 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1712 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1713 *last_conflicts = chrec_dont_know;
1714 dependence_stats.num_ziv_dependent++;
1716 else
1718 /* The accesses do not overlap. */
1719 *overlaps_a = conflict_fn_no_dependence ();
1720 *overlaps_b = conflict_fn_no_dependence ();
1721 *last_conflicts = integer_zero_node;
1722 dependence_stats.num_ziv_independent++;
1724 break;
1726 default:
1727 /* We're not sure whether the indexes overlap. For the moment,
1728 conservatively answer "don't know". */
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1732 *overlaps_a = conflict_fn_not_known ();
1733 *overlaps_b = conflict_fn_not_known ();
1734 *last_conflicts = chrec_dont_know;
1735 dependence_stats.num_ziv_unimplemented++;
1736 break;
1739 if (dump_file && (dump_flags & TDF_DETAILS))
1740 fprintf (dump_file, ")\n");
1743 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1744 and only if it fits to the int type. If this is not the case, or the
1745 bound on the number of iterations of LOOP could not be derived, returns
1746 chrec_dont_know. */
1748 static tree
1749 max_stmt_executions_tree (struct loop *loop)
1751 double_int nit;
1753 if (!max_stmt_executions (loop, &nit))
1754 return chrec_dont_know;
1756 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1757 return chrec_dont_know;
1759 return double_int_to_tree (unsigned_type_node, nit);
1762 /* Determine whether the CHREC is always positive/negative. If the expression
1763 cannot be statically analyzed, return false, otherwise set the answer into
1764 VALUE. */
1766 static bool
1767 chrec_is_positive (tree chrec, bool *value)
1769 bool value0, value1, value2;
1770 tree end_value, nb_iter;
1772 switch (TREE_CODE (chrec))
1774 case POLYNOMIAL_CHREC:
1775 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1776 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1777 return false;
1779 /* FIXME -- overflows. */
1780 if (value0 == value1)
1782 *value = value0;
1783 return true;
1786 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1787 and the proof consists in showing that the sign never
1788 changes during the execution of the loop, from 0 to
1789 loop->nb_iterations. */
1790 if (!evolution_function_is_affine_p (chrec))
1791 return false;
1793 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1794 if (chrec_contains_undetermined (nb_iter))
1795 return false;
1797 #if 0
1798 /* TODO -- If the test is after the exit, we may decrease the number of
1799 iterations by one. */
1800 if (after_exit)
1801 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1802 #endif
1804 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1806 if (!chrec_is_positive (end_value, &value2))
1807 return false;
1809 *value = value0;
1810 return value0 == value1;
1812 case INTEGER_CST:
1813 switch (tree_int_cst_sgn (chrec))
1815 case -1:
1816 *value = false;
1817 break;
1818 case 1:
1819 *value = true;
1820 break;
1821 default:
1822 return false;
1824 return true;
1826 default:
1827 return false;
1832 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1833 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1834 *OVERLAPS_B are initialized to the functions that describe the
1835 relation between the elements accessed twice by CHREC_A and
1836 CHREC_B. For k >= 0, the following property is verified:
1838 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1840 static void
1841 analyze_siv_subscript_cst_affine (tree chrec_a,
1842 tree chrec_b,
1843 conflict_function **overlaps_a,
1844 conflict_function **overlaps_b,
1845 tree *last_conflicts)
1847 bool value0, value1, value2;
1848 tree type, difference, tmp;
1850 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1851 chrec_a = chrec_convert (type, chrec_a, NULL);
1852 chrec_b = chrec_convert (type, chrec_b, NULL);
1853 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1855 /* Special case overlap in the first iteration. */
1856 if (integer_zerop (difference))
1858 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1859 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1860 *last_conflicts = integer_one_node;
1861 return;
1864 if (!chrec_is_positive (initial_condition (difference), &value0))
1866 if (dump_file && (dump_flags & TDF_DETAILS))
1867 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1869 dependence_stats.num_siv_unimplemented++;
1870 *overlaps_a = conflict_fn_not_known ();
1871 *overlaps_b = conflict_fn_not_known ();
1872 *last_conflicts = chrec_dont_know;
1873 return;
1875 else
1877 if (value0 == false)
1879 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1881 if (dump_file && (dump_flags & TDF_DETAILS))
1882 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1884 *overlaps_a = conflict_fn_not_known ();
1885 *overlaps_b = conflict_fn_not_known ();
1886 *last_conflicts = chrec_dont_know;
1887 dependence_stats.num_siv_unimplemented++;
1888 return;
1890 else
1892 if (value1 == true)
1894 /* Example:
1895 chrec_a = 12
1896 chrec_b = {10, +, 1}
1899 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1901 HOST_WIDE_INT numiter;
1902 struct loop *loop = get_chrec_loop (chrec_b);
1904 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1905 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1906 fold_build1 (ABS_EXPR, type, difference),
1907 CHREC_RIGHT (chrec_b));
1908 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1909 *last_conflicts = integer_one_node;
1912 /* Perform weak-zero siv test to see if overlap is
1913 outside the loop bounds. */
1914 numiter = max_stmt_executions_int (loop);
1916 if (numiter >= 0
1917 && compare_tree_int (tmp, numiter) > 0)
1919 free_conflict_function (*overlaps_a);
1920 free_conflict_function (*overlaps_b);
1921 *overlaps_a = conflict_fn_no_dependence ();
1922 *overlaps_b = conflict_fn_no_dependence ();
1923 *last_conflicts = integer_zero_node;
1924 dependence_stats.num_siv_independent++;
1925 return;
1927 dependence_stats.num_siv_dependent++;
1928 return;
1931 /* When the step does not divide the difference, there are
1932 no overlaps. */
1933 else
1935 *overlaps_a = conflict_fn_no_dependence ();
1936 *overlaps_b = conflict_fn_no_dependence ();
1937 *last_conflicts = integer_zero_node;
1938 dependence_stats.num_siv_independent++;
1939 return;
1943 else
1945 /* Example:
1946 chrec_a = 12
1947 chrec_b = {10, +, -1}
1949 In this case, chrec_a will not overlap with chrec_b. */
1950 *overlaps_a = conflict_fn_no_dependence ();
1951 *overlaps_b = conflict_fn_no_dependence ();
1952 *last_conflicts = integer_zero_node;
1953 dependence_stats.num_siv_independent++;
1954 return;
1958 else
1960 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1962 if (dump_file && (dump_flags & TDF_DETAILS))
1963 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1965 *overlaps_a = conflict_fn_not_known ();
1966 *overlaps_b = conflict_fn_not_known ();
1967 *last_conflicts = chrec_dont_know;
1968 dependence_stats.num_siv_unimplemented++;
1969 return;
1971 else
1973 if (value2 == false)
1975 /* Example:
1976 chrec_a = 3
1977 chrec_b = {10, +, -1}
1979 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1981 HOST_WIDE_INT numiter;
1982 struct loop *loop = get_chrec_loop (chrec_b);
1984 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1985 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1986 CHREC_RIGHT (chrec_b));
1987 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1988 *last_conflicts = integer_one_node;
1990 /* Perform weak-zero siv test to see if overlap is
1991 outside the loop bounds. */
1992 numiter = max_stmt_executions_int (loop);
1994 if (numiter >= 0
1995 && compare_tree_int (tmp, numiter) > 0)
1997 free_conflict_function (*overlaps_a);
1998 free_conflict_function (*overlaps_b);
1999 *overlaps_a = conflict_fn_no_dependence ();
2000 *overlaps_b = conflict_fn_no_dependence ();
2001 *last_conflicts = integer_zero_node;
2002 dependence_stats.num_siv_independent++;
2003 return;
2005 dependence_stats.num_siv_dependent++;
2006 return;
2009 /* When the step does not divide the difference, there
2010 are no overlaps. */
2011 else
2013 *overlaps_a = conflict_fn_no_dependence ();
2014 *overlaps_b = conflict_fn_no_dependence ();
2015 *last_conflicts = integer_zero_node;
2016 dependence_stats.num_siv_independent++;
2017 return;
2020 else
2022 /* Example:
2023 chrec_a = 3
2024 chrec_b = {4, +, 1}
2026 In this case, chrec_a will not overlap with chrec_b. */
2027 *overlaps_a = conflict_fn_no_dependence ();
2028 *overlaps_b = conflict_fn_no_dependence ();
2029 *last_conflicts = integer_zero_node;
2030 dependence_stats.num_siv_independent++;
2031 return;
2038 /* Helper recursive function for initializing the matrix A. Returns
2039 the initial value of CHREC. */
2041 static tree
2042 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2044 gcc_assert (chrec);
2046 switch (TREE_CODE (chrec))
2048 case POLYNOMIAL_CHREC:
2049 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2051 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2052 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2054 case PLUS_EXPR:
2055 case MULT_EXPR:
2056 case MINUS_EXPR:
2058 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2059 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2061 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2064 case NOP_EXPR:
2066 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2067 return chrec_convert (chrec_type (chrec), op, NULL);
2070 case BIT_NOT_EXPR:
2072 /* Handle ~X as -1 - X. */
2073 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2074 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2075 build_int_cst (TREE_TYPE (chrec), -1), op);
2078 case INTEGER_CST:
2079 return chrec;
2081 default:
2082 gcc_unreachable ();
2083 return NULL_TREE;
2087 #define FLOOR_DIV(x,y) ((x) / (y))
2089 /* Solves the special case of the Diophantine equation:
2090 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2092 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2093 number of iterations that loops X and Y run. The overlaps will be
2094 constructed as evolutions in dimension DIM. */
2096 static void
2097 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2098 affine_fn *overlaps_a,
2099 affine_fn *overlaps_b,
2100 tree *last_conflicts, int dim)
2102 if (((step_a > 0 && step_b > 0)
2103 || (step_a < 0 && step_b < 0)))
2105 int step_overlaps_a, step_overlaps_b;
2106 int gcd_steps_a_b, last_conflict, tau2;
2108 gcd_steps_a_b = gcd (step_a, step_b);
2109 step_overlaps_a = step_b / gcd_steps_a_b;
2110 step_overlaps_b = step_a / gcd_steps_a_b;
2112 if (niter > 0)
2114 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2115 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2116 last_conflict = tau2;
2117 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2119 else
2120 *last_conflicts = chrec_dont_know;
2122 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2123 build_int_cst (NULL_TREE,
2124 step_overlaps_a));
2125 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2126 build_int_cst (NULL_TREE,
2127 step_overlaps_b));
2130 else
2132 *overlaps_a = affine_fn_cst (integer_zero_node);
2133 *overlaps_b = affine_fn_cst (integer_zero_node);
2134 *last_conflicts = integer_zero_node;
2138 /* Solves the special case of a Diophantine equation where CHREC_A is
2139 an affine bivariate function, and CHREC_B is an affine univariate
2140 function. For example,
2142 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2144 has the following overlapping functions:
2146 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2147 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2148 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2150 FORNOW: This is a specialized implementation for a case occurring in
2151 a common benchmark. Implement the general algorithm. */
2153 static void
2154 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2155 conflict_function **overlaps_a,
2156 conflict_function **overlaps_b,
2157 tree *last_conflicts)
2159 bool xz_p, yz_p, xyz_p;
2160 int step_x, step_y, step_z;
2161 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2162 affine_fn overlaps_a_xz, overlaps_b_xz;
2163 affine_fn overlaps_a_yz, overlaps_b_yz;
2164 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2165 affine_fn ova1, ova2, ovb;
2166 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2168 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2169 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2170 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2172 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2173 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2174 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2176 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2181 *overlaps_a = conflict_fn_not_known ();
2182 *overlaps_b = conflict_fn_not_known ();
2183 *last_conflicts = chrec_dont_know;
2184 return;
2187 niter = MIN (niter_x, niter_z);
2188 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2189 &overlaps_a_xz,
2190 &overlaps_b_xz,
2191 &last_conflicts_xz, 1);
2192 niter = MIN (niter_y, niter_z);
2193 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2194 &overlaps_a_yz,
2195 &overlaps_b_yz,
2196 &last_conflicts_yz, 2);
2197 niter = MIN (niter_x, niter_z);
2198 niter = MIN (niter_y, niter);
2199 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2200 &overlaps_a_xyz,
2201 &overlaps_b_xyz,
2202 &last_conflicts_xyz, 3);
2204 xz_p = !integer_zerop (last_conflicts_xz);
2205 yz_p = !integer_zerop (last_conflicts_yz);
2206 xyz_p = !integer_zerop (last_conflicts_xyz);
2208 if (xz_p || yz_p || xyz_p)
2210 ova1 = affine_fn_cst (integer_zero_node);
2211 ova2 = affine_fn_cst (integer_zero_node);
2212 ovb = affine_fn_cst (integer_zero_node);
2213 if (xz_p)
2215 affine_fn t0 = ova1;
2216 affine_fn t2 = ovb;
2218 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2219 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2220 affine_fn_free (t0);
2221 affine_fn_free (t2);
2222 *last_conflicts = last_conflicts_xz;
2224 if (yz_p)
2226 affine_fn t0 = ova2;
2227 affine_fn t2 = ovb;
2229 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2230 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2231 affine_fn_free (t0);
2232 affine_fn_free (t2);
2233 *last_conflicts = last_conflicts_yz;
2235 if (xyz_p)
2237 affine_fn t0 = ova1;
2238 affine_fn t2 = ova2;
2239 affine_fn t4 = ovb;
2241 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2242 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2243 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2244 affine_fn_free (t0);
2245 affine_fn_free (t2);
2246 affine_fn_free (t4);
2247 *last_conflicts = last_conflicts_xyz;
2249 *overlaps_a = conflict_fn (2, ova1, ova2);
2250 *overlaps_b = conflict_fn (1, ovb);
2252 else
2254 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2255 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2256 *last_conflicts = integer_zero_node;
2259 affine_fn_free (overlaps_a_xz);
2260 affine_fn_free (overlaps_b_xz);
2261 affine_fn_free (overlaps_a_yz);
2262 affine_fn_free (overlaps_b_yz);
2263 affine_fn_free (overlaps_a_xyz);
2264 affine_fn_free (overlaps_b_xyz);
2267 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2269 static void
2270 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2271 int size)
2273 memcpy (vec2, vec1, size * sizeof (*vec1));
2276 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2278 static void
2279 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2280 int m, int n)
2282 int i;
2284 for (i = 0; i < m; i++)
2285 lambda_vector_copy (mat1[i], mat2[i], n);
2288 /* Store the N x N identity matrix in MAT. */
2290 static void
2291 lambda_matrix_id (lambda_matrix mat, int size)
2293 int i, j;
2295 for (i = 0; i < size; i++)
2296 for (j = 0; j < size; j++)
2297 mat[i][j] = (i == j) ? 1 : 0;
2300 /* Return the first nonzero element of vector VEC1 between START and N.
2301 We must have START <= N. Returns N if VEC1 is the zero vector. */
2303 static int
2304 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2306 int j = start;
2307 while (j < n && vec1[j] == 0)
2308 j++;
2309 return j;
2312 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2313 R2 = R2 + CONST1 * R1. */
2315 static void
2316 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2318 int i;
2320 if (const1 == 0)
2321 return;
2323 for (i = 0; i < n; i++)
2324 mat[r2][i] += const1 * mat[r1][i];
2327 /* Swap rows R1 and R2 in matrix MAT. */
2329 static void
2330 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2332 lambda_vector row;
2334 row = mat[r1];
2335 mat[r1] = mat[r2];
2336 mat[r2] = row;
2339 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2340 and store the result in VEC2. */
2342 static void
2343 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2344 int size, int const1)
2346 int i;
2348 if (const1 == 0)
2349 lambda_vector_clear (vec2, size);
2350 else
2351 for (i = 0; i < size; i++)
2352 vec2[i] = const1 * vec1[i];
2355 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2357 static void
2358 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2359 int size)
2361 lambda_vector_mult_const (vec1, vec2, size, -1);
2364 /* Negate row R1 of matrix MAT which has N columns. */
2366 static void
2367 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2369 lambda_vector_negate (mat[r1], mat[r1], n);
2372 /* Return true if two vectors are equal. */
2374 static bool
2375 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2377 int i;
2378 for (i = 0; i < size; i++)
2379 if (vec1[i] != vec2[i])
2380 return false;
2381 return true;
2384 /* Given an M x N integer matrix A, this function determines an M x
2385 M unimodular matrix U, and an M x N echelon matrix S such that
2386 "U.A = S". This decomposition is also known as "right Hermite".
2388 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2389 Restructuring Compilers" Utpal Banerjee. */
2391 static void
2392 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2393 lambda_matrix S, lambda_matrix U)
2395 int i, j, i0 = 0;
2397 lambda_matrix_copy (A, S, m, n);
2398 lambda_matrix_id (U, m);
2400 for (j = 0; j < n; j++)
2402 if (lambda_vector_first_nz (S[j], m, i0) < m)
2404 ++i0;
2405 for (i = m - 1; i >= i0; i--)
2407 while (S[i][j] != 0)
2409 int sigma, factor, a, b;
2411 a = S[i-1][j];
2412 b = S[i][j];
2413 sigma = (a * b < 0) ? -1: 1;
2414 a = abs (a);
2415 b = abs (b);
2416 factor = sigma * (a / b);
2418 lambda_matrix_row_add (S, n, i, i-1, -factor);
2419 lambda_matrix_row_exchange (S, i, i-1);
2421 lambda_matrix_row_add (U, m, i, i-1, -factor);
2422 lambda_matrix_row_exchange (U, i, i-1);
2429 /* Determines the overlapping elements due to accesses CHREC_A and
2430 CHREC_B, that are affine functions. This function cannot handle
2431 symbolic evolution functions, ie. when initial conditions are
2432 parameters, because it uses lambda matrices of integers. */
2434 static void
2435 analyze_subscript_affine_affine (tree chrec_a,
2436 tree chrec_b,
2437 conflict_function **overlaps_a,
2438 conflict_function **overlaps_b,
2439 tree *last_conflicts)
2441 unsigned nb_vars_a, nb_vars_b, dim;
2442 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2443 lambda_matrix A, U, S;
2444 struct obstack scratch_obstack;
2446 if (eq_evolutions_p (chrec_a, chrec_b))
2448 /* The accessed index overlaps for each iteration in the
2449 loop. */
2450 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2451 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2452 *last_conflicts = chrec_dont_know;
2453 return;
2455 if (dump_file && (dump_flags & TDF_DETAILS))
2456 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2458 /* For determining the initial intersection, we have to solve a
2459 Diophantine equation. This is the most time consuming part.
2461 For answering to the question: "Is there a dependence?" we have
2462 to prove that there exists a solution to the Diophantine
2463 equation, and that the solution is in the iteration domain,
2464 i.e. the solution is positive or zero, and that the solution
2465 happens before the upper bound loop.nb_iterations. Otherwise
2466 there is no dependence. This function outputs a description of
2467 the iterations that hold the intersections. */
2469 nb_vars_a = nb_vars_in_chrec (chrec_a);
2470 nb_vars_b = nb_vars_in_chrec (chrec_b);
2472 gcc_obstack_init (&scratch_obstack);
2474 dim = nb_vars_a + nb_vars_b;
2475 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2476 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2477 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2479 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2480 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2481 gamma = init_b - init_a;
2483 /* Don't do all the hard work of solving the Diophantine equation
2484 when we already know the solution: for example,
2485 | {3, +, 1}_1
2486 | {3, +, 4}_2
2487 | gamma = 3 - 3 = 0.
2488 Then the first overlap occurs during the first iterations:
2489 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2491 if (gamma == 0)
2493 if (nb_vars_a == 1 && nb_vars_b == 1)
2495 HOST_WIDE_INT step_a, step_b;
2496 HOST_WIDE_INT niter, niter_a, niter_b;
2497 affine_fn ova, ovb;
2499 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2500 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2501 niter = MIN (niter_a, niter_b);
2502 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2503 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2505 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2506 &ova, &ovb,
2507 last_conflicts, 1);
2508 *overlaps_a = conflict_fn (1, ova);
2509 *overlaps_b = conflict_fn (1, ovb);
2512 else if (nb_vars_a == 2 && nb_vars_b == 1)
2513 compute_overlap_steps_for_affine_1_2
2514 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2516 else if (nb_vars_a == 1 && nb_vars_b == 2)
2517 compute_overlap_steps_for_affine_1_2
2518 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2520 else
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2523 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2524 *overlaps_a = conflict_fn_not_known ();
2525 *overlaps_b = conflict_fn_not_known ();
2526 *last_conflicts = chrec_dont_know;
2528 goto end_analyze_subs_aa;
2531 /* U.A = S */
2532 lambda_matrix_right_hermite (A, dim, 1, S, U);
2534 if (S[0][0] < 0)
2536 S[0][0] *= -1;
2537 lambda_matrix_row_negate (U, dim, 0);
2539 gcd_alpha_beta = S[0][0];
2541 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2542 but that is a quite strange case. Instead of ICEing, answer
2543 don't know. */
2544 if (gcd_alpha_beta == 0)
2546 *overlaps_a = conflict_fn_not_known ();
2547 *overlaps_b = conflict_fn_not_known ();
2548 *last_conflicts = chrec_dont_know;
2549 goto end_analyze_subs_aa;
2552 /* The classic "gcd-test". */
2553 if (!int_divides_p (gcd_alpha_beta, gamma))
2555 /* The "gcd-test" has determined that there is no integer
2556 solution, i.e. there is no dependence. */
2557 *overlaps_a = conflict_fn_no_dependence ();
2558 *overlaps_b = conflict_fn_no_dependence ();
2559 *last_conflicts = integer_zero_node;
2562 /* Both access functions are univariate. This includes SIV and MIV cases. */
2563 else if (nb_vars_a == 1 && nb_vars_b == 1)
2565 /* Both functions should have the same evolution sign. */
2566 if (((A[0][0] > 0 && -A[1][0] > 0)
2567 || (A[0][0] < 0 && -A[1][0] < 0)))
2569 /* The solutions are given by:
2571 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2572 | [u21 u22] [y0]
2574 For a given integer t. Using the following variables,
2576 | i0 = u11 * gamma / gcd_alpha_beta
2577 | j0 = u12 * gamma / gcd_alpha_beta
2578 | i1 = u21
2579 | j1 = u22
2581 the solutions are:
2583 | x0 = i0 + i1 * t,
2584 | y0 = j0 + j1 * t. */
2585 HOST_WIDE_INT i0, j0, i1, j1;
2587 i0 = U[0][0] * gamma / gcd_alpha_beta;
2588 j0 = U[0][1] * gamma / gcd_alpha_beta;
2589 i1 = U[1][0];
2590 j1 = U[1][1];
2592 if ((i1 == 0 && i0 < 0)
2593 || (j1 == 0 && j0 < 0))
2595 /* There is no solution.
2596 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2597 falls in here, but for the moment we don't look at the
2598 upper bound of the iteration domain. */
2599 *overlaps_a = conflict_fn_no_dependence ();
2600 *overlaps_b = conflict_fn_no_dependence ();
2601 *last_conflicts = integer_zero_node;
2602 goto end_analyze_subs_aa;
2605 if (i1 > 0 && j1 > 0)
2607 HOST_WIDE_INT niter_a
2608 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2609 HOST_WIDE_INT niter_b
2610 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2611 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2613 /* (X0, Y0) is a solution of the Diophantine equation:
2614 "chrec_a (X0) = chrec_b (Y0)". */
2615 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2616 CEIL (-j0, j1));
2617 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2618 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2620 /* (X1, Y1) is the smallest positive solution of the eq
2621 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2622 first conflict occurs. */
2623 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2624 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2625 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2627 if (niter > 0)
2629 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2630 FLOOR_DIV (niter - j0, j1));
2631 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2633 /* If the overlap occurs outside of the bounds of the
2634 loop, there is no dependence. */
2635 if (x1 >= niter || y1 >= niter)
2637 *overlaps_a = conflict_fn_no_dependence ();
2638 *overlaps_b = conflict_fn_no_dependence ();
2639 *last_conflicts = integer_zero_node;
2640 goto end_analyze_subs_aa;
2642 else
2643 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2645 else
2646 *last_conflicts = chrec_dont_know;
2648 *overlaps_a
2649 = conflict_fn (1,
2650 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2652 build_int_cst (NULL_TREE, i1)));
2653 *overlaps_b
2654 = conflict_fn (1,
2655 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2657 build_int_cst (NULL_TREE, j1)));
2659 else
2661 /* FIXME: For the moment, the upper bound of the
2662 iteration domain for i and j is not checked. */
2663 if (dump_file && (dump_flags & TDF_DETAILS))
2664 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2665 *overlaps_a = conflict_fn_not_known ();
2666 *overlaps_b = conflict_fn_not_known ();
2667 *last_conflicts = chrec_dont_know;
2670 else
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2674 *overlaps_a = conflict_fn_not_known ();
2675 *overlaps_b = conflict_fn_not_known ();
2676 *last_conflicts = chrec_dont_know;
2679 else
2681 if (dump_file && (dump_flags & TDF_DETAILS))
2682 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2683 *overlaps_a = conflict_fn_not_known ();
2684 *overlaps_b = conflict_fn_not_known ();
2685 *last_conflicts = chrec_dont_know;
2688 end_analyze_subs_aa:
2689 obstack_free (&scratch_obstack, NULL);
2690 if (dump_file && (dump_flags & TDF_DETAILS))
2692 fprintf (dump_file, " (overlaps_a = ");
2693 dump_conflict_function (dump_file, *overlaps_a);
2694 fprintf (dump_file, ")\n (overlaps_b = ");
2695 dump_conflict_function (dump_file, *overlaps_b);
2696 fprintf (dump_file, "))\n");
2700 /* Returns true when analyze_subscript_affine_affine can be used for
2701 determining the dependence relation between chrec_a and chrec_b,
2702 that contain symbols. This function modifies chrec_a and chrec_b
2703 such that the analysis result is the same, and such that they don't
2704 contain symbols, and then can safely be passed to the analyzer.
2706 Example: The analysis of the following tuples of evolutions produce
2707 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2708 vs. {0, +, 1}_1
2710 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2711 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2714 static bool
2715 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2717 tree diff, type, left_a, left_b, right_b;
2719 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2720 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2721 /* FIXME: For the moment not handled. Might be refined later. */
2722 return false;
2724 type = chrec_type (*chrec_a);
2725 left_a = CHREC_LEFT (*chrec_a);
2726 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2727 diff = chrec_fold_minus (type, left_a, left_b);
2729 if (!evolution_function_is_constant_p (diff))
2730 return false;
2732 if (dump_file && (dump_flags & TDF_DETAILS))
2733 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2735 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2736 diff, CHREC_RIGHT (*chrec_a));
2737 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2738 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2739 build_int_cst (type, 0),
2740 right_b);
2741 return true;
2744 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2745 *OVERLAPS_B are initialized to the functions that describe the
2746 relation between the elements accessed twice by CHREC_A and
2747 CHREC_B. For k >= 0, the following property is verified:
2749 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2751 static void
2752 analyze_siv_subscript (tree chrec_a,
2753 tree chrec_b,
2754 conflict_function **overlaps_a,
2755 conflict_function **overlaps_b,
2756 tree *last_conflicts,
2757 int loop_nest_num)
2759 dependence_stats.num_siv++;
2761 if (dump_file && (dump_flags & TDF_DETAILS))
2762 fprintf (dump_file, "(analyze_siv_subscript \n");
2764 if (evolution_function_is_constant_p (chrec_a)
2765 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2766 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2767 overlaps_a, overlaps_b, last_conflicts);
2769 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2770 && evolution_function_is_constant_p (chrec_b))
2771 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2772 overlaps_b, overlaps_a, last_conflicts);
2774 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2775 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2777 if (!chrec_contains_symbols (chrec_a)
2778 && !chrec_contains_symbols (chrec_b))
2780 analyze_subscript_affine_affine (chrec_a, chrec_b,
2781 overlaps_a, overlaps_b,
2782 last_conflicts);
2784 if (CF_NOT_KNOWN_P (*overlaps_a)
2785 || CF_NOT_KNOWN_P (*overlaps_b))
2786 dependence_stats.num_siv_unimplemented++;
2787 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2788 || CF_NO_DEPENDENCE_P (*overlaps_b))
2789 dependence_stats.num_siv_independent++;
2790 else
2791 dependence_stats.num_siv_dependent++;
2793 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2794 &chrec_b))
2796 analyze_subscript_affine_affine (chrec_a, chrec_b,
2797 overlaps_a, overlaps_b,
2798 last_conflicts);
2800 if (CF_NOT_KNOWN_P (*overlaps_a)
2801 || CF_NOT_KNOWN_P (*overlaps_b))
2802 dependence_stats.num_siv_unimplemented++;
2803 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2804 || CF_NO_DEPENDENCE_P (*overlaps_b))
2805 dependence_stats.num_siv_independent++;
2806 else
2807 dependence_stats.num_siv_dependent++;
2809 else
2810 goto siv_subscript_dontknow;
2813 else
2815 siv_subscript_dontknow:;
2816 if (dump_file && (dump_flags & TDF_DETAILS))
2817 fprintf (dump_file, " siv test failed: unimplemented");
2818 *overlaps_a = conflict_fn_not_known ();
2819 *overlaps_b = conflict_fn_not_known ();
2820 *last_conflicts = chrec_dont_know;
2821 dependence_stats.num_siv_unimplemented++;
2824 if (dump_file && (dump_flags & TDF_DETAILS))
2825 fprintf (dump_file, ")\n");
2828 /* Returns false if we can prove that the greatest common divisor of the steps
2829 of CHREC does not divide CST, false otherwise. */
2831 static bool
2832 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2834 HOST_WIDE_INT cd = 0, val;
2835 tree step;
2837 if (!host_integerp (cst, 0))
2838 return true;
2839 val = tree_low_cst (cst, 0);
2841 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2843 step = CHREC_RIGHT (chrec);
2844 if (!host_integerp (step, 0))
2845 return true;
2846 cd = gcd (cd, tree_low_cst (step, 0));
2847 chrec = CHREC_LEFT (chrec);
2850 return val % cd == 0;
2853 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2854 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2855 functions that describe the relation between the elements accessed
2856 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2857 is verified:
2859 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2861 static void
2862 analyze_miv_subscript (tree chrec_a,
2863 tree chrec_b,
2864 conflict_function **overlaps_a,
2865 conflict_function **overlaps_b,
2866 tree *last_conflicts,
2867 struct loop *loop_nest)
2869 tree type, difference;
2871 dependence_stats.num_miv++;
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2873 fprintf (dump_file, "(analyze_miv_subscript \n");
2875 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2876 chrec_a = chrec_convert (type, chrec_a, NULL);
2877 chrec_b = chrec_convert (type, chrec_b, NULL);
2878 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2880 if (eq_evolutions_p (chrec_a, chrec_b))
2882 /* Access functions are the same: all the elements are accessed
2883 in the same order. */
2884 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2885 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2886 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2887 dependence_stats.num_miv_dependent++;
2890 else if (evolution_function_is_constant_p (difference)
2891 /* For the moment, the following is verified:
2892 evolution_function_is_affine_multivariate_p (chrec_a,
2893 loop_nest->num) */
2894 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2896 /* testsuite/.../ssa-chrec-33.c
2897 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2899 The difference is 1, and all the evolution steps are multiples
2900 of 2, consequently there are no overlapping elements. */
2901 *overlaps_a = conflict_fn_no_dependence ();
2902 *overlaps_b = conflict_fn_no_dependence ();
2903 *last_conflicts = integer_zero_node;
2904 dependence_stats.num_miv_independent++;
2907 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2908 && !chrec_contains_symbols (chrec_a)
2909 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2910 && !chrec_contains_symbols (chrec_b))
2912 /* testsuite/.../ssa-chrec-35.c
2913 {0, +, 1}_2 vs. {0, +, 1}_3
2914 the overlapping elements are respectively located at iterations:
2915 {0, +, 1}_x and {0, +, 1}_x,
2916 in other words, we have the equality:
2917 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2919 Other examples:
2920 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2921 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2923 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2924 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2926 analyze_subscript_affine_affine (chrec_a, chrec_b,
2927 overlaps_a, overlaps_b, last_conflicts);
2929 if (CF_NOT_KNOWN_P (*overlaps_a)
2930 || CF_NOT_KNOWN_P (*overlaps_b))
2931 dependence_stats.num_miv_unimplemented++;
2932 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2933 || CF_NO_DEPENDENCE_P (*overlaps_b))
2934 dependence_stats.num_miv_independent++;
2935 else
2936 dependence_stats.num_miv_dependent++;
2939 else
2941 /* When the analysis is too difficult, answer "don't know". */
2942 if (dump_file && (dump_flags & TDF_DETAILS))
2943 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2945 *overlaps_a = conflict_fn_not_known ();
2946 *overlaps_b = conflict_fn_not_known ();
2947 *last_conflicts = chrec_dont_know;
2948 dependence_stats.num_miv_unimplemented++;
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, ")\n");
2955 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2956 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2957 OVERLAP_ITERATIONS_B are initialized with two functions that
2958 describe the iterations that contain conflicting elements.
2960 Remark: For an integer k >= 0, the following equality is true:
2962 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2965 static void
2966 analyze_overlapping_iterations (tree chrec_a,
2967 tree chrec_b,
2968 conflict_function **overlap_iterations_a,
2969 conflict_function **overlap_iterations_b,
2970 tree *last_conflicts, struct loop *loop_nest)
2972 unsigned int lnn = loop_nest->num;
2974 dependence_stats.num_subscript_tests++;
2976 if (dump_file && (dump_flags & TDF_DETAILS))
2978 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2979 fprintf (dump_file, " (chrec_a = ");
2980 print_generic_expr (dump_file, chrec_a, 0);
2981 fprintf (dump_file, ")\n (chrec_b = ");
2982 print_generic_expr (dump_file, chrec_b, 0);
2983 fprintf (dump_file, ")\n");
2986 if (chrec_a == NULL_TREE
2987 || chrec_b == NULL_TREE
2988 || chrec_contains_undetermined (chrec_a)
2989 || chrec_contains_undetermined (chrec_b))
2991 dependence_stats.num_subscript_undetermined++;
2993 *overlap_iterations_a = conflict_fn_not_known ();
2994 *overlap_iterations_b = conflict_fn_not_known ();
2997 /* If they are the same chrec, and are affine, they overlap
2998 on every iteration. */
2999 else if (eq_evolutions_p (chrec_a, chrec_b)
3000 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3001 || operand_equal_p (chrec_a, chrec_b, 0)))
3003 dependence_stats.num_same_subscript_function++;
3004 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3005 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3006 *last_conflicts = chrec_dont_know;
3009 /* If they aren't the same, and aren't affine, we can't do anything
3010 yet. */
3011 else if ((chrec_contains_symbols (chrec_a)
3012 || chrec_contains_symbols (chrec_b))
3013 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3014 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3016 dependence_stats.num_subscript_undetermined++;
3017 *overlap_iterations_a = conflict_fn_not_known ();
3018 *overlap_iterations_b = conflict_fn_not_known ();
3021 else if (ziv_subscript_p (chrec_a, chrec_b))
3022 analyze_ziv_subscript (chrec_a, chrec_b,
3023 overlap_iterations_a, overlap_iterations_b,
3024 last_conflicts);
3026 else if (siv_subscript_p (chrec_a, chrec_b))
3027 analyze_siv_subscript (chrec_a, chrec_b,
3028 overlap_iterations_a, overlap_iterations_b,
3029 last_conflicts, lnn);
3031 else
3032 analyze_miv_subscript (chrec_a, chrec_b,
3033 overlap_iterations_a, overlap_iterations_b,
3034 last_conflicts, loop_nest);
3036 if (dump_file && (dump_flags & TDF_DETAILS))
3038 fprintf (dump_file, " (overlap_iterations_a = ");
3039 dump_conflict_function (dump_file, *overlap_iterations_a);
3040 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3041 dump_conflict_function (dump_file, *overlap_iterations_b);
3042 fprintf (dump_file, "))\n");
3046 /* Helper function for uniquely inserting distance vectors. */
3048 static void
3049 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3051 unsigned i;
3052 lambda_vector v;
3054 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3055 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3056 return;
3058 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3061 /* Helper function for uniquely inserting direction vectors. */
3063 static void
3064 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3066 unsigned i;
3067 lambda_vector v;
3069 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3070 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3071 return;
3073 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3076 /* Add a distance of 1 on all the loops outer than INDEX. If we
3077 haven't yet determined a distance for this outer loop, push a new
3078 distance vector composed of the previous distance, and a distance
3079 of 1 for this outer loop. Example:
3081 | loop_1
3082 | loop_2
3083 | A[10]
3084 | endloop_2
3085 | endloop_1
3087 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3088 save (0, 1), then we have to save (1, 0). */
3090 static void
3091 add_outer_distances (struct data_dependence_relation *ddr,
3092 lambda_vector dist_v, int index)
3094 /* For each outer loop where init_v is not set, the accesses are
3095 in dependence of distance 1 in the loop. */
3096 while (--index >= 0)
3098 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3099 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3100 save_v[index] = 1;
3101 save_dist_v (ddr, save_v);
3105 /* Return false when fail to represent the data dependence as a
3106 distance vector. INIT_B is set to true when a component has been
3107 added to the distance vector DIST_V. INDEX_CARRY is then set to
3108 the index in DIST_V that carries the dependence. */
3110 static bool
3111 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3112 struct data_reference *ddr_a,
3113 struct data_reference *ddr_b,
3114 lambda_vector dist_v, bool *init_b,
3115 int *index_carry)
3117 unsigned i;
3118 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3120 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3122 tree access_fn_a, access_fn_b;
3123 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3125 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3127 non_affine_dependence_relation (ddr);
3128 return false;
3131 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3132 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3134 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3135 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3137 int dist, index;
3138 int var_a = CHREC_VARIABLE (access_fn_a);
3139 int var_b = CHREC_VARIABLE (access_fn_b);
3141 if (var_a != var_b
3142 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3144 non_affine_dependence_relation (ddr);
3145 return false;
3148 dist = int_cst_value (SUB_DISTANCE (subscript));
3149 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3150 *index_carry = MIN (index, *index_carry);
3152 /* This is the subscript coupling test. If we have already
3153 recorded a distance for this loop (a distance coming from
3154 another subscript), it should be the same. For example,
3155 in the following code, there is no dependence:
3157 | loop i = 0, N, 1
3158 | T[i+1][i] = ...
3159 | ... = T[i][i]
3160 | endloop
3162 if (init_v[index] != 0 && dist_v[index] != dist)
3164 finalize_ddr_dependent (ddr, chrec_known);
3165 return false;
3168 dist_v[index] = dist;
3169 init_v[index] = 1;
3170 *init_b = true;
3172 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3174 /* This can be for example an affine vs. constant dependence
3175 (T[i] vs. T[3]) that is not an affine dependence and is
3176 not representable as a distance vector. */
3177 non_affine_dependence_relation (ddr);
3178 return false;
3182 return true;
3185 /* Return true when the DDR contains only constant access functions. */
3187 static bool
3188 constant_access_functions (const struct data_dependence_relation *ddr)
3190 unsigned i;
3192 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3193 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3194 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3195 return false;
3197 return true;
3200 /* Helper function for the case where DDR_A and DDR_B are the same
3201 multivariate access function with a constant step. For an example
3202 see pr34635-1.c. */
3204 static void
3205 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3207 int x_1, x_2;
3208 tree c_1 = CHREC_LEFT (c_2);
3209 tree c_0 = CHREC_LEFT (c_1);
3210 lambda_vector dist_v;
3211 int v1, v2, cd;
3213 /* Polynomials with more than 2 variables are not handled yet. When
3214 the evolution steps are parameters, it is not possible to
3215 represent the dependence using classical distance vectors. */
3216 if (TREE_CODE (c_0) != INTEGER_CST
3217 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3218 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3220 DDR_AFFINE_P (ddr) = false;
3221 return;
3224 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3225 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3227 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3228 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3229 v1 = int_cst_value (CHREC_RIGHT (c_1));
3230 v2 = int_cst_value (CHREC_RIGHT (c_2));
3231 cd = gcd (v1, v2);
3232 v1 /= cd;
3233 v2 /= cd;
3235 if (v2 < 0)
3237 v2 = -v2;
3238 v1 = -v1;
3241 dist_v[x_1] = v2;
3242 dist_v[x_2] = -v1;
3243 save_dist_v (ddr, dist_v);
3245 add_outer_distances (ddr, dist_v, x_1);
3248 /* Helper function for the case where DDR_A and DDR_B are the same
3249 access functions. */
3251 static void
3252 add_other_self_distances (struct data_dependence_relation *ddr)
3254 lambda_vector dist_v;
3255 unsigned i;
3256 int index_carry = DDR_NB_LOOPS (ddr);
3258 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3260 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3262 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3264 if (!evolution_function_is_univariate_p (access_fun))
3266 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3268 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3269 return;
3272 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3274 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3275 add_multivariate_self_dist (ddr, access_fun);
3276 else
3277 /* The evolution step is not constant: it varies in
3278 the outer loop, so this cannot be represented by a
3279 distance vector. For example in pr34635.c the
3280 evolution is {0, +, {0, +, 4}_1}_2. */
3281 DDR_AFFINE_P (ddr) = false;
3283 return;
3286 index_carry = MIN (index_carry,
3287 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3288 DDR_LOOP_NEST (ddr)));
3292 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3293 add_outer_distances (ddr, dist_v, index_carry);
3296 static void
3297 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3299 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3301 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3302 save_dist_v (ddr, dist_v);
3305 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3306 is the case for example when access functions are the same and
3307 equal to a constant, as in:
3309 | loop_1
3310 | A[3] = ...
3311 | ... = A[3]
3312 | endloop_1
3314 in which case the distance vectors are (0) and (1). */
3316 static void
3317 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3319 unsigned i, j;
3321 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3323 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3324 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3325 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3327 for (j = 0; j < ca->n; j++)
3328 if (affine_function_zero_p (ca->fns[j]))
3330 insert_innermost_unit_dist_vector (ddr);
3331 return;
3334 for (j = 0; j < cb->n; j++)
3335 if (affine_function_zero_p (cb->fns[j]))
3337 insert_innermost_unit_dist_vector (ddr);
3338 return;
3343 /* Compute the classic per loop distance vector. DDR is the data
3344 dependence relation to build a vector from. Return false when fail
3345 to represent the data dependence as a distance vector. */
3347 static bool
3348 build_classic_dist_vector (struct data_dependence_relation *ddr,
3349 struct loop *loop_nest)
3351 bool init_b = false;
3352 int index_carry = DDR_NB_LOOPS (ddr);
3353 lambda_vector dist_v;
3355 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3356 return false;
3358 if (same_access_functions (ddr))
3360 /* Save the 0 vector. */
3361 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3362 save_dist_v (ddr, dist_v);
3364 if (constant_access_functions (ddr))
3365 add_distance_for_zero_overlaps (ddr);
3367 if (DDR_NB_LOOPS (ddr) > 1)
3368 add_other_self_distances (ddr);
3370 return true;
3373 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3374 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3375 dist_v, &init_b, &index_carry))
3376 return false;
3378 /* Save the distance vector if we initialized one. */
3379 if (init_b)
3381 /* Verify a basic constraint: classic distance vectors should
3382 always be lexicographically positive.
3384 Data references are collected in the order of execution of
3385 the program, thus for the following loop
3387 | for (i = 1; i < 100; i++)
3388 | for (j = 1; j < 100; j++)
3390 | t = T[j+1][i-1]; // A
3391 | T[j][i] = t + 2; // B
3394 references are collected following the direction of the wind:
3395 A then B. The data dependence tests are performed also
3396 following this order, such that we're looking at the distance
3397 separating the elements accessed by A from the elements later
3398 accessed by B. But in this example, the distance returned by
3399 test_dep (A, B) is lexicographically negative (-1, 1), that
3400 means that the access A occurs later than B with respect to
3401 the outer loop, ie. we're actually looking upwind. In this
3402 case we solve test_dep (B, A) looking downwind to the
3403 lexicographically positive solution, that returns the
3404 distance vector (1, -1). */
3405 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3407 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3408 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3409 loop_nest))
3410 return false;
3411 compute_subscript_distance (ddr);
3412 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3413 save_v, &init_b, &index_carry))
3414 return false;
3415 save_dist_v (ddr, save_v);
3416 DDR_REVERSED_P (ddr) = true;
3418 /* In this case there is a dependence forward for all the
3419 outer loops:
3421 | for (k = 1; k < 100; k++)
3422 | for (i = 1; i < 100; i++)
3423 | for (j = 1; j < 100; j++)
3425 | t = T[j+1][i-1]; // A
3426 | T[j][i] = t + 2; // B
3429 the vectors are:
3430 (0, 1, -1)
3431 (1, 1, -1)
3432 (1, -1, 1)
3434 if (DDR_NB_LOOPS (ddr) > 1)
3436 add_outer_distances (ddr, save_v, index_carry);
3437 add_outer_distances (ddr, dist_v, index_carry);
3440 else
3442 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3443 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3445 if (DDR_NB_LOOPS (ddr) > 1)
3447 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3449 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3450 DDR_A (ddr), loop_nest))
3451 return false;
3452 compute_subscript_distance (ddr);
3453 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3454 opposite_v, &init_b,
3455 &index_carry))
3456 return false;
3458 save_dist_v (ddr, save_v);
3459 add_outer_distances (ddr, dist_v, index_carry);
3460 add_outer_distances (ddr, opposite_v, index_carry);
3462 else
3463 save_dist_v (ddr, save_v);
3466 else
3468 /* There is a distance of 1 on all the outer loops: Example:
3469 there is a dependence of distance 1 on loop_1 for the array A.
3471 | loop_1
3472 | A[5] = ...
3473 | endloop
3475 add_outer_distances (ddr, dist_v,
3476 lambda_vector_first_nz (dist_v,
3477 DDR_NB_LOOPS (ddr), 0));
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3482 unsigned i;
3484 fprintf (dump_file, "(build_classic_dist_vector\n");
3485 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3487 fprintf (dump_file, " dist_vector = (");
3488 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3489 DDR_NB_LOOPS (ddr));
3490 fprintf (dump_file, " )\n");
3492 fprintf (dump_file, ")\n");
3495 return true;
3498 /* Return the direction for a given distance.
3499 FIXME: Computing dir this way is suboptimal, since dir can catch
3500 cases that dist is unable to represent. */
3502 static inline enum data_dependence_direction
3503 dir_from_dist (int dist)
3505 if (dist > 0)
3506 return dir_positive;
3507 else if (dist < 0)
3508 return dir_negative;
3509 else
3510 return dir_equal;
3513 /* Compute the classic per loop direction vector. DDR is the data
3514 dependence relation to build a vector from. */
3516 static void
3517 build_classic_dir_vector (struct data_dependence_relation *ddr)
3519 unsigned i, j;
3520 lambda_vector dist_v;
3522 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3524 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3526 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3527 dir_v[j] = dir_from_dist (dist_v[j]);
3529 save_dir_v (ddr, dir_v);
3533 /* Helper function. Returns true when there is a dependence between
3534 data references DRA and DRB. */
3536 static bool
3537 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3538 struct data_reference *dra,
3539 struct data_reference *drb,
3540 struct loop *loop_nest)
3542 unsigned int i;
3543 tree last_conflicts;
3544 struct subscript *subscript;
3545 tree res = NULL_TREE;
3547 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3549 conflict_function *overlaps_a, *overlaps_b;
3551 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3552 DR_ACCESS_FN (drb, i),
3553 &overlaps_a, &overlaps_b,
3554 &last_conflicts, loop_nest);
3556 if (SUB_CONFLICTS_IN_A (subscript))
3557 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3558 if (SUB_CONFLICTS_IN_B (subscript))
3559 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3561 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3562 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3563 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3565 /* If there is any undetermined conflict function we have to
3566 give a conservative answer in case we cannot prove that
3567 no dependence exists when analyzing another subscript. */
3568 if (CF_NOT_KNOWN_P (overlaps_a)
3569 || CF_NOT_KNOWN_P (overlaps_b))
3571 res = chrec_dont_know;
3572 continue;
3575 /* When there is a subscript with no dependence we can stop. */
3576 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3577 || CF_NO_DEPENDENCE_P (overlaps_b))
3579 res = chrec_known;
3580 break;
3584 if (res == NULL_TREE)
3585 return true;
3587 if (res == chrec_known)
3588 dependence_stats.num_dependence_independent++;
3589 else
3590 dependence_stats.num_dependence_undetermined++;
3591 finalize_ddr_dependent (ddr, res);
3592 return false;
3595 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3597 static void
3598 subscript_dependence_tester (struct data_dependence_relation *ddr,
3599 struct loop *loop_nest)
3601 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3602 dependence_stats.num_dependence_dependent++;
3604 compute_subscript_distance (ddr);
3605 if (build_classic_dist_vector (ddr, loop_nest))
3606 build_classic_dir_vector (ddr);
3609 /* Returns true when all the access functions of A are affine or
3610 constant with respect to LOOP_NEST. */
3612 static bool
3613 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3614 const struct loop *loop_nest)
3616 unsigned int i;
3617 vec<tree> fns = DR_ACCESS_FNS (a);
3618 tree t;
3620 FOR_EACH_VEC_ELT (fns, i, t)
3621 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3622 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3623 return false;
3625 return true;
3628 /* Initializes an equation for an OMEGA problem using the information
3629 contained in the ACCESS_FUN. Returns true when the operation
3630 succeeded.
3632 PB is the omega constraint system.
3633 EQ is the number of the equation to be initialized.
3634 OFFSET is used for shifting the variables names in the constraints:
3635 a constrain is composed of 2 * the number of variables surrounding
3636 dependence accesses. OFFSET is set either to 0 for the first n variables,
3637 then it is set to n.
3638 ACCESS_FUN is expected to be an affine chrec. */
3640 static bool
3641 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3642 unsigned int offset, tree access_fun,
3643 struct data_dependence_relation *ddr)
3645 switch (TREE_CODE (access_fun))
3647 case POLYNOMIAL_CHREC:
3649 tree left = CHREC_LEFT (access_fun);
3650 tree right = CHREC_RIGHT (access_fun);
3651 int var = CHREC_VARIABLE (access_fun);
3652 unsigned var_idx;
3654 if (TREE_CODE (right) != INTEGER_CST)
3655 return false;
3657 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3658 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3660 /* Compute the innermost loop index. */
3661 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3663 if (offset == 0)
3664 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3665 += int_cst_value (right);
3667 switch (TREE_CODE (left))
3669 case POLYNOMIAL_CHREC:
3670 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3672 case INTEGER_CST:
3673 pb->eqs[eq].coef[0] += int_cst_value (left);
3674 return true;
3676 default:
3677 return false;
3681 case INTEGER_CST:
3682 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3683 return true;
3685 default:
3686 return false;
3690 /* As explained in the comments preceding init_omega_for_ddr, we have
3691 to set up a system for each loop level, setting outer loops
3692 variation to zero, and current loop variation to positive or zero.
3693 Save each lexico positive distance vector. */
3695 static void
3696 omega_extract_distance_vectors (omega_pb pb,
3697 struct data_dependence_relation *ddr)
3699 int eq, geq;
3700 unsigned i, j;
3701 struct loop *loopi, *loopj;
3702 enum omega_result res;
3704 /* Set a new problem for each loop in the nest. The basis is the
3705 problem that we have initialized until now. On top of this we
3706 add new constraints. */
3707 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3708 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3710 int dist = 0;
3711 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3712 DDR_NB_LOOPS (ddr));
3714 omega_copy_problem (copy, pb);
3716 /* For all the outer loops "loop_j", add "dj = 0". */
3717 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3719 eq = omega_add_zero_eq (copy, omega_black);
3720 copy->eqs[eq].coef[j + 1] = 1;
3723 /* For "loop_i", add "0 <= di". */
3724 geq = omega_add_zero_geq (copy, omega_black);
3725 copy->geqs[geq].coef[i + 1] = 1;
3727 /* Reduce the constraint system, and test that the current
3728 problem is feasible. */
3729 res = omega_simplify_problem (copy);
3730 if (res == omega_false
3731 || res == omega_unknown
3732 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3733 goto next_problem;
3735 for (eq = 0; eq < copy->num_subs; eq++)
3736 if (copy->subs[eq].key == (int) i + 1)
3738 dist = copy->subs[eq].coef[0];
3739 goto found_dist;
3742 if (dist == 0)
3744 /* Reinitialize problem... */
3745 omega_copy_problem (copy, pb);
3746 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3748 eq = omega_add_zero_eq (copy, omega_black);
3749 copy->eqs[eq].coef[j + 1] = 1;
3752 /* ..., but this time "di = 1". */
3753 eq = omega_add_zero_eq (copy, omega_black);
3754 copy->eqs[eq].coef[i + 1] = 1;
3755 copy->eqs[eq].coef[0] = -1;
3757 res = omega_simplify_problem (copy);
3758 if (res == omega_false
3759 || res == omega_unknown
3760 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3761 goto next_problem;
3763 for (eq = 0; eq < copy->num_subs; eq++)
3764 if (copy->subs[eq].key == (int) i + 1)
3766 dist = copy->subs[eq].coef[0];
3767 goto found_dist;
3771 found_dist:;
3772 /* Save the lexicographically positive distance vector. */
3773 if (dist >= 0)
3775 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3776 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3778 dist_v[i] = dist;
3780 for (eq = 0; eq < copy->num_subs; eq++)
3781 if (copy->subs[eq].key > 0)
3783 dist = copy->subs[eq].coef[0];
3784 dist_v[copy->subs[eq].key - 1] = dist;
3787 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3788 dir_v[j] = dir_from_dist (dist_v[j]);
3790 save_dist_v (ddr, dist_v);
3791 save_dir_v (ddr, dir_v);
3794 next_problem:;
3795 omega_free_problem (copy);
3799 /* This is called for each subscript of a tuple of data references:
3800 insert an equality for representing the conflicts. */
3802 static bool
3803 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3804 struct data_dependence_relation *ddr,
3805 omega_pb pb, bool *maybe_dependent)
3807 int eq;
3808 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3809 TREE_TYPE (access_fun_b));
3810 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3811 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3812 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3813 tree minus_one;
3815 /* When the fun_a - fun_b is not constant, the dependence is not
3816 captured by the classic distance vector representation. */
3817 if (TREE_CODE (difference) != INTEGER_CST)
3818 return false;
3820 /* ZIV test. */
3821 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3823 /* There is no dependence. */
3824 *maybe_dependent = false;
3825 return true;
3828 minus_one = build_int_cst (type, -1);
3829 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3831 eq = omega_add_zero_eq (pb, omega_black);
3832 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3833 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3834 /* There is probably a dependence, but the system of
3835 constraints cannot be built: answer "don't know". */
3836 return false;
3838 /* GCD test. */
3839 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3840 && !int_divides_p (lambda_vector_gcd
3841 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3842 2 * DDR_NB_LOOPS (ddr)),
3843 pb->eqs[eq].coef[0]))
3845 /* There is no dependence. */
3846 *maybe_dependent = false;
3847 return true;
3850 return true;
3853 /* Helper function, same as init_omega_for_ddr but specialized for
3854 data references A and B. */
3856 static bool
3857 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3858 struct data_dependence_relation *ddr,
3859 omega_pb pb, bool *maybe_dependent)
3861 unsigned i;
3862 int ineq;
3863 struct loop *loopi;
3864 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3866 /* Insert an equality per subscript. */
3867 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3869 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3870 ddr, pb, maybe_dependent))
3871 return false;
3872 else if (*maybe_dependent == false)
3874 /* There is no dependence. */
3875 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3876 return true;
3880 /* Insert inequalities: constraints corresponding to the iteration
3881 domain, i.e. the loops surrounding the references "loop_x" and
3882 the distance variables "dx". The layout of the OMEGA
3883 representation is as follows:
3884 - coef[0] is the constant
3885 - coef[1..nb_loops] are the protected variables that will not be
3886 removed by the solver: the "dx"
3887 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3889 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3890 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3892 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3894 /* 0 <= loop_x */
3895 ineq = omega_add_zero_geq (pb, omega_black);
3896 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3898 /* 0 <= loop_x + dx */
3899 ineq = omega_add_zero_geq (pb, omega_black);
3900 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3901 pb->geqs[ineq].coef[i + 1] = 1;
3903 if (nbi != -1)
3905 /* loop_x <= nb_iters */
3906 ineq = omega_add_zero_geq (pb, omega_black);
3907 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3908 pb->geqs[ineq].coef[0] = nbi;
3910 /* loop_x + dx <= nb_iters */
3911 ineq = omega_add_zero_geq (pb, omega_black);
3912 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3913 pb->geqs[ineq].coef[i + 1] = -1;
3914 pb->geqs[ineq].coef[0] = nbi;
3916 /* A step "dx" bigger than nb_iters is not feasible, so
3917 add "0 <= nb_iters + dx", */
3918 ineq = omega_add_zero_geq (pb, omega_black);
3919 pb->geqs[ineq].coef[i + 1] = 1;
3920 pb->geqs[ineq].coef[0] = nbi;
3921 /* and "dx <= nb_iters". */
3922 ineq = omega_add_zero_geq (pb, omega_black);
3923 pb->geqs[ineq].coef[i + 1] = -1;
3924 pb->geqs[ineq].coef[0] = nbi;
3928 omega_extract_distance_vectors (pb, ddr);
3930 return true;
3933 /* Sets up the Omega dependence problem for the data dependence
3934 relation DDR. Returns false when the constraint system cannot be
3935 built, ie. when the test answers "don't know". Returns true
3936 otherwise, and when independence has been proved (using one of the
3937 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3938 set MAYBE_DEPENDENT to true.
3940 Example: for setting up the dependence system corresponding to the
3941 conflicting accesses
3943 | loop_i
3944 | loop_j
3945 | A[i, i+1] = ...
3946 | ... A[2*j, 2*(i + j)]
3947 | endloop_j
3948 | endloop_i
3950 the following constraints come from the iteration domain:
3952 0 <= i <= Ni
3953 0 <= i + di <= Ni
3954 0 <= j <= Nj
3955 0 <= j + dj <= Nj
3957 where di, dj are the distance variables. The constraints
3958 representing the conflicting elements are:
3960 i = 2 * (j + dj)
3961 i + 1 = 2 * (i + di + j + dj)
3963 For asking that the resulting distance vector (di, dj) be
3964 lexicographically positive, we insert the constraint "di >= 0". If
3965 "di = 0" in the solution, we fix that component to zero, and we
3966 look at the inner loops: we set a new problem where all the outer
3967 loop distances are zero, and fix this inner component to be
3968 positive. When one of the components is positive, we save that
3969 distance, and set a new problem where the distance on this loop is
3970 zero, searching for other distances in the inner loops. Here is
3971 the classic example that illustrates that we have to set for each
3972 inner loop a new problem:
3974 | loop_1
3975 | loop_2
3976 | A[10]
3977 | endloop_2
3978 | endloop_1
3980 we have to save two distances (1, 0) and (0, 1).
3982 Given two array references, refA and refB, we have to set the
3983 dependence problem twice, refA vs. refB and refB vs. refA, and we
3984 cannot do a single test, as refB might occur before refA in the
3985 inner loops, and the contrary when considering outer loops: ex.
3987 | loop_0
3988 | loop_1
3989 | loop_2
3990 | T[{1,+,1}_2][{1,+,1}_1] // refA
3991 | T[{2,+,1}_2][{0,+,1}_1] // refB
3992 | endloop_2
3993 | endloop_1
3994 | endloop_0
3996 refB touches the elements in T before refA, and thus for the same
3997 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3998 but for successive loop_0 iterations, we have (1, -1, 1)
4000 The Omega solver expects the distance variables ("di" in the
4001 previous example) to come first in the constraint system (as
4002 variables to be protected, or "safe" variables), the constraint
4003 system is built using the following layout:
4005 "cst | distance vars | index vars".
4008 static bool
4009 init_omega_for_ddr (struct data_dependence_relation *ddr,
4010 bool *maybe_dependent)
4012 omega_pb pb;
4013 bool res = false;
4015 *maybe_dependent = true;
4017 if (same_access_functions (ddr))
4019 unsigned j;
4020 lambda_vector dir_v;
4022 /* Save the 0 vector. */
4023 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4024 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4025 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4026 dir_v[j] = dir_equal;
4027 save_dir_v (ddr, dir_v);
4029 /* Save the dependences carried by outer loops. */
4030 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4031 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4032 maybe_dependent);
4033 omega_free_problem (pb);
4034 return res;
4037 /* Omega expects the protected variables (those that have to be kept
4038 after elimination) to appear first in the constraint system.
4039 These variables are the distance variables. In the following
4040 initialization we declare NB_LOOPS safe variables, and the total
4041 number of variables for the constraint system is 2*NB_LOOPS. */
4042 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4043 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4044 maybe_dependent);
4045 omega_free_problem (pb);
4047 /* Stop computation if not decidable, or no dependence. */
4048 if (res == false || *maybe_dependent == false)
4049 return res;
4051 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4052 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4053 maybe_dependent);
4054 omega_free_problem (pb);
4056 return res;
4059 /* Return true when DDR contains the same information as that stored
4060 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4062 static bool
4063 ddr_consistent_p (FILE *file,
4064 struct data_dependence_relation *ddr,
4065 vec<lambda_vector> dist_vects,
4066 vec<lambda_vector> dir_vects)
4068 unsigned int i, j;
4070 /* If dump_file is set, output there. */
4071 if (dump_file && (dump_flags & TDF_DETAILS))
4072 file = dump_file;
4074 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4076 lambda_vector b_dist_v;
4077 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4078 dist_vects.length (),
4079 DDR_NUM_DIST_VECTS (ddr));
4081 fprintf (file, "Banerjee dist vectors:\n");
4082 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4083 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4085 fprintf (file, "Omega dist vectors:\n");
4086 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4087 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4089 fprintf (file, "data dependence relation:\n");
4090 dump_data_dependence_relation (file, ddr);
4092 fprintf (file, ")\n");
4093 return false;
4096 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4098 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4099 dir_vects.length (),
4100 DDR_NUM_DIR_VECTS (ddr));
4101 return false;
4104 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4106 lambda_vector a_dist_v;
4107 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4109 /* Distance vectors are not ordered in the same way in the DDR
4110 and in the DIST_VECTS: search for a matching vector. */
4111 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4112 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4113 break;
4115 if (j == dist_vects.length ())
4117 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4118 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4119 fprintf (file, "not found in Omega dist vectors:\n");
4120 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4121 fprintf (file, "data dependence relation:\n");
4122 dump_data_dependence_relation (file, ddr);
4123 fprintf (file, ")\n");
4127 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4129 lambda_vector a_dir_v;
4130 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4132 /* Direction vectors are not ordered in the same way in the DDR
4133 and in the DIR_VECTS: search for a matching vector. */
4134 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4135 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4136 break;
4138 if (j == dist_vects.length ())
4140 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4141 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4142 fprintf (file, "not found in Omega dir vectors:\n");
4143 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4144 fprintf (file, "data dependence relation:\n");
4145 dump_data_dependence_relation (file, ddr);
4146 fprintf (file, ")\n");
4150 return true;
4153 /* This computes the affine dependence relation between A and B with
4154 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4155 independence between two accesses, while CHREC_DONT_KNOW is used
4156 for representing the unknown relation.
4158 Note that it is possible to stop the computation of the dependence
4159 relation the first time we detect a CHREC_KNOWN element for a given
4160 subscript. */
4162 void
4163 compute_affine_dependence (struct data_dependence_relation *ddr,
4164 struct loop *loop_nest)
4166 struct data_reference *dra = DDR_A (ddr);
4167 struct data_reference *drb = DDR_B (ddr);
4169 if (dump_file && (dump_flags & TDF_DETAILS))
4171 fprintf (dump_file, "(compute_affine_dependence\n");
4172 fprintf (dump_file, " stmt_a: ");
4173 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4174 fprintf (dump_file, " stmt_b: ");
4175 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4178 /* Analyze only when the dependence relation is not yet known. */
4179 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4181 dependence_stats.num_dependence_tests++;
4183 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4184 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4186 subscript_dependence_tester (ddr, loop_nest);
4188 if (flag_check_data_deps)
4190 /* Dump the dependences from the first algorithm. */
4191 if (dump_file && (dump_flags & TDF_DETAILS))
4193 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4194 dump_data_dependence_relation (dump_file, ddr);
4197 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4199 bool maybe_dependent;
4200 vec<lambda_vector> dir_vects, dist_vects;
4202 /* Save the result of the first DD analyzer. */
4203 dist_vects = DDR_DIST_VECTS (ddr);
4204 dir_vects = DDR_DIR_VECTS (ddr);
4206 /* Reset the information. */
4207 DDR_DIST_VECTS (ddr).create (0);
4208 DDR_DIR_VECTS (ddr).create (0);
4210 /* Compute the same information using Omega. */
4211 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4212 goto csys_dont_know;
4214 if (dump_file && (dump_flags & TDF_DETAILS))
4216 fprintf (dump_file, "Omega Analyzer\n");
4217 dump_data_dependence_relation (dump_file, ddr);
4220 /* Check that we get the same information. */
4221 if (maybe_dependent)
4222 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4223 dir_vects));
4228 /* As a last case, if the dependence cannot be determined, or if
4229 the dependence is considered too difficult to determine, answer
4230 "don't know". */
4231 else
4233 csys_dont_know:;
4234 dependence_stats.num_dependence_undetermined++;
4236 if (dump_file && (dump_flags & TDF_DETAILS))
4238 fprintf (dump_file, "Data ref a:\n");
4239 dump_data_reference (dump_file, dra);
4240 fprintf (dump_file, "Data ref b:\n");
4241 dump_data_reference (dump_file, drb);
4242 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4244 finalize_ddr_dependent (ddr, chrec_dont_know);
4248 if (dump_file && (dump_flags & TDF_DETAILS))
4250 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4251 fprintf (dump_file, ") -> no dependence\n");
4252 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4253 fprintf (dump_file, ") -> dependence analysis failed\n");
4254 else
4255 fprintf (dump_file, ")\n");
4259 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4260 the data references in DATAREFS, in the LOOP_NEST. When
4261 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4262 relations. Return true when successful, i.e. data references number
4263 is small enough to be handled. */
4265 bool
4266 compute_all_dependences (vec<data_reference_p> datarefs,
4267 vec<ddr_p> *dependence_relations,
4268 vec<loop_p> loop_nest,
4269 bool compute_self_and_rr)
4271 struct data_dependence_relation *ddr;
4272 struct data_reference *a, *b;
4273 unsigned int i, j;
4275 if ((int) datarefs.length ()
4276 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4278 struct data_dependence_relation *ddr;
4280 /* Insert a single relation into dependence_relations:
4281 chrec_dont_know. */
4282 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4283 dependence_relations->safe_push (ddr);
4284 return false;
4287 FOR_EACH_VEC_ELT (datarefs, i, a)
4288 for (j = i + 1; datarefs.iterate (j, &b); j++)
4289 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4291 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4292 dependence_relations->safe_push (ddr);
4293 if (loop_nest.exists ())
4294 compute_affine_dependence (ddr, loop_nest[0]);
4297 if (compute_self_and_rr)
4298 FOR_EACH_VEC_ELT (datarefs, i, a)
4300 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4301 dependence_relations->safe_push (ddr);
4302 if (loop_nest.exists ())
4303 compute_affine_dependence (ddr, loop_nest[0]);
4306 return true;
4309 /* Describes a location of a memory reference. */
4311 typedef struct data_ref_loc_d
4313 /* Position of the memory reference. */
4314 tree *pos;
4316 /* True if the memory reference is read. */
4317 bool is_read;
4318 } data_ref_loc;
4321 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4322 true if STMT clobbers memory, false otherwise. */
4324 static bool
4325 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_stack> *references)
4327 bool clobbers_memory = false;
4328 data_ref_loc ref;
4329 tree *op0, *op1;
4330 enum gimple_code stmt_code = gimple_code (stmt);
4332 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4333 As we cannot model data-references to not spelled out
4334 accesses give up if they may occur. */
4335 if (stmt_code == GIMPLE_CALL
4336 && !(gimple_call_flags (stmt) & ECF_CONST))
4338 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4339 if (gimple_call_internal_p (stmt)
4340 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
4342 struct loop *loop = gimple_bb (stmt)->loop_father;
4343 tree uid = gimple_call_arg (stmt, 0);
4344 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4345 if (loop == NULL
4346 || loop->simduid != SSA_NAME_VAR (uid))
4347 clobbers_memory = true;
4349 else
4350 clobbers_memory = true;
4352 else if (stmt_code == GIMPLE_ASM
4353 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))
4354 clobbers_memory = true;
4356 if (!gimple_vuse (stmt))
4357 return clobbers_memory;
4359 if (stmt_code == GIMPLE_ASSIGN)
4361 tree base;
4362 op0 = gimple_assign_lhs_ptr (stmt);
4363 op1 = gimple_assign_rhs1_ptr (stmt);
4365 if (DECL_P (*op1)
4366 || (REFERENCE_CLASS_P (*op1)
4367 && (base = get_base_address (*op1))
4368 && TREE_CODE (base) != SSA_NAME))
4370 ref.pos = op1;
4371 ref.is_read = true;
4372 references->safe_push (ref);
4375 else if (stmt_code == GIMPLE_CALL)
4377 unsigned i, n;
4379 op0 = gimple_call_lhs_ptr (stmt);
4380 n = gimple_call_num_args (stmt);
4381 for (i = 0; i < n; i++)
4383 op1 = gimple_call_arg_ptr (stmt, i);
4385 if (DECL_P (*op1)
4386 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4388 ref.pos = op1;
4389 ref.is_read = true;
4390 references->safe_push (ref);
4394 else
4395 return clobbers_memory;
4397 if (*op0
4398 && (DECL_P (*op0)
4399 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4401 ref.pos = op0;
4402 ref.is_read = false;
4403 references->safe_push (ref);
4405 return clobbers_memory;
4408 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4409 reference, returns false, otherwise returns true. NEST is the outermost
4410 loop of the loop nest in which the references should be analyzed. */
4412 bool
4413 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4414 vec<data_reference_p> *datarefs)
4416 unsigned i;
4417 vec<data_ref_loc, va_stack> references;
4418 data_ref_loc *ref;
4419 bool ret = true;
4420 data_reference_p dr;
4422 vec_stack_alloc (data_ref_loc, references, 2);
4423 if (get_references_in_stmt (stmt, &references))
4425 references.release ();
4426 return false;
4429 FOR_EACH_VEC_ELT (references, i, ref)
4431 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4432 *ref->pos, stmt, ref->is_read);
4433 gcc_assert (dr != NULL);
4434 datarefs->safe_push (dr);
4436 references.release ();
4437 return ret;
4440 /* Stores the data references in STMT to DATAREFS. If there is an
4441 unanalyzable reference, returns false, otherwise returns true.
4442 NEST is the outermost loop of the loop nest in which the references
4443 should be instantiated, LOOP is the loop in which the references
4444 should be analyzed. */
4446 bool
4447 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4448 vec<data_reference_p> *datarefs)
4450 unsigned i;
4451 vec<data_ref_loc, va_stack> references;
4452 data_ref_loc *ref;
4453 bool ret = true;
4454 data_reference_p dr;
4456 vec_stack_alloc (data_ref_loc, references, 2);
4457 if (get_references_in_stmt (stmt, &references))
4459 references.release ();
4460 return false;
4463 FOR_EACH_VEC_ELT (references, i, ref)
4465 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4466 gcc_assert (dr != NULL);
4467 datarefs->safe_push (dr);
4470 references.release ();
4471 return ret;
4474 /* Search the data references in LOOP, and record the information into
4475 DATAREFS. Returns chrec_dont_know when failing to analyze a
4476 difficult case, returns NULL_TREE otherwise. */
4478 tree
4479 find_data_references_in_bb (struct loop *loop, basic_block bb,
4480 vec<data_reference_p> *datarefs)
4482 gimple_stmt_iterator bsi;
4484 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4486 gimple stmt = gsi_stmt (bsi);
4488 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4490 struct data_reference *res;
4491 res = XCNEW (struct data_reference);
4492 datarefs->safe_push (res);
4494 return chrec_dont_know;
4498 return NULL_TREE;
4501 /* Search the data references in LOOP, and record the information into
4502 DATAREFS. Returns chrec_dont_know when failing to analyze a
4503 difficult case, returns NULL_TREE otherwise.
4505 TODO: This function should be made smarter so that it can handle address
4506 arithmetic as if they were array accesses, etc. */
4508 tree
4509 find_data_references_in_loop (struct loop *loop,
4510 vec<data_reference_p> *datarefs)
4512 basic_block bb, *bbs;
4513 unsigned int i;
4515 bbs = get_loop_body_in_dom_order (loop);
4517 for (i = 0; i < loop->num_nodes; i++)
4519 bb = bbs[i];
4521 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4523 free (bbs);
4524 return chrec_dont_know;
4527 free (bbs);
4529 return NULL_TREE;
4532 /* Recursive helper function. */
4534 static bool
4535 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4537 /* Inner loops of the nest should not contain siblings. Example:
4538 when there are two consecutive loops,
4540 | loop_0
4541 | loop_1
4542 | A[{0, +, 1}_1]
4543 | endloop_1
4544 | loop_2
4545 | A[{0, +, 1}_2]
4546 | endloop_2
4547 | endloop_0
4549 the dependence relation cannot be captured by the distance
4550 abstraction. */
4551 if (loop->next)
4552 return false;
4554 loop_nest->safe_push (loop);
4555 if (loop->inner)
4556 return find_loop_nest_1 (loop->inner, loop_nest);
4557 return true;
4560 /* Return false when the LOOP is not well nested. Otherwise return
4561 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4562 contain the loops from the outermost to the innermost, as they will
4563 appear in the classic distance vector. */
4565 bool
4566 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4568 loop_nest->safe_push (loop);
4569 if (loop->inner)
4570 return find_loop_nest_1 (loop->inner, loop_nest);
4571 return true;
4574 /* Returns true when the data dependences have been computed, false otherwise.
4575 Given a loop nest LOOP, the following vectors are returned:
4576 DATAREFS is initialized to all the array elements contained in this loop,
4577 DEPENDENCE_RELATIONS contains the relations between the data references.
4578 Compute read-read and self relations if
4579 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4581 bool
4582 compute_data_dependences_for_loop (struct loop *loop,
4583 bool compute_self_and_read_read_dependences,
4584 vec<loop_p> *loop_nest,
4585 vec<data_reference_p> *datarefs,
4586 vec<ddr_p> *dependence_relations)
4588 bool res = true;
4590 memset (&dependence_stats, 0, sizeof (dependence_stats));
4592 /* If the loop nest is not well formed, or one of the data references
4593 is not computable, give up without spending time to compute other
4594 dependences. */
4595 if (!loop
4596 || !find_loop_nest (loop, loop_nest)
4597 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4598 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4599 compute_self_and_read_read_dependences))
4600 res = false;
4602 if (dump_file && (dump_flags & TDF_STATS))
4604 fprintf (dump_file, "Dependence tester statistics:\n");
4606 fprintf (dump_file, "Number of dependence tests: %d\n",
4607 dependence_stats.num_dependence_tests);
4608 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4609 dependence_stats.num_dependence_dependent);
4610 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4611 dependence_stats.num_dependence_independent);
4612 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4613 dependence_stats.num_dependence_undetermined);
4615 fprintf (dump_file, "Number of subscript tests: %d\n",
4616 dependence_stats.num_subscript_tests);
4617 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4618 dependence_stats.num_subscript_undetermined);
4619 fprintf (dump_file, "Number of same subscript function: %d\n",
4620 dependence_stats.num_same_subscript_function);
4622 fprintf (dump_file, "Number of ziv tests: %d\n",
4623 dependence_stats.num_ziv);
4624 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4625 dependence_stats.num_ziv_dependent);
4626 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4627 dependence_stats.num_ziv_independent);
4628 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4629 dependence_stats.num_ziv_unimplemented);
4631 fprintf (dump_file, "Number of siv tests: %d\n",
4632 dependence_stats.num_siv);
4633 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4634 dependence_stats.num_siv_dependent);
4635 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4636 dependence_stats.num_siv_independent);
4637 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4638 dependence_stats.num_siv_unimplemented);
4640 fprintf (dump_file, "Number of miv tests: %d\n",
4641 dependence_stats.num_miv);
4642 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4643 dependence_stats.num_miv_dependent);
4644 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4645 dependence_stats.num_miv_independent);
4646 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4647 dependence_stats.num_miv_unimplemented);
4650 return res;
4653 /* Returns true when the data dependences for the basic block BB have been
4654 computed, false otherwise.
4655 DATAREFS is initialized to all the array elements contained in this basic
4656 block, DEPENDENCE_RELATIONS contains the relations between the data
4657 references. Compute read-read and self relations if
4658 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4659 bool
4660 compute_data_dependences_for_bb (basic_block bb,
4661 bool compute_self_and_read_read_dependences,
4662 vec<data_reference_p> *datarefs,
4663 vec<ddr_p> *dependence_relations)
4665 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4666 return false;
4668 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4669 compute_self_and_read_read_dependences);
4672 /* Entry point (for testing only). Analyze all the data references
4673 and the dependence relations in LOOP.
4675 The data references are computed first.
4677 A relation on these nodes is represented by a complete graph. Some
4678 of the relations could be of no interest, thus the relations can be
4679 computed on demand.
4681 In the following function we compute all the relations. This is
4682 just a first implementation that is here for:
4683 - for showing how to ask for the dependence relations,
4684 - for the debugging the whole dependence graph,
4685 - for the dejagnu testcases and maintenance.
4687 It is possible to ask only for a part of the graph, avoiding to
4688 compute the whole dependence graph. The computed dependences are
4689 stored in a knowledge base (KB) such that later queries don't
4690 recompute the same information. The implementation of this KB is
4691 transparent to the optimizer, and thus the KB can be changed with a
4692 more efficient implementation, or the KB could be disabled. */
4693 static void
4694 analyze_all_data_dependences (struct loop *loop)
4696 unsigned int i;
4697 int nb_data_refs = 10;
4698 vec<data_reference_p> datarefs;
4699 datarefs.create (nb_data_refs);
4700 vec<ddr_p> dependence_relations;
4701 dependence_relations.create (nb_data_refs * nb_data_refs);
4702 vec<loop_p> loop_nest;
4703 loop_nest.create (3);
4705 /* Compute DDs on the whole function. */
4706 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4707 &dependence_relations);
4709 if (dump_file)
4711 dump_data_dependence_relations (dump_file, dependence_relations);
4712 fprintf (dump_file, "\n\n");
4714 if (dump_flags & TDF_DETAILS)
4715 dump_dist_dir_vectors (dump_file, dependence_relations);
4717 if (dump_flags & TDF_STATS)
4719 unsigned nb_top_relations = 0;
4720 unsigned nb_bot_relations = 0;
4721 unsigned nb_chrec_relations = 0;
4722 struct data_dependence_relation *ddr;
4724 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4726 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4727 nb_top_relations++;
4729 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4730 nb_bot_relations++;
4732 else
4733 nb_chrec_relations++;
4736 gather_stats_on_scev_database ();
4740 loop_nest.release ();
4741 free_dependence_relations (dependence_relations);
4742 free_data_refs (datarefs);
4745 /* Computes all the data dependences and check that the results of
4746 several analyzers are the same. */
4748 void
4749 tree_check_data_deps (void)
4751 loop_iterator li;
4752 struct loop *loop_nest;
4754 FOR_EACH_LOOP (li, loop_nest, 0)
4755 analyze_all_data_dependences (loop_nest);
4758 /* Free the memory used by a data dependence relation DDR. */
4760 void
4761 free_dependence_relation (struct data_dependence_relation *ddr)
4763 if (ddr == NULL)
4764 return;
4766 if (DDR_SUBSCRIPTS (ddr).exists ())
4767 free_subscripts (DDR_SUBSCRIPTS (ddr));
4768 DDR_DIST_VECTS (ddr).release ();
4769 DDR_DIR_VECTS (ddr).release ();
4771 free (ddr);
4774 /* Free the memory used by the data dependence relations from
4775 DEPENDENCE_RELATIONS. */
4777 void
4778 free_dependence_relations (vec<ddr_p> dependence_relations)
4780 unsigned int i;
4781 struct data_dependence_relation *ddr;
4783 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4784 if (ddr)
4785 free_dependence_relation (ddr);
4787 dependence_relations.release ();
4790 /* Free the memory used by the data references from DATAREFS. */
4792 void
4793 free_data_refs (vec<data_reference_p> datarefs)
4795 unsigned int i;
4796 struct data_reference *dr;
4798 FOR_EACH_VEC_ELT (datarefs, i, dr)
4799 free_data_ref (dr);
4800 datarefs.release ();