* config/mips/mips.md (mulsi3_mul3, muldi3_mul3): Merge these ...
[official-gcc.git] / gcc / graphite.c
blob4531936b154ce068ce6333b90fbc161f5126284d
1 /* Gimple Represented as Polyhedra.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@inria.fr>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass converts GIMPLE to GRAPHITE, performs some loop
22 transformations and then converts the resulting representation back
23 to GIMPLE.
25 An early description of this pass can be found in the GCC Summit'06
26 paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC".
27 The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to
28 the related work.
30 One important document to read is CLooG's internal manual:
31 http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD
32 that describes the data structure of loops used in this file, and
33 the functions that are used for transforming the code. */
35 #include "config.h"
36 #include "system.h"
37 #include "coretypes.h"
38 #include "tm.h"
39 #include "ggc.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "basic-block.h"
43 #include "diagnostic.h"
44 #include "tree-flow.h"
45 #include "toplev.h"
46 #include "tree-dump.h"
47 #include "timevar.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "domwalk.h"
54 #include "pointer-set.h"
55 #include "gimple.h"
57 #ifdef HAVE_cloog
58 #include "cloog/cloog.h"
59 #include "graphite.h"
61 static VEC (scop_p, heap) *current_scops;
63 /* Debug the list of old induction variables for this SCOP. */
65 void
66 debug_oldivs (scop_p scop)
68 int i;
69 name_tree oldiv;
71 fprintf (stderr, "Old IVs:");
73 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
75 fprintf (stderr, "(");
76 print_generic_expr (stderr, oldiv->t, 0);
77 fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num);
79 fprintf (stderr, "\n");
82 /* Debug the loops around basic block GB. */
84 void
85 debug_loop_vec (graphite_bb_p gb)
87 int i;
88 loop_p loop;
90 fprintf (stderr, "Loop Vec:");
92 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
93 fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1);
95 fprintf (stderr, "\n");
98 /* Push (IV, NAME) on STACK. */
100 static void
101 loop_iv_stack_push (loop_iv_stack stack, tree iv, const char *name)
103 name_tree named_iv = XNEW (struct name_tree);
105 named_iv->t = iv;
106 named_iv->name = name;
107 VEC_safe_push (name_tree, heap, *stack, named_iv);
110 /* Pops an element out of STACK. */
112 static void
113 loop_iv_stack_pop (loop_iv_stack stack)
115 VEC_pop (name_tree, *stack);
118 /* Get the IV at INDEX in STACK. */
120 static tree
121 loop_iv_stack_get_iv (loop_iv_stack stack, int index)
123 name_tree named_iv = VEC_index (name_tree, *stack, index);
125 return named_iv->t;
128 /* Get the IV from its NAME in STACK. */
130 static tree
131 loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name)
133 int i;
134 name_tree iv;
136 for (i = 0; VEC_iterate (name_tree, *stack, i, iv); i++)
137 if (!strcmp (name, iv->name))
138 return iv->t;
140 return NULL;
143 /* Prints on stderr the contents of STACK. */
145 void
146 loop_iv_stack_debug (loop_iv_stack stack)
148 int i;
149 name_tree iv;
150 bool first = true;
152 fprintf (stderr, "(");
154 for (i = 0; VEC_iterate (name_tree, *stack, i, iv); i++)
156 if (first)
157 first = false;
158 else
159 fprintf (stderr, " ");
160 fprintf (stderr, "%s:", iv->name);
161 print_generic_expr (stderr, iv->t, 0);
164 fprintf (stderr, ")\n");
167 /* In SCOP, get the induction variable from NAME. OLD is the original
168 loop that contained the definition of NAME. */
170 static name_tree
171 get_old_iv_from_ssa_name (scop_p scop, loop_p old, tree name)
173 tree var = SSA_NAME_VAR (name);
174 int i;
175 name_tree oldiv;
177 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
179 loop_p current = old;
181 while (current)
183 if (var == oldiv->t
184 && oldiv->loop == current)
185 return oldiv;
187 current = loop_outer (current);
190 return NULL;
194 /* Returns a new loop_to_cloog_loop_str structure. */
196 static inline struct loop_to_cloog_loop_str *
197 new_loop_to_cloog_loop_str (int loop_num,
198 int loop_position,
199 CloogLoop *cloog_loop)
201 struct loop_to_cloog_loop_str *result;
203 result = XNEW (struct loop_to_cloog_loop_str);
204 result->loop_num = loop_num;
205 result->cloog_loop = cloog_loop;
206 result->loop_position = loop_position;
208 return result;
211 /* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */
213 static hashval_t
214 hash_loop_to_cloog_loop (const void *elt)
216 return ((const struct loop_to_cloog_loop_str *) elt)->loop_num;
219 /* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */
221 static int
222 eq_loop_to_cloog_loop (const void *el1, const void *el2)
224 const struct loop_to_cloog_loop_str *elt1, *elt2;
226 elt1 = (const struct loop_to_cloog_loop_str *) el1;
227 elt2 = (const struct loop_to_cloog_loop_str *) el2;
228 return elt1->loop_num == elt2->loop_num;
231 /* Compares two graphite bbs and returns an integer less than, equal to, or
232 greater than zero if the first argument is considered to be respectively
233 less than, equal to, or greater than the second.
234 We compare using the lexicographic order of the static schedules. */
236 static int
237 gbb_compare (const void *p_1, const void *p_2)
239 const struct graphite_bb *const gbb_1
240 = *(const struct graphite_bb *const*) p_1;
241 const struct graphite_bb *const gbb_2
242 = *(const struct graphite_bb *const*) p_2;
244 return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1),
245 gbb_nb_loops (gbb_1) + 1,
246 GBB_STATIC_SCHEDULE (gbb_2),
247 gbb_nb_loops (gbb_2) + 1);
250 /* Sort graphite bbs in SCOP. */
252 static void
253 graphite_sort_gbbs (scop_p scop)
255 VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop);
257 qsort (VEC_address (graphite_bb_p, bbs),
258 VEC_length (graphite_bb_p, bbs),
259 sizeof (graphite_bb_p), gbb_compare);
262 /* Dump conditions of a graphite basic block GBB on FILE. */
264 static void
265 dump_gbb_conditions (FILE *file, graphite_bb_p gbb)
267 int i;
268 gimple stmt;
269 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb);
271 if (VEC_empty (gimple, conditions))
272 return;
274 fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index);
276 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
277 print_gimple_stmt (file, stmt, 0, 0);
279 fprintf (file, "}\n");
282 /* Converts the graphite scheduling function into a cloog scattering
283 matrix. This scattering matrix is used to limit the possible cloog
284 output to valid programs in respect to the scheduling function.
286 SCATTERING_DIMENSIONS specifies the dimensionality of the scattering
287 matrix. CLooG 0.14.0 and previous versions require, that all scattering
288 functions of one CloogProgram have the same dimensionality, therefore we
289 allow to specify it. (Should be removed in future versions) */
291 static CloogMatrix *
292 schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions)
294 int i;
295 scop_p scop = GBB_SCOP (gb);
297 int nb_iterators = gbb_nb_loops (gb);
299 /* The cloog scattering matrix consists of these colums:
300 1 col = Eq/Inq,
301 scattering_dimensions cols = Scattering dimensions,
302 nb_iterators cols = bb's iterators,
303 scop_nb_params cols = Parameters,
304 1 col = Constant 1.
306 Example:
308 scattering_dimensions = 5
309 max_nb_iterators = 2
310 nb_iterators = 1
311 scop_nb_params = 2
313 Schedule:
317 Scattering Matrix:
318 s1 s2 s3 s4 s5 i p1 p2 1
319 1 0 0 0 0 0 0 0 -4 = 0
320 0 1 0 0 0 -1 0 0 0 = 0
321 0 0 1 0 0 0 0 0 -5 = 0 */
322 int nb_params = scop_nb_params (scop);
323 int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1;
324 int col_const = nb_cols - 1;
325 int col_iter_offset = 1 + scattering_dimensions;
327 CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols);
329 gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1);
331 /* Initialize the identity matrix. */
332 for (i = 0; i < scattering_dimensions; i++)
333 value_set_si (scat->p[i][i + 1], 1);
335 /* Textual order outside the first loop */
336 value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]);
338 /* For all surrounding loops. */
339 for (i = 0; i < nb_iterators; i++)
341 int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1];
343 /* Iterations of this loop. */
344 value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1);
346 /* Textual order inside this loop. */
347 value_set_si (scat->p[2 * i + 2][col_const], -schedule);
350 return scat;
353 /* Print the schedules of GB to FILE with INDENT white spaces before.
354 VERBOSITY determines how verbose the code pretty printers are. */
356 void
357 print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity)
359 CloogMatrix *scattering;
360 int i;
361 loop_p loop;
362 fprintf (file, "\nGBB (\n");
364 print_loops_bb (file, GBB_BB (gb), indent+2, verbosity);
366 if (GBB_DOMAIN (gb))
368 fprintf (file, " (domain: \n");
369 cloog_matrix_print (dump_file, GBB_DOMAIN (gb));
370 fprintf (file, " )\n");
373 if (GBB_STATIC_SCHEDULE (gb))
375 fprintf (file, " (static schedule: ");
376 print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb),
377 gbb_nb_loops (gb) + 1);
378 fprintf (file, " )\n");
381 if (GBB_LOOPS (gb))
383 fprintf (file, " (contained loops: \n");
384 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
385 if (loop == NULL)
386 fprintf (file, " iterator %d => NULL \n", i);
387 else
388 fprintf (file, " iterator %d => loop %d \n", i,
389 loop->num);
390 fprintf (file, " )\n");
393 if (GBB_DATA_REFS (gb))
394 dump_data_references (file, GBB_DATA_REFS (gb));
396 if (GBB_CONDITIONS (gb))
398 fprintf (file, " (conditions: \n");
399 dump_gbb_conditions (dump_file, gb);
400 fprintf (file, " )\n");
403 if (GBB_SCOP (gb)
404 && GBB_STATIC_SCHEDULE (gb))
406 fprintf (file, " (scattering: \n");
407 scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1);
408 cloog_matrix_print (file, scattering);
409 cloog_matrix_free (scattering);
410 fprintf (file, " )\n");
413 fprintf (file, ")\n");
416 /* Print to STDERR the schedules of GB with VERBOSITY level. */
418 void
419 debug_gbb (graphite_bb_p gb, int verbosity)
421 print_graphite_bb (stderr, gb, 0, verbosity);
425 /* Print SCOP to FILE. VERBOSITY determines how verbose the pretty
426 printers are. */
428 static void
429 print_scop (FILE *file, scop_p scop, int verbosity)
431 if (scop == NULL)
432 return;
434 fprintf (file, "\nSCoP_%d_%d (\n",
435 SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index);
437 fprintf (file, " (cloog: \n");
438 cloog_program_print (file, SCOP_PROG (scop));
439 fprintf (file, " )\n");
441 if (SCOP_BBS (scop))
443 graphite_bb_p gb;
444 int i;
446 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
447 print_graphite_bb (file, gb, 0, verbosity);
450 fprintf (file, ")\n");
453 /* Print all the SCOPs to FILE. VERBOSITY determines how verbose the
454 code pretty printers are. */
456 static void
457 print_scops (FILE *file, int verbosity)
459 int i;
460 scop_p scop;
462 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
463 print_scop (file, scop, verbosity);
466 /* Debug SCOP. VERBOSITY determines how verbose the code pretty
467 printers are. */
469 void
470 debug_scop (scop_p scop, int verbosity)
472 print_scop (stderr, scop, verbosity);
475 /* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how
476 verbose the code pretty printers are. */
478 void
479 debug_scops (int verbosity)
481 print_scops (stderr, verbosity);
484 /* Return true when BB is contained in SCOP. */
486 static inline bool
487 bb_in_scop_p (basic_block bb, scop_p scop)
489 return bitmap_bit_p (SCOP_BBS_B (scop), bb->index);
492 /* Pretty print to FILE the SCOP in DOT format. */
494 static void
495 dot_scop_1 (FILE *file, scop_p scop)
497 edge e;
498 edge_iterator ei;
499 basic_block bb;
500 basic_block entry = SCOP_ENTRY (scop);
501 basic_block exit = SCOP_EXIT (scop);
503 fprintf (file, "digraph SCoP_%d_%d {\n", entry->index,
504 exit->index);
506 FOR_ALL_BB (bb)
508 if (bb == entry)
509 fprintf (file, "%d [shape=triangle];\n", bb->index);
511 if (bb == exit)
512 fprintf (file, "%d [shape=box];\n", bb->index);
514 if (bb_in_scop_p (bb, scop))
515 fprintf (file, "%d [color=red];\n", bb->index);
517 FOR_EACH_EDGE (e, ei, bb->succs)
518 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
521 fputs ("}\n\n", file);
524 /* Display SCOP using dotty. */
526 void
527 dot_scop (scop_p scop)
529 dot_scop_1 (stderr, scop);
532 /* Pretty print all SCoPs in DOT format and mark them with different colors.
533 If there are not enough colors, paint later SCoPs gray.
534 Special nodes:
535 - "*" after the node number: entry of a SCoP,
536 - "#" after the node number: exit of a SCoP,
537 - "()" entry or exit not part of SCoP. */
539 static void
540 dot_all_scops_1 (FILE *file)
542 basic_block bb;
543 edge e;
544 edge_iterator ei;
545 scop_p scop;
546 const char* color;
547 int i;
549 /* Disable debugging while printing graph. */
550 int tmp_dump_flags = dump_flags;
551 dump_flags = 0;
553 fprintf (file, "digraph all {\n");
555 FOR_ALL_BB (bb)
557 int part_of_scop = false;
559 /* Use HTML for every bb label. So we are able to print bbs
560 which are part of two different SCoPs, with two different
561 background colors. */
562 fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ",
563 bb->index);
564 fprintf (file, "CELLSPACING=\"0\">\n");
566 /* Select color for SCoP. */
567 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
568 if (bb_in_scop_p (bb, scop)
569 || (SCOP_EXIT (scop) == bb)
570 || (SCOP_ENTRY (scop) == bb))
572 switch (i % 17)
574 case 0: /* red */
575 color = "#e41a1c";
576 break;
577 case 1: /* blue */
578 color = "#377eb8";
579 break;
580 case 2: /* green */
581 color = "#4daf4a";
582 break;
583 case 3: /* purple */
584 color = "#984ea3";
585 break;
586 case 4: /* orange */
587 color = "#ff7f00";
588 break;
589 case 5: /* yellow */
590 color = "#ffff33";
591 break;
592 case 6: /* brown */
593 color = "#a65628";
594 break;
595 case 7: /* rose */
596 color = "#f781bf";
597 break;
598 case 8:
599 color = "#8dd3c7";
600 break;
601 case 9:
602 color = "#ffffb3";
603 break;
604 case 10:
605 color = "#bebada";
606 break;
607 case 11:
608 color = "#fb8072";
609 break;
610 case 12:
611 color = "#80b1d3";
612 break;
613 case 13:
614 color = "#fdb462";
615 break;
616 case 14:
617 color = "#b3de69";
618 break;
619 case 15:
620 color = "#fccde5";
621 break;
622 case 16:
623 color = "#bc80bd";
624 break;
625 default: /* gray */
626 color = "#999999";
629 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color);
631 if (!bb_in_scop_p (bb, scop))
632 fprintf (file, " (");
634 if (bb == SCOP_ENTRY (scop)
635 && bb == SCOP_EXIT (scop))
636 fprintf (file, " %d*# ", bb->index);
637 else if (bb == SCOP_ENTRY (scop))
638 fprintf (file, " %d* ", bb->index);
639 else if (bb == SCOP_EXIT (scop))
640 fprintf (file, " %d# ", bb->index);
641 else
642 fprintf (file, " %d ", bb->index);
644 if (!bb_in_scop_p (bb, scop))
645 fprintf (file, ")");
647 fprintf (file, "</TD></TR>\n");
648 part_of_scop = true;
651 if (!part_of_scop)
653 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">");
654 fprintf (file, " %d </TD></TR>\n", bb->index);
657 fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n");
660 FOR_ALL_BB (bb)
662 FOR_EACH_EDGE (e, ei, bb->succs)
663 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
666 fputs ("}\n\n", file);
668 /* Enable debugging again. */
669 dump_flags = tmp_dump_flags;
672 /* Display all SCoPs using dotty. */
674 void
675 dot_all_scops (void)
677 /* When debugging, enable the following code. This cannot be used
678 in production compilers because it calls "system". */
679 #if 0
680 FILE *stream = fopen ("/tmp/allscops.dot", "w");
681 gcc_assert (stream);
683 dot_all_scops_1 (stream);
684 fclose (stream);
686 system ("dotty /tmp/allscops.dot");
687 #else
688 dot_all_scops_1 (stderr);
689 #endif
692 /* Returns true when LOOP is in SCOP. */
694 static inline bool
695 loop_in_scop_p (struct loop *loop, scop_p scop)
697 return (bb_in_scop_p (loop->header, scop)
698 && bb_in_scop_p (loop->latch, scop));
701 /* Returns the outermost loop in SCOP that contains BB. */
703 static struct loop *
704 outermost_loop_in_scop (scop_p scop, basic_block bb)
706 struct loop *nest;
708 nest = bb->loop_father;
709 while (loop_outer (nest) && loop_in_scop_p (loop_outer (nest), scop))
710 nest = loop_outer (nest);
712 return nest;
715 /* Returns the block preceding the entry of SCOP. */
717 static basic_block
718 block_before_scop (scop_p scop)
720 return SESE_ENTRY (SCOP_REGION (scop))->src;
723 /* Return true when EXPR is an affine function in LOOP with parameters
724 instantiated relative to SCOP_ENTRY. */
726 static bool
727 loop_affine_expr (basic_block scop_entry, struct loop *loop, tree expr)
729 int n = loop->num;
730 tree scev = analyze_scalar_evolution (loop, expr);
732 scev = instantiate_scev (scop_entry, loop, scev);
734 return (evolution_function_is_invariant_p (scev, n)
735 || evolution_function_is_affine_multivariate_p (scev, n));
738 /* Return true if the operand OP is simple. */
740 static bool
741 is_simple_operand (loop_p loop, gimple stmt, tree op)
743 /* It is not a simple operand when it is a declaration, */
744 if (DECL_P (op)
745 /* or a structure, */
746 || AGGREGATE_TYPE_P (TREE_TYPE (op))
747 /* or a memory access that cannot be analyzed by the data
748 reference analysis. */
749 || ((handled_component_p (op) || INDIRECT_REF_P (op))
750 && !stmt_simple_memref_p (loop, stmt, op)))
751 return false;
753 return true;
756 /* Return true only when STMT is simple enough for being handled by
757 Graphite. This depends on SCOP_ENTRY, as the parametetrs are
758 initialized relatively to this basic block. */
760 static bool
761 stmt_simple_for_scop_p (basic_block scop_entry, gimple stmt)
763 basic_block bb = gimple_bb (stmt);
764 struct loop *loop = bb->loop_father;
766 /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects.
767 Calls have side-effects, except those to const or pure
768 functions. */
769 if (gimple_has_volatile_ops (stmt)
770 || (gimple_code (stmt) == GIMPLE_CALL
771 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
772 || (gimple_code (stmt) == GIMPLE_ASM))
773 return false;
775 switch (gimple_code (stmt))
777 case GIMPLE_RETURN:
778 case GIMPLE_LABEL:
779 return true;
781 case GIMPLE_COND:
783 tree op;
784 ssa_op_iter op_iter;
785 enum tree_code code = gimple_cond_code (stmt);
787 /* We can only handle this kind of conditional expressions.
788 For inequalities like "if (i != 3 * k)" we need unions of
789 polyhedrons. Expressions like "if (a)" or "if (a == 15)" need
790 them for the else branch. */
791 if (!(code == LT_EXPR
792 || code == GT_EXPR
793 || code == LE_EXPR
794 || code == GE_EXPR))
795 return false;
797 if (!scop_entry)
798 return false;
800 FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
801 if (!loop_affine_expr (scop_entry, loop, op))
802 return false;
804 return true;
807 case GIMPLE_ASSIGN:
809 enum tree_code code = gimple_assign_rhs_code (stmt);
811 switch (get_gimple_rhs_class (code))
813 case GIMPLE_UNARY_RHS:
814 case GIMPLE_SINGLE_RHS:
815 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
816 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));
818 case GIMPLE_BINARY_RHS:
819 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
820 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
821 && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
823 case GIMPLE_INVALID_RHS:
824 default:
825 gcc_unreachable ();
829 case GIMPLE_CALL:
831 size_t i;
832 size_t n = gimple_call_num_args (stmt);
833 tree lhs = gimple_call_lhs (stmt);
835 for (i = 0; i < n; i++)
837 tree arg = gimple_call_arg (stmt, i);
839 if (!(is_simple_operand (loop, stmt, lhs)
840 && is_simple_operand (loop, stmt, arg)))
841 return false;
844 return true;
847 default:
848 /* These nodes cut a new scope. */
849 return false;
852 return false;
855 /* Returns the statement of BB that contains a harmful operation: that
856 can be a function call with side effects, the induction variables
857 are not linear with respect to SCOP_ENTRY, etc. The current open
858 scop should end before this statement. */
860 static gimple
861 harmful_stmt_in_bb (basic_block scop_entry, basic_block bb)
863 gimple_stmt_iterator gsi;
865 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
866 if (!stmt_simple_for_scop_p (scop_entry, gsi_stmt (gsi)))
867 return gsi_stmt (gsi);
869 return NULL;
872 /* Store the GRAPHITE representation of BB. */
874 static void
875 new_graphite_bb (scop_p scop, basic_block bb)
877 struct graphite_bb *gbb = XNEW (struct graphite_bb);
879 bb->aux = gbb;
880 GBB_BB (gbb) = bb;
881 GBB_SCOP (gbb) = scop;
882 GBB_DATA_REFS (gbb) = NULL;
883 GBB_DOMAIN (gbb) = NULL;
884 GBB_CONDITIONS (gbb) = NULL;
885 GBB_CONDITION_CASES (gbb) = NULL;
886 GBB_LOOPS (gbb) = NULL;
887 VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb);
888 bitmap_set_bit (SCOP_BBS_B (scop), bb->index);
891 /* Frees GBB. */
893 static void
894 free_graphite_bb (struct graphite_bb *gbb)
896 if (GBB_DOMAIN (gbb))
897 cloog_matrix_free (GBB_DOMAIN (gbb));
899 free_data_refs (GBB_DATA_REFS (gbb));
900 VEC_free (gimple, heap, GBB_CONDITIONS (gbb));
901 VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb));
902 VEC_free (loop_p, heap, GBB_LOOPS (gbb));
904 GBB_BB (gbb)->aux = 0;
905 XDELETE (gbb);
908 /* Creates a new scop starting with ENTRY. */
910 static scop_p
911 new_scop (edge entry, edge exit)
913 scop_p scop = XNEW (struct scop);
915 gcc_assert (entry && exit);
917 SCOP_REGION (scop) = XNEW (struct sese);
918 SESE_ENTRY (SCOP_REGION (scop)) = entry;
919 SESE_EXIT (SCOP_REGION (scop)) = exit;
920 SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3);
921 SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3);
922 SCOP_BBS_B (scop) = BITMAP_ALLOC (NULL);
923 SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL);
924 SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3);
925 SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3);
926 SCOP_PROG (scop) = cloog_program_malloc ();
927 cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ());
928 SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop,
929 eq_loop_to_cloog_loop,
930 free);
931 return scop;
934 /* Deletes SCOP. */
936 static void
937 free_scop (scop_p scop)
939 int i;
940 name_tree p;
941 struct graphite_bb *gb;
943 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
944 free_graphite_bb (gb);
946 VEC_free (graphite_bb_p, heap, SCOP_BBS (scop));
947 BITMAP_FREE (SCOP_BBS_B (scop));
948 BITMAP_FREE (SCOP_LOOPS (scop));
949 VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop));
950 VEC_free (name_tree, heap, SCOP_OLDIVS (scop));
952 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
953 free (p);
955 VEC_free (name_tree, heap, SCOP_PARAMS (scop));
956 cloog_program_free (SCOP_PROG (scop));
957 htab_delete (SCOP_LOOP2CLOOG_LOOP (scop));
958 XDELETE (SCOP_REGION (scop));
959 XDELETE (scop);
962 /* Deletes all scops in SCOPS. */
964 static void
965 free_scops (VEC (scop_p, heap) *scops)
967 int i;
968 scop_p scop;
970 for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++)
971 free_scop (scop);
973 VEC_free (scop_p, heap, scops);
976 typedef enum gbb_type {
977 GBB_UNKNOWN,
978 GBB_LOOP_SING_EXIT_HEADER,
979 GBB_LOOP_MULT_EXIT_HEADER,
980 GBB_LOOP_EXIT,
981 GBB_COND_HEADER,
982 GBB_SIMPLE,
983 GBB_LAST
984 } gbb_type;
986 /* Detect the type of BB. Loop headers are only marked, if they are
987 new. This means their loop_father is different to LAST_LOOP.
988 Otherwise they are treated like any other bb and their type can be
989 any other type. */
991 static gbb_type
992 get_bb_type (basic_block bb, struct loop *last_loop)
994 VEC (basic_block, heap) *dom;
995 int nb_dom, nb_suc;
996 struct loop *loop = bb->loop_father;
998 /* Check, if we entry into a new loop. */
999 if (loop != last_loop)
1001 if (single_exit (loop) != NULL)
1002 return GBB_LOOP_SING_EXIT_HEADER;
1003 else if (loop->num != 0)
1004 return GBB_LOOP_MULT_EXIT_HEADER;
1005 else
1006 return GBB_COND_HEADER;
1009 dom = get_dominated_by (CDI_DOMINATORS, bb);
1010 nb_dom = VEC_length (basic_block, dom);
1011 VEC_free (basic_block, heap, dom);
1013 if (nb_dom == 0)
1014 return GBB_LAST;
1016 nb_suc = VEC_length (edge, bb->succs);
1018 if (nb_dom == 1 && nb_suc == 1)
1019 return GBB_SIMPLE;
1021 return GBB_COND_HEADER;
1024 /* A SCoP detection region, defined using bbs as borders.
1025 All control flow touching this region, comes in passing basic_block ENTRY and
1026 leaves passing basic_block EXIT. By using bbs instead of edges for the
1027 borders we are able to represent also regions that do not have a single
1028 entry or exit edge.
1029 But as they have a single entry basic_block and a single exit basic_block, we
1030 are able to generate for every sd_region a single entry and exit edge.
1034 3 <- entry
1037 / \ This region contains: {3, 4, 5, 6, 7, 8}
1042 9 <- exit */
1045 typedef struct sd_region_p
1047 /* The entry bb dominates all bbs in the sd_region. It is part of the
1048 region. */
1049 basic_block entry;
1051 /* The exit bb postdominates all bbs in the sd_region, but is not
1052 part of the region. */
1053 basic_block exit;
1054 } sd_region;
1056 DEF_VEC_O(sd_region);
1057 DEF_VEC_ALLOC_O(sd_region, heap);
1060 /* Moves the scops from SOURCE to TARGET and clean up SOURCE. */
1062 static void
1063 move_sd_regions (VEC (sd_region, heap) **source, VEC (sd_region, heap) **target)
1065 sd_region *s;
1066 int i;
1068 for (i = 0; VEC_iterate (sd_region, *source, i, s); i++)
1069 VEC_safe_push (sd_region, heap, *target, s);
1071 VEC_free (sd_region, heap, *source);
1074 /* Store information needed by scopdet_* functions. */
1076 struct scopdet_info
1078 /* Where the last open scop would stop if the current BB is harmful. */
1079 basic_block last;
1081 /* Where the next scop would start if the current BB is harmful. */
1082 basic_block next;
1084 /* The bb or one of its children contains open loop exits. That means
1085 loop exit nodes that are not surrounded by a loop dominated by bb. */
1086 bool exits;
1088 /* The bb or one of its children contains only structures we can handle. */
1089 bool difficult;
1093 static struct scopdet_info build_scops_1 (basic_block, VEC (sd_region, heap) **,
1094 loop_p);
1096 /* Calculates BB infos. If bb is difficult we add valid SCoPs dominated by BB
1097 to SCOPS. TYPE is the gbb_type of BB. */
1099 static struct scopdet_info
1100 scopdet_basic_block_info (basic_block bb, VEC (sd_region, heap) **scops,
1101 gbb_type type)
1103 struct loop *loop = bb->loop_father;
1104 struct scopdet_info result;
1105 gimple stmt;
1107 /* XXX: ENTRY_BLOCK_PTR could be optimized in later steps. */
1108 stmt = harmful_stmt_in_bb (ENTRY_BLOCK_PTR, bb);
1109 result.difficult = (stmt != NULL);
1110 result.last = NULL;
1112 switch (type)
1114 case GBB_LAST:
1115 result.next = NULL;
1116 result.exits = false;
1117 result.last = bb;
1118 break;
1120 case GBB_SIMPLE:
1121 result.next = single_succ (bb);
1122 result.exits = false;
1123 result.last = bb;
1124 break;
1126 case GBB_LOOP_SING_EXIT_HEADER:
1128 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap,3);
1129 struct scopdet_info sinfo;
1131 sinfo = build_scops_1 (bb, &tmp_scops, loop);
1133 result.last = single_exit (bb->loop_father)->src;
1134 result.next = single_exit (bb->loop_father)->dest;
1136 /* If we do not dominate result.next, remove it. It's either
1137 the EXIT_BLOCK_PTR, or another bb dominates it and will
1138 call the scop detection for this bb. */
1139 if (!dominated_by_p (CDI_DOMINATORS, result.next, bb))
1140 result.next = NULL;
1142 if (result.last->loop_father != loop)
1143 result.next = NULL;
1145 if (TREE_CODE (number_of_latch_executions (loop))
1146 == SCEV_NOT_KNOWN)
1147 result.difficult = true;
1149 if (sinfo.difficult)
1150 move_sd_regions (&tmp_scops, scops);
1151 else
1152 VEC_free (sd_region, heap, tmp_scops);
1154 result.exits = false;
1155 result.difficult |= sinfo.difficult;
1156 break;
1159 case GBB_LOOP_MULT_EXIT_HEADER:
1161 /* XXX: For now we just do not join loops with multiple exits. If the
1162 exits lead to the same bb it may be possible to join the loop. */
1163 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1164 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1165 edge e;
1166 int i;
1167 build_scops_1 (bb, &tmp_scops, loop);
1169 /* XXX: Use 'e->src' ot better 'bb'? */
1170 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
1171 if (dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1172 && e->src->loop_father == loop)
1173 build_scops_1 (e->dest, &tmp_scops, e->dest->loop_father);
1175 result.next = NULL;
1176 result.last = NULL;
1177 result.difficult = true;
1178 result.exits = false;
1179 move_sd_regions (&tmp_scops, scops);
1180 VEC_free (edge, heap, exits);
1181 break;
1183 case GBB_COND_HEADER:
1185 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1186 struct scopdet_info sinfo;
1187 VEC (basic_block, heap) *dominated;
1188 int i;
1189 basic_block dom_bb;
1190 basic_block last_bb = NULL;
1191 edge e;
1192 result.exits = false;
1194 /* First check the successors of BB, and check if it is possible to join
1195 the different branches. */
1196 for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++)
1198 /* Ignore loop exits. They will be handled after the loop body. */
1199 if (is_loop_exit (loop, e->dest))
1201 result.exits = true;
1202 continue;
1205 /* Do not follow edges that lead to the end of the
1206 conditions block. For example, in
1209 | /|\
1210 | 1 2 |
1211 | | | |
1212 | 3 4 |
1213 | \|/
1216 the edge from 0 => 6. Only check if all paths lead to
1217 the same node 6. */
1219 if (!single_pred_p (e->dest))
1221 /* Check, if edge leads directly to the end of this
1222 condition. */
1223 if (!last_bb)
1225 last_bb = e->dest;
1228 if (e->dest != last_bb)
1229 result.difficult = true;
1231 continue;
1234 if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb))
1236 result.difficult = true;
1237 continue;
1240 sinfo = build_scops_1 (e->dest, &tmp_scops, loop);
1242 result.exits |= sinfo.exits;
1243 result.last = sinfo.last;
1244 result.difficult |= sinfo.difficult;
1246 /* Checks, if all branches end at the same point.
1247 If that is true, the condition stays joinable.
1248 Have a look at the example above. */
1249 if (sinfo.last && single_succ_p (sinfo.last))
1251 basic_block next_tmp = single_succ (sinfo.last);
1253 if (!last_bb)
1254 last_bb = next_tmp;
1256 if (next_tmp != last_bb)
1257 result.difficult = true;
1259 else
1260 result.difficult = true;
1263 /* If the condition is joinable. */
1264 if (!result.exits && !result.difficult)
1266 /* Only return a next pointer if we dominate this pointer.
1267 Otherwise it will be handled by the bb dominating it. */
1268 if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb)
1269 result.next = last_bb;
1270 else
1271 result.next = NULL;
1273 VEC_free (sd_region, heap, tmp_scops);
1274 break;
1277 /* Scan remaining bbs dominated by BB. */
1278 dominated = get_dominated_by (CDI_DOMINATORS, bb);
1280 for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++)
1282 /* Ignore loop exits: they will be handled after the loop body. */
1283 if (is_loop_exit (loop, dom_bb))
1285 result.exits = true;
1286 continue;
1289 /* Ignore the bbs processed above. */
1290 if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb)
1291 continue;
1293 if (loop_depth (loop) > loop_depth (dom_bb->loop_father))
1294 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop_outer (loop));
1295 else
1296 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop);
1299 result.exits |= sinfo.exits;
1300 result.difficult = true;
1301 result.last = NULL;
1304 VEC_free (basic_block, heap, dominated);
1306 result.next = NULL;
1307 move_sd_regions (&tmp_scops, scops);
1309 break;
1312 default:
1313 gcc_unreachable ();
1316 return result;
1319 /* Creates the SCoPs and writes entry and exit points for every SCoP. */
1321 static struct scopdet_info
1322 build_scops_1 (basic_block current, VEC (sd_region, heap) **scops, loop_p loop)
1325 bool in_scop = false;
1326 sd_region open_scop;
1327 struct scopdet_info sinfo;
1329 /* Initialize result. */
1330 struct scopdet_info result;
1331 result.exits = false;
1332 result.difficult = false;
1333 result.next = NULL;
1334 result.last = NULL;
1335 open_scop.entry = NULL;
1337 /* Loop over the dominance tree. If we meet a difficult bb, close
1338 the current SCoP. Loop and condition header start a new layer,
1339 and can only be added if all bbs in deeper layers are simple. */
1340 while (current != NULL)
1342 sinfo = scopdet_basic_block_info (current, scops, get_bb_type (current,
1343 loop));
1345 if (!in_scop && !(sinfo.exits || sinfo.difficult))
1347 open_scop.entry = current;
1348 open_scop.exit = NULL;
1349 in_scop = true;
1351 else if (in_scop && (sinfo.exits || sinfo.difficult))
1353 open_scop.exit = current;
1354 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1355 in_scop = false;
1358 result.difficult |= sinfo.difficult;
1359 result.exits |= sinfo.exits;
1361 current = sinfo.next;
1364 /* Try to close open_scop, if we are still in an open SCoP. */
1365 if (in_scop)
1367 int i;
1368 edge e;
1370 for (i = 0; VEC_iterate (edge, sinfo.last->succs, i, e); i++)
1371 if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last, e->dest))
1372 open_scop.exit = e->dest;
1374 if (!open_scop.exit && open_scop.entry != sinfo.last)
1375 open_scop.exit = sinfo.last;
1377 if (open_scop.exit)
1378 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1382 result.last = sinfo.last;
1383 return result;
1386 /* Checks if a bb is contained in REGION. */
1388 static bool
1389 bb_in_sd_region (basic_block bb, sd_region *region)
1391 return dominated_by_p (CDI_DOMINATORS, bb, region->entry)
1392 && !(dominated_by_p (CDI_DOMINATORS, bb, region->exit)
1393 && !dominated_by_p (CDI_DOMINATORS, region->entry,
1394 region->exit));
1397 /* Returns the single entry edge of REGION, if it does not exits NULL. */
1399 static edge
1400 find_single_entry_edge (sd_region *region)
1402 edge e;
1403 edge_iterator ei;
1404 edge entry = NULL;
1406 FOR_EACH_EDGE (e, ei, region->entry->preds)
1407 if (!bb_in_sd_region (e->src, region))
1409 if (entry)
1411 entry = NULL;
1412 break;
1415 else
1416 entry = e;
1419 return entry;
1422 /* Returns the single exit edge of REGION, if it does not exits NULL. */
1424 static edge
1425 find_single_exit_edge (sd_region *region)
1427 edge e;
1428 edge_iterator ei;
1429 edge exit = NULL;
1431 FOR_EACH_EDGE (e, ei, region->exit->preds)
1432 if (bb_in_sd_region (e->src, region))
1434 if (exit)
1436 exit = NULL;
1437 break;
1440 else
1441 exit = e;
1444 return exit;
1447 /* Create a single entry edge for REGION. */
1449 static void
1450 create_single_entry_edge (sd_region *region)
1452 if (find_single_entry_edge (region))
1453 return;
1455 /* There are multiple predecessors for bb_3
1457 | 1 2
1458 | | /
1459 | |/
1460 | 3 <- entry
1461 | |\
1462 | | |
1463 | 4 ^
1464 | | |
1465 | |/
1468 There are two edges (1->3, 2->3), that point from outside into the region,
1469 and another one (5->3), a loop latch, lead to bb_3.
1471 We split bb_3.
1473 | 1 2
1474 | | /
1475 | |/
1476 |3.0
1477 | |\ (3.0 -> 3.1) = single entry edge
1478 |3.1 | <- entry
1479 | | |
1480 | | |
1481 | 4 ^
1482 | | |
1483 | |/
1486 If the loop is part of the SCoP, we have to redirect the loop latches.
1488 | 1 2
1489 | | /
1490 | |/
1491 |3.0
1492 | | (3.0 -> 3.1) = entry edge
1493 |3.1 <- entry
1494 | |\
1495 | | |
1496 | 4 ^
1497 | | |
1498 | |/
1499 | 5 */
1501 if (region->entry->loop_father->header != region->entry
1502 || dominated_by_p (CDI_DOMINATORS,
1503 loop_latch_edge (region->entry->loop_father)->src,
1504 region->exit))
1506 edge forwarder = split_block_after_labels (region->entry);
1507 region->entry = forwarder->dest;
1509 else
1510 /* This case is never executed, as the loop headers seem always to have a
1511 single edge pointing from outside into the loop. */
1512 gcc_unreachable ();
1514 #ifdef ENABLE_CHECKING
1515 gcc_assert (find_single_entry_edge (region));
1516 #endif
1519 /* Check if the sd_region, mentioned in EDGE, has no exit bb. */
1521 static bool
1522 sd_region_without_exit (edge e)
1524 sd_region *r = (sd_region *) e->aux;
1526 if (r)
1527 return r->exit == NULL;
1528 else
1529 return false;
1532 /* Create a single exit edge for REGION. */
1534 static void
1535 create_single_exit_edge (sd_region *region)
1537 edge e;
1538 edge_iterator ei;
1539 edge forwarder = NULL;
1540 basic_block exit;
1542 if (find_single_exit_edge (region))
1543 return;
1545 /* We create a forwarder bb (5) for all edges leaving this region
1546 (3->5, 4->5). All other edges leading to the same bb, are moved
1547 to a new bb (6). If these edges where part of another region (2->5)
1548 we update the region->exit pointer, of this region.
1550 To identify which edge belongs to which region we depend on the e->aux
1551 pointer in every edge. It points to the region of the edge or to NULL,
1552 if the edge is not part of any region.
1554 1 2 3 4 1->5 no region, 2->5 region->exit = 5,
1555 \| |/ 3->5 region->exit = NULL, 4->5 region->exit = NULL
1556 5 <- exit
1558 changes to
1560 1 2 3 4 1->6 no region, 2->6 region->exit = 6,
1561 | | \/ 3->5 no region, 4->5 no region,
1562 | | 5
1563 \| / 5->6 region->exit = 6
1566 Now there is only a single exit edge (5->6). */
1567 exit = region->exit;
1568 region->exit = NULL;
1569 forwarder = make_forwarder_block (exit, &sd_region_without_exit, NULL);
1571 /* Unmark the edges, that are no longer exit edges. */
1572 FOR_EACH_EDGE (e, ei, forwarder->src->preds)
1573 if (e->aux)
1574 e->aux = NULL;
1576 /* Mark the new exit edge. */
1577 single_succ_edge (forwarder->src)->aux = region;
1579 /* Update the exit bb of all regions, where exit edges lead to
1580 forwarder->dest. */
1581 FOR_EACH_EDGE (e, ei, forwarder->dest->preds)
1582 if (e->aux)
1583 ((sd_region *) e->aux)->exit = forwarder->dest;
1585 #ifdef ENABLE_CHECKING
1586 gcc_assert (find_single_exit_edge (region));
1587 #endif
1590 /* Unmark the exit edges of all REGIONS.
1591 See comment in "create_single_exit_edge". */
1593 static void
1594 unmark_exit_edges (VEC (sd_region, heap) *regions)
1596 int i;
1597 sd_region *s;
1598 edge e;
1599 edge_iterator ei;
1601 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1602 FOR_EACH_EDGE (e, ei, s->exit->preds)
1603 e->aux = NULL;
1607 /* Mark the exit edges of all REGIONS.
1608 See comment in "create_single_exit_edge". */
1610 static void
1611 mark_exit_edges (VEC (sd_region, heap) *regions)
1613 int i;
1614 sd_region *s;
1615 edge e;
1616 edge_iterator ei;
1618 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1619 FOR_EACH_EDGE (e, ei, s->exit->preds)
1620 if (bb_in_sd_region (e->src, s))
1621 e->aux = s;
1625 /* Create for all scop regions a single entry and a single exit edge. */
1627 static void
1628 create_sese_edges (VEC (sd_region, heap) *regions)
1630 int i;
1631 sd_region *s;
1634 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1635 create_single_entry_edge (s);
1637 mark_exit_edges (regions);
1639 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1640 create_single_exit_edge (s);
1642 unmark_exit_edges (regions);
1644 #ifdef ENABLE_CHECKING
1645 verify_loop_structure ();
1646 verify_dominators (CDI_DOMINATORS);
1647 verify_ssa (false);
1648 #endif
1651 /* Create graphite SCoPs from an array of scop detection regions. */
1653 static void
1654 build_graphite_scops (VEC (sd_region, heap) *scop_regions)
1656 int i;
1657 sd_region *s;
1659 for (i = 0; VEC_iterate (sd_region, scop_regions, i, s); i++)
1661 edge entry = find_single_entry_edge (s);
1662 edge exit = find_single_exit_edge (s);
1663 scop_p scop = new_scop (entry, exit);
1664 VEC_safe_push (scop_p, heap, current_scops, scop);
1666 /* Are there overlapping SCoPs? */
1667 #ifdef ENABLE_CHECKING
1669 int j;
1670 sd_region *s2;
1672 for (j = 0; VEC_iterate (sd_region, scop_regions, j, s2); j++)
1673 if (s != s2)
1674 gcc_assert (!bb_in_sd_region (s->entry, s2));
1676 #endif
1680 /* Find static control parts. */
1682 static void
1683 build_scops (void)
1685 struct loop *loop = current_loops->tree_root;
1686 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1688 build_scops_1 (single_succ (ENTRY_BLOCK_PTR), &tmp_scops, loop);
1689 create_sese_edges (tmp_scops);
1690 build_graphite_scops (tmp_scops);
1693 /* Gather the basic blocks belonging to the SCOP. */
1695 static void
1696 build_scop_bbs (scop_p scop)
1698 basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
1699 sbitmap visited = sbitmap_alloc (last_basic_block);
1700 int sp = 0;
1702 sbitmap_zero (visited);
1703 stack[sp++] = SCOP_ENTRY (scop);
1705 while (sp)
1707 basic_block bb = stack[--sp];
1708 int depth = loop_depth (bb->loop_father);
1709 int num = bb->loop_father->num;
1710 edge_iterator ei;
1711 edge e;
1713 /* Scop's exit is not in the scop. Exclude also bbs, which are
1714 dominated by the SCoP exit. These are e.g. loop latches. */
1715 if (TEST_BIT (visited, bb->index)
1716 || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
1717 /* Every block in the scop is dominated by scop's entry. */
1718 || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop)))
1719 continue;
1721 new_graphite_bb (scop, bb);
1722 SET_BIT (visited, bb->index);
1724 /* First push the blocks that have to be processed last. Note
1725 that this means that the order in which the code is organized
1726 below is important: do not reorder the following code. */
1727 FOR_EACH_EDGE (e, ei, bb->succs)
1728 if (! TEST_BIT (visited, e->dest->index)
1729 && (int) loop_depth (e->dest->loop_father) < depth)
1730 stack[sp++] = e->dest;
1732 FOR_EACH_EDGE (e, ei, bb->succs)
1733 if (! TEST_BIT (visited, e->dest->index)
1734 && (int) loop_depth (e->dest->loop_father) == depth
1735 && e->dest->loop_father->num != num)
1736 stack[sp++] = e->dest;
1738 FOR_EACH_EDGE (e, ei, bb->succs)
1739 if (! TEST_BIT (visited, e->dest->index)
1740 && (int) loop_depth (e->dest->loop_father) == depth
1741 && e->dest->loop_father->num == num
1742 && EDGE_COUNT (e->dest->preds) > 1)
1743 stack[sp++] = e->dest;
1745 FOR_EACH_EDGE (e, ei, bb->succs)
1746 if (! TEST_BIT (visited, e->dest->index)
1747 && (int) loop_depth (e->dest->loop_father) == depth
1748 && e->dest->loop_father->num == num
1749 && EDGE_COUNT (e->dest->preds) == 1)
1750 stack[sp++] = e->dest;
1752 FOR_EACH_EDGE (e, ei, bb->succs)
1753 if (! TEST_BIT (visited, e->dest->index)
1754 && (int) loop_depth (e->dest->loop_father) > depth)
1755 stack[sp++] = e->dest;
1758 free (stack);
1759 sbitmap_free (visited);
1763 /* Record LOOP as occuring in SCOP. */
1765 static void
1766 scop_record_loop (scop_p scop, struct loop *loop)
1768 loop_p parent;
1769 tree induction_var;
1771 if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
1772 return;
1774 parent = loop_outer (loop);
1775 induction_var = find_induction_var_from_exit_cond (loop);
1777 if (!bb_in_scop_p (parent->latch, scop))
1778 parent = NULL;
1780 if (induction_var != NULL_TREE)
1782 name_tree oldiv = XNEW (struct name_tree);
1783 oldiv->t = SSA_NAME_VAR (induction_var);
1784 if (DECL_NAME (oldiv->t))
1785 oldiv->name = IDENTIFIER_POINTER (DECL_NAME (oldiv->t));
1786 else
1788 int len = 2 + 16;
1789 char *n = XNEWVEC (char, len);
1790 snprintf (n, len, "D.%u", DECL_UID (oldiv->t));
1791 oldiv->name = n;
1793 oldiv->loop = loop;
1795 VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv);
1798 bitmap_set_bit (SCOP_LOOPS (scop), loop->num);
1799 VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop);
1802 /* Build the loop nests contained in SCOP. */
1804 static void
1805 build_scop_loop_nests (scop_p scop)
1807 unsigned i;
1808 graphite_bb_p gb;
1809 struct loop *loop0, *loop1;
1811 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1813 struct loop *loop = gbb_loop (gb);
1815 /* Only add loops, if they are completely contained in the SCoP. */
1816 if (loop->header == GBB_BB (gb)
1817 && bb_in_scop_p (loop->latch, scop))
1818 scop_record_loop (scop, gbb_loop (gb));
1821 /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It
1822 can be the case that an inner loop is inserted before an outer
1823 loop. To avoid this, semi-sort once. */
1824 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++)
1826 if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1)
1827 break;
1829 loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1);
1830 if (loop0->num > loop1->num)
1832 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1);
1833 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0);
1838 /* Calculate the number of loops around GB in the current SCOP. */
1840 static inline int
1841 nb_loops_around_gb (graphite_bb_p gb)
1843 scop_p scop = GBB_SCOP (gb);
1844 struct loop *l = gbb_loop (gb);
1845 int d = 0;
1847 for (; loop_in_scop_p (l, scop); d++, l = loop_outer (l));
1849 return d;
1852 /* Build for BB the static schedule.
1854 The STATIC_SCHEDULE is defined like this:
1857 for (i: ...)
1859 for (j: ...)
1865 for (k: ...)
1873 Static schedules for A to F:
1875 DEPTH
1876 0 1 2
1878 B 1 0 0
1879 C 1 0 1
1880 D 1 1 0
1881 E 1 1 1
1885 static void
1886 build_scop_canonical_schedules (scop_p scop)
1888 int i, j;
1889 graphite_bb_p gb;
1890 int nb = scop_nb_loops (scop) + 1;
1892 SCOP_STATIC_SCHEDULE (scop) = lambda_vector_new (nb);
1894 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1896 int offset = nb_loops_around_gb (gb);
1898 /* After leaving a loop, it is possible that the schedule is not
1899 set at zero. This loop reinitializes components located
1900 after OFFSET. */
1902 for (j = offset + 1; j < nb; j++)
1903 if (SCOP_STATIC_SCHEDULE (scop)[j])
1905 memset (&(SCOP_STATIC_SCHEDULE (scop)[j]), 0,
1906 sizeof (int) * (nb - j));
1907 ++SCOP_STATIC_SCHEDULE (scop)[offset];
1908 break;
1911 GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (offset + 1);
1912 lambda_vector_copy (SCOP_STATIC_SCHEDULE (scop),
1913 GBB_STATIC_SCHEDULE (gb), offset + 1);
1915 ++SCOP_STATIC_SCHEDULE (scop)[offset];
1919 /* Build the LOOPS vector for all bbs in SCOP. */
1921 static void
1922 build_bb_loops (scop_p scop)
1924 graphite_bb_p gb;
1925 int i;
1927 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1929 loop_p loop;
1930 int depth;
1932 depth = nb_loops_around_gb (gb) - 1;
1934 GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3);
1935 VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1);
1937 loop = GBB_BB (gb)->loop_father;
1939 while (scop_contains_loop (scop, loop))
1941 VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop);
1942 loop = loop_outer (loop);
1943 depth--;
1948 /* Get the index for parameter VAR in SCOP. */
1950 static int
1951 param_index (tree var, scop_p scop)
1953 int i;
1954 name_tree p;
1955 name_tree nvar;
1957 gcc_assert (TREE_CODE (var) == SSA_NAME);
1959 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
1960 if (p->t == var)
1961 return i;
1963 nvar = XNEW (struct name_tree);
1964 nvar->t = var;
1965 nvar->name = NULL;
1966 VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar);
1967 return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1;
1970 /* Scan EXPR and translate it to an inequality vector INEQ that will
1971 be added, or subtracted, in the constraint domain matrix C at row
1972 R. K is the number of columns for loop iterators in C. */
1974 static void
1975 scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k,
1976 bool subtract)
1978 int cst_col, param_col;
1980 if (e == chrec_dont_know)
1981 return;
1983 switch (TREE_CODE (e))
1985 case POLYNOMIAL_CHREC:
1987 tree left = CHREC_LEFT (e);
1988 tree right = CHREC_RIGHT (e);
1989 int var = CHREC_VARIABLE (e);
1991 if (TREE_CODE (right) != INTEGER_CST)
1992 return;
1994 if (c)
1996 int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1;
1998 if (subtract)
1999 value_sub_int (c->p[r][loop_col], c->p[r][loop_col],
2000 int_cst_value (right));
2001 else
2002 value_add_int (c->p[r][loop_col], c->p[r][loop_col],
2003 int_cst_value (right));
2006 switch (TREE_CODE (left))
2008 case POLYNOMIAL_CHREC:
2009 scan_tree_for_params (s, left, c, r, k, subtract);
2010 return;
2012 case INTEGER_CST:
2013 /* Constant part. */
2014 if (c)
2016 int v = int_cst_value (left);
2017 cst_col = c->NbColumns - 1;
2019 if (v < 0)
2021 v = -v;
2022 subtract = subtract ? false : true;
2025 if (subtract)
2026 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2027 else
2028 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2030 return;
2032 default:
2033 scan_tree_for_params (s, left, c, r, k, subtract);
2034 return;
2037 break;
2039 case MULT_EXPR:
2040 if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
2042 Value val;
2044 gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
2046 value_init (val);
2047 value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
2048 value_multiply (k, k, val);
2049 value_clear (val);
2050 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2052 else
2054 Value val;
2056 gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
2058 value_init (val);
2059 value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
2060 value_multiply (k, k, val);
2061 value_clear (val);
2062 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2064 break;
2066 case PLUS_EXPR:
2067 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2068 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2069 break;
2071 case MINUS_EXPR:
2072 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2073 value_oppose (k, k);
2074 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2075 break;
2077 case NEGATE_EXPR:
2078 value_oppose (k, k);
2079 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2080 break;
2082 case SSA_NAME:
2083 param_col = param_index (e, s);
2085 if (c)
2087 param_col += c->NbColumns - scop_nb_params (s) - 1;
2089 if (subtract)
2090 value_subtract (c->p[r][param_col], c->p[r][param_col], k);
2091 else
2092 value_addto (c->p[r][param_col], c->p[r][param_col], k);
2094 break;
2096 case INTEGER_CST:
2097 if (c)
2099 int v = int_cst_value (e);
2100 cst_col = c->NbColumns - 1;
2102 if (v < 0)
2104 v = -v;
2105 subtract = subtract ? false : true;
2108 if (subtract)
2109 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2110 else
2111 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2113 break;
2115 case NOP_EXPR:
2116 case CONVERT_EXPR:
2117 case NON_LVALUE_EXPR:
2118 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2119 break;
2121 default:
2122 gcc_unreachable ();
2123 break;
2127 /* Data structure for idx_record_params. */
2129 struct irp_data
2131 struct loop *loop;
2132 scop_p scop;
2135 /* For a data reference with an ARRAY_REF as its BASE, record the
2136 parameters occurring in IDX. DTA is passed in as complementary
2137 information, and is used by the automatic walker function. This
2138 function is a callback for for_each_index. */
2140 static bool
2141 idx_record_params (tree base, tree *idx, void *dta)
2143 struct irp_data *data = (struct irp_data *) dta;
2145 if (TREE_CODE (base) != ARRAY_REF)
2146 return true;
2148 if (TREE_CODE (*idx) == SSA_NAME)
2150 tree scev;
2151 scop_p scop = data->scop;
2152 struct loop *loop = data->loop;
2153 Value one;
2155 scev = analyze_scalar_evolution (loop, *idx);
2156 scev = instantiate_scev (block_before_scop (scop), loop, scev);
2158 value_init (one);
2159 value_set_si (one, 1);
2160 scan_tree_for_params (scop, scev, NULL, 0, one, false);
2161 value_clear (one);
2164 return true;
2167 /* Find parameters with respect to SCOP in BB. We are looking in memory
2168 access functions, conditions and loop bounds. */
2170 static void
2171 find_params_in_bb (scop_p scop, basic_block bb)
2173 int i;
2174 data_reference_p dr;
2175 VEC (data_reference_p, heap) *drs;
2176 gimple_stmt_iterator gsi;
2177 struct loop *nest = outermost_loop_in_scop (scop, bb);
2179 /* Find the parameters used in the memory access functions. */
2180 drs = VEC_alloc (data_reference_p, heap, 5);
2181 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2182 find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs);
2184 for (i = 0; VEC_iterate (data_reference_p, drs, i, dr); i++)
2186 struct irp_data irp;
2188 irp.loop = bb->loop_father;
2189 irp.scop = scop;
2190 for_each_index (&dr->ref, idx_record_params, &irp);
2191 free_data_ref (dr);
2194 VEC_free (data_reference_p, heap, drs);
2196 /* Find parameters in conditional statements. */
2197 gsi = gsi_last_bb (bb);
2198 if (!gsi_end_p (gsi))
2200 gimple stmt = gsi_stmt (gsi);
2202 if (gimple_code (stmt) == GIMPLE_COND)
2204 Value one;
2205 loop_p loop = bb->loop_father;
2207 tree lhs, rhs;
2209 lhs = gimple_cond_lhs (stmt);
2210 lhs = analyze_scalar_evolution (loop, lhs);
2211 lhs = instantiate_scev (block_before_scop (scop), loop, lhs);
2213 rhs = gimple_cond_rhs (stmt);
2214 rhs = analyze_scalar_evolution (loop, rhs);
2215 rhs = instantiate_scev (block_before_scop (scop), loop, rhs);
2217 value_init (one);
2218 scan_tree_for_params (scop, lhs, NULL, 0, one, false);
2219 value_set_si (one, 1);
2220 scan_tree_for_params (scop, rhs, NULL, 0, one, false);
2221 value_clear (one);
2226 /* Saves in NV the name of variable P->T. */
2228 static void
2229 save_var_name (char **nv, int i, name_tree p)
2231 const char *name = get_name (SSA_NAME_VAR (p->t));
2233 if (name)
2235 int len = strlen (name) + 16;
2236 nv[i] = XNEWVEC (char, len);
2237 snprintf (nv[i], len, "%s_%d", name, SSA_NAME_VERSION (p->t));
2239 else
2241 nv[i] = XNEWVEC (char, 16);
2242 snprintf (nv[i], 2 + 16, "T_%d", SSA_NAME_VERSION (p->t));
2245 p->name = nv[i];
2248 /* Return the maximal loop depth in SCOP. */
2250 static int
2251 scop_max_loop_depth (scop_p scop)
2253 int i;
2254 graphite_bb_p gbb;
2255 int max_nb_loops = 0;
2257 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
2259 int nb_loops = gbb_nb_loops (gbb);
2260 if (max_nb_loops < nb_loops)
2261 max_nb_loops = nb_loops;
2264 return max_nb_loops;
2267 /* Initialize Cloog's parameter names from the names used in GIMPLE.
2268 Initialize Cloog's iterator names, using 'graphite_iterator_%d'
2269 from 0 to scop_nb_loops (scop). */
2271 static void
2272 initialize_cloog_names (scop_p scop)
2274 int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop));
2275 char **params = XNEWVEC (char *, nb_params);
2276 int nb_iterators = scop_max_loop_depth (scop);
2277 int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop));
2278 char **iterators = XNEWVEC (char *, nb_iterators * 2);
2279 char **scattering = XNEWVEC (char *, nb_scattering);
2280 name_tree p;
2282 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2283 save_var_name (params, i, p);
2285 cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)),
2286 nb_params);
2287 cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)),
2288 params);
2290 for (i = 0; i < nb_iterators; i++)
2292 int len = 18 + 16;
2293 iterators[i] = XNEWVEC (char, len);
2294 snprintf (iterators[i], len, "graphite_iterator_%d", i);
2297 cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)),
2298 nb_iterators);
2299 cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)),
2300 iterators);
2302 for (i = 0; i < nb_scattering; i++)
2304 int len = 2 + 16;
2305 scattering[i] = XNEWVEC (char, len);
2306 snprintf (scattering[i], len, "s_%d", i);
2309 cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)),
2310 nb_scattering);
2311 cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)),
2312 scattering);
2315 /* Record the parameters used in the SCOP. A variable is a parameter
2316 in a scop if it does not vary during the execution of that scop. */
2318 static void
2319 find_scop_parameters (scop_p scop)
2321 graphite_bb_p gb;
2322 unsigned i;
2323 struct loop *loop;
2324 Value one;
2326 value_init (one);
2327 value_set_si (one, 1);
2329 /* Find the parameters used in the loop bounds. */
2330 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
2332 tree nb_iters = number_of_latch_executions (loop);
2334 if (!chrec_contains_symbols (nb_iters))
2335 continue;
2337 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2338 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2339 scan_tree_for_params (scop, nb_iters, NULL, 0, one, false);
2342 value_clear (one);
2344 /* Find the parameters used in data accesses. */
2345 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2346 find_params_in_bb (scop, GBB_BB (gb));
2349 /* Build the context constraints for SCOP: constraints and relations
2350 on parameters. */
2352 static void
2353 build_scop_context (scop_p scop)
2355 int nb_params = scop_nb_params (scop);
2356 CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2);
2358 /* Insert '0 >= 0' in the context matrix, as it is not allowed to be
2359 empty. */
2361 value_set_si (matrix->p[0][0], 1);
2363 value_set_si (matrix->p[0][nb_params + 1], 0);
2365 cloog_program_set_context (SCOP_PROG (scop),
2366 cloog_domain_matrix2domain (matrix));
2367 cloog_matrix_free (matrix);
2370 /* Returns a graphite_bb from BB. */
2372 static inline graphite_bb_p
2373 gbb_from_bb (basic_block bb)
2375 return (graphite_bb_p) bb->aux;
2378 /* Add DOMAIN to all the basic blocks in LOOP. */
2380 static void
2381 add_bb_domains (struct loop *loop, CloogMatrix *domain)
2383 basic_block *bbs = get_loop_body (loop);
2384 unsigned i;
2386 for (i = 0; i < loop->num_nodes; i++)
2387 if (bbs[i]->loop_father == loop)
2389 graphite_bb_p gbb = gbb_from_bb (bbs[i]);
2390 GBB_DOMAIN (gbb) = cloog_matrix_copy (domain);
2393 free (bbs);
2396 /* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the
2397 number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the
2398 constraints matrix for the surrounding loops. */
2400 static void
2401 build_loop_iteration_domains (scop_p scop, struct loop *loop,
2402 CloogMatrix *outer_cstr, int nb_outer_loops)
2404 int i, j, row;
2405 CloogMatrix *cstr;
2407 int nb_rows = outer_cstr->NbRows + 1;
2408 int nb_cols = outer_cstr->NbColumns + 1;
2410 /* Last column of CSTR is the column of constants. */
2411 int cst_col = nb_cols - 1;
2413 /* The column for the current loop is just after the columns of
2414 other outer loops. */
2415 int loop_col = nb_outer_loops + 1;
2417 tree nb_iters = number_of_latch_executions (loop);
2419 /* When the number of iterations is a constant or a parameter, we
2420 add a constraint for the upper bound of the loop. So add a row
2421 to the constraint matrix before allocating it. */
2422 if (TREE_CODE (nb_iters) == INTEGER_CST
2423 || !chrec_contains_undetermined (nb_iters))
2424 nb_rows++;
2426 cstr = cloog_matrix_alloc (nb_rows, nb_cols);
2428 /* Copy the outer constraints. */
2429 for (i = 0; i < outer_cstr->NbRows; i++)
2431 /* Copy the eq/ineq and loops columns. */
2432 for (j = 0; j < loop_col; j++)
2433 value_assign (cstr->p[i][j], outer_cstr->p[i][j]);
2435 /* Leave an empty column in CSTR for the current loop, and then
2436 copy the parameter columns. */
2437 for (j = loop_col; j < outer_cstr->NbColumns; j++)
2438 value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]);
2441 /* 0 <= loop_i */
2442 row = outer_cstr->NbRows;
2443 value_set_si (cstr->p[row][0], 1);
2444 value_set_si (cstr->p[row][loop_col], 1);
2446 /* loop_i <= nb_iters */
2447 if (TREE_CODE (nb_iters) == INTEGER_CST)
2449 row++;
2450 value_set_si (cstr->p[row][0], 1);
2451 value_set_si (cstr->p[row][loop_col], -1);
2453 value_set_si (cstr->p[row][cst_col],
2454 int_cst_value (nb_iters));
2456 else if (!chrec_contains_undetermined (nb_iters))
2458 /* Otherwise nb_iters contains parameters: scan the nb_iters
2459 expression and build its matrix representation. */
2460 Value one;
2462 row++;
2463 value_set_si (cstr->p[row][0], 1);
2464 value_set_si (cstr->p[row][loop_col], -1);
2466 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2467 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2469 value_init (one);
2470 value_set_si (one, 1);
2471 scan_tree_for_params (scop, nb_iters, cstr, row, one, false);
2472 value_clear (one);
2474 else
2475 gcc_unreachable ();
2477 if (loop->inner && loop_in_scop_p (loop->inner, scop))
2478 build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1);
2480 /* Only go to the next loops, if we are not at the outermost layer. These
2481 have to be handled seperately, as we can be sure, that the chain at this
2482 layer will be connected. */
2483 if (nb_outer_loops != 0 && loop->next && loop_in_scop_p (loop->next, scop))
2484 build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops);
2486 add_bb_domains (loop, cstr);
2488 cloog_matrix_free (cstr);
2491 /* Add conditions to the domain of GB. */
2493 static void
2494 add_conditions_to_domain (graphite_bb_p gb)
2496 unsigned int i,j;
2497 gimple stmt;
2498 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb);
2499 CloogMatrix *domain = GBB_DOMAIN (gb);
2500 scop_p scop = GBB_SCOP (gb);
2502 unsigned nb_rows;
2503 unsigned nb_cols;
2504 unsigned nb_new_rows = 0;
2505 unsigned row;
2507 if (VEC_empty (gimple, conditions))
2508 return;
2510 if (domain)
2512 nb_rows = domain->NbRows;
2513 nb_cols = domain->NbColumns;
2515 else
2517 nb_rows = 0;
2518 nb_cols = scop_nb_params (scop) + 2;
2521 /* Count number of necessary new rows to add the conditions to the
2522 domain. */
2523 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2525 switch (gimple_code (stmt))
2527 case GIMPLE_COND:
2529 enum tree_code code = gimple_cond_code (stmt);
2531 switch (code)
2533 case NE_EXPR:
2534 case EQ_EXPR:
2535 /* NE and EQ statements are not supported right know. */
2536 gcc_unreachable ();
2537 break;
2538 case LT_EXPR:
2539 case GT_EXPR:
2540 case LE_EXPR:
2541 case GE_EXPR:
2542 nb_new_rows++;
2543 break;
2544 default:
2545 gcc_unreachable ();
2546 break;
2548 break;
2550 case SWITCH_EXPR:
2551 /* Switch statements are not supported right know. */
2552 gcc_unreachable ();
2553 break;
2555 default:
2556 gcc_unreachable ();
2557 break;
2562 /* Enlarge the matrix. */
2564 CloogMatrix *new_domain;
2565 new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols);
2567 for (i = 0; i < nb_rows; i++)
2568 for (j = 0; j < nb_cols; j++)
2569 value_assign (new_domain->p[i][j], domain->p[i][j]);
2571 cloog_matrix_free (domain);
2572 domain = new_domain;
2573 GBB_DOMAIN (gb) = new_domain;
2576 /* Add the conditions to the new enlarged domain matrix. */
2577 row = nb_rows;
2578 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2580 switch (gimple_code (stmt))
2582 case GIMPLE_COND:
2584 Value one;
2585 enum tree_code code;
2586 tree left;
2587 tree right;
2588 loop_p loop = GBB_BB (gb)->loop_father;
2590 left = gimple_cond_lhs (stmt);
2591 right = gimple_cond_rhs (stmt);
2593 left = analyze_scalar_evolution (loop, left);
2594 right = analyze_scalar_evolution (loop, right);
2596 left = instantiate_scev (block_before_scop (scop), loop, left);
2597 right = instantiate_scev (block_before_scop (scop), loop, right);
2599 code = gimple_cond_code (stmt);
2601 /* The conditions for ELSE-branches are inverted. */
2602 if (VEC_index (gimple, gb->condition_cases, i) == NULL)
2603 code = invert_tree_comparison (code, false);
2605 switch (code)
2607 case NE_EXPR:
2608 /* NE statements are not supported right know. */
2609 gcc_unreachable ();
2610 break;
2611 case EQ_EXPR:
2612 value_set_si (domain->p[row][0], 1);
2613 value_init (one);
2614 value_set_si (one, 1);
2615 scan_tree_for_params (scop, left, domain, row, one, true);
2616 value_set_si (one, 1);
2617 scan_tree_for_params (scop, right, domain, row, one, false);
2618 row++;
2619 value_set_si (domain->p[row][0], 1);
2620 value_set_si (one, 1);
2621 scan_tree_for_params (scop, left, domain, row, one, false);
2622 value_set_si (one, 1);
2623 scan_tree_for_params (scop, right, domain, row, one, true);
2624 value_clear (one);
2625 row++;
2626 break;
2627 case LT_EXPR:
2628 value_set_si (domain->p[row][0], 1);
2629 value_init (one);
2630 value_set_si (one, 1);
2631 scan_tree_for_params (scop, left, domain, row, one, true);
2632 value_set_si (one, 1);
2633 scan_tree_for_params (scop, right, domain, row, one, false);
2634 value_sub_int (domain->p[row][nb_cols - 1],
2635 domain->p[row][nb_cols - 1], 1);
2636 value_clear (one);
2637 row++;
2638 break;
2639 case GT_EXPR:
2640 value_set_si (domain->p[row][0], 1);
2641 value_init (one);
2642 value_set_si (one, 1);
2643 scan_tree_for_params (scop, left, domain, row, one, false);
2644 value_set_si (one, 1);
2645 scan_tree_for_params (scop, right, domain, row, one, true);
2646 value_sub_int (domain->p[row][nb_cols - 1],
2647 domain->p[row][nb_cols - 1], 1);
2648 value_clear (one);
2649 row++;
2650 break;
2651 case LE_EXPR:
2652 value_set_si (domain->p[row][0], 1);
2653 value_init (one);
2654 value_set_si (one, 1);
2655 scan_tree_for_params (scop, left, domain, row, one, true);
2656 value_set_si (one, 1);
2657 scan_tree_for_params (scop, right, domain, row, one, false);
2658 value_clear (one);
2659 row++;
2660 break;
2661 case GE_EXPR:
2662 value_set_si (domain->p[row][0], 1);
2663 value_init (one);
2664 value_set_si (one, 1);
2665 scan_tree_for_params (scop, left, domain, row, one, false);
2666 value_set_si (one, 1);
2667 scan_tree_for_params (scop, right, domain, row, one, true);
2668 value_clear (one);
2669 row++;
2670 break;
2671 default:
2672 gcc_unreachable ();
2673 break;
2675 break;
2677 case GIMPLE_SWITCH:
2678 /* Switch statements are not supported right know. */
2679 gcc_unreachable ();
2680 break;
2682 default:
2683 gcc_unreachable ();
2684 break;
2689 /* Helper recursive function. */
2691 static void
2692 build_scop_conditions_1 (VEC (gimple, heap) **conditions,
2693 VEC (gimple, heap) **cases, basic_block bb,
2694 scop_p scop)
2696 int i, j;
2697 graphite_bb_p gbb;
2698 gimple_stmt_iterator gsi;
2699 basic_block bb_child, bb_iter;
2700 VEC (basic_block, heap) *dom;
2702 /* Make sure we are in the SCoP. */
2703 if (!bb_in_scop_p (bb, scop))
2704 return;
2706 /* Record conditions in graphite_bb. */
2707 gbb = gbb_from_bb (bb);
2708 GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions);
2709 GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases);
2711 add_conditions_to_domain (gbb);
2713 dom = get_dominated_by (CDI_DOMINATORS, bb);
2715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2717 gimple stmt = gsi_stmt (gsi);
2718 VEC (edge, gc) *edges;
2719 edge e;
2721 switch (gimple_code (stmt))
2723 case GIMPLE_COND:
2724 edges = bb->succs;
2725 for (i = 0; VEC_iterate (edge, edges, i, e); i++)
2726 if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb))
2727 && VEC_length (edge, e->dest->preds) == 1)
2729 /* Remove the scanned block from the dominator successors. */
2730 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2731 if (bb_iter == e->dest)
2733 VEC_unordered_remove (basic_block, dom, j);
2734 break;
2737 /* Recursively scan the then or else part. */
2738 if (e->flags & EDGE_TRUE_VALUE)
2739 VEC_safe_push (gimple, heap, *cases, stmt);
2740 else if (e->flags & EDGE_FALSE_VALUE)
2741 VEC_safe_push (gimple, heap, *cases, NULL);
2742 else
2743 gcc_unreachable ();
2745 VEC_safe_push (gimple, heap, *conditions, stmt);
2746 build_scop_conditions_1 (conditions, cases, e->dest, scop);
2747 VEC_pop (gimple, *conditions);
2748 VEC_pop (gimple, *cases);
2750 break;
2752 case GIMPLE_SWITCH:
2754 unsigned i;
2755 gimple_stmt_iterator gsi_search_gimple_label;
2757 for (i = 0; i < gimple_switch_num_labels (stmt); ++i)
2759 basic_block bb_iter;
2760 size_t k;
2761 size_t n_cases = VEC_length (gimple, *conditions);
2762 unsigned n = gimple_switch_num_labels (stmt);
2764 bb_child = label_to_block
2765 (CASE_LABEL (gimple_switch_label (stmt, i)));
2767 /* Do not handle multiple values for the same block. */
2768 for (k = 0; k < n; k++)
2769 if (i != k
2770 && label_to_block
2771 (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child)
2772 break;
2774 if (k != n)
2775 continue;
2777 /* Switch cases with more than one predecessor are not
2778 handled. */
2779 if (VEC_length (edge, bb_child->preds) != 1)
2780 continue;
2782 /* Recursively scan the corresponding 'case' block. */
2784 for (gsi_search_gimple_label = gsi_start_bb (bb_child);
2785 !gsi_end_p (gsi_search_gimple_label);
2786 gsi_next (&gsi_search_gimple_label))
2788 gimple stmt_gimple_label
2789 = gsi_stmt (gsi_search_gimple_label);
2791 if (gimple_code (stmt_gimple_label) == GIMPLE_LABEL)
2793 tree t = gimple_label_label (stmt_gimple_label);
2795 if (t == gimple_switch_label (stmt, i))
2796 VEC_replace (gimple, *cases, n_cases,
2797 stmt_gimple_label);
2798 else
2799 gcc_unreachable ();
2803 build_scop_conditions_1 (conditions, cases, bb_child, scop);
2805 /* Remove the scanned block from the dominator successors. */
2806 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2807 if (bb_iter == bb_child)
2809 VEC_unordered_remove (basic_block, dom, j);
2810 break;
2814 VEC_pop (gimple, *conditions);
2815 VEC_pop (gimple, *cases);
2816 break;
2818 default:
2819 break;
2823 /* Scan all immediate dominated successors. */
2824 for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++)
2825 build_scop_conditions_1 (conditions, cases, bb_child, scop);
2827 VEC_free (basic_block, heap, dom);
2830 /* Record all 'if' and 'switch' conditions in each gbb of SCOP. */
2832 static void
2833 build_scop_conditions (scop_p scop)
2835 VEC (gimple, heap) *conditions = NULL;
2836 VEC (gimple, heap) *cases = NULL;
2838 build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop);
2840 VEC_free (gimple, heap, conditions);
2841 VEC_free (gimple, heap, cases);
2844 /* Build the current domain matrix: the loops belonging to the current
2845 SCOP, and that vary for the execution of the current basic block.
2846 Returns false if there is no loop in SCOP. */
2848 static bool
2849 build_scop_iteration_domain (scop_p scop)
2851 struct loop *loop;
2852 CloogMatrix *outer_cstr;
2853 int i;
2855 /* Build cloog loop for all loops, that are in the uppermost loop layer of
2856 this SCoP. */
2857 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
2858 if (!loop_in_scop_p (loop_outer (loop), scop))
2860 /* The outermost constraints is a matrix that has:
2861 -first column: eq/ineq boolean
2862 -last column: a constant
2863 -scop_nb_params columns for the parameters used in the scop. */
2864 outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
2865 build_loop_iteration_domains (scop, loop, outer_cstr, 0);
2866 cloog_matrix_free (outer_cstr);
2869 return (i != 0);
2872 /* Initializes an equation CY of the access matrix using the
2873 information for a subscript from ACCESS_FUN, relatively to the loop
2874 indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is
2875 the dimension of the array access, i.e. the number of
2876 subscripts. Returns true when the operation succeeds. */
2878 static bool
2879 build_access_matrix_with_af (tree access_fun, lambda_vector cy,
2880 scop_p scop, int ndim)
2882 switch (TREE_CODE (access_fun))
2884 case POLYNOMIAL_CHREC:
2886 tree left = CHREC_LEFT (access_fun);
2887 tree right = CHREC_RIGHT (access_fun);
2888 int var;
2890 if (TREE_CODE (right) != INTEGER_CST)
2891 return false;
2893 var = loop_iteration_vector_dim (CHREC_VARIABLE (access_fun), scop);
2894 cy[var] = int_cst_value (right);
2896 switch (TREE_CODE (left))
2898 case POLYNOMIAL_CHREC:
2899 return build_access_matrix_with_af (left, cy, scop, ndim);
2901 case INTEGER_CST:
2902 cy[ndim - 1] = int_cst_value (left);
2903 return true;
2905 default:
2906 /* FIXME: access_fn can have parameters. */
2907 return false;
2910 case INTEGER_CST:
2911 cy[ndim - 1] = int_cst_value (access_fun);
2912 return true;
2914 default:
2915 /* FIXME: access_fn can have parameters. */
2916 return false;
2920 /* Initialize the access matrix in the data reference REF with respect
2921 to the loop nesting LOOP_NEST. Return true when the operation
2922 succeeded. */
2924 static bool
2925 build_access_matrix (data_reference_p ref, graphite_bb_p gb)
2927 int i, ndim = DR_NUM_DIMENSIONS (ref);
2928 struct access_matrix *am = GGC_NEW (struct access_matrix);
2930 AM_MATRIX (am) = VEC_alloc (lambda_vector, heap, ndim);
2931 DR_SCOP (ref) = GBB_SCOP (gb);
2933 for (i = 0; i < ndim; i++)
2935 lambda_vector v = lambda_vector_new (ref_nb_loops (ref));
2936 scop_p scop = GBB_SCOP (gb);
2937 tree af = DR_ACCESS_FN (ref, i);
2939 if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref)))
2940 return false;
2942 VEC_safe_push (lambda_vector, heap, AM_MATRIX (am), v);
2945 DR_ACCESS_MATRIX (ref) = am;
2946 return true;
2949 /* Build the access matrices for the data references in the SCOP. */
2951 static void
2952 build_scop_data_accesses (scop_p scop)
2954 int i;
2955 graphite_bb_p gb;
2957 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2959 int j;
2960 gimple_stmt_iterator gsi;
2961 data_reference_p dr;
2962 struct loop *nest = outermost_loop_in_scop (scop, GBB_BB (gb));
2964 /* On each statement of the basic block, gather all the occurences
2965 to read/write memory. */
2966 GBB_DATA_REFS (gb) = VEC_alloc (data_reference_p, heap, 5);
2967 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
2968 find_data_references_in_stmt (nest, gsi_stmt (gsi),
2969 &GBB_DATA_REFS (gb));
2971 /* FIXME: Construction of access matrix is disabled until some
2972 pass, like the data dependence analysis, is using it. */
2973 continue;
2975 /* Construct the access matrix for each data ref, with respect to
2976 the loop nest of the current BB in the considered SCOP. */
2977 for (j = 0;
2978 VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr);
2979 j++)
2981 bool res = build_access_matrix (dr, gb);
2983 /* FIXME: At this point the DRs should always have an affine
2984 form. For the moment this fails as build_access_matrix
2985 does not build matrices with parameters. */
2986 gcc_assert (res);
2991 /* Converts a GMP constant value to a tree and returns it. */
2993 static tree
2994 gmp_cst_to_tree (Value v)
2996 return build_int_cst (integer_type_node, value_get_si (v));
2999 /* Returns the tree variable from the name NAME that was given in
3000 Cloog representation. All the parameters are stored in PARAMS, and
3001 all the loop induction variables are stored in IVSTACK.
3003 FIXME: This is a hack, and Cloog should be fixed to not work with
3004 variable names represented as "char *string", but with void
3005 pointers that could be casted back to a tree. The only problem in
3006 doing that is that Cloog's pretty printer still assumes that
3007 variable names are char *strings. The solution would be to have a
3008 function pointer for pretty-printing that can be redirected to be
3009 print_generic_stmt in our case, or fprintf by default.
3010 ??? Too ugly to live. */
3012 static tree
3013 clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params,
3014 loop_iv_stack ivstack)
3016 int i;
3017 name_tree t;
3018 tree iv;
3020 for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
3021 if (!strcmp (name, t->name))
3022 return t->t;
3024 iv = loop_iv_stack_get_iv_from_name (ivstack, name);
3025 if (iv)
3026 return iv;
3028 gcc_unreachable ();
3031 /* Converts a Cloog AST expression E back to a GCC expression tree. */
3033 static tree
3034 clast_to_gcc_expression (struct clast_expr *e,
3035 VEC (name_tree, heap) *params,
3036 loop_iv_stack ivstack)
3038 tree type = integer_type_node;
3040 gcc_assert (e);
3042 switch (e->type)
3044 case expr_term:
3046 struct clast_term *t = (struct clast_term *) e;
3048 if (t->var)
3050 if (value_one_p (t->val))
3051 return clast_name_to_gcc (t->var, params, ivstack);
3053 else if (value_mone_p (t->val))
3054 return fold_build1 (NEGATE_EXPR, type,
3055 clast_name_to_gcc (t->var, params, ivstack));
3056 else
3057 return fold_build2 (MULT_EXPR, type,
3058 gmp_cst_to_tree (t->val),
3059 clast_name_to_gcc (t->var, params, ivstack));
3061 else
3062 return gmp_cst_to_tree (t->val);
3065 case expr_red:
3067 struct clast_reduction *r = (struct clast_reduction *) e;
3068 tree left, right;
3070 switch (r->type)
3072 case clast_red_sum:
3073 if (r->n == 1)
3074 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3076 else
3078 gcc_assert (r->n >= 1
3079 && r->elts[0]->type == expr_term
3080 && r->elts[1]->type == expr_term);
3082 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3083 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3084 return fold_build2 (PLUS_EXPR, type, left, right);
3087 break;
3089 case clast_red_min:
3090 if (r->n == 1)
3091 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3093 else if (r->n == 2)
3095 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3096 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3097 return fold_build2 (MIN_EXPR, type, left, right);
3100 else
3101 gcc_unreachable();
3103 break;
3105 case clast_red_max:
3106 if (r->n == 1)
3107 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3109 else if (r->n == 2)
3111 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3112 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3113 return fold_build2 (MAX_EXPR, type, left, right);
3116 else
3117 gcc_unreachable();
3119 break;
3121 default:
3122 gcc_unreachable ();
3124 break;
3127 case expr_bin:
3129 struct clast_binary *b = (struct clast_binary *) e;
3130 struct clast_expr *lhs = (struct clast_expr *) b->LHS;
3131 struct clast_expr *rhs = (struct clast_expr *) b->RHS;
3132 tree tl = clast_to_gcc_expression (lhs, params, ivstack);
3134 /* FIXME: The next statement produces a warning: Cloog assumes
3135 that the RHS is a constant, but this is a "void *" pointer
3136 that should be casted into a Value, but this cast cannot be
3137 done as Value is a GMP type, that is an array. Cloog must
3138 be fixed for removing this warning. */
3139 tree tr = gmp_cst_to_tree (rhs);
3141 switch (b->type)
3143 case clast_bin_fdiv:
3144 return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr);
3146 case clast_bin_cdiv:
3147 return fold_build2 (CEIL_DIV_EXPR, type, tl, tr);
3149 case clast_bin_div:
3150 return fold_build2 (EXACT_DIV_EXPR, type, tl, tr);
3152 case clast_bin_mod:
3153 return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr);
3155 default:
3156 gcc_unreachable ();
3160 default:
3161 gcc_unreachable ();
3164 return NULL_TREE;
3167 /* Translates a clast equation CLEQ to a tree. */
3169 static tree
3170 graphite_translate_clast_equation (scop_p scop,
3171 struct clast_equation *cleq,
3172 loop_iv_stack ivstack)
3174 enum tree_code comp;
3175 tree lhs = clast_to_gcc_expression (cleq->LHS, SCOP_PARAMS (scop), ivstack);
3176 tree rhs = clast_to_gcc_expression (cleq->RHS, SCOP_PARAMS (scop), ivstack);
3178 if (cleq->sign == 0)
3179 comp = EQ_EXPR;
3181 else if (cleq->sign > 0)
3182 comp = GE_EXPR;
3184 else
3185 comp = LE_EXPR;
3187 return fold_build2 (comp, integer_type_node, lhs, rhs);
3190 /* Creates the test for the condition in STMT. */
3192 static tree
3193 graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt,
3194 loop_iv_stack ivstack)
3196 tree cond = NULL;
3197 int i;
3199 for (i = 0; i < stmt->n; i++)
3201 tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack);
3203 if (cond)
3204 cond = fold_build2 (TRUTH_AND_EXPR, integer_type_node, cond, eq);
3205 else
3206 cond = eq;
3209 return cond;
3212 /* Creates a new if region corresponding to Cloog's guard. */
3214 static edge
3215 graphite_create_new_guard (scop_p scop, edge entry_edge,
3216 struct clast_guard *stmt,
3217 loop_iv_stack ivstack)
3219 tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack);
3220 edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
3221 return exit_edge;
3225 /* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction
3226 variable for the new LOOP. New LOOP is attached to CFG starting at
3227 ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child
3228 loop of the OUTER_LOOP. */
3230 static struct loop *
3231 graphite_create_new_loop (scop_p scop, edge entry_edge,
3232 struct clast_for *stmt, loop_iv_stack ivstack,
3233 loop_p outer)
3235 struct loop *loop;
3236 tree ivvar;
3237 tree stride, lowb, upb;
3238 tree iv_before;
3240 gcc_assert (stmt->LB
3241 && stmt->UB);
3243 stride = gmp_cst_to_tree (stmt->stride);
3244 lowb = clast_to_gcc_expression (stmt->LB, SCOP_PARAMS (scop), ivstack);
3245 ivvar = create_tmp_var (integer_type_node, "graphiteIV");
3246 add_referenced_var (ivvar);
3248 upb = clast_to_gcc_expression (stmt->UB, SCOP_PARAMS (scop), ivstack);
3249 loop = create_empty_loop_on_edge (entry_edge, lowb, stride, upb, ivvar,
3250 &iv_before, outer ? outer
3251 : entry_edge->src->loop_father);
3253 loop_iv_stack_push (ivstack, iv_before, stmt->iterator);
3255 return loop;
3258 /* Remove all the edges from EDGES except the edge KEEP. */
3260 static void
3261 remove_all_edges_1 (VEC (edge, gc) *edges, edge keep)
3263 edge e;
3264 edge_iterator ei;
3266 for (ei = ei_start (edges); (e = ei_safe_edge (ei)); )
3268 if (e != keep)
3270 remove_edge (e);
3271 e = ei_safe_edge (ei);
3273 else
3274 ei_next (&ei);
3278 /* Remove all the edges from BB except the edge KEEP. */
3280 static void
3281 remove_all_edges (basic_block bb, edge keep)
3283 remove_all_edges_1 (bb->succs, keep);
3284 remove_all_edges_1 (bb->preds, keep);
3287 /* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */
3289 static void
3290 graphite_rename_ivs_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3291 loop_p old, loop_iv_stack ivstack)
3293 ssa_op_iter iter;
3294 use_operand_p use_p;
3296 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3298 tree use = USE_FROM_PTR (use_p);
3299 tree new_iv = NULL;
3300 name_tree old_iv = get_old_iv_from_ssa_name (scop, old, use);
3302 if (old_iv)
3303 new_iv = loop_iv_stack_get_iv (ivstack,
3304 gbb_loop_index (gbb, old_iv->loop));
3306 if (new_iv)
3307 SET_USE (use_p, new_iv);
3311 /* Returns true if SSA_NAME is a parameter of SCOP. */
3313 static bool
3314 is_parameter (scop_p scop, tree ssa_name)
3316 int i;
3317 VEC (name_tree, heap) *params = SCOP_PARAMS (scop);
3318 name_tree param;
3320 for (i = 0; VEC_iterate (name_tree, params, i, param); i++)
3321 if (param->t == ssa_name)
3322 return true;
3324 return false;
3327 /* Returns true if NAME is an old induction variable in SCOP. OLD is
3328 the original loop that contained the definition of NAME. */
3330 static bool
3331 is_old_iv (scop_p scop, loop_p old, tree name)
3333 return get_old_iv_from_ssa_name (scop, old, name) != NULL;
3337 static void expand_scalar_variables_stmt (gimple, graphite_bb_p, scop_p, loop_p,
3338 loop_iv_stack);
3340 /* Constructs a tree which only contains old_ivs and parameters. Any
3341 other variables that are defined outside GBB will be eliminated by
3342 using their definitions in the constructed tree. OLD_LOOP_FATHER
3343 is the original loop that contained GBB. */
3345 static tree
3346 expand_scalar_variables_expr (tree type, tree op0, enum tree_code code,
3347 tree op1, graphite_bb_p gbb, scop_p scop,
3348 loop_p old_loop_father, loop_iv_stack ivstack)
3350 if (TREE_CODE_CLASS (code) == tcc_constant
3351 && code == INTEGER_CST)
3352 return op0;
3354 if (TREE_CODE_CLASS (code) == tcc_unary)
3356 tree op0_type = TREE_TYPE (op0);
3357 enum tree_code op0_code = TREE_CODE (op0);
3358 tree op0_expr =
3359 expand_scalar_variables_expr (op0_type, op0, op0_code,
3360 NULL, gbb, scop, old_loop_father,
3361 ivstack);
3363 return fold_build1 (code, type, op0_expr);
3366 if (TREE_CODE_CLASS (code) == tcc_binary)
3368 tree op0_type = TREE_TYPE (op0);
3369 enum tree_code op0_code = TREE_CODE (op0);
3370 tree op0_expr =
3371 expand_scalar_variables_expr (op0_type, op0, op0_code,
3372 NULL, gbb, scop, old_loop_father,
3373 ivstack);
3374 tree op1_type = TREE_TYPE (op1);
3375 enum tree_code op1_code = TREE_CODE (op1);
3376 tree op1_expr =
3377 expand_scalar_variables_expr (op1_type, op1, op1_code,
3378 NULL, gbb, scop, old_loop_father,
3379 ivstack);
3381 return fold_build2 (code, type, op0_expr, op1_expr);
3384 if (code == SSA_NAME)
3386 tree var0, var1;
3387 gimple def_stmt;
3388 enum tree_code subcode;
3390 if(is_parameter (scop, op0) ||
3391 is_old_iv (scop, old_loop_father, op0))
3392 return op0;
3394 def_stmt = SSA_NAME_DEF_STMT (op0);
3396 if (gimple_bb (def_stmt) == GBB_BB (gbb))
3398 /* If the defining statement is in the basic block already
3399 we do not need to create a new expression for it, we
3400 only need to ensure its operands are expanded. */
3401 expand_scalar_variables_stmt (def_stmt, gbb, scop,
3402 old_loop_father, ivstack);
3403 return op0;
3406 else
3408 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3409 return op0;
3411 var0 = gimple_assign_rhs1 (def_stmt);
3412 subcode = gimple_assign_rhs_code (def_stmt);
3413 var1 = gimple_assign_rhs2 (def_stmt);
3415 return expand_scalar_variables_expr (type, var0, subcode, var1,
3416 gbb, scop, old_loop_father,
3417 ivstack);
3421 gcc_unreachable ();
3422 return NULL;
3425 /* Replicates any uses of non-parameters and non-old-ivs variablesthat
3426 are defind outside GBB with code that is inserted in GBB.
3427 OLD_LOOP_FATHER is the original loop that contained STMT. */
3429 static void
3430 expand_scalar_variables_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3431 loop_p old_loop_father, loop_iv_stack ivstack)
3433 ssa_op_iter iter;
3434 use_operand_p use_p;
3435 basic_block bb = GBB_BB (gbb);
3437 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3439 tree use = USE_FROM_PTR (use_p);
3440 tree type = TREE_TYPE (use);
3441 enum tree_code code = TREE_CODE (use);
3442 tree use_expr = expand_scalar_variables_expr (type, use, code, NULL,
3443 gbb, scop, old_loop_father,
3444 ivstack);
3445 if (use_expr != use)
3447 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3448 tree new_use =
3449 force_gimple_operand_gsi (&gsi, use_expr, true, NULL,
3450 true, GSI_NEW_STMT);
3451 SET_USE (use_p, new_use);
3456 /* Copies the definitions outside of GBB of variables that are not
3457 induction variables nor parameters. GBB must only contain
3458 "external" references to these types of variables. OLD_LOOP_FATHER
3459 is the original loop that contained GBB. */
3461 static void
3462 expand_scalar_variables (graphite_bb_p gbb, scop_p scop,
3463 loop_p old_loop_father, loop_iv_stack ivstack)
3465 basic_block bb = GBB_BB (gbb);
3466 gimple_stmt_iterator gsi;
3468 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3470 gimple stmt = gsi_stmt (gsi);
3471 expand_scalar_variables_stmt (stmt, gbb, scop, old_loop_father,
3472 ivstack);
3473 gsi_next (&gsi);
3477 /* Rename all the SSA_NAMEs from block GBB that appear in IVSTACK in
3478 terms of new induction variables. OLD_LOOP_FATHER is the original
3479 loop that contained GBB. */
3481 static void
3482 graphite_rename_ivs (graphite_bb_p gbb, scop_p scop, loop_p old_loop_father,
3483 loop_iv_stack ivstack)
3485 basic_block bb = GBB_BB (gbb);
3486 gimple_stmt_iterator gsi;
3488 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3490 gimple stmt = gsi_stmt (gsi);
3492 if (gimple_get_lhs (stmt)
3493 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
3494 && get_old_iv_from_ssa_name (scop, old_loop_father,
3495 gimple_get_lhs (stmt)))
3496 gsi_remove (&gsi, false);
3497 else
3499 graphite_rename_ivs_stmt (stmt, gbb, scop, old_loop_father, ivstack);
3500 gsi_next (&gsi);
3505 /* Move all the PHI nodes from block FROM to block TO.
3506 OLD_LOOP_FATHER is the original loop that contained FROM. */
3508 static void
3509 move_phi_nodes (scop_p scop, loop_p old_loop_father, basic_block from,
3510 basic_block to)
3512 gimple_stmt_iterator gsi;
3514 for (gsi = gsi_start_phis (from); !gsi_end_p (gsi);)
3516 gimple phi = gsi_stmt (gsi);
3517 tree op = gimple_phi_result (phi);
3519 if (get_old_iv_from_ssa_name (scop, old_loop_father, op) == NULL)
3521 gimple new_phi = make_phi_node (op, 0);
3522 add_phi_node_to_bb (new_phi, to);
3524 remove_phi_node (&gsi, false);
3528 /* Remove condition from BB. */
3530 static void
3531 remove_condition (basic_block bb)
3533 gimple last = last_stmt (bb);
3535 if (last && gimple_code (last) == GIMPLE_COND)
3537 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3538 gsi_remove (&gsi, true);
3542 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */
3544 static edge
3545 get_true_edge_from_guard_bb (basic_block bb)
3547 edge e;
3548 edge_iterator ei;
3550 FOR_EACH_EDGE (e, ei, bb->succs)
3551 if (e->flags & EDGE_TRUE_VALUE)
3552 return e;
3554 gcc_unreachable ();
3555 return NULL;
3558 /* Translates a CLAST statement STMT to GCC representation. NEXT_E is
3559 the edge where new generated code should be attached. BB_EXIT is the last
3560 basic block that defines the scope of code generation. CONTEXT_LOOP is the
3561 loop in which the generated code will be placed (might be NULL). */
3563 static edge
3564 translate_clast (scop_p scop, struct loop *context_loop,
3565 struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack)
3567 if (!stmt)
3568 return next_e;
3570 if (CLAST_STMT_IS_A (stmt, stmt_root))
3571 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3573 if (CLAST_STMT_IS_A (stmt, stmt_user))
3575 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
3576 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
3577 basic_block bb = gbb->bb;
3578 loop_p old_loop_father = bb->loop_father;
3580 if (bb == ENTRY_BLOCK_PTR)
3581 return next_e;
3583 remove_condition (bb);
3584 expand_scalar_variables (gbb, scop, old_loop_father, ivstack);
3585 remove_all_edges (bb, next_e);
3586 move_phi_nodes (scop, old_loop_father, bb, next_e->src);
3587 redirect_edge_succ_nodup (next_e, bb);
3589 if (context_loop)
3591 remove_bb_from_loops (bb);
3592 add_bb_to_loop (bb, context_loop);
3595 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3596 mark_virtual_ops_in_bb (bb);
3597 next_e = make_edge (bb,
3598 context_loop ? context_loop->latch : EXIT_BLOCK_PTR,
3599 EDGE_FALLTHRU);;
3600 graphite_rename_ivs (gbb, scop, old_loop_father, ivstack);
3601 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3604 if (CLAST_STMT_IS_A (stmt, stmt_for))
3606 struct loop *loop
3607 = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt,
3608 ivstack, context_loop ? context_loop
3609 : get_loop (0));
3610 edge last_e = single_exit (loop);
3612 next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body,
3613 single_pred_edge (loop->latch), ivstack);
3614 redirect_edge_succ_nodup (next_e, loop->latch);
3616 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3617 loop_iv_stack_pop (ivstack);
3619 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3622 if (CLAST_STMT_IS_A (stmt, stmt_guard))
3624 edge last_e = graphite_create_new_guard (scop, next_e,
3625 ((struct clast_guard *) stmt),
3626 ivstack);
3627 edge true_e = get_true_edge_from_guard_bb (next_e->dest);
3628 next_e = translate_clast (scop, context_loop,
3629 ((struct clast_guard *) stmt)->then,
3630 true_e, ivstack);
3631 redirect_edge_succ_nodup (next_e, last_e->src);
3632 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3635 if (CLAST_STMT_IS_A (stmt, stmt_block))
3637 next_e = translate_clast (scop, context_loop,
3638 ((struct clast_block *) stmt)->body,
3639 next_e, ivstack);
3640 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3643 gcc_unreachable ();
3646 /* Build cloog program for SCoP. */
3648 static void
3649 build_cloog_prog (scop_p scop)
3651 int i;
3652 int max_nb_loops = scop_max_loop_depth (scop);
3653 graphite_bb_p gbb;
3654 CloogLoop *loop_list = NULL;
3655 CloogBlockList *block_list = NULL;
3656 CloogDomainList *scattering = NULL;
3657 CloogProgram *prog = SCOP_PROG (scop);
3658 int nbs = 2 * max_nb_loops + 1;
3659 int *scaldims = (int *) xmalloc (nbs * (sizeof (int)));
3661 cloog_program_set_nb_scattdims (prog, nbs);
3662 initialize_cloog_names (scop);
3664 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
3666 /* Build new block. */
3667 CloogMatrix *domain = GBB_DOMAIN (gbb);
3668 CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index);
3669 CloogBlock *block = cloog_block_alloc (stmt, 0, NULL,
3670 nb_loops_around_gb (gbb));
3671 cloog_statement_set_usr (stmt, gbb);
3673 /* Add empty domain to all bbs, which do not yet have a domain, as they
3674 are not part of any loop. */
3675 if (domain == NULL)
3677 domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3678 GBB_DOMAIN (gbb) = domain;
3681 /* Build loop list. */
3683 CloogLoop *new_loop_list = cloog_loop_malloc ();
3684 cloog_loop_set_next (new_loop_list, loop_list);
3685 cloog_loop_set_domain (new_loop_list,
3686 cloog_domain_matrix2domain (domain));
3687 cloog_loop_set_block (new_loop_list, block);
3688 loop_list = new_loop_list;
3691 /* Build block list. */
3693 CloogBlockList *new_block_list = cloog_block_list_malloc ();
3695 cloog_block_list_set_next (new_block_list, block_list);
3696 cloog_block_list_set_block (new_block_list, block);
3697 block_list = new_block_list;
3700 /* Build scattering list. */
3702 /* XXX: Replace with cloog_domain_list_alloc(), when available. */
3703 CloogDomainList *new_scattering
3704 = (CloogDomainList *) xmalloc (sizeof (CloogDomainList));
3705 CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs);
3707 cloog_set_next_domain (new_scattering, scattering);
3708 cloog_set_domain (new_scattering,
3709 cloog_domain_matrix2domain (scat_mat));
3710 scattering = new_scattering;
3711 cloog_matrix_free (scat_mat);
3715 cloog_program_set_loop (prog, loop_list);
3716 cloog_program_set_blocklist (prog, block_list);
3718 for (i = 0; i < nbs; i++)
3719 scaldims[i] = 0 ;
3721 cloog_program_set_scaldims (prog, scaldims);
3723 /* Extract scalar dimensions to simplify the code generation problem. */
3724 cloog_program_extract_scalars (prog, scattering);
3726 /* Apply scattering. */
3727 cloog_program_scatter (prog, scattering);
3729 /* Iterators corresponding to scalar dimensions have to be extracted. */
3730 cloog_names_scalarize (cloog_program_names (prog), nbs,
3731 cloog_program_scaldims (prog));
3733 /* Free blocklist. */
3735 CloogBlockList *next = cloog_program_blocklist (prog);
3737 while (next)
3739 CloogBlockList *toDelete = next;
3740 next = cloog_block_list_next (next);
3741 cloog_block_list_set_next (toDelete, NULL);
3742 cloog_block_list_set_block (toDelete, NULL);
3743 cloog_block_list_free (toDelete);
3745 cloog_program_set_blocklist (prog, NULL);
3749 /* Return the options that will be used in GLOOG. */
3751 static CloogOptions *
3752 set_cloog_options (void)
3754 CloogOptions *options = cloog_options_malloc ();
3756 /* Change cloog output language to C. If we do use FORTRAN instead, cloog
3757 will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if
3758 we pass an incomplete program to cloog. */
3759 options->language = LANGUAGE_C;
3761 /* Enable complex equality spreading: removes dummy statements
3762 (assignments) in the generated code which repeats the
3763 substitution equations for statements. This is useless for
3764 GLooG. */
3765 options->esp = 1;
3767 /* Enable C pretty-printing mode: normalizes the substitution
3768 equations for statements. */
3769 options->cpp = 1;
3771 /* Allow cloog to build strides with a stride width different to one.
3772 This example has stride = 4:
3774 for (i = 0; i < 20; i += 4)
3775 A */
3776 options->strides = 1;
3778 /* Disable optimizations and make cloog generate source code closer to the
3779 input. This is useful for debugging, but later we want the optimized
3780 code.
3782 XXX: We can not disable optimizations, as loop blocking is not working
3783 without them. */
3784 if (0)
3786 options->f = -1;
3787 options->l = INT_MAX;
3790 return options;
3793 /* Prints STMT to STDERR. */
3795 void
3796 debug_clast_stmt (struct clast_stmt *stmt)
3798 CloogOptions *options = set_cloog_options ();
3800 pprint (stderr, stmt, 0, options);
3803 /* Find the right transform for the SCOP, and return a Cloog AST
3804 representing the new form of the program. */
3806 static struct clast_stmt *
3807 find_transform (scop_p scop)
3809 CloogProgram *prog;
3810 struct clast_stmt *stmt;
3811 CloogOptions *options = set_cloog_options ();
3813 /* Connect new cloog prog generation to graphite. */
3814 build_cloog_prog (scop);
3816 if (dump_file && (dump_flags & TDF_DETAILS))
3818 fprintf (dump_file, "Cloog Input [\n");
3819 cloog_program_print (dump_file, SCOP_PROG(scop));
3820 fprintf (dump_file, "]\n");
3823 prog = cloog_program_generate (SCOP_PROG (scop), options);
3824 stmt = cloog_clast_create (prog, options);
3826 if (dump_file && (dump_flags & TDF_DETAILS))
3828 fprintf (dump_file, "Cloog Output[\n");
3829 pprint (dump_file, stmt, 0, options);
3830 cloog_program_dump_cloog (dump_file, prog);
3831 fprintf (dump_file, "]\n");
3834 cloog_options_free (options);
3835 return stmt;
3838 /* Return a vector of all the virtual phi nodes in the current
3839 function. */
3841 static VEC (gimple, heap) *
3842 collect_virtual_phis (void)
3844 gimple_stmt_iterator si;
3845 gimple_vec phis = VEC_alloc (gimple, heap, 3);
3846 basic_block bb;
3848 FOR_EACH_BB (bb)
3849 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3850 /* The phis we moved will have 0 arguments because the
3851 original edges were removed. */
3852 if (gimple_phi_num_args (gsi_stmt (si)) == 0)
3853 VEC_safe_push (gimple, heap, phis, gsi_stmt (si));
3855 /* Deallocate if we did not find any. */
3856 if (VEC_length (gimple, phis) == 0)
3858 VEC_free (gimple, heap, phis);
3859 phis = NULL;
3862 return phis;
3865 /* Find a virtual definition for variable VAR in BB. */
3867 static tree
3868 find_vdef_for_var_in_bb (basic_block bb, tree var)
3870 gimple_stmt_iterator gsi;
3871 gimple phi;
3872 def_operand_p def_var;
3873 vuse_vec_p vv;
3874 ssa_op_iter op_iter;
3876 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3877 FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter)
3878 if (SSA_NAME_VAR (*def_var) == var)
3879 return *def_var;
3881 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3882 FOR_EACH_SSA_DEF_OPERAND (def_var, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
3883 if (SSA_NAME_VAR (*def_var) == var)
3884 return *def_var;
3886 for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi))
3888 phi = gsi_stmt (gsi);
3889 if (SSA_NAME_VAR (PHI_RESULT (phi)) == var)
3890 return PHI_RESULT (phi);
3893 return NULL;
3896 /* Recursive helper. */
3898 static tree
3899 find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var)
3901 tree result = NULL;
3902 edge_iterator ei;
3903 edge pred_edge;
3905 if (pointer_set_contains (visited, bb))
3906 return NULL;
3908 pointer_set_insert (visited, bb);
3909 result = find_vdef_for_var_in_bb (bb, var);
3911 if (!result)
3912 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
3913 if (!result)
3914 result = find_vdef_for_var_1 (pred_edge->src, visited, var);
3916 return result;
3919 /* Finds a virtual definition for variable VAR. */
3921 static tree
3922 find_vdef_for_var (basic_block bb, tree var)
3924 struct pointer_set_t *visited = pointer_set_create ();
3925 tree def = find_vdef_for_var_1 (bb, visited, var);
3927 pointer_set_destroy (visited);
3928 return def;
3931 /* Update the virtual phis after loop bodies are moved to new
3932 loops. */
3934 static void
3935 patch_phis_for_virtual_defs (void)
3937 int i;
3938 gimple phi;
3939 VEC (gimple, heap) *virtual_phis = collect_virtual_phis ();
3941 for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++)
3943 basic_block bb = gimple_bb (phi);
3944 edge_iterator ei;
3945 edge pred_edge;
3946 gimple_stmt_iterator gsi;
3947 gimple new_phi;
3948 tree phi_result = PHI_RESULT (phi);
3949 tree var = SSA_NAME_VAR (phi_result);
3951 new_phi = create_phi_node (phi_result, bb);
3952 SSA_NAME_DEF_STMT (phi_result) = new_phi;
3954 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
3956 tree def = find_vdef_for_var (pred_edge->src, var);
3958 if (def)
3959 add_phi_arg (new_phi, def, pred_edge);
3960 else
3961 add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge);
3964 gsi = gsi_for_stmt (phi);
3965 remove_phi_node (&gsi, false);
3969 /* Mark the original loops of SCOP for removal, replacing their header
3970 field with NULL. */
3972 static void
3973 mark_old_loops (scop_p scop)
3975 int i;
3976 struct loop *loop;
3978 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
3980 loop->header = NULL;
3981 loop->latch = NULL;
3985 /* Scan the loops and remove the ones that have been marked for
3986 removal. */
3988 static void
3989 remove_dead_loops (void)
3991 struct loop *loop, *ploop;
3992 loop_iterator li;
3994 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
3996 /* Remove only those loops that we marked to be removed with
3997 mark_old_loops. */
3998 if (loop->header)
3999 continue;
4001 while (loop->inner)
4003 ploop = loop->inner;
4004 flow_loop_tree_node_remove (ploop);
4005 flow_loop_tree_node_add (loop_outer (loop), ploop);
4008 /* Remove the loop and free its data. */
4009 delete_loop (loop);
4013 /* Returns true when it is possible to generate code for this STMT.
4014 For the moment we cannot generate code when Cloog decides to
4015 duplicate a statement, as we do not do a copy, but a move.
4016 USED_BASIC_BLOCKS records the blocks that have already been seen.
4017 We return false if we have to generate code twice for the same
4018 block. */
4020 static bool
4021 can_generate_code_stmt (struct clast_stmt *stmt,
4022 struct pointer_set_t *used_basic_blocks)
4024 if (!stmt)
4025 return true;
4027 if (CLAST_STMT_IS_A (stmt, stmt_root))
4028 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4030 if (CLAST_STMT_IS_A (stmt, stmt_user))
4032 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
4033 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
4035 if (pointer_set_contains (used_basic_blocks, gbb))
4036 return false;
4037 pointer_set_insert (used_basic_blocks, gbb);
4038 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4041 if (CLAST_STMT_IS_A (stmt, stmt_for))
4042 return can_generate_code_stmt (((struct clast_for *) stmt)->body,
4043 used_basic_blocks)
4044 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4046 if (CLAST_STMT_IS_A (stmt, stmt_guard))
4047 return can_generate_code_stmt (((struct clast_guard *) stmt)->then,
4048 used_basic_blocks);
4050 if (CLAST_STMT_IS_A (stmt, stmt_block))
4051 return can_generate_code_stmt (((struct clast_block *) stmt)->body,
4052 used_basic_blocks)
4053 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4055 return false;
4058 /* Returns true when it is possible to generate code for this STMT. */
4060 static bool
4061 can_generate_code (struct clast_stmt *stmt)
4063 bool result;
4064 struct pointer_set_t *used_basic_blocks = pointer_set_create ();
4066 result = can_generate_code_stmt (stmt, used_basic_blocks);
4067 pointer_set_destroy (used_basic_blocks);
4068 return result;
4071 /* Skip any definition that is a phi node with a single phi def. */
4073 static tree
4074 skip_phi_defs (tree ssa_name)
4076 tree result = ssa_name;
4077 gimple def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4079 if (gimple_code (def_stmt) == GIMPLE_PHI
4080 && gimple_phi_num_args (def_stmt) == 1)
4081 result = skip_phi_defs (gimple_phi_arg(def_stmt,0)->def);
4083 return result;
4086 /* Returns a VEC containing the phi-arg defs coming from SCOP_EXIT in
4087 the destination block of SCOP_EXIT. */
4089 static VEC (tree, heap) *
4090 collect_scop_exit_phi_args (edge scop_exit)
4092 VEC (tree, heap) *phi_args = VEC_alloc (tree, heap, 1);
4093 gimple_stmt_iterator gsi;
4095 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4097 gimple phi = gsi_stmt (gsi);
4098 tree phi_arg = skip_phi_defs(PHI_ARG_DEF_FROM_EDGE (phi, scop_exit));
4100 VEC_safe_push (tree, heap, phi_args, phi_arg);
4103 return phi_args;
4106 /* Patches (adds) PHI_ARGS to the phi nodes in SCOP_EXIT destination. */
4108 static void
4109 patch_scop_exit_phi_args (edge scop_exit,
4110 VEC (tree, heap) *phi_args)
4112 int i = 0;
4113 gimple_stmt_iterator gsi;
4115 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi);
4116 gsi_next (&gsi), i++)
4118 tree def = VEC_index (tree, phi_args, i);
4119 gimple phi = gsi_stmt (gsi);
4121 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi, scop_exit) == NULL);
4123 add_phi_arg (phi, def, scop_exit);
4127 /* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
4128 the given SCOP. */
4130 static void
4131 gloog (scop_p scop, struct clast_stmt *stmt)
4133 edge new_scop_exit_edge = NULL;
4134 basic_block scop_exit = SCOP_EXIT (scop);
4135 VEC (tree, heap)* phi_args =
4136 collect_scop_exit_phi_args (SESE_EXIT (SCOP_REGION (scop)));
4137 VEC (name_tree, heap) *ivstack = VEC_alloc (name_tree, heap, 10);
4138 edge construction_edge = SESE_ENTRY (SCOP_REGION (scop));
4139 basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS,
4140 scop_exit);
4142 if (!can_generate_code (stmt))
4144 cloog_clast_free (stmt);
4145 return;
4148 new_scop_exit_edge = translate_clast (scop,
4149 construction_edge->src->loop_father,
4150 stmt, construction_edge, &ivstack);
4151 redirect_edge_succ (new_scop_exit_edge, scop_exit);
4152 if (!old_scop_exit_idom
4153 || !dominated_by_p (CDI_DOMINATORS, SCOP_ENTRY (scop),
4154 old_scop_exit_idom)
4155 || SCOP_ENTRY (scop) == old_scop_exit_idom)
4156 set_immediate_dominator (CDI_DOMINATORS,
4157 new_scop_exit_edge->dest,
4158 new_scop_exit_edge->src);
4160 cloog_clast_free (stmt);
4162 if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR)
4163 new_scop_exit_edge->flags = 0;
4165 find_unreachable_blocks ();
4166 delete_unreachable_blocks ();
4167 patch_phis_for_virtual_defs ();
4168 patch_scop_exit_phi_args (new_scop_exit_edge, phi_args);
4169 mark_old_loops (scop);
4170 remove_dead_loops ();
4171 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4173 #ifdef ENABLE_CHECKING
4174 verify_loop_structure ();
4175 verify_dominators (CDI_DOMINATORS);
4176 verify_ssa (false);
4177 #endif
4180 /* Returns the number of data references in SCOP. */
4182 static int
4183 nb_data_refs_in_scop (scop_p scop)
4185 int i;
4186 graphite_bb_p gbb;
4187 int res = 0;
4189 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
4190 res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb));
4192 return res;
4195 /* Check if a graphite bb can be ignored in graphite. We ignore all
4196 bbs, that only contain code, that will be eliminated later.
4198 TODO: - Move PHI nodes and scalar variables out of these bbs, that only
4199 remain conditions and induction variables. */
4201 static bool
4202 gbb_can_be_ignored (graphite_bb_p gb)
4204 gimple_stmt_iterator gsi;
4205 scop_p scop = GBB_SCOP (gb);
4206 loop_p loop = GBB_BB (gb)->loop_father;
4208 if (VEC_length (data_reference_p, GBB_DATA_REFS(gb)))
4209 return false;
4211 /* Check statements. */
4212 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
4214 gimple stmt = gsi_stmt (gsi);
4215 switch (gimple_code (stmt))
4217 /* Control flow expressions can be ignored, as they are
4218 represented in the iteration domains and will be
4219 regenerated by graphite. */
4220 case GIMPLE_COND:
4221 case GIMPLE_GOTO:
4222 case GIMPLE_SWITCH:
4223 break;
4225 /* Scalar variables can be ignored, if we can regenerate
4226 them later using their scalar evolution function.
4227 XXX: Just a heuristic, that needs further investigation. */
4228 case GIMPLE_ASSIGN:
4230 tree var = gimple_assign_lhs (stmt);
4231 var = analyze_scalar_evolution (loop, var);
4232 var = instantiate_scev (block_before_scop (scop), loop, var);
4234 if (TREE_CODE (var) == SCEV_NOT_KNOWN)
4235 return false;
4237 break;
4239 /* Otherwise not ignoreable. */
4240 default:
4241 return false;
4245 return true;
4248 /* Remove all ignoreable gbbs from SCOP. */
4250 static void
4251 scop_remove_ignoreable_gbbs (scop_p scop)
4253 graphite_bb_p gb;
4254 int i;
4256 int max_schedule = scop_max_loop_depth (scop) + 1;
4257 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4258 lambda_vector_clear (last_schedule, max_schedule);
4260 /* Update schedules. */
4261 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4263 int nb_loops = gbb_nb_loops (gb);
4265 if (GBB_STATIC_SCHEDULE (gb) [nb_loops] == 0)
4266 last_schedule [nb_loops] = 0;
4268 if (gbb_can_be_ignored (gb))
4270 /* Mark gbb for remove. */
4271 bitmap_clear_bit (SCOP_BBS_B (scop), gb->bb->index);
4272 GBB_SCOP (gb) = NULL;
4273 last_schedule [nb_loops]--;
4275 else
4276 lambda_vector_add (GBB_STATIC_SCHEDULE (gb), last_schedule,
4277 GBB_STATIC_SCHEDULE (gb), nb_loops + 1);
4280 /* Remove gbbs. */
4281 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4282 if (GBB_SCOP (gb) == NULL)
4284 VEC_unordered_remove (graphite_bb_p, SCOP_BBS (scop), i);
4285 free_graphite_bb (gb);
4286 /* XXX: Hackish? But working. */
4287 i--;
4290 graphite_sort_gbbs (scop);
4293 /* Move the loop at index LOOP and insert it before index NEW_LOOP_POS.
4294 This transformartion is only valid, if the loop nest between i and k is
4295 perfectly nested. Therefore we do not need to change the static schedule.
4297 Example:
4299 for (i = 0; i < 50; i++)
4300 for (j ...)
4301 for (k = 5; k < 100; k++)
4304 To move k before i use:
4306 graphite_trans_bb_move_loop (A, 2, 0)
4308 for (k = 5; k < 100; k++)
4309 for (i = 0; i < 50; i++)
4310 for (j ...)
4313 And to move k back:
4315 graphite_trans_bb_move_loop (A, 0, 2)
4317 This function does not check the validity of interchanging loops.
4318 This should be checked before calling this function. */
4320 static void
4321 graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
4322 int new_loop_pos)
4324 CloogMatrix *domain = GBB_DOMAIN (gb);
4325 int row, j;
4326 loop_p tmp_loop_p;
4328 gcc_assert (loop < gbb_nb_loops (gb)
4329 && new_loop_pos < gbb_nb_loops (gb));
4331 /* Update LOOPS vector. */
4332 tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
4333 VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
4334 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
4336 /* Move the domain columns. */
4337 if (loop < new_loop_pos)
4338 for (row = 0; row < domain->NbRows; row++)
4340 Value tmp;
4341 value_init (tmp);
4342 value_assign (tmp, domain->p[row][loop + 1]);
4344 for (j = loop ; j < new_loop_pos - 1; j++)
4345 value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
4347 value_assign (domain->p[row][new_loop_pos], tmp);
4348 value_clear (tmp);
4350 else
4351 for (row = 0; row < domain->NbRows; row++)
4353 Value tmp;
4354 value_init (tmp);
4355 value_assign (tmp, domain->p[row][loop + 1]);
4357 for (j = loop ; j > new_loop_pos; j--)
4358 value_assign (domain->p[row][j + 1], domain->p[row][j]);
4360 value_assign (domain->p[row][new_loop_pos + 1], tmp);
4361 value_clear (tmp);
4365 /* Get the index of the column representing constants in the DOMAIN
4366 matrix. */
4368 static int
4369 const_column_index (CloogMatrix *domain)
4371 return domain->NbColumns - 1;
4375 /* Get the first index that is positive or negative, determined
4376 following the value of POSITIVE, in matrix DOMAIN in COLUMN. */
4378 static int
4379 get_first_matching_sign_row_index (CloogMatrix *domain, int column,
4380 bool positive)
4382 int row;
4384 for (row = 0; row < domain->NbRows; row++)
4386 int val = value_get_si (domain->p[row][column]);
4388 if (val > 0 && positive)
4389 return row;
4391 else if (val < 0 && !positive)
4392 return row;
4395 gcc_unreachable ();
4398 /* Get the lower bound of COLUMN in matrix DOMAIN. */
4400 static int
4401 get_lower_bound_row (CloogMatrix *domain, int column)
4403 return get_first_matching_sign_row_index (domain, column, true);
4406 /* Get the upper bound of COLUMN in matrix DOMAIN. */
4408 static int
4409 get_upper_bound_row (CloogMatrix *domain, int column)
4411 return get_first_matching_sign_row_index (domain, column, false);
4414 /* Get the lower bound of LOOP. */
4416 static void
4417 get_lower_bound (CloogMatrix *domain, int loop, Value lower_bound_result)
4419 int lower_bound_row = get_lower_bound_row (domain, loop);
4420 value_init (lower_bound_result);
4421 value_assign (lower_bound_result,
4422 domain->p[lower_bound_row][const_column_index(domain)]);
4425 /* Get the upper bound of LOOP. */
4427 static void
4428 get_upper_bound (CloogMatrix *domain, int loop, Value upper_bound_result)
4430 int upper_bound_row = get_upper_bound_row (domain, loop);
4431 value_init (upper_bound_result);
4432 value_assign (upper_bound_result,
4433 domain->p[upper_bound_row][const_column_index(domain)]);
4436 /* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
4437 Always valid, but not always a performance improvement. */
4439 static void
4440 graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
4442 int row, col;
4444 CloogMatrix *domain = GBB_DOMAIN (gb);
4445 CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
4446 domain->NbColumns + 1);
4448 int col_loop_old = loop_depth + 2;
4449 int col_loop_strip = col_loop_old - 1;
4451 Value old_lower_bound;
4452 Value old_upper_bound;
4455 gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1);
4457 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL);
4459 GBB_DOMAIN (gb) = new_domain;
4462 nrows = 4, ncols = 4
4463 eq i j c
4464 1 1 0 0
4465 1 -1 0 99
4466 1 0 1 0
4467 1 0 -1 99
4470 /* Move domain. */
4471 for (row = 0; row < domain->NbRows; row++)
4472 for (col = 0; col < domain->NbColumns; col++)
4473 if (col <= loop_depth)
4475 value_assign (new_domain->p[row][col], domain->p[row][col]);
4477 else
4479 value_assign (new_domain->p[row][col + 1], domain->p[row][col]);
4484 nrows = 6, ncols = 5
4485 outer inner
4486 eq i jj j c
4487 1 1 0 0 0
4488 1 -1 0 0 99
4489 1 0 0 1 0
4490 1 0 0 -1 99
4491 0 0 0 0 0
4492 0 0 0 0 0
4493 0 0 0 0 0
4496 row = domain->NbRows;
4498 /* Add outer loop. */
4500 get_lower_bound (new_domain, col_loop_old, old_lower_bound);
4501 get_upper_bound (new_domain, col_loop_old, old_upper_bound);
4503 /* Set Lower Bound */
4504 value_set_si (new_domain->p[row][0], 1);
4505 value_set_si (new_domain->p[row][col_loop_strip], 1);
4506 value_assign (new_domain->p[row][const_column_index (new_domain)],
4507 old_lower_bound);
4508 row++;
4513 eq i jj j c
4514 1 1 0 0 0
4515 1 -1 0 0 99
4516 1 0 0 1 0 -
4517 1 0 0 -1 99 | copy old lower bound
4518 1 0 1 0 0 <-
4519 0 0 0 0 0
4520 0 0 0 0 0
4524 Value new_upper_bound;
4525 Value strip_size_value;
4527 value_init (new_upper_bound);
4529 value_init (strip_size_value);
4530 value_set_si (strip_size_value, (int) stride);
4533 value_pdivision(new_upper_bound,old_upper_bound,strip_size_value);
4534 value_add_int (new_upper_bound, new_upper_bound, 1);
4536 /* Set Upper Bound */
4537 value_set_si (new_domain->p[row][0], 1);
4538 value_set_si (new_domain->p[row][col_loop_strip], -1);
4539 value_assign (new_domain->p[row][const_column_index (new_domain)],
4540 new_upper_bound);
4541 row++;
4545 eq i jj j c
4546 1 1 0 0 0
4547 1 -1 0 0 99
4548 1 0 0 1 0
4549 1 0 0 -1 99
4550 1 0 1 0 0
4551 1 0 -1 0 25 (divide old upper bound with stride)
4552 0 0 0 0 0
4556 row = get_lower_bound_row (new_domain, col_loop_old);
4557 /* Add local variable to keep linear representation. */
4558 value_set_si (new_domain->p[row][0], 1);
4559 value_set_si (new_domain->p[row][const_column_index (new_domain)],0);
4560 value_set_si (new_domain->p[row][col_loop_old], 1);
4561 value_set_si (new_domain->p[row][col_loop_strip], -1*((int)stride));
4566 eq i jj j c
4567 1 1 0 0 0
4568 1 -1 0 0 99
4569 1 0 -1 1 0
4570 1 0 0 -1 99
4571 1 0 1 0 0
4572 1 0 -1 0 25 (divide old upper bound with stride)
4573 0 0 0 0 0
4577 row = new_domain->NbRows-1;
4579 value_set_si (new_domain->p[row][0], 1);
4580 value_set_si (new_domain->p[row][col_loop_old], -1);
4581 value_set_si (new_domain->p[row][col_loop_strip], stride);
4582 value_set_si (new_domain->p[row][const_column_index (new_domain)],
4583 stride-1);
4588 eq i jj j c
4589 1 1 0 0 0 i >= 0
4590 1 -1 0 0 99 99 >= i
4591 1 0 -4 1 0 j >= 4*jj
4592 1 0 0 -1 99 99 >= j
4593 1 0 1 0 0 jj >= 0
4594 1 0 -1 0 25 25 >= jj
4595 0 0 4 -1 3 jj+3 >= j
4598 cloog_matrix_free (domain);
4600 /* Update static schedule. */
4602 int i;
4603 int nb_loops = gbb_nb_loops (gb);
4604 lambda_vector new_schedule = lambda_vector_new (nb_loops + 1);
4606 for (i = 0; i <= loop_depth; i++)
4607 new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i];
4609 for (i = loop_depth + 1; i <= nb_loops - 2; i++)
4610 new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i];
4612 GBB_STATIC_SCHEDULE (gb) = new_schedule;
4616 /* Returns true when the strip mining of LOOP_INDEX by STRIDE is
4617 profitable or undecidable. GB is the statement around which the
4618 loops will be strip mined. */
4620 static bool
4621 strip_mine_profitable_p (graphite_bb_p gb, int stride,
4622 int loop_index)
4624 bool res = true;
4625 edge exit = NULL;
4626 tree niter;
4627 loop_p loop;
4628 long niter_val;
4630 loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index);
4631 exit = single_exit (loop);
4633 niter = find_loop_niter (loop, &exit);
4634 if (niter == chrec_dont_know
4635 || TREE_CODE (niter) != INTEGER_CST)
4636 return true;
4638 niter_val = int_cst_value (niter);
4640 if (niter_val < stride)
4642 res = false;
4643 if (dump_file && (dump_flags & TDF_DETAILS))
4645 fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:",
4646 loop_index);
4647 fprintf (dump_file, "number of iterations is too low.\n");
4651 return res;
4654 /* Determines when the interchange of LOOP_A and LOOP_B belonging to
4655 SCOP is legal. */
4657 static bool
4658 is_interchange_valid (scop_p scop, int loop_a, int loop_b)
4660 bool res;
4661 VEC (ddr_p, heap) *dependence_relations;
4662 VEC (data_reference_p, heap) *datarefs;
4664 struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a);
4665 int depth = perfect_loop_nest_depth (nest);
4666 lambda_trans_matrix trans;
4668 gcc_assert (loop_a < loop_b);
4670 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
4671 datarefs = VEC_alloc (data_reference_p, heap, 10);
4673 if (!compute_data_dependences_for_loop (nest, true, &datarefs,
4674 &dependence_relations))
4675 return false;
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4678 dump_ddrs (dump_file, dependence_relations);
4680 trans = lambda_trans_matrix_new (depth, depth);
4681 lambda_matrix_id (LTM_MATRIX (trans), depth);
4683 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4685 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
4687 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4688 res = false;
4690 else
4691 res = true;
4693 free_dependence_relations (dependence_relations);
4694 free_data_refs (datarefs);
4695 return res;
4698 /* Loop block the LOOPS innermost loops of GB with stride size STRIDE.
4700 Example
4702 for (i = 0; i <= 50; i++=4)
4703 for (k = 0; k <= 100; k++=4)
4704 for (l = 0; l <= 200; l++=4)
4707 To strip mine the two inner most loops with stride = 4 call:
4709 graphite_trans_bb_block (A, 4, 2)
4711 for (i = 0; i <= 50; i++)
4712 for (kk = 0; kk <= 100; kk+=4)
4713 for (ll = 0; ll <= 200; ll+=4)
4714 for (k = kk; k <= min (100, kk + 3); k++)
4715 for (l = ll; l <= min (200, ll + 3); l++)
4719 static bool
4720 graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops)
4722 int i, j;
4723 int nb_loops = gbb_nb_loops (gb);
4724 int start = nb_loops - loops;
4725 scop_p scop = GBB_SCOP (gb);
4727 gcc_assert (scop_contains_loop (scop, gbb_loop (gb)));
4729 for (i = start ; i < nb_loops; i++)
4730 for (j = i + 1; j < nb_loops; j++)
4731 if (!is_interchange_valid (scop, i, j))
4733 if (dump_file && (dump_flags & TDF_DETAILS))
4734 fprintf (dump_file,
4735 "\nInterchange not valid for loops %d and %d:\n", i, j);
4736 return false;
4738 else if (dump_file && (dump_flags & TDF_DETAILS))
4739 fprintf (dump_file,
4740 "\nInterchange valid for loops %d and %d:\n", i, j);
4742 /* Check if strip mining is profitable for every loop. */
4743 for (i = 0; i < nb_loops - start; i++)
4744 if (!strip_mine_profitable_p (gb, stride, start + i))
4745 return false;
4747 /* Strip mine loops. */
4748 for (i = 0; i < nb_loops - start; i++)
4749 graphite_trans_bb_strip_mine (gb, start + 2 * i, stride);
4751 /* Interchange loops. */
4752 for (i = 1; i < nb_loops - start; i++)
4753 graphite_trans_bb_move_loop (gb, start + 2 * i, start + i);
4755 return true;
4758 /* Loop block LOOPS innermost loops of a loop nest. BBS represent the
4759 basic blocks that belong to the loop nest to be blocked. */
4761 static bool
4762 graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops)
4764 graphite_bb_p gb;
4765 int i;
4766 bool transform_done = false;
4768 /* TODO: - Calculate the stride size automatically. */
4769 int stride_size = 64;
4771 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
4772 transform_done |= graphite_trans_bb_block (gb, stride_size, loops);
4774 return transform_done;
4777 /* Loop block all basic blocks of SCOP. */
4779 static bool
4780 graphite_trans_scop_block (scop_p scop)
4782 graphite_bb_p gb;
4783 int i, j;
4784 int last_nb_loops;
4785 int nb_loops;
4786 bool perfect = true;
4787 bool transform_done = false;
4789 VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3);
4790 int max_schedule = scop_max_loop_depth (scop) + 1;
4791 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4793 if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0)
4794 return transform_done;
4796 /* Get the data of the first bb. */
4797 gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0);
4798 last_nb_loops = gbb_nb_loops (gb);
4799 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
4800 last_nb_loops + 1);
4801 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
4803 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4805 /* We did the first bb before. */
4806 if (i == 0)
4807 continue;
4809 nb_loops = gbb_nb_loops (gb);
4811 /* If the number of loops is unchanged and only the last element of the
4812 schedule changes, we stay in the loop nest. */
4813 if (nb_loops == last_nb_loops
4814 && (last_schedule [nb_loops + 1]
4815 != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1]))
4817 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
4818 continue;
4821 /* Otherwise, we left the innermost loop. So check, if the last bb was in
4822 a perfect loop nest and how many loops are contained in this perfect
4823 loop nest.
4825 Count the number of zeros from the end of the schedule. They are the
4826 number of surrounding loops.
4828 Example:
4829 last_bb 2 3 2 0 0 0 0 3
4830 bb 2 4 0
4831 <------ j = 4
4833 last_bb 2 3 2 0 0 0 0 3
4834 bb 2 3 2 0 1
4835 <-- j = 2
4837 If there is no zero, there were other bbs in outer loops and the loop
4838 nest is not perfect. */
4839 for (j = last_nb_loops - 1; j >= 0; j--)
4841 if (last_schedule [j] != 0
4842 || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1))
4844 j--;
4845 break;
4849 j++;
4851 /* Found perfect loop nest. */
4852 if (perfect && last_nb_loops - j > 0)
4853 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
4855 /* Check if we start with a new loop.
4857 Example:
4859 last_bb 2 3 2 0 0 0 0 3
4860 bb 2 3 2 0 0 1 0
4862 Here we start with the loop "2 3 2 0 0 1"
4864 last_bb 2 3 2 0 0 0 0 3
4865 bb 2 3 2 0 0 1
4867 But here not, so the loop nest can never be perfect. */
4869 perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0);
4871 /* Update the last_bb infos. We do not do that for the bbs in the same
4872 loop, as the data we use is not changed. */
4873 last_nb_loops = nb_loops;
4874 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
4875 nb_loops + 1);
4876 VEC_truncate (graphite_bb_p, bbs, 0);
4877 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
4880 /* Check if the last loop nest was perfect. It is the same check as above,
4881 but the comparison with the next bb is missing. */
4882 for (j = last_nb_loops - 1; j >= 0; j--)
4883 if (last_schedule [j] != 0)
4885 j--;
4886 break;
4889 j++;
4891 /* Found perfect loop nest. */
4892 if (last_nb_loops - j > 0)
4893 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
4894 VEC_free (graphite_bb_p, heap, bbs);
4896 if (dump_file && (dump_flags & TDF_DETAILS))
4897 fprintf (dump_file, "\nLoop blocked.\n");
4899 return transform_done;
4902 /* Apply graphite transformations to all the basic blocks of SCOP. */
4904 static bool
4905 graphite_apply_transformations (scop_p scop)
4907 bool transform_done = false;
4909 /* Sort the list of bbs. Keep them always sorted. */
4910 graphite_sort_gbbs (scop);
4911 scop_remove_ignoreable_gbbs (scop);
4913 if (flag_loop_block)
4914 transform_done = graphite_trans_scop_block (scop);
4916 #if 0 && ENABLE_CHECKING
4917 /* When the compiler is configured with ENABLE_CHECKING, always
4918 generate code, even if we did not apply any transformation. This
4919 provides better code coverage of the backend code generator.
4921 This also allows to check the performance for an identity
4922 transform: GIMPLE -> GRAPHITE -> GIMPLE; and the output of CLooG
4923 is never an identity: if CLooG optimizations are not disabled,
4924 the CLooG output is always optimized in control flow. */
4925 transform_done = true;
4926 #endif
4928 return transform_done;
4931 /* We limit all SCoPs to SCoPs, that are completely surrounded by a loop.
4933 Example:
4935 for (i |
4937 for (j | SCoP 1
4938 for (k |
4941 * SCoP frontier, as this line is not surrounded by any loop. *
4943 for (l | SCoP 2
4945 This is necessary as scalar evolution and parameter detection need a
4946 outermost loop to initialize parameters correctly.
4948 TODO: FIX scalar evolution and parameter detection to allow more flexible
4949 SCoP frontiers. */
4951 static void
4952 limit_scops (void)
4954 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
4956 int i;
4957 scop_p scop;
4959 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
4961 int j;
4962 loop_p loop;
4963 build_scop_bbs (scop);
4964 build_scop_loop_nests (scop);
4966 for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++)
4967 if (!loop_in_scop_p (loop_outer (loop), scop))
4969 sd_region open_scop;
4970 open_scop.entry = loop_preheader_edge (loop)->dest;
4971 open_scop.exit = single_exit (loop)->dest;
4972 VEC_safe_push (sd_region, heap, tmp_scops, &open_scop);
4976 free_scops (current_scops);
4977 current_scops = VEC_alloc (scop_p, heap, 3);
4979 create_sese_edges (tmp_scops);
4980 build_graphite_scops (tmp_scops);
4983 /* Perform a set of linear transforms on the loops of the current
4984 function. */
4986 void
4987 graphite_transform_loops (void)
4989 int i;
4990 scop_p scop;
4992 if (number_of_loops () <= 1)
4993 return;
4995 current_scops = VEC_alloc (scop_p, heap, 3);
4997 calculate_dominance_info (CDI_DOMINATORS);
4998 calculate_dominance_info (CDI_POST_DOMINATORS);
5000 if (dump_file && (dump_flags & TDF_DETAILS))
5001 fprintf (dump_file, "Graphite loop transformations \n");
5003 cloog_initialize ();
5004 build_scops ();
5005 limit_scops ();
5007 if (dump_file && (dump_flags & TDF_DETAILS))
5008 fprintf (dump_file, "\nnumber of SCoPs: %d\n",
5009 VEC_length (scop_p, current_scops));
5011 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5013 build_scop_bbs (scop);
5014 build_scop_loop_nests (scop);
5015 build_scop_canonical_schedules (scop);
5016 build_bb_loops (scop);
5017 find_scop_parameters (scop);
5018 build_scop_context (scop);
5020 if (dump_file && (dump_flags & TDF_DETAILS))
5022 fprintf (dump_file, "\n(In SCoP %d:\n", i);
5023 fprintf (dump_file, "\nnumber of bbs: %d\n",
5024 VEC_length (graphite_bb_p, SCOP_BBS (scop)));
5025 fprintf (dump_file, "\nnumber of loops: %d)\n",
5026 VEC_length (loop_p, SCOP_LOOP_NEST (scop)));
5029 if (!build_scop_iteration_domain (scop))
5030 continue;
5032 build_scop_conditions (scop);
5033 build_scop_data_accesses (scop);
5035 if (dump_file && (dump_flags & TDF_DETAILS))
5037 int nbrefs = nb_data_refs_in_scop (scop);
5038 fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs);
5041 if (graphite_apply_transformations (scop))
5042 gloog (scop, find_transform (scop));
5045 /* Cleanup. */
5046 free_scops (current_scops);
5047 cloog_finalize ();
5050 #else /* If Cloog is not available: #ifndef HAVE_cloog. */
5052 void
5053 graphite_transform_loops (void)
5055 sorry ("Graphite loop optimizations cannot be used");
5058 #endif