2008-11-30 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / graphite.c
blob8a464c19b3df969ee4546ac540975723126910bf
1 /* Gimple Represented as Polyhedra.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@inria.fr>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass converts GIMPLE to GRAPHITE, performs some loop
22 transformations and then converts the resulting representation back
23 to GIMPLE.
25 An early description of this pass can be found in the GCC Summit'06
26 paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC".
27 The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to
28 the related work.
30 One important document to read is CLooG's internal manual:
31 http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD
32 that describes the data structure of loops used in this file, and
33 the functions that are used for transforming the code. */
35 #include "config.h"
36 #include "system.h"
37 #include "coretypes.h"
38 #include "tm.h"
39 #include "ggc.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "basic-block.h"
43 #include "diagnostic.h"
44 #include "tree-flow.h"
45 #include "toplev.h"
46 #include "tree-dump.h"
47 #include "timevar.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "domwalk.h"
54 #include "pointer-set.h"
55 #include "gimple.h"
57 #ifdef HAVE_cloog
58 #include "cloog/cloog.h"
59 #include "graphite.h"
61 static VEC (scop_p, heap) *current_scops;
63 /* Converts a GMP constant V to a tree and returns it. */
65 static tree
66 gmp_cst_to_tree (Value v)
68 return build_int_cst (integer_type_node, value_get_si (v));
71 /* Debug the list of old induction variables for this SCOP. */
73 void
74 debug_oldivs (scop_p scop)
76 int i;
77 name_tree oldiv;
79 fprintf (stderr, "Old IVs:");
81 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
83 fprintf (stderr, "(");
84 print_generic_expr (stderr, oldiv->t, 0);
85 fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num);
87 fprintf (stderr, "\n");
90 /* Debug the loops around basic block GB. */
92 void
93 debug_loop_vec (graphite_bb_p gb)
95 int i;
96 loop_p loop;
98 fprintf (stderr, "Loop Vec:");
100 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
101 fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1);
103 fprintf (stderr, "\n");
106 /* Returns true if stack ENTRY is a constant. */
108 static bool
109 iv_stack_entry_is_constant (iv_stack_entry *entry)
111 return entry->kind == iv_stack_entry_const;
114 /* Returns true if stack ENTRY is an induction variable. */
116 static bool
117 iv_stack_entry_is_iv (iv_stack_entry *entry)
119 return entry->kind == iv_stack_entry_iv;
122 /* Push (IV, NAME) on STACK. */
124 static void
125 loop_iv_stack_push_iv (loop_iv_stack stack, tree iv, const char *name)
127 iv_stack_entry *entry = XNEW (iv_stack_entry);
128 name_tree named_iv = XNEW (struct name_tree);
130 named_iv->t = iv;
131 named_iv->name = name;
133 entry->kind = iv_stack_entry_iv;
134 entry->data.iv = named_iv;
136 VEC_safe_push (iv_stack_entry_p, heap, *stack, entry);
139 /* Inserts a CONSTANT in STACK at INDEX. */
141 static void
142 loop_iv_stack_insert_constant (loop_iv_stack stack, int index,
143 tree constant)
145 iv_stack_entry *entry = XNEW (iv_stack_entry);
147 entry->kind = iv_stack_entry_const;
148 entry->data.constant = constant;
150 VEC_safe_insert (iv_stack_entry_p, heap, *stack, index, entry);
153 /* Pops and frees an element out of STACK. */
155 static void
156 loop_iv_stack_pop (loop_iv_stack stack)
158 iv_stack_entry_p entry = VEC_pop (iv_stack_entry_p, *stack);
160 free (entry->data.iv);
161 free (entry);
164 /* Get the IV at INDEX in STACK. */
166 static tree
167 loop_iv_stack_get_iv (loop_iv_stack stack, int index)
169 iv_stack_entry_p entry = VEC_index (iv_stack_entry_p, *stack, index);
171 tree result = NULL;
173 if (entry->kind != iv_stack_entry_const)
174 result = entry->data.iv->t;
176 return result;
179 /* Get the IV from its NAME in STACK. */
181 static tree
182 loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name)
184 int i;
185 iv_stack_entry_p entry;
187 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
189 name_tree iv = entry->data.iv;
190 if (!strcmp (name, iv->name))
191 return iv->t;
194 return NULL;
197 /* Prints on stderr the contents of STACK. */
199 void
200 debug_loop_iv_stack (loop_iv_stack stack)
202 int i;
203 iv_stack_entry_p entry;
204 bool first = true;
206 fprintf (stderr, "(");
208 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
210 if (first)
211 first = false;
212 else
213 fprintf (stderr, " ");
215 if (iv_stack_entry_is_iv (entry))
217 name_tree iv = entry->data.iv;
218 fprintf (stderr, "%s:", iv->name);
219 print_generic_expr (stderr, iv->t, 0);
221 else
223 tree constant = entry->data.constant;
224 print_generic_expr (stderr, constant, 0);
225 fprintf (stderr, ":");
226 print_generic_expr (stderr, constant, 0);
230 fprintf (stderr, ")\n");
233 /* Frees STACK. */
235 static void
236 free_loop_iv_stack (loop_iv_stack stack)
238 int i;
239 iv_stack_entry_p entry;
241 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
243 free (entry->data.iv);
244 free (entry);
247 VEC_free (iv_stack_entry_p, heap, *stack);
250 /* Inserts constants derived from the USER_STMT argument list into the
251 STACK. This is needed to map old ivs to constants when loops have
252 been eliminated. */
254 static void
255 loop_iv_stack_patch_for_consts (loop_iv_stack stack,
256 struct clast_user_stmt *user_stmt)
258 struct clast_stmt *t;
259 int index = 0;
260 for (t = user_stmt->substitutions; t; t = t->next)
262 struct clast_term *term = (struct clast_term*)
263 ((struct clast_assignment *)t)->RHS;
265 /* FIXME: What should be done with expr_bin, expr_red? */
266 if (((struct clast_assignment *)t)->RHS->type == expr_term
267 && !term->var)
269 tree value = gmp_cst_to_tree (term->val);
270 loop_iv_stack_insert_constant (stack, index, value);
272 index = index + 1;
276 /* Removes all constants in the iv STACK. */
278 static void
279 loop_iv_stack_remove_constants (loop_iv_stack stack)
281 int i;
282 iv_stack_entry *entry;
284 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry);)
286 if (iv_stack_entry_is_constant (entry))
288 free (VEC_index (iv_stack_entry_p, *stack, i));
289 VEC_ordered_remove (iv_stack_entry_p, *stack, i);
291 else
292 i++;
296 /* In SCOP, get the induction variable from NAME. OLD is the original
297 loop that contained the definition of NAME. */
299 static name_tree
300 get_old_iv_from_ssa_name (scop_p scop, loop_p old, tree name)
302 tree var = SSA_NAME_VAR (name);
303 int i;
304 name_tree oldiv;
306 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
308 loop_p current = old;
310 while (current)
312 if (var == oldiv->t
313 && oldiv->loop == current)
314 return oldiv;
316 current = loop_outer (current);
319 return NULL;
323 /* Returns a new loop_to_cloog_loop_str structure. */
325 static inline struct loop_to_cloog_loop_str *
326 new_loop_to_cloog_loop_str (int loop_num,
327 int loop_position,
328 CloogLoop *cloog_loop)
330 struct loop_to_cloog_loop_str *result;
332 result = XNEW (struct loop_to_cloog_loop_str);
333 result->loop_num = loop_num;
334 result->cloog_loop = cloog_loop;
335 result->loop_position = loop_position;
337 return result;
340 /* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */
342 static hashval_t
343 hash_loop_to_cloog_loop (const void *elt)
345 return ((const struct loop_to_cloog_loop_str *) elt)->loop_num;
348 /* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */
350 static int
351 eq_loop_to_cloog_loop (const void *el1, const void *el2)
353 const struct loop_to_cloog_loop_str *elt1, *elt2;
355 elt1 = (const struct loop_to_cloog_loop_str *) el1;
356 elt2 = (const struct loop_to_cloog_loop_str *) el2;
357 return elt1->loop_num == elt2->loop_num;
360 /* Compares two graphite bbs and returns an integer less than, equal to, or
361 greater than zero if the first argument is considered to be respectively
362 less than, equal to, or greater than the second.
363 We compare using the lexicographic order of the static schedules. */
365 static int
366 gbb_compare (const void *p_1, const void *p_2)
368 const struct graphite_bb *const gbb_1
369 = *(const struct graphite_bb *const*) p_1;
370 const struct graphite_bb *const gbb_2
371 = *(const struct graphite_bb *const*) p_2;
373 return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1),
374 gbb_nb_loops (gbb_1) + 1,
375 GBB_STATIC_SCHEDULE (gbb_2),
376 gbb_nb_loops (gbb_2) + 1);
379 /* Sort graphite bbs in SCOP. */
381 static void
382 graphite_sort_gbbs (scop_p scop)
384 VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop);
386 qsort (VEC_address (graphite_bb_p, bbs),
387 VEC_length (graphite_bb_p, bbs),
388 sizeof (graphite_bb_p), gbb_compare);
391 /* Dump conditions of a graphite basic block GBB on FILE. */
393 static void
394 dump_gbb_conditions (FILE *file, graphite_bb_p gbb)
396 int i;
397 gimple stmt;
398 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb);
400 if (VEC_empty (gimple, conditions))
401 return;
403 fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index);
405 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
406 print_gimple_stmt (file, stmt, 0, 0);
408 fprintf (file, "}\n");
411 /* Converts the graphite scheduling function into a cloog scattering
412 matrix. This scattering matrix is used to limit the possible cloog
413 output to valid programs in respect to the scheduling function.
415 SCATTERING_DIMENSIONS specifies the dimensionality of the scattering
416 matrix. CLooG 0.14.0 and previous versions require, that all scattering
417 functions of one CloogProgram have the same dimensionality, therefore we
418 allow to specify it. (Should be removed in future versions) */
420 static CloogMatrix *
421 schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions)
423 int i;
424 scop_p scop = GBB_SCOP (gb);
426 int nb_iterators = gbb_nb_loops (gb);
428 /* The cloog scattering matrix consists of these colums:
429 1 col = Eq/Inq,
430 scattering_dimensions cols = Scattering dimensions,
431 nb_iterators cols = bb's iterators,
432 scop_nb_params cols = Parameters,
433 1 col = Constant 1.
435 Example:
437 scattering_dimensions = 5
438 max_nb_iterators = 2
439 nb_iterators = 1
440 scop_nb_params = 2
442 Schedule:
446 Scattering Matrix:
447 s1 s2 s3 s4 s5 i p1 p2 1
448 1 0 0 0 0 0 0 0 -4 = 0
449 0 1 0 0 0 -1 0 0 0 = 0
450 0 0 1 0 0 0 0 0 -5 = 0 */
451 int nb_params = scop_nb_params (scop);
452 int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1;
453 int col_const = nb_cols - 1;
454 int col_iter_offset = 1 + scattering_dimensions;
456 CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols);
458 gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1);
460 /* Initialize the identity matrix. */
461 for (i = 0; i < scattering_dimensions; i++)
462 value_set_si (scat->p[i][i + 1], 1);
464 /* Textual order outside the first loop */
465 value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]);
467 /* For all surrounding loops. */
468 for (i = 0; i < nb_iterators; i++)
470 int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1];
472 /* Iterations of this loop. */
473 value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1);
475 /* Textual order inside this loop. */
476 value_set_si (scat->p[2 * i + 2][col_const], -schedule);
479 return scat;
482 /* Print the schedules of GB to FILE with INDENT white spaces before.
483 VERBOSITY determines how verbose the code pretty printers are. */
485 void
486 print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity)
488 CloogMatrix *scattering;
489 int i;
490 loop_p loop;
491 fprintf (file, "\nGBB (\n");
493 print_loops_bb (file, GBB_BB (gb), indent+2, verbosity);
495 if (GBB_DOMAIN (gb))
497 fprintf (file, " (domain: \n");
498 cloog_matrix_print (dump_file, GBB_DOMAIN (gb));
499 fprintf (file, " )\n");
502 if (GBB_STATIC_SCHEDULE (gb))
504 fprintf (file, " (static schedule: ");
505 print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb),
506 gbb_nb_loops (gb) + 1);
507 fprintf (file, " )\n");
510 if (GBB_LOOPS (gb))
512 fprintf (file, " (contained loops: \n");
513 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
514 if (loop == NULL)
515 fprintf (file, " iterator %d => NULL \n", i);
516 else
517 fprintf (file, " iterator %d => loop %d \n", i,
518 loop->num);
519 fprintf (file, " )\n");
522 if (GBB_DATA_REFS (gb))
523 dump_data_references (file, GBB_DATA_REFS (gb));
525 if (GBB_CONDITIONS (gb))
527 fprintf (file, " (conditions: \n");
528 dump_gbb_conditions (dump_file, gb);
529 fprintf (file, " )\n");
532 if (GBB_SCOP (gb)
533 && GBB_STATIC_SCHEDULE (gb))
535 fprintf (file, " (scattering: \n");
536 scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1);
537 cloog_matrix_print (file, scattering);
538 cloog_matrix_free (scattering);
539 fprintf (file, " )\n");
542 fprintf (file, ")\n");
545 /* Print to STDERR the schedules of GB with VERBOSITY level. */
547 void
548 debug_gbb (graphite_bb_p gb, int verbosity)
550 print_graphite_bb (stderr, gb, 0, verbosity);
554 /* Print SCOP to FILE. VERBOSITY determines how verbose the pretty
555 printers are. */
557 static void
558 print_scop (FILE *file, scop_p scop, int verbosity)
560 if (scop == NULL)
561 return;
563 fprintf (file, "\nSCoP_%d_%d (\n",
564 SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index);
566 fprintf (file, " (cloog: \n");
567 cloog_program_print (file, SCOP_PROG (scop));
568 fprintf (file, " )\n");
570 if (SCOP_BBS (scop))
572 graphite_bb_p gb;
573 int i;
575 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
576 print_graphite_bb (file, gb, 0, verbosity);
579 fprintf (file, ")\n");
582 /* Print all the SCOPs to FILE. VERBOSITY determines how verbose the
583 code pretty printers are. */
585 static void
586 print_scops (FILE *file, int verbosity)
588 int i;
589 scop_p scop;
591 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
592 print_scop (file, scop, verbosity);
595 /* Debug SCOP. VERBOSITY determines how verbose the code pretty
596 printers are. */
598 void
599 debug_scop (scop_p scop, int verbosity)
601 print_scop (stderr, scop, verbosity);
604 /* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how
605 verbose the code pretty printers are. */
607 void
608 debug_scops (int verbosity)
610 print_scops (stderr, verbosity);
613 /* Return true when BB is contained in SCOP. */
615 static inline bool
616 bb_in_scop_p (basic_block bb, scop_p scop)
618 return bitmap_bit_p (SCOP_BBS_B (scop), bb->index);
621 /* Pretty print to FILE the SCOP in DOT format. */
623 static void
624 dot_scop_1 (FILE *file, scop_p scop)
626 edge e;
627 edge_iterator ei;
628 basic_block bb;
629 basic_block entry = SCOP_ENTRY (scop);
630 basic_block exit = SCOP_EXIT (scop);
632 fprintf (file, "digraph SCoP_%d_%d {\n", entry->index,
633 exit->index);
635 FOR_ALL_BB (bb)
637 if (bb == entry)
638 fprintf (file, "%d [shape=triangle];\n", bb->index);
640 if (bb == exit)
641 fprintf (file, "%d [shape=box];\n", bb->index);
643 if (bb_in_scop_p (bb, scop))
644 fprintf (file, "%d [color=red];\n", bb->index);
646 FOR_EACH_EDGE (e, ei, bb->succs)
647 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
650 fputs ("}\n\n", file);
653 /* Display SCOP using dotty. */
655 void
656 dot_scop (scop_p scop)
658 dot_scop_1 (stderr, scop);
661 /* Pretty print all SCoPs in DOT format and mark them with different colors.
662 If there are not enough colors, paint later SCoPs gray.
663 Special nodes:
664 - "*" after the node number: entry of a SCoP,
665 - "#" after the node number: exit of a SCoP,
666 - "()" entry or exit not part of SCoP. */
668 static void
669 dot_all_scops_1 (FILE *file)
671 basic_block bb;
672 edge e;
673 edge_iterator ei;
674 scop_p scop;
675 const char* color;
676 int i;
678 /* Disable debugging while printing graph. */
679 int tmp_dump_flags = dump_flags;
680 dump_flags = 0;
682 fprintf (file, "digraph all {\n");
684 FOR_ALL_BB (bb)
686 int part_of_scop = false;
688 /* Use HTML for every bb label. So we are able to print bbs
689 which are part of two different SCoPs, with two different
690 background colors. */
691 fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ",
692 bb->index);
693 fprintf (file, "CELLSPACING=\"0\">\n");
695 /* Select color for SCoP. */
696 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
697 if (bb_in_scop_p (bb, scop)
698 || (SCOP_EXIT (scop) == bb)
699 || (SCOP_ENTRY (scop) == bb))
701 switch (i % 17)
703 case 0: /* red */
704 color = "#e41a1c";
705 break;
706 case 1: /* blue */
707 color = "#377eb8";
708 break;
709 case 2: /* green */
710 color = "#4daf4a";
711 break;
712 case 3: /* purple */
713 color = "#984ea3";
714 break;
715 case 4: /* orange */
716 color = "#ff7f00";
717 break;
718 case 5: /* yellow */
719 color = "#ffff33";
720 break;
721 case 6: /* brown */
722 color = "#a65628";
723 break;
724 case 7: /* rose */
725 color = "#f781bf";
726 break;
727 case 8:
728 color = "#8dd3c7";
729 break;
730 case 9:
731 color = "#ffffb3";
732 break;
733 case 10:
734 color = "#bebada";
735 break;
736 case 11:
737 color = "#fb8072";
738 break;
739 case 12:
740 color = "#80b1d3";
741 break;
742 case 13:
743 color = "#fdb462";
744 break;
745 case 14:
746 color = "#b3de69";
747 break;
748 case 15:
749 color = "#fccde5";
750 break;
751 case 16:
752 color = "#bc80bd";
753 break;
754 default: /* gray */
755 color = "#999999";
758 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color);
760 if (!bb_in_scop_p (bb, scop))
761 fprintf (file, " (");
763 if (bb == SCOP_ENTRY (scop)
764 && bb == SCOP_EXIT (scop))
765 fprintf (file, " %d*# ", bb->index);
766 else if (bb == SCOP_ENTRY (scop))
767 fprintf (file, " %d* ", bb->index);
768 else if (bb == SCOP_EXIT (scop))
769 fprintf (file, " %d# ", bb->index);
770 else
771 fprintf (file, " %d ", bb->index);
773 if (!bb_in_scop_p (bb, scop))
774 fprintf (file, ")");
776 fprintf (file, "</TD></TR>\n");
777 part_of_scop = true;
780 if (!part_of_scop)
782 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">");
783 fprintf (file, " %d </TD></TR>\n", bb->index);
786 fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n");
789 FOR_ALL_BB (bb)
791 FOR_EACH_EDGE (e, ei, bb->succs)
792 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
795 fputs ("}\n\n", file);
797 /* Enable debugging again. */
798 dump_flags = tmp_dump_flags;
801 /* Display all SCoPs using dotty. */
803 void
804 dot_all_scops (void)
806 /* When debugging, enable the following code. This cannot be used
807 in production compilers because it calls "system". */
808 #if 0
809 FILE *stream = fopen ("/tmp/allscops.dot", "w");
810 gcc_assert (stream);
812 dot_all_scops_1 (stream);
813 fclose (stream);
815 system ("dotty /tmp/allscops.dot");
816 #else
817 dot_all_scops_1 (stderr);
818 #endif
821 /* Returns true when LOOP is in SCOP. */
823 static inline bool
824 loop_in_scop_p (struct loop *loop, scop_p scop)
826 return (bb_in_scop_p (loop->header, scop)
827 && bb_in_scop_p (loop->latch, scop));
830 /* Returns the outermost loop in SCOP that contains BB. */
832 static struct loop *
833 outermost_loop_in_scop (scop_p scop, basic_block bb)
835 struct loop *nest;
837 nest = bb->loop_father;
838 while (loop_outer (nest) && loop_in_scop_p (loop_outer (nest), scop))
839 nest = loop_outer (nest);
841 return nest;
844 /* Returns the block preceding the entry of SCOP. */
846 static basic_block
847 block_before_scop (scop_p scop)
849 return SESE_ENTRY (SCOP_REGION (scop))->src;
852 /* Return true when EXPR is an affine function in LOOP with parameters
853 instantiated relative to SCOP_ENTRY. */
855 static bool
856 loop_affine_expr (basic_block scop_entry, struct loop *loop, tree expr)
858 int n = loop->num;
859 tree scev = analyze_scalar_evolution (loop, expr);
861 scev = instantiate_scev (scop_entry, loop, scev);
863 return (evolution_function_is_invariant_p (scev, n)
864 || evolution_function_is_affine_multivariate_p (scev, n));
867 /* Return false if the tree_code of the operand OP or any of its operands
868 is component_ref. */
870 static bool
871 exclude_component_ref (tree op)
873 int i;
874 int len;
876 if (op)
878 if (TREE_CODE (op) == COMPONENT_REF)
879 return false;
880 else
882 len = TREE_OPERAND_LENGTH (op);
883 for (i = 0; i < len; ++i)
885 if (!exclude_component_ref (TREE_OPERAND (op, i)))
886 return false;
891 return true;
894 /* Return true if the operand OP is simple. */
896 static bool
897 is_simple_operand (loop_p loop, gimple stmt, tree op)
899 /* It is not a simple operand when it is a declaration, */
900 if (DECL_P (op)
901 /* or a structure, */
902 || AGGREGATE_TYPE_P (TREE_TYPE (op))
903 /* or a memory access that cannot be analyzed by the data
904 reference analysis. */
905 || ((handled_component_p (op) || INDIRECT_REF_P (op))
906 && !stmt_simple_memref_p (loop, stmt, op)))
907 return false;
909 return exclude_component_ref (op);
912 /* Return true only when STMT is simple enough for being handled by
913 Graphite. This depends on SCOP_ENTRY, as the parametetrs are
914 initialized relatively to this basic block. */
916 static bool
917 stmt_simple_for_scop_p (basic_block scop_entry, gimple stmt)
919 basic_block bb = gimple_bb (stmt);
920 struct loop *loop = bb->loop_father;
922 /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects.
923 Calls have side-effects, except those to const or pure
924 functions. */
925 if (gimple_has_volatile_ops (stmt)
926 || (gimple_code (stmt) == GIMPLE_CALL
927 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
928 || (gimple_code (stmt) == GIMPLE_ASM))
929 return false;
931 switch (gimple_code (stmt))
933 case GIMPLE_RETURN:
934 case GIMPLE_LABEL:
935 return true;
937 case GIMPLE_COND:
939 tree op;
940 ssa_op_iter op_iter;
941 enum tree_code code = gimple_cond_code (stmt);
943 /* We can only handle this kind of conditional expressions.
944 For inequalities like "if (i != 3 * k)" we need unions of
945 polyhedrons. Expressions like "if (a)" or "if (a == 15)" need
946 them for the else branch. */
947 if (!(code == LT_EXPR
948 || code == GT_EXPR
949 || code == LE_EXPR
950 || code == GE_EXPR))
951 return false;
953 if (!scop_entry)
954 return false;
956 FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
957 if (!loop_affine_expr (scop_entry, loop, op))
958 return false;
960 return true;
963 case GIMPLE_ASSIGN:
965 enum tree_code code = gimple_assign_rhs_code (stmt);
967 switch (get_gimple_rhs_class (code))
969 case GIMPLE_UNARY_RHS:
970 case GIMPLE_SINGLE_RHS:
971 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
972 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));
974 case GIMPLE_BINARY_RHS:
975 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
976 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
977 && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
979 case GIMPLE_INVALID_RHS:
980 default:
981 gcc_unreachable ();
985 case GIMPLE_CALL:
987 size_t i;
988 size_t n = gimple_call_num_args (stmt);
989 tree lhs = gimple_call_lhs (stmt);
991 for (i = 0; i < n; i++)
993 tree arg = gimple_call_arg (stmt, i);
995 if (!(is_simple_operand (loop, stmt, lhs)
996 && is_simple_operand (loop, stmt, arg)))
997 return false;
1000 return true;
1003 default:
1004 /* These nodes cut a new scope. */
1005 return false;
1008 return false;
1011 /* Returns the statement of BB that contains a harmful operation: that
1012 can be a function call with side effects, the induction variables
1013 are not linear with respect to SCOP_ENTRY, etc. The current open
1014 scop should end before this statement. */
1016 static gimple
1017 harmful_stmt_in_bb (basic_block scop_entry, basic_block bb)
1019 gimple_stmt_iterator gsi;
1021 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1022 if (!stmt_simple_for_scop_p (scop_entry, gsi_stmt (gsi)))
1023 return gsi_stmt (gsi);
1025 return NULL;
1028 /* Store the GRAPHITE representation of BB. */
1030 static void
1031 new_graphite_bb (scop_p scop, basic_block bb)
1033 struct graphite_bb *gbb = XNEW (struct graphite_bb);
1035 bb->aux = gbb;
1036 GBB_BB (gbb) = bb;
1037 GBB_SCOP (gbb) = scop;
1038 GBB_DATA_REFS (gbb) = NULL;
1039 GBB_DOMAIN (gbb) = NULL;
1040 GBB_CONDITIONS (gbb) = NULL;
1041 GBB_CONDITION_CASES (gbb) = NULL;
1042 GBB_LOOPS (gbb) = NULL;
1043 VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb);
1044 bitmap_set_bit (SCOP_BBS_B (scop), bb->index);
1047 /* Frees GBB. */
1049 static void
1050 free_graphite_bb (struct graphite_bb *gbb)
1052 if (GBB_DOMAIN (gbb))
1053 cloog_matrix_free (GBB_DOMAIN (gbb));
1055 free_data_refs (GBB_DATA_REFS (gbb));
1056 VEC_free (gimple, heap, GBB_CONDITIONS (gbb));
1057 VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb));
1058 VEC_free (loop_p, heap, GBB_LOOPS (gbb));
1060 GBB_BB (gbb)->aux = 0;
1061 XDELETE (gbb);
1064 /* Creates a new scop starting with ENTRY. */
1066 static scop_p
1067 new_scop (edge entry, edge exit)
1069 scop_p scop = XNEW (struct scop);
1071 gcc_assert (entry && exit);
1073 SCOP_REGION (scop) = XNEW (struct sese);
1074 SESE_ENTRY (SCOP_REGION (scop)) = entry;
1075 SESE_EXIT (SCOP_REGION (scop)) = exit;
1076 SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3);
1077 SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3);
1078 SCOP_BBS_B (scop) = BITMAP_ALLOC (NULL);
1079 SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL);
1080 SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3);
1081 SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3);
1082 SCOP_PROG (scop) = cloog_program_malloc ();
1083 cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ());
1084 SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop,
1085 eq_loop_to_cloog_loop,
1086 free);
1087 return scop;
1090 /* Deletes SCOP. */
1092 static void
1093 free_scop (scop_p scop)
1095 int i;
1096 name_tree p;
1097 struct graphite_bb *gb;
1098 name_tree iv;
1100 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1101 free_graphite_bb (gb);
1103 VEC_free (graphite_bb_p, heap, SCOP_BBS (scop));
1104 BITMAP_FREE (SCOP_BBS_B (scop));
1105 BITMAP_FREE (SCOP_LOOPS (scop));
1106 VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop));
1108 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++)
1109 free (iv);
1110 VEC_free (name_tree, heap, SCOP_OLDIVS (scop));
1112 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
1113 free (p);
1115 VEC_free (name_tree, heap, SCOP_PARAMS (scop));
1116 cloog_program_free (SCOP_PROG (scop));
1117 htab_delete (SCOP_LOOP2CLOOG_LOOP (scop));
1118 XDELETE (SCOP_REGION (scop));
1119 XDELETE (scop);
1122 /* Deletes all scops in SCOPS. */
1124 static void
1125 free_scops (VEC (scop_p, heap) *scops)
1127 int i;
1128 scop_p scop;
1130 for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++)
1131 free_scop (scop);
1133 VEC_free (scop_p, heap, scops);
1136 typedef enum gbb_type {
1137 GBB_UNKNOWN,
1138 GBB_LOOP_SING_EXIT_HEADER,
1139 GBB_LOOP_MULT_EXIT_HEADER,
1140 GBB_LOOP_EXIT,
1141 GBB_COND_HEADER,
1142 GBB_SIMPLE,
1143 GBB_LAST
1144 } gbb_type;
1146 /* Detect the type of BB. Loop headers are only marked, if they are
1147 new. This means their loop_father is different to LAST_LOOP.
1148 Otherwise they are treated like any other bb and their type can be
1149 any other type. */
1151 static gbb_type
1152 get_bb_type (basic_block bb, struct loop *last_loop)
1154 VEC (basic_block, heap) *dom;
1155 int nb_dom, nb_suc;
1156 struct loop *loop = bb->loop_father;
1158 /* Check, if we entry into a new loop. */
1159 if (loop != last_loop)
1161 if (single_exit (loop) != NULL)
1162 return GBB_LOOP_SING_EXIT_HEADER;
1163 else if (loop->num != 0)
1164 return GBB_LOOP_MULT_EXIT_HEADER;
1165 else
1166 return GBB_COND_HEADER;
1169 dom = get_dominated_by (CDI_DOMINATORS, bb);
1170 nb_dom = VEC_length (basic_block, dom);
1171 VEC_free (basic_block, heap, dom);
1173 if (nb_dom == 0)
1174 return GBB_LAST;
1176 nb_suc = VEC_length (edge, bb->succs);
1178 if (nb_dom == 1 && nb_suc == 1)
1179 return GBB_SIMPLE;
1181 return GBB_COND_HEADER;
1184 /* A SCoP detection region, defined using bbs as borders.
1185 All control flow touching this region, comes in passing basic_block ENTRY and
1186 leaves passing basic_block EXIT. By using bbs instead of edges for the
1187 borders we are able to represent also regions that do not have a single
1188 entry or exit edge.
1189 But as they have a single entry basic_block and a single exit basic_block, we
1190 are able to generate for every sd_region a single entry and exit edge.
1194 3 <- entry
1197 / \ This region contains: {3, 4, 5, 6, 7, 8}
1202 9 <- exit */
1205 typedef struct sd_region_p
1207 /* The entry bb dominates all bbs in the sd_region. It is part of the
1208 region. */
1209 basic_block entry;
1211 /* The exit bb postdominates all bbs in the sd_region, but is not
1212 part of the region. */
1213 basic_block exit;
1214 } sd_region;
1216 DEF_VEC_O(sd_region);
1217 DEF_VEC_ALLOC_O(sd_region, heap);
1220 /* Moves the scops from SOURCE to TARGET and clean up SOURCE. */
1222 static void
1223 move_sd_regions (VEC (sd_region, heap) **source, VEC (sd_region, heap) **target)
1225 sd_region *s;
1226 int i;
1228 for (i = 0; VEC_iterate (sd_region, *source, i, s); i++)
1229 VEC_safe_push (sd_region, heap, *target, s);
1231 VEC_free (sd_region, heap, *source);
1234 /* Store information needed by scopdet_* functions. */
1236 struct scopdet_info
1238 /* Where the last open scop would stop if the current BB is harmful. */
1239 basic_block last;
1241 /* Where the next scop would start if the current BB is harmful. */
1242 basic_block next;
1244 /* The bb or one of its children contains open loop exits. That means
1245 loop exit nodes that are not surrounded by a loop dominated by bb. */
1246 bool exits;
1248 /* The bb or one of its children contains only structures we can handle. */
1249 bool difficult;
1253 static struct scopdet_info build_scops_1 (basic_block, VEC (sd_region, heap) **,
1254 loop_p);
1256 /* Calculates BB infos. If bb is difficult we add valid SCoPs dominated by BB
1257 to SCOPS. TYPE is the gbb_type of BB. */
1259 static struct scopdet_info
1260 scopdet_basic_block_info (basic_block bb, VEC (sd_region, heap) **scops,
1261 gbb_type type)
1263 struct loop *loop = bb->loop_father;
1264 struct scopdet_info result;
1265 gimple stmt;
1267 /* XXX: ENTRY_BLOCK_PTR could be optimized in later steps. */
1268 stmt = harmful_stmt_in_bb (ENTRY_BLOCK_PTR, bb);
1269 result.difficult = (stmt != NULL);
1270 result.last = NULL;
1272 switch (type)
1274 case GBB_LAST:
1275 result.next = NULL;
1276 result.exits = false;
1277 result.last = bb;
1278 break;
1280 case GBB_SIMPLE:
1281 result.next = single_succ (bb);
1282 result.exits = false;
1283 result.last = bb;
1284 break;
1286 case GBB_LOOP_SING_EXIT_HEADER:
1288 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap,3);
1289 struct scopdet_info sinfo;
1291 sinfo = build_scops_1 (bb, &tmp_scops, loop);
1293 result.last = single_exit (bb->loop_father)->src;
1294 result.next = single_exit (bb->loop_father)->dest;
1296 /* If we do not dominate result.next, remove it. It's either
1297 the EXIT_BLOCK_PTR, or another bb dominates it and will
1298 call the scop detection for this bb. */
1299 if (!dominated_by_p (CDI_DOMINATORS, result.next, bb))
1300 result.next = NULL;
1302 if (result.last->loop_father != loop)
1303 result.next = NULL;
1305 if (TREE_CODE (number_of_latch_executions (loop))
1306 == SCEV_NOT_KNOWN)
1307 result.difficult = true;
1309 if (sinfo.difficult)
1310 move_sd_regions (&tmp_scops, scops);
1311 else
1312 VEC_free (sd_region, heap, tmp_scops);
1314 result.exits = false;
1315 result.difficult |= sinfo.difficult;
1316 break;
1319 case GBB_LOOP_MULT_EXIT_HEADER:
1321 /* XXX: For now we just do not join loops with multiple exits. If the
1322 exits lead to the same bb it may be possible to join the loop. */
1323 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1324 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1325 edge e;
1326 int i;
1327 build_scops_1 (bb, &tmp_scops, loop);
1330 /* Start at all bbs dominated by a loop exit that only exists in this
1331 loop. */
1332 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
1333 if (e->src->loop_father == loop)
1335 VEC (basic_block, heap) *dominated;
1336 basic_block b;
1337 int j;
1338 dominated = get_dominated_by (CDI_DOMINATORS, e->src);
1339 for (j = 0; VEC_iterate (basic_block, dominated, j, b); j++)
1340 /* Loop exit. */
1341 if (loop_depth (find_common_loop (loop, b->loop_father))
1342 < loop_depth (loop))
1344 /* Pass loop_outer to recognize b as loop header in
1345 build_scops_1. */
1346 if (b->loop_father->header == b)
1347 build_scops_1 (b, &tmp_scops, loop_outer (b->loop_father));
1348 else
1349 build_scops_1 (b, &tmp_scops, b->loop_father);
1353 result.next = NULL;
1354 result.last = NULL;
1355 result.difficult = true;
1356 result.exits = false;
1357 move_sd_regions (&tmp_scops, scops);
1358 VEC_free (edge, heap, exits);
1359 break;
1361 case GBB_COND_HEADER:
1363 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1364 struct scopdet_info sinfo;
1365 VEC (basic_block, heap) *dominated;
1366 int i;
1367 basic_block dom_bb;
1368 basic_block last_bb = NULL;
1369 edge e;
1370 result.exits = false;
1372 /* First check the successors of BB, and check if it is possible to join
1373 the different branches. */
1374 for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++)
1376 /* Ignore loop exits. They will be handled after the loop body. */
1377 if (is_loop_exit (loop, e->dest))
1379 result.exits = true;
1380 continue;
1383 /* Do not follow edges that lead to the end of the
1384 conditions block. For example, in
1387 | /|\
1388 | 1 2 |
1389 | | | |
1390 | 3 4 |
1391 | \|/
1394 the edge from 0 => 6. Only check if all paths lead to
1395 the same node 6. */
1397 if (!single_pred_p (e->dest))
1399 /* Check, if edge leads directly to the end of this
1400 condition. */
1401 if (!last_bb)
1403 last_bb = e->dest;
1406 if (e->dest != last_bb)
1407 result.difficult = true;
1409 continue;
1412 if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb))
1414 result.difficult = true;
1415 continue;
1418 sinfo = build_scops_1 (e->dest, &tmp_scops, loop);
1420 result.exits |= sinfo.exits;
1421 result.last = sinfo.last;
1422 result.difficult |= sinfo.difficult;
1424 /* Checks, if all branches end at the same point.
1425 If that is true, the condition stays joinable.
1426 Have a look at the example above. */
1427 if (sinfo.last && single_succ_p (sinfo.last))
1429 basic_block next_tmp = single_succ (sinfo.last);
1431 if (!last_bb)
1432 last_bb = next_tmp;
1434 if (next_tmp != last_bb)
1435 result.difficult = true;
1437 else
1438 result.difficult = true;
1441 /* If the condition is joinable. */
1442 if (!result.exits && !result.difficult)
1444 /* Only return a next pointer if we dominate this pointer.
1445 Otherwise it will be handled by the bb dominating it. */
1446 if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb)
1447 result.next = last_bb;
1448 else
1449 result.next = NULL;
1451 VEC_free (sd_region, heap, tmp_scops);
1452 break;
1455 /* Scan remaining bbs dominated by BB. */
1456 dominated = get_dominated_by (CDI_DOMINATORS, bb);
1458 for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++)
1460 /* Ignore loop exits: they will be handled after the loop body. */
1461 if (loop_depth (find_common_loop (loop, dom_bb->loop_father))
1462 < loop_depth (loop))
1464 result.exits = true;
1465 continue;
1468 /* Ignore the bbs processed above. */
1469 if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb)
1470 continue;
1472 if (loop_depth (loop) > loop_depth (dom_bb->loop_father))
1473 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop_outer (loop));
1474 else
1475 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop);
1478 result.exits |= sinfo.exits;
1479 result.difficult = true;
1480 result.last = NULL;
1483 VEC_free (basic_block, heap, dominated);
1485 result.next = NULL;
1486 move_sd_regions (&tmp_scops, scops);
1488 break;
1491 default:
1492 gcc_unreachable ();
1495 return result;
1498 /* Creates the SCoPs and writes entry and exit points for every SCoP. */
1500 static struct scopdet_info
1501 build_scops_1 (basic_block current, VEC (sd_region, heap) **scops, loop_p loop)
1504 bool in_scop = false;
1505 sd_region open_scop;
1506 struct scopdet_info sinfo;
1508 /* Initialize result. */
1509 struct scopdet_info result;
1510 result.exits = false;
1511 result.difficult = false;
1512 result.next = NULL;
1513 result.last = NULL;
1514 open_scop.entry = NULL;
1516 /* Loop over the dominance tree. If we meet a difficult bb, close
1517 the current SCoP. Loop and condition header start a new layer,
1518 and can only be added if all bbs in deeper layers are simple. */
1519 while (current != NULL)
1521 sinfo = scopdet_basic_block_info (current, scops, get_bb_type (current,
1522 loop));
1524 if (!in_scop && !(sinfo.exits || sinfo.difficult))
1526 open_scop.entry = current;
1527 open_scop.exit = NULL;
1528 in_scop = true;
1530 else if (in_scop && (sinfo.exits || sinfo.difficult))
1532 open_scop.exit = current;
1533 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1534 in_scop = false;
1537 result.difficult |= sinfo.difficult;
1538 result.exits |= sinfo.exits;
1540 current = sinfo.next;
1543 /* Try to close open_scop, if we are still in an open SCoP. */
1544 if (in_scop)
1546 int i;
1547 edge e;
1549 for (i = 0; VEC_iterate (edge, sinfo.last->succs, i, e); i++)
1550 if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last, e->dest))
1551 open_scop.exit = e->dest;
1553 if (!open_scop.exit && open_scop.entry != sinfo.last)
1554 open_scop.exit = sinfo.last;
1556 if (open_scop.exit)
1557 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1561 result.last = sinfo.last;
1562 return result;
1565 /* Checks if a bb is contained in REGION. */
1567 static bool
1568 bb_in_sd_region (basic_block bb, sd_region *region)
1570 return dominated_by_p (CDI_DOMINATORS, bb, region->entry)
1571 && !(dominated_by_p (CDI_DOMINATORS, bb, region->exit)
1572 && !dominated_by_p (CDI_DOMINATORS, region->entry,
1573 region->exit));
1576 /* Returns the single entry edge of REGION, if it does not exits NULL. */
1578 static edge
1579 find_single_entry_edge (sd_region *region)
1581 edge e;
1582 edge_iterator ei;
1583 edge entry = NULL;
1585 FOR_EACH_EDGE (e, ei, region->entry->preds)
1586 if (!bb_in_sd_region (e->src, region))
1588 if (entry)
1590 entry = NULL;
1591 break;
1594 else
1595 entry = e;
1598 return entry;
1601 /* Returns the single exit edge of REGION, if it does not exits NULL. */
1603 static edge
1604 find_single_exit_edge (sd_region *region)
1606 edge e;
1607 edge_iterator ei;
1608 edge exit = NULL;
1610 FOR_EACH_EDGE (e, ei, region->exit->preds)
1611 if (bb_in_sd_region (e->src, region))
1613 if (exit)
1615 exit = NULL;
1616 break;
1619 else
1620 exit = e;
1623 return exit;
1626 /* Create a single entry edge for REGION. */
1628 static void
1629 create_single_entry_edge (sd_region *region)
1631 if (find_single_entry_edge (region))
1632 return;
1634 /* There are multiple predecessors for bb_3
1636 | 1 2
1637 | | /
1638 | |/
1639 | 3 <- entry
1640 | |\
1641 | | |
1642 | 4 ^
1643 | | |
1644 | |/
1647 There are two edges (1->3, 2->3), that point from outside into the region,
1648 and another one (5->3), a loop latch, lead to bb_3.
1650 We split bb_3.
1652 | 1 2
1653 | | /
1654 | |/
1655 |3.0
1656 | |\ (3.0 -> 3.1) = single entry edge
1657 |3.1 | <- entry
1658 | | |
1659 | | |
1660 | 4 ^
1661 | | |
1662 | |/
1665 If the loop is part of the SCoP, we have to redirect the loop latches.
1667 | 1 2
1668 | | /
1669 | |/
1670 |3.0
1671 | | (3.0 -> 3.1) = entry edge
1672 |3.1 <- entry
1673 | |\
1674 | | |
1675 | 4 ^
1676 | | |
1677 | |/
1678 | 5 */
1680 if (region->entry->loop_father->header != region->entry
1681 || dominated_by_p (CDI_DOMINATORS,
1682 loop_latch_edge (region->entry->loop_father)->src,
1683 region->exit))
1685 edge forwarder = split_block_after_labels (region->entry);
1686 region->entry = forwarder->dest;
1688 else
1689 /* This case is never executed, as the loop headers seem always to have a
1690 single edge pointing from outside into the loop. */
1691 gcc_unreachable ();
1693 #ifdef ENABLE_CHECKING
1694 gcc_assert (find_single_entry_edge (region));
1695 #endif
1698 /* Check if the sd_region, mentioned in EDGE, has no exit bb. */
1700 static bool
1701 sd_region_without_exit (edge e)
1703 sd_region *r = (sd_region *) e->aux;
1705 if (r)
1706 return r->exit == NULL;
1707 else
1708 return false;
1711 /* Create a single exit edge for REGION. */
1713 static void
1714 create_single_exit_edge (sd_region *region)
1716 edge e;
1717 edge_iterator ei;
1718 edge forwarder = NULL;
1719 basic_block exit;
1721 if (find_single_exit_edge (region))
1722 return;
1724 /* We create a forwarder bb (5) for all edges leaving this region
1725 (3->5, 4->5). All other edges leading to the same bb, are moved
1726 to a new bb (6). If these edges where part of another region (2->5)
1727 we update the region->exit pointer, of this region.
1729 To identify which edge belongs to which region we depend on the e->aux
1730 pointer in every edge. It points to the region of the edge or to NULL,
1731 if the edge is not part of any region.
1733 1 2 3 4 1->5 no region, 2->5 region->exit = 5,
1734 \| |/ 3->5 region->exit = NULL, 4->5 region->exit = NULL
1735 5 <- exit
1737 changes to
1739 1 2 3 4 1->6 no region, 2->6 region->exit = 6,
1740 | | \/ 3->5 no region, 4->5 no region,
1741 | | 5
1742 \| / 5->6 region->exit = 6
1745 Now there is only a single exit edge (5->6). */
1746 exit = region->exit;
1747 region->exit = NULL;
1748 forwarder = make_forwarder_block (exit, &sd_region_without_exit, NULL);
1750 /* Unmark the edges, that are no longer exit edges. */
1751 FOR_EACH_EDGE (e, ei, forwarder->src->preds)
1752 if (e->aux)
1753 e->aux = NULL;
1755 /* Mark the new exit edge. */
1756 single_succ_edge (forwarder->src)->aux = region;
1758 /* Update the exit bb of all regions, where exit edges lead to
1759 forwarder->dest. */
1760 FOR_EACH_EDGE (e, ei, forwarder->dest->preds)
1761 if (e->aux)
1762 ((sd_region *) e->aux)->exit = forwarder->dest;
1764 #ifdef ENABLE_CHECKING
1765 gcc_assert (find_single_exit_edge (region));
1766 #endif
1769 /* Unmark the exit edges of all REGIONS.
1770 See comment in "create_single_exit_edge". */
1772 static void
1773 unmark_exit_edges (VEC (sd_region, heap) *regions)
1775 int i;
1776 sd_region *s;
1777 edge e;
1778 edge_iterator ei;
1780 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1781 FOR_EACH_EDGE (e, ei, s->exit->preds)
1782 e->aux = NULL;
1786 /* Mark the exit edges of all REGIONS.
1787 See comment in "create_single_exit_edge". */
1789 static void
1790 mark_exit_edges (VEC (sd_region, heap) *regions)
1792 int i;
1793 sd_region *s;
1794 edge e;
1795 edge_iterator ei;
1797 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1798 FOR_EACH_EDGE (e, ei, s->exit->preds)
1799 if (bb_in_sd_region (e->src, s))
1800 e->aux = s;
1804 /* Create for all scop regions a single entry and a single exit edge. */
1806 static void
1807 create_sese_edges (VEC (sd_region, heap) *regions)
1809 int i;
1810 sd_region *s;
1812 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1813 create_single_entry_edge (s);
1815 mark_exit_edges (regions);
1817 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1818 create_single_exit_edge (s);
1820 unmark_exit_edges (regions);
1822 #ifdef ENABLE_CHECKING
1823 verify_loop_structure ();
1824 verify_dominators (CDI_DOMINATORS);
1825 verify_ssa (false);
1826 #endif
1829 /* Create graphite SCoPs from an array of scop detection regions. */
1831 static void
1832 build_graphite_scops (VEC (sd_region, heap) *scop_regions)
1834 int i;
1835 sd_region *s;
1837 for (i = 0; VEC_iterate (sd_region, scop_regions, i, s); i++)
1839 edge entry = find_single_entry_edge (s);
1840 edge exit = find_single_exit_edge (s);
1841 scop_p scop = new_scop (entry, exit);
1842 VEC_safe_push (scop_p, heap, current_scops, scop);
1844 /* Are there overlapping SCoPs? */
1845 #ifdef ENABLE_CHECKING
1847 int j;
1848 sd_region *s2;
1850 for (j = 0; VEC_iterate (sd_region, scop_regions, j, s2); j++)
1851 if (s != s2)
1852 gcc_assert (!bb_in_sd_region (s->entry, s2));
1854 #endif
1858 /* Find static control parts. */
1860 static void
1861 build_scops (void)
1863 struct loop *loop = current_loops->tree_root;
1864 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1866 build_scops_1 (single_succ (ENTRY_BLOCK_PTR), &tmp_scops, loop);
1867 create_sese_edges (tmp_scops);
1868 build_graphite_scops (tmp_scops);
1869 VEC_free (sd_region, heap, tmp_scops);
1872 /* Gather the basic blocks belonging to the SCOP. */
1874 static void
1875 build_scop_bbs (scop_p scop)
1877 basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
1878 sbitmap visited = sbitmap_alloc (last_basic_block);
1879 int sp = 0;
1881 sbitmap_zero (visited);
1882 stack[sp++] = SCOP_ENTRY (scop);
1884 while (sp)
1886 basic_block bb = stack[--sp];
1887 int depth = loop_depth (bb->loop_father);
1888 int num = bb->loop_father->num;
1889 edge_iterator ei;
1890 edge e;
1892 /* Scop's exit is not in the scop. Exclude also bbs, which are
1893 dominated by the SCoP exit. These are e.g. loop latches. */
1894 if (TEST_BIT (visited, bb->index)
1895 || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
1896 /* Every block in the scop is dominated by scop's entry. */
1897 || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop)))
1898 continue;
1900 new_graphite_bb (scop, bb);
1901 SET_BIT (visited, bb->index);
1903 /* First push the blocks that have to be processed last. Note
1904 that this means that the order in which the code is organized
1905 below is important: do not reorder the following code. */
1906 FOR_EACH_EDGE (e, ei, bb->succs)
1907 if (! TEST_BIT (visited, e->dest->index)
1908 && (int) loop_depth (e->dest->loop_father) < depth)
1909 stack[sp++] = e->dest;
1911 FOR_EACH_EDGE (e, ei, bb->succs)
1912 if (! TEST_BIT (visited, e->dest->index)
1913 && (int) loop_depth (e->dest->loop_father) == depth
1914 && e->dest->loop_father->num != num)
1915 stack[sp++] = e->dest;
1917 FOR_EACH_EDGE (e, ei, bb->succs)
1918 if (! TEST_BIT (visited, e->dest->index)
1919 && (int) loop_depth (e->dest->loop_father) == depth
1920 && e->dest->loop_father->num == num
1921 && EDGE_COUNT (e->dest->preds) > 1)
1922 stack[sp++] = e->dest;
1924 FOR_EACH_EDGE (e, ei, bb->succs)
1925 if (! TEST_BIT (visited, e->dest->index)
1926 && (int) loop_depth (e->dest->loop_father) == depth
1927 && e->dest->loop_father->num == num
1928 && EDGE_COUNT (e->dest->preds) == 1)
1929 stack[sp++] = e->dest;
1931 FOR_EACH_EDGE (e, ei, bb->succs)
1932 if (! TEST_BIT (visited, e->dest->index)
1933 && (int) loop_depth (e->dest->loop_father) > depth)
1934 stack[sp++] = e->dest;
1937 free (stack);
1938 sbitmap_free (visited);
1942 /* Record LOOP as occuring in SCOP. */
1944 static void
1945 scop_record_loop (scop_p scop, struct loop *loop)
1947 loop_p parent;
1948 tree induction_var;
1950 if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
1951 return;
1953 parent = loop_outer (loop);
1954 induction_var = find_induction_var_from_exit_cond (loop);
1956 if (!bb_in_scop_p (parent->latch, scop))
1957 parent = NULL;
1959 if (induction_var != NULL_TREE)
1961 name_tree oldiv = XNEW (struct name_tree);
1962 oldiv->t = SSA_NAME_VAR (induction_var);
1963 if (DECL_NAME (oldiv->t))
1964 oldiv->name = IDENTIFIER_POINTER (DECL_NAME (oldiv->t));
1965 else
1967 int len = 2 + 16;
1968 char *n = XNEWVEC (char, len);
1969 snprintf (n, len, "D.%u", DECL_UID (oldiv->t));
1970 oldiv->name = n;
1972 oldiv->loop = loop;
1974 VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv);
1977 bitmap_set_bit (SCOP_LOOPS (scop), loop->num);
1978 VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop);
1981 /* Build the loop nests contained in SCOP. */
1983 static void
1984 build_scop_loop_nests (scop_p scop)
1986 unsigned i;
1987 graphite_bb_p gb;
1988 struct loop *loop0, *loop1;
1990 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1992 struct loop *loop = gbb_loop (gb);
1994 /* Only add loops, if they are completely contained in the SCoP. */
1995 if (loop->header == GBB_BB (gb)
1996 && bb_in_scop_p (loop->latch, scop))
1997 scop_record_loop (scop, gbb_loop (gb));
2000 /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It
2001 can be the case that an inner loop is inserted before an outer
2002 loop. To avoid this, semi-sort once. */
2003 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++)
2005 if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1)
2006 break;
2008 loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1);
2009 if (loop0->num > loop1->num)
2011 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1);
2012 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0);
2017 /* Calculate the number of loops around GB in the current SCOP. */
2019 static inline int
2020 nb_loops_around_gb (graphite_bb_p gb)
2022 scop_p scop = GBB_SCOP (gb);
2023 struct loop *l = gbb_loop (gb);
2024 int d = 0;
2026 for (; loop_in_scop_p (l, scop); d++, l = loop_outer (l));
2028 return d;
2031 /* Build for BB the static schedule.
2033 The STATIC_SCHEDULE is defined like this:
2036 for (i: ...)
2038 for (j: ...)
2044 for (k: ...)
2052 Static schedules for A to F:
2054 DEPTH
2055 0 1 2
2057 B 1 0 0
2058 C 1 0 1
2059 D 1 1 0
2060 E 1 1 1
2064 static void
2065 build_scop_canonical_schedules (scop_p scop)
2067 int i, j;
2068 graphite_bb_p gb;
2069 int nb = scop_nb_loops (scop) + 1;
2071 SCOP_STATIC_SCHEDULE (scop) = lambda_vector_new (nb);
2073 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2075 int offset = nb_loops_around_gb (gb);
2077 /* After leaving a loop, it is possible that the schedule is not
2078 set at zero. This loop reinitializes components located
2079 after OFFSET. */
2081 for (j = offset + 1; j < nb; j++)
2082 if (SCOP_STATIC_SCHEDULE (scop)[j])
2084 memset (&(SCOP_STATIC_SCHEDULE (scop)[j]), 0,
2085 sizeof (int) * (nb - j));
2086 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2087 break;
2090 GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (offset + 1);
2091 lambda_vector_copy (SCOP_STATIC_SCHEDULE (scop),
2092 GBB_STATIC_SCHEDULE (gb), offset + 1);
2094 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2098 /* Build the LOOPS vector for all bbs in SCOP. */
2100 static void
2101 build_bb_loops (scop_p scop)
2103 graphite_bb_p gb;
2104 int i;
2106 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2108 loop_p loop;
2109 int depth;
2111 depth = nb_loops_around_gb (gb) - 1;
2113 GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3);
2114 VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1);
2116 loop = GBB_BB (gb)->loop_father;
2118 while (scop_contains_loop (scop, loop))
2120 VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop);
2121 loop = loop_outer (loop);
2122 depth--;
2127 /* Get the index for parameter VAR in SCOP. */
2129 static int
2130 param_index (tree var, scop_p scop)
2132 int i;
2133 name_tree p;
2134 name_tree nvar;
2136 gcc_assert (TREE_CODE (var) == SSA_NAME);
2138 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2139 if (p->t == var)
2140 return i;
2142 nvar = XNEW (struct name_tree);
2143 nvar->t = var;
2144 nvar->name = NULL;
2145 VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar);
2146 return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1;
2149 /* Scan EXPR and translate it to an inequality vector INEQ that will
2150 be added, or subtracted, in the constraint domain matrix C at row
2151 R. K is the number of columns for loop iterators in C. */
2153 static void
2154 scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k,
2155 bool subtract)
2157 int cst_col, param_col;
2159 if (e == chrec_dont_know)
2160 return;
2162 switch (TREE_CODE (e))
2164 case POLYNOMIAL_CHREC:
2166 tree left = CHREC_LEFT (e);
2167 tree right = CHREC_RIGHT (e);
2168 int var = CHREC_VARIABLE (e);
2170 if (TREE_CODE (right) != INTEGER_CST)
2171 return;
2173 if (c)
2175 int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1;
2177 if (subtract)
2178 value_sub_int (c->p[r][loop_col], c->p[r][loop_col],
2179 int_cst_value (right));
2180 else
2181 value_add_int (c->p[r][loop_col], c->p[r][loop_col],
2182 int_cst_value (right));
2185 switch (TREE_CODE (left))
2187 case POLYNOMIAL_CHREC:
2188 scan_tree_for_params (s, left, c, r, k, subtract);
2189 return;
2191 case INTEGER_CST:
2192 /* Constant part. */
2193 if (c)
2195 int v = int_cst_value (left);
2196 cst_col = c->NbColumns - 1;
2198 if (v < 0)
2200 v = -v;
2201 subtract = subtract ? false : true;
2204 if (subtract)
2205 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2206 else
2207 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2209 return;
2211 default:
2212 scan_tree_for_params (s, left, c, r, k, subtract);
2213 return;
2216 break;
2218 case MULT_EXPR:
2219 if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
2221 Value val;
2223 gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
2225 value_init (val);
2226 value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
2227 value_multiply (k, k, val);
2228 value_clear (val);
2229 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2231 else
2233 Value val;
2235 gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
2237 value_init (val);
2238 value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
2239 value_multiply (k, k, val);
2240 value_clear (val);
2241 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2243 break;
2245 case PLUS_EXPR:
2246 case POINTER_PLUS_EXPR:
2247 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2248 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2249 break;
2251 case MINUS_EXPR:
2252 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2253 value_oppose (k, k);
2254 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2255 break;
2257 case NEGATE_EXPR:
2258 value_oppose (k, k);
2259 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2260 break;
2262 case SSA_NAME:
2263 param_col = param_index (e, s);
2265 if (c)
2267 param_col += c->NbColumns - scop_nb_params (s) - 1;
2269 if (subtract)
2270 value_subtract (c->p[r][param_col], c->p[r][param_col], k);
2271 else
2272 value_addto (c->p[r][param_col], c->p[r][param_col], k);
2274 break;
2276 case INTEGER_CST:
2277 if (c)
2279 int v = int_cst_value (e);
2280 cst_col = c->NbColumns - 1;
2282 if (v < 0)
2284 v = -v;
2285 subtract = subtract ? false : true;
2288 if (subtract)
2289 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2290 else
2291 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2293 break;
2295 case NOP_EXPR:
2296 case CONVERT_EXPR:
2297 case NON_LVALUE_EXPR:
2298 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2299 break;
2301 default:
2302 gcc_unreachable ();
2303 break;
2307 /* Data structure for idx_record_params. */
2309 struct irp_data
2311 struct loop *loop;
2312 scop_p scop;
2315 /* For a data reference with an ARRAY_REF as its BASE, record the
2316 parameters occurring in IDX. DTA is passed in as complementary
2317 information, and is used by the automatic walker function. This
2318 function is a callback for for_each_index. */
2320 static bool
2321 idx_record_params (tree base, tree *idx, void *dta)
2323 struct irp_data *data = (struct irp_data *) dta;
2325 if (TREE_CODE (base) != ARRAY_REF)
2326 return true;
2328 if (TREE_CODE (*idx) == SSA_NAME)
2330 tree scev;
2331 scop_p scop = data->scop;
2332 struct loop *loop = data->loop;
2333 Value one;
2335 scev = analyze_scalar_evolution (loop, *idx);
2336 scev = instantiate_scev (block_before_scop (scop), loop, scev);
2338 value_init (one);
2339 value_set_si (one, 1);
2340 scan_tree_for_params (scop, scev, NULL, 0, one, false);
2341 value_clear (one);
2344 return true;
2347 /* Find parameters with respect to SCOP in BB. We are looking in memory
2348 access functions, conditions and loop bounds. */
2350 static void
2351 find_params_in_bb (scop_p scop, basic_block bb)
2353 int i;
2354 data_reference_p dr;
2355 VEC (data_reference_p, heap) *drs;
2356 gimple_stmt_iterator gsi;
2357 struct loop *nest = outermost_loop_in_scop (scop, bb);
2359 /* Find the parameters used in the memory access functions. */
2360 drs = VEC_alloc (data_reference_p, heap, 5);
2361 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2362 find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs);
2364 for (i = 0; VEC_iterate (data_reference_p, drs, i, dr); i++)
2366 struct irp_data irp;
2368 irp.loop = bb->loop_father;
2369 irp.scop = scop;
2370 for_each_index (&dr->ref, idx_record_params, &irp);
2371 free_data_ref (dr);
2374 VEC_free (data_reference_p, heap, drs);
2376 /* Find parameters in conditional statements. */
2377 gsi = gsi_last_bb (bb);
2378 if (!gsi_end_p (gsi))
2380 gimple stmt = gsi_stmt (gsi);
2382 if (gimple_code (stmt) == GIMPLE_COND)
2384 Value one;
2385 loop_p loop = bb->loop_father;
2387 tree lhs, rhs;
2389 lhs = gimple_cond_lhs (stmt);
2390 lhs = analyze_scalar_evolution (loop, lhs);
2391 lhs = instantiate_scev (block_before_scop (scop), loop, lhs);
2393 rhs = gimple_cond_rhs (stmt);
2394 rhs = analyze_scalar_evolution (loop, rhs);
2395 rhs = instantiate_scev (block_before_scop (scop), loop, rhs);
2397 value_init (one);
2398 scan_tree_for_params (scop, lhs, NULL, 0, one, false);
2399 value_set_si (one, 1);
2400 scan_tree_for_params (scop, rhs, NULL, 0, one, false);
2401 value_clear (one);
2406 /* Saves in NV the name of variable P->T. */
2408 static void
2409 save_var_name (char **nv, int i, name_tree p)
2411 const char *name = get_name (SSA_NAME_VAR (p->t));
2413 if (name)
2415 int len = strlen (name) + 16;
2416 nv[i] = XNEWVEC (char, len);
2417 snprintf (nv[i], len, "%s_%d", name, SSA_NAME_VERSION (p->t));
2419 else
2421 nv[i] = XNEWVEC (char, 16);
2422 snprintf (nv[i], 2 + 16, "T_%d", SSA_NAME_VERSION (p->t));
2425 p->name = nv[i];
2428 /* Return the maximal loop depth in SCOP. */
2430 static int
2431 scop_max_loop_depth (scop_p scop)
2433 int i;
2434 graphite_bb_p gbb;
2435 int max_nb_loops = 0;
2437 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
2439 int nb_loops = gbb_nb_loops (gbb);
2440 if (max_nb_loops < nb_loops)
2441 max_nb_loops = nb_loops;
2444 return max_nb_loops;
2447 /* Initialize Cloog's parameter names from the names used in GIMPLE.
2448 Initialize Cloog's iterator names, using 'graphite_iterator_%d'
2449 from 0 to scop_nb_loops (scop). */
2451 static void
2452 initialize_cloog_names (scop_p scop)
2454 int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop));
2455 char **params = XNEWVEC (char *, nb_params);
2456 int nb_iterators = scop_max_loop_depth (scop);
2457 int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop));
2458 char **iterators = XNEWVEC (char *, nb_iterators * 2);
2459 char **scattering = XNEWVEC (char *, nb_scattering);
2460 name_tree p;
2462 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2463 save_var_name (params, i, p);
2465 cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)),
2466 nb_params);
2467 cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)),
2468 params);
2470 for (i = 0; i < nb_iterators; i++)
2472 int len = 18 + 16;
2473 iterators[i] = XNEWVEC (char, len);
2474 snprintf (iterators[i], len, "graphite_iterator_%d", i);
2477 cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)),
2478 nb_iterators);
2479 cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)),
2480 iterators);
2482 for (i = 0; i < nb_scattering; i++)
2484 int len = 2 + 16;
2485 scattering[i] = XNEWVEC (char, len);
2486 snprintf (scattering[i], len, "s_%d", i);
2489 cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)),
2490 nb_scattering);
2491 cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)),
2492 scattering);
2495 /* Record the parameters used in the SCOP. A variable is a parameter
2496 in a scop if it does not vary during the execution of that scop. */
2498 static void
2499 find_scop_parameters (scop_p scop)
2501 graphite_bb_p gb;
2502 unsigned i;
2503 struct loop *loop;
2504 Value one;
2506 value_init (one);
2507 value_set_si (one, 1);
2509 /* Find the parameters used in the loop bounds. */
2510 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
2512 tree nb_iters = number_of_latch_executions (loop);
2514 if (!chrec_contains_symbols (nb_iters))
2515 continue;
2517 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2518 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2519 scan_tree_for_params (scop, nb_iters, NULL, 0, one, false);
2522 value_clear (one);
2524 /* Find the parameters used in data accesses. */
2525 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2526 find_params_in_bb (scop, GBB_BB (gb));
2529 /* Build the context constraints for SCOP: constraints and relations
2530 on parameters. */
2532 static void
2533 build_scop_context (scop_p scop)
2535 int nb_params = scop_nb_params (scop);
2536 CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2);
2538 /* Insert '0 >= 0' in the context matrix, as it is not allowed to be
2539 empty. */
2541 value_set_si (matrix->p[0][0], 1);
2543 value_set_si (matrix->p[0][nb_params + 1], 0);
2545 cloog_program_set_context (SCOP_PROG (scop),
2546 cloog_domain_matrix2domain (matrix));
2547 cloog_matrix_free (matrix);
2550 /* Returns a graphite_bb from BB. */
2552 static inline graphite_bb_p
2553 gbb_from_bb (basic_block bb)
2555 return (graphite_bb_p) bb->aux;
2558 /* Add DOMAIN to all the basic blocks in LOOP. */
2560 static void
2561 add_bb_domains (struct loop *loop, CloogMatrix *domain)
2563 basic_block *bbs = get_loop_body (loop);
2564 unsigned i;
2566 for (i = 0; i < loop->num_nodes; i++)
2567 if (bbs[i]->loop_father == loop)
2569 graphite_bb_p gbb = gbb_from_bb (bbs[i]);
2570 GBB_DOMAIN (gbb) = cloog_matrix_copy (domain);
2573 free (bbs);
2576 /* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the
2577 number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the
2578 constraints matrix for the surrounding loops. */
2580 static void
2581 build_loop_iteration_domains (scop_p scop, struct loop *loop,
2582 CloogMatrix *outer_cstr, int nb_outer_loops)
2584 int i, j, row;
2585 CloogMatrix *cstr;
2587 int nb_rows = outer_cstr->NbRows + 1;
2588 int nb_cols = outer_cstr->NbColumns + 1;
2590 /* Last column of CSTR is the column of constants. */
2591 int cst_col = nb_cols - 1;
2593 /* The column for the current loop is just after the columns of
2594 other outer loops. */
2595 int loop_col = nb_outer_loops + 1;
2597 tree nb_iters = number_of_latch_executions (loop);
2599 /* When the number of iterations is a constant or a parameter, we
2600 add a constraint for the upper bound of the loop. So add a row
2601 to the constraint matrix before allocating it. */
2602 if (TREE_CODE (nb_iters) == INTEGER_CST
2603 || !chrec_contains_undetermined (nb_iters))
2604 nb_rows++;
2606 cstr = cloog_matrix_alloc (nb_rows, nb_cols);
2608 /* Copy the outer constraints. */
2609 for (i = 0; i < outer_cstr->NbRows; i++)
2611 /* Copy the eq/ineq and loops columns. */
2612 for (j = 0; j < loop_col; j++)
2613 value_assign (cstr->p[i][j], outer_cstr->p[i][j]);
2615 /* Leave an empty column in CSTR for the current loop, and then
2616 copy the parameter columns. */
2617 for (j = loop_col; j < outer_cstr->NbColumns; j++)
2618 value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]);
2621 /* 0 <= loop_i */
2622 row = outer_cstr->NbRows;
2623 value_set_si (cstr->p[row][0], 1);
2624 value_set_si (cstr->p[row][loop_col], 1);
2626 /* loop_i <= nb_iters */
2627 if (TREE_CODE (nb_iters) == INTEGER_CST)
2629 row++;
2630 value_set_si (cstr->p[row][0], 1);
2631 value_set_si (cstr->p[row][loop_col], -1);
2633 value_set_si (cstr->p[row][cst_col],
2634 int_cst_value (nb_iters));
2636 else if (!chrec_contains_undetermined (nb_iters))
2638 /* Otherwise nb_iters contains parameters: scan the nb_iters
2639 expression and build its matrix representation. */
2640 Value one;
2642 row++;
2643 value_set_si (cstr->p[row][0], 1);
2644 value_set_si (cstr->p[row][loop_col], -1);
2646 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2647 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2649 value_init (one);
2650 value_set_si (one, 1);
2651 scan_tree_for_params (scop, nb_iters, cstr, row, one, false);
2652 value_clear (one);
2654 else
2655 gcc_unreachable ();
2657 if (loop->inner && loop_in_scop_p (loop->inner, scop))
2658 build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1);
2660 /* Only go to the next loops, if we are not at the outermost layer. These
2661 have to be handled seperately, as we can be sure, that the chain at this
2662 layer will be connected. */
2663 if (nb_outer_loops != 0 && loop->next && loop_in_scop_p (loop->next, scop))
2664 build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops);
2666 add_bb_domains (loop, cstr);
2668 cloog_matrix_free (cstr);
2671 /* Add conditions to the domain of GB. */
2673 static void
2674 add_conditions_to_domain (graphite_bb_p gb)
2676 unsigned int i,j;
2677 gimple stmt;
2678 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb);
2679 CloogMatrix *domain = GBB_DOMAIN (gb);
2680 scop_p scop = GBB_SCOP (gb);
2682 unsigned nb_rows;
2683 unsigned nb_cols;
2684 unsigned nb_new_rows = 0;
2685 unsigned row;
2687 if (VEC_empty (gimple, conditions))
2688 return;
2690 if (domain)
2692 nb_rows = domain->NbRows;
2693 nb_cols = domain->NbColumns;
2695 else
2697 nb_rows = 0;
2698 nb_cols = scop_nb_params (scop) + 2;
2701 /* Count number of necessary new rows to add the conditions to the
2702 domain. */
2703 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2705 switch (gimple_code (stmt))
2707 case GIMPLE_COND:
2709 enum tree_code code = gimple_cond_code (stmt);
2711 switch (code)
2713 case NE_EXPR:
2714 case EQ_EXPR:
2715 /* NE and EQ statements are not supported right know. */
2716 gcc_unreachable ();
2717 break;
2718 case LT_EXPR:
2719 case GT_EXPR:
2720 case LE_EXPR:
2721 case GE_EXPR:
2722 nb_new_rows++;
2723 break;
2724 default:
2725 gcc_unreachable ();
2726 break;
2728 break;
2730 case SWITCH_EXPR:
2731 /* Switch statements are not supported right know. */
2732 gcc_unreachable ();
2733 break;
2735 default:
2736 gcc_unreachable ();
2737 break;
2742 /* Enlarge the matrix. */
2744 CloogMatrix *new_domain;
2745 new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols);
2747 for (i = 0; i < nb_rows; i++)
2748 for (j = 0; j < nb_cols; j++)
2749 value_assign (new_domain->p[i][j], domain->p[i][j]);
2751 cloog_matrix_free (domain);
2752 domain = new_domain;
2753 GBB_DOMAIN (gb) = new_domain;
2756 /* Add the conditions to the new enlarged domain matrix. */
2757 row = nb_rows;
2758 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2760 switch (gimple_code (stmt))
2762 case GIMPLE_COND:
2764 Value one;
2765 enum tree_code code;
2766 tree left;
2767 tree right;
2768 loop_p loop = GBB_BB (gb)->loop_father;
2770 left = gimple_cond_lhs (stmt);
2771 right = gimple_cond_rhs (stmt);
2773 left = analyze_scalar_evolution (loop, left);
2774 right = analyze_scalar_evolution (loop, right);
2776 left = instantiate_scev (block_before_scop (scop), loop, left);
2777 right = instantiate_scev (block_before_scop (scop), loop, right);
2779 code = gimple_cond_code (stmt);
2781 /* The conditions for ELSE-branches are inverted. */
2782 if (VEC_index (gimple, gb->condition_cases, i) == NULL)
2783 code = invert_tree_comparison (code, false);
2785 switch (code)
2787 case NE_EXPR:
2788 /* NE statements are not supported right know. */
2789 gcc_unreachable ();
2790 break;
2791 case EQ_EXPR:
2792 value_set_si (domain->p[row][0], 1);
2793 value_init (one);
2794 value_set_si (one, 1);
2795 scan_tree_for_params (scop, left, domain, row, one, true);
2796 value_set_si (one, 1);
2797 scan_tree_for_params (scop, right, domain, row, one, false);
2798 row++;
2799 value_set_si (domain->p[row][0], 1);
2800 value_set_si (one, 1);
2801 scan_tree_for_params (scop, left, domain, row, one, false);
2802 value_set_si (one, 1);
2803 scan_tree_for_params (scop, right, domain, row, one, true);
2804 value_clear (one);
2805 row++;
2806 break;
2807 case LT_EXPR:
2808 value_set_si (domain->p[row][0], 1);
2809 value_init (one);
2810 value_set_si (one, 1);
2811 scan_tree_for_params (scop, left, domain, row, one, true);
2812 value_set_si (one, 1);
2813 scan_tree_for_params (scop, right, domain, row, one, false);
2814 value_sub_int (domain->p[row][nb_cols - 1],
2815 domain->p[row][nb_cols - 1], 1);
2816 value_clear (one);
2817 row++;
2818 break;
2819 case GT_EXPR:
2820 value_set_si (domain->p[row][0], 1);
2821 value_init (one);
2822 value_set_si (one, 1);
2823 scan_tree_for_params (scop, left, domain, row, one, false);
2824 value_set_si (one, 1);
2825 scan_tree_for_params (scop, right, domain, row, one, true);
2826 value_sub_int (domain->p[row][nb_cols - 1],
2827 domain->p[row][nb_cols - 1], 1);
2828 value_clear (one);
2829 row++;
2830 break;
2831 case LE_EXPR:
2832 value_set_si (domain->p[row][0], 1);
2833 value_init (one);
2834 value_set_si (one, 1);
2835 scan_tree_for_params (scop, left, domain, row, one, true);
2836 value_set_si (one, 1);
2837 scan_tree_for_params (scop, right, domain, row, one, false);
2838 value_clear (one);
2839 row++;
2840 break;
2841 case GE_EXPR:
2842 value_set_si (domain->p[row][0], 1);
2843 value_init (one);
2844 value_set_si (one, 1);
2845 scan_tree_for_params (scop, left, domain, row, one, false);
2846 value_set_si (one, 1);
2847 scan_tree_for_params (scop, right, domain, row, one, true);
2848 value_clear (one);
2849 row++;
2850 break;
2851 default:
2852 gcc_unreachable ();
2853 break;
2855 break;
2857 case GIMPLE_SWITCH:
2858 /* Switch statements are not supported right know. */
2859 gcc_unreachable ();
2860 break;
2862 default:
2863 gcc_unreachable ();
2864 break;
2869 /* Helper recursive function. */
2871 static void
2872 build_scop_conditions_1 (VEC (gimple, heap) **conditions,
2873 VEC (gimple, heap) **cases, basic_block bb,
2874 scop_p scop)
2876 int i, j;
2877 graphite_bb_p gbb;
2878 gimple_stmt_iterator gsi;
2879 basic_block bb_child, bb_iter;
2880 VEC (basic_block, heap) *dom;
2882 /* Make sure we are in the SCoP. */
2883 if (!bb_in_scop_p (bb, scop))
2884 return;
2886 /* Record conditions in graphite_bb. */
2887 gbb = gbb_from_bb (bb);
2888 GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions);
2889 GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases);
2891 add_conditions_to_domain (gbb);
2893 dom = get_dominated_by (CDI_DOMINATORS, bb);
2895 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2897 gimple stmt = gsi_stmt (gsi);
2898 VEC (edge, gc) *edges;
2899 edge e;
2901 switch (gimple_code (stmt))
2903 case GIMPLE_COND:
2904 edges = bb->succs;
2905 for (i = 0; VEC_iterate (edge, edges, i, e); i++)
2906 if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb))
2907 && VEC_length (edge, e->dest->preds) == 1)
2909 /* Remove the scanned block from the dominator successors. */
2910 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2911 if (bb_iter == e->dest)
2913 VEC_unordered_remove (basic_block, dom, j);
2914 break;
2917 /* Recursively scan the then or else part. */
2918 if (e->flags & EDGE_TRUE_VALUE)
2919 VEC_safe_push (gimple, heap, *cases, stmt);
2920 else if (e->flags & EDGE_FALSE_VALUE)
2921 VEC_safe_push (gimple, heap, *cases, NULL);
2922 else
2923 gcc_unreachable ();
2925 VEC_safe_push (gimple, heap, *conditions, stmt);
2926 build_scop_conditions_1 (conditions, cases, e->dest, scop);
2927 VEC_pop (gimple, *conditions);
2928 VEC_pop (gimple, *cases);
2930 break;
2932 case GIMPLE_SWITCH:
2934 unsigned i;
2935 gimple_stmt_iterator gsi_search_gimple_label;
2937 for (i = 0; i < gimple_switch_num_labels (stmt); ++i)
2939 basic_block bb_iter;
2940 size_t k;
2941 size_t n_cases = VEC_length (gimple, *conditions);
2942 unsigned n = gimple_switch_num_labels (stmt);
2944 bb_child = label_to_block
2945 (CASE_LABEL (gimple_switch_label (stmt, i)));
2947 /* Do not handle multiple values for the same block. */
2948 for (k = 0; k < n; k++)
2949 if (i != k
2950 && label_to_block
2951 (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child)
2952 break;
2954 if (k != n)
2955 continue;
2957 /* Switch cases with more than one predecessor are not
2958 handled. */
2959 if (VEC_length (edge, bb_child->preds) != 1)
2960 continue;
2962 /* Recursively scan the corresponding 'case' block. */
2964 for (gsi_search_gimple_label = gsi_start_bb (bb_child);
2965 !gsi_end_p (gsi_search_gimple_label);
2966 gsi_next (&gsi_search_gimple_label))
2968 gimple stmt_gimple_label
2969 = gsi_stmt (gsi_search_gimple_label);
2971 if (gimple_code (stmt_gimple_label) == GIMPLE_LABEL)
2973 tree t = gimple_label_label (stmt_gimple_label);
2975 if (t == gimple_switch_label (stmt, i))
2976 VEC_replace (gimple, *cases, n_cases,
2977 stmt_gimple_label);
2978 else
2979 gcc_unreachable ();
2983 build_scop_conditions_1 (conditions, cases, bb_child, scop);
2985 /* Remove the scanned block from the dominator successors. */
2986 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2987 if (bb_iter == bb_child)
2989 VEC_unordered_remove (basic_block, dom, j);
2990 break;
2994 VEC_pop (gimple, *conditions);
2995 VEC_pop (gimple, *cases);
2996 break;
2998 default:
2999 break;
3003 /* Scan all immediate dominated successors. */
3004 for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++)
3005 build_scop_conditions_1 (conditions, cases, bb_child, scop);
3007 VEC_free (basic_block, heap, dom);
3010 /* Record all 'if' and 'switch' conditions in each gbb of SCOP. */
3012 static void
3013 build_scop_conditions (scop_p scop)
3015 VEC (gimple, heap) *conditions = NULL;
3016 VEC (gimple, heap) *cases = NULL;
3018 build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop);
3020 VEC_free (gimple, heap, conditions);
3021 VEC_free (gimple, heap, cases);
3024 /* Build the current domain matrix: the loops belonging to the current
3025 SCOP, and that vary for the execution of the current basic block.
3026 Returns false if there is no loop in SCOP. */
3028 static bool
3029 build_scop_iteration_domain (scop_p scop)
3031 struct loop *loop;
3032 CloogMatrix *outer_cstr;
3033 int i;
3035 /* Build cloog loop for all loops, that are in the uppermost loop layer of
3036 this SCoP. */
3037 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
3038 if (!loop_in_scop_p (loop_outer (loop), scop))
3040 /* The outermost constraints is a matrix that has:
3041 -first column: eq/ineq boolean
3042 -last column: a constant
3043 -scop_nb_params columns for the parameters used in the scop. */
3044 outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3045 build_loop_iteration_domains (scop, loop, outer_cstr, 0);
3046 cloog_matrix_free (outer_cstr);
3049 return (i != 0);
3052 /* Initializes an equation CY of the access matrix using the
3053 information for a subscript from ACCESS_FUN, relatively to the loop
3054 indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is
3055 the dimension of the array access, i.e. the number of
3056 subscripts. Returns true when the operation succeeds. */
3058 static bool
3059 build_access_matrix_with_af (tree access_fun, lambda_vector cy,
3060 scop_p scop, int ndim)
3062 switch (TREE_CODE (access_fun))
3064 case POLYNOMIAL_CHREC:
3066 tree left = CHREC_LEFT (access_fun);
3067 tree right = CHREC_RIGHT (access_fun);
3068 int var;
3070 if (TREE_CODE (right) != INTEGER_CST)
3071 return false;
3073 var = loop_iteration_vector_dim (CHREC_VARIABLE (access_fun), scop);
3074 cy[var] = int_cst_value (right);
3076 switch (TREE_CODE (left))
3078 case POLYNOMIAL_CHREC:
3079 return build_access_matrix_with_af (left, cy, scop, ndim);
3081 case INTEGER_CST:
3082 cy[ndim - 1] = int_cst_value (left);
3083 return true;
3085 default:
3086 /* FIXME: access_fn can have parameters. */
3087 return false;
3090 case INTEGER_CST:
3091 cy[ndim - 1] = int_cst_value (access_fun);
3092 return true;
3094 default:
3095 /* FIXME: access_fn can have parameters. */
3096 return false;
3100 /* Initialize the access matrix in the data reference REF with respect
3101 to the loop nesting LOOP_NEST. Return true when the operation
3102 succeeded. */
3104 static bool
3105 build_access_matrix (data_reference_p ref, graphite_bb_p gb)
3107 int i, ndim = DR_NUM_DIMENSIONS (ref);
3108 struct access_matrix *am = GGC_NEW (struct access_matrix);
3110 AM_MATRIX (am) = VEC_alloc (lambda_vector, heap, ndim);
3111 DR_SCOP (ref) = GBB_SCOP (gb);
3113 for (i = 0; i < ndim; i++)
3115 lambda_vector v = lambda_vector_new (ref_nb_loops (ref));
3116 scop_p scop = GBB_SCOP (gb);
3117 tree af = DR_ACCESS_FN (ref, i);
3119 if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref)))
3120 return false;
3122 VEC_safe_push (lambda_vector, heap, AM_MATRIX (am), v);
3125 DR_ACCESS_MATRIX (ref) = am;
3126 return true;
3129 /* Build the access matrices for the data references in the SCOP. */
3131 static void
3132 build_scop_data_accesses (scop_p scop)
3134 int i;
3135 graphite_bb_p gb;
3137 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
3139 int j;
3140 gimple_stmt_iterator gsi;
3141 data_reference_p dr;
3142 struct loop *nest = outermost_loop_in_scop (scop, GBB_BB (gb));
3144 /* On each statement of the basic block, gather all the occurences
3145 to read/write memory. */
3146 GBB_DATA_REFS (gb) = VEC_alloc (data_reference_p, heap, 5);
3147 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
3148 find_data_references_in_stmt (nest, gsi_stmt (gsi),
3149 &GBB_DATA_REFS (gb));
3151 /* FIXME: Construction of access matrix is disabled until some
3152 pass, like the data dependence analysis, is using it. */
3153 continue;
3155 /* Construct the access matrix for each data ref, with respect to
3156 the loop nest of the current BB in the considered SCOP. */
3157 for (j = 0;
3158 VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr);
3159 j++)
3161 bool res = build_access_matrix (dr, gb);
3163 /* FIXME: At this point the DRs should always have an affine
3164 form. For the moment this fails as build_access_matrix
3165 does not build matrices with parameters. */
3166 gcc_assert (res);
3171 /* Returns the tree variable from the name NAME that was given in
3172 Cloog representation. All the parameters are stored in PARAMS, and
3173 all the loop induction variables are stored in IVSTACK.
3175 FIXME: This is a hack, and Cloog should be fixed to not work with
3176 variable names represented as "char *string", but with void
3177 pointers that could be casted back to a tree. The only problem in
3178 doing that is that Cloog's pretty printer still assumes that
3179 variable names are char *strings. The solution would be to have a
3180 function pointer for pretty-printing that can be redirected to be
3181 print_generic_stmt in our case, or fprintf by default.
3182 ??? Too ugly to live. */
3184 static tree
3185 clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params,
3186 loop_iv_stack ivstack)
3188 int i;
3189 name_tree t;
3190 tree iv;
3192 for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
3193 if (!strcmp (name, t->name))
3194 return t->t;
3196 iv = loop_iv_stack_get_iv_from_name (ivstack, name);
3197 if (iv)
3198 return iv;
3200 gcc_unreachable ();
3203 /* Converts a Cloog AST expression E back to a GCC expression tree. */
3205 static tree
3206 clast_to_gcc_expression (struct clast_expr *e,
3207 VEC (name_tree, heap) *params,
3208 loop_iv_stack ivstack)
3210 tree type = integer_type_node;
3212 gcc_assert (e);
3214 switch (e->type)
3216 case expr_term:
3218 struct clast_term *t = (struct clast_term *) e;
3220 if (t->var)
3222 if (value_one_p (t->val))
3223 return clast_name_to_gcc (t->var, params, ivstack);
3225 else if (value_mone_p (t->val))
3226 return fold_build1 (NEGATE_EXPR, type,
3227 clast_name_to_gcc (t->var, params, ivstack));
3228 else
3229 return fold_build2 (MULT_EXPR, type,
3230 gmp_cst_to_tree (t->val),
3231 clast_name_to_gcc (t->var, params, ivstack));
3233 else
3234 return gmp_cst_to_tree (t->val);
3237 case expr_red:
3239 struct clast_reduction *r = (struct clast_reduction *) e;
3240 tree left, right;
3242 switch (r->type)
3244 case clast_red_sum:
3245 if (r->n == 1)
3246 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3248 else
3250 gcc_assert (r->n >= 1
3251 && r->elts[0]->type == expr_term
3252 && r->elts[1]->type == expr_term);
3254 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3255 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3256 return fold_build2 (PLUS_EXPR, type, left, right);
3259 break;
3261 case clast_red_min:
3262 if (r->n == 1)
3263 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3265 else if (r->n == 2)
3267 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3268 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3269 return fold_build2 (MIN_EXPR, type, left, right);
3272 else
3273 gcc_unreachable();
3275 break;
3277 case clast_red_max:
3278 if (r->n == 1)
3279 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3281 else if (r->n == 2)
3283 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3284 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3285 return fold_build2 (MAX_EXPR, type, left, right);
3288 else
3289 gcc_unreachable();
3291 break;
3293 default:
3294 gcc_unreachable ();
3296 break;
3299 case expr_bin:
3301 struct clast_binary *b = (struct clast_binary *) e;
3302 struct clast_expr *lhs = (struct clast_expr *) b->LHS;
3303 struct clast_expr *rhs = (struct clast_expr *) b->RHS;
3304 tree tl = clast_to_gcc_expression (lhs, params, ivstack);
3306 /* FIXME: The next statement produces a warning: Cloog assumes
3307 that the RHS is a constant, but this is a "void *" pointer
3308 that should be casted into a Value, but this cast cannot be
3309 done as Value is a GMP type, that is an array. Cloog must
3310 be fixed for removing this warning. */
3311 tree tr = gmp_cst_to_tree (rhs);
3313 switch (b->type)
3315 case clast_bin_fdiv:
3316 return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr);
3318 case clast_bin_cdiv:
3319 return fold_build2 (CEIL_DIV_EXPR, type, tl, tr);
3321 case clast_bin_div:
3322 return fold_build2 (EXACT_DIV_EXPR, type, tl, tr);
3324 case clast_bin_mod:
3325 return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr);
3327 default:
3328 gcc_unreachable ();
3332 default:
3333 gcc_unreachable ();
3336 return NULL_TREE;
3339 /* Translates a clast equation CLEQ to a tree. */
3341 static tree
3342 graphite_translate_clast_equation (scop_p scop,
3343 struct clast_equation *cleq,
3344 loop_iv_stack ivstack)
3346 enum tree_code comp;
3347 tree lhs = clast_to_gcc_expression (cleq->LHS, SCOP_PARAMS (scop), ivstack);
3348 tree rhs = clast_to_gcc_expression (cleq->RHS, SCOP_PARAMS (scop), ivstack);
3350 if (cleq->sign == 0)
3351 comp = EQ_EXPR;
3353 else if (cleq->sign > 0)
3354 comp = GE_EXPR;
3356 else
3357 comp = LE_EXPR;
3359 return fold_build2 (comp, integer_type_node, lhs, rhs);
3362 /* Creates the test for the condition in STMT. */
3364 static tree
3365 graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt,
3366 loop_iv_stack ivstack)
3368 tree cond = NULL;
3369 int i;
3371 for (i = 0; i < stmt->n; i++)
3373 tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack);
3375 if (cond)
3376 cond = fold_build2 (TRUTH_AND_EXPR, integer_type_node, cond, eq);
3377 else
3378 cond = eq;
3381 return cond;
3384 /* Creates a new if region corresponding to Cloog's guard. */
3386 static edge
3387 graphite_create_new_guard (scop_p scop, edge entry_edge,
3388 struct clast_guard *stmt,
3389 loop_iv_stack ivstack)
3391 tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack);
3392 edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
3393 return exit_edge;
3397 /* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction
3398 variable for the new LOOP. New LOOP is attached to CFG starting at
3399 ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child
3400 loop of the OUTER_LOOP. */
3402 static struct loop *
3403 graphite_create_new_loop (scop_p scop, edge entry_edge,
3404 struct clast_for *stmt, loop_iv_stack ivstack,
3405 loop_p outer)
3407 struct loop *loop;
3408 tree ivvar;
3409 tree stride, lowb, upb;
3410 tree iv_before;
3412 gcc_assert (stmt->LB
3413 && stmt->UB);
3415 stride = gmp_cst_to_tree (stmt->stride);
3416 lowb = clast_to_gcc_expression (stmt->LB, SCOP_PARAMS (scop), ivstack);
3417 ivvar = create_tmp_var (integer_type_node, "graphiteIV");
3418 add_referenced_var (ivvar);
3420 upb = clast_to_gcc_expression (stmt->UB, SCOP_PARAMS (scop), ivstack);
3421 loop = create_empty_loop_on_edge (entry_edge, lowb, stride, upb, ivvar,
3422 &iv_before, outer ? outer
3423 : entry_edge->src->loop_father);
3425 loop_iv_stack_push_iv (ivstack, iv_before, stmt->iterator);
3427 return loop;
3430 /* Remove all the edges from EDGES except the edge KEEP. */
3432 static void
3433 remove_all_edges_1 (VEC (edge, gc) *edges, edge keep)
3435 edge e;
3436 edge_iterator ei;
3438 for (ei = ei_start (edges); (e = ei_safe_edge (ei)); )
3440 if (e != keep)
3442 remove_edge (e);
3443 e = ei_safe_edge (ei);
3445 else
3446 ei_next (&ei);
3450 /* Remove all the edges from BB except the edge KEEP. */
3452 static void
3453 remove_all_edges (basic_block bb, edge keep)
3455 remove_all_edges_1 (bb->succs, keep);
3456 remove_all_edges_1 (bb->preds, keep);
3459 /* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */
3461 static void
3462 graphite_rename_ivs_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3463 loop_p old, loop_iv_stack ivstack)
3465 ssa_op_iter iter;
3466 use_operand_p use_p;
3468 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3470 tree use = USE_FROM_PTR (use_p);
3471 tree new_iv = NULL;
3472 name_tree old_iv = get_old_iv_from_ssa_name (scop, old, use);
3474 if (old_iv)
3475 new_iv = loop_iv_stack_get_iv (ivstack,
3476 gbb_loop_index (gbb, old_iv->loop));
3478 if (new_iv)
3479 SET_USE (use_p, new_iv);
3483 /* Returns true if SSA_NAME is a parameter of SCOP. */
3485 static bool
3486 is_parameter (scop_p scop, tree ssa_name)
3488 int i;
3489 VEC (name_tree, heap) *params = SCOP_PARAMS (scop);
3490 name_tree param;
3492 for (i = 0; VEC_iterate (name_tree, params, i, param); i++)
3493 if (param->t == ssa_name)
3494 return true;
3496 return false;
3499 /* Returns true if NAME is an old induction variable in SCOP. OLD is
3500 the original loop that contained the definition of NAME. */
3502 static bool
3503 is_old_iv (scop_p scop, loop_p old, tree name)
3505 return get_old_iv_from_ssa_name (scop, old, name) != NULL;
3509 static void expand_scalar_variables_stmt (gimple, graphite_bb_p, scop_p, loop_p,
3510 loop_iv_stack);
3512 /* Constructs a tree which only contains old_ivs and parameters. Any
3513 other variables that are defined outside GBB will be eliminated by
3514 using their definitions in the constructed tree. OLD_LOOP_FATHER
3515 is the original loop that contained GBB. */
3517 static tree
3518 expand_scalar_variables_expr (tree type, tree op0, enum tree_code code,
3519 tree op1, graphite_bb_p gbb, scop_p scop,
3520 loop_p old_loop_father, loop_iv_stack ivstack)
3522 if ((TREE_CODE_CLASS (code) == tcc_constant
3523 && code == INTEGER_CST)
3524 || TREE_CODE_CLASS (code) == tcc_reference)
3525 return op0;
3527 if (TREE_CODE_CLASS (code) == tcc_unary)
3529 tree op0_type = TREE_TYPE (op0);
3530 enum tree_code op0_code = TREE_CODE (op0);
3531 tree op0_expr =
3532 expand_scalar_variables_expr (op0_type, op0, op0_code,
3533 NULL, gbb, scop, old_loop_father,
3534 ivstack);
3536 return fold_build1 (code, type, op0_expr);
3539 if (TREE_CODE_CLASS (code) == tcc_binary)
3541 tree op0_type = TREE_TYPE (op0);
3542 enum tree_code op0_code = TREE_CODE (op0);
3543 tree op0_expr =
3544 expand_scalar_variables_expr (op0_type, op0, op0_code,
3545 NULL, gbb, scop, old_loop_father,
3546 ivstack);
3547 tree op1_type = TREE_TYPE (op1);
3548 enum tree_code op1_code = TREE_CODE (op1);
3549 tree op1_expr =
3550 expand_scalar_variables_expr (op1_type, op1, op1_code,
3551 NULL, gbb, scop, old_loop_father,
3552 ivstack);
3554 return fold_build2 (code, type, op0_expr, op1_expr);
3557 if (code == SSA_NAME)
3559 tree var0, var1;
3560 gimple def_stmt;
3561 enum tree_code subcode;
3563 if(is_parameter (scop, op0) ||
3564 is_old_iv (scop, old_loop_father, op0))
3565 return op0;
3567 def_stmt = SSA_NAME_DEF_STMT (op0);
3569 if (gimple_bb (def_stmt) == GBB_BB (gbb))
3571 /* If the defining statement is in the basic block already
3572 we do not need to create a new expression for it, we
3573 only need to ensure its operands are expanded. */
3574 expand_scalar_variables_stmt (def_stmt, gbb, scop,
3575 old_loop_father, ivstack);
3576 return op0;
3579 else
3581 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3582 return op0;
3584 var0 = gimple_assign_rhs1 (def_stmt);
3585 subcode = gimple_assign_rhs_code (def_stmt);
3586 var1 = gimple_assign_rhs2 (def_stmt);
3588 return expand_scalar_variables_expr (type, var0, subcode, var1,
3589 gbb, scop, old_loop_father,
3590 ivstack);
3594 gcc_unreachable ();
3595 return NULL;
3598 /* Replicates any uses of non-parameters and non-old-ivs variablesthat
3599 are defind outside GBB with code that is inserted in GBB.
3600 OLD_LOOP_FATHER is the original loop that contained STMT. */
3602 static void
3603 expand_scalar_variables_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3604 loop_p old_loop_father, loop_iv_stack ivstack)
3606 ssa_op_iter iter;
3607 use_operand_p use_p;
3608 basic_block bb = GBB_BB (gbb);
3610 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3612 tree use = USE_FROM_PTR (use_p);
3613 tree type = TREE_TYPE (use);
3614 enum tree_code code = TREE_CODE (use);
3615 tree use_expr = expand_scalar_variables_expr (type, use, code, NULL,
3616 gbb, scop, old_loop_father,
3617 ivstack);
3618 if (use_expr != use)
3620 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3621 tree new_use =
3622 force_gimple_operand_gsi (&gsi, use_expr, true, NULL,
3623 true, GSI_NEW_STMT);
3624 SET_USE (use_p, new_use);
3629 /* Copies the definitions outside of GBB of variables that are not
3630 induction variables nor parameters. GBB must only contain
3631 "external" references to these types of variables. OLD_LOOP_FATHER
3632 is the original loop that contained GBB. */
3634 static void
3635 expand_scalar_variables (graphite_bb_p gbb, scop_p scop,
3636 loop_p old_loop_father, loop_iv_stack ivstack)
3638 basic_block bb = GBB_BB (gbb);
3639 gimple_stmt_iterator gsi;
3641 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3643 gimple stmt = gsi_stmt (gsi);
3644 expand_scalar_variables_stmt (stmt, gbb, scop, old_loop_father,
3645 ivstack);
3646 gsi_next (&gsi);
3650 /* Rename all the SSA_NAMEs from block GBB that appear in IVSTACK in
3651 terms of new induction variables. OLD_LOOP_FATHER is the original
3652 loop that contained GBB. */
3654 static void
3655 graphite_rename_ivs (graphite_bb_p gbb, scop_p scop, loop_p old_loop_father,
3656 loop_iv_stack ivstack)
3658 basic_block bb = GBB_BB (gbb);
3659 gimple_stmt_iterator gsi;
3661 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3663 gimple stmt = gsi_stmt (gsi);
3665 if (gimple_get_lhs (stmt)
3666 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
3667 && get_old_iv_from_ssa_name (scop, old_loop_father,
3668 gimple_get_lhs (stmt)))
3669 gsi_remove (&gsi, false);
3670 else
3672 graphite_rename_ivs_stmt (stmt, gbb, scop, old_loop_father, ivstack);
3673 gsi_next (&gsi);
3678 /* Move all the PHI nodes from block FROM to block TO.
3679 OLD_LOOP_FATHER is the original loop that contained FROM. */
3681 static void
3682 move_phi_nodes (scop_p scop, loop_p old_loop_father, basic_block from,
3683 basic_block to)
3685 gimple_stmt_iterator gsi;
3687 for (gsi = gsi_start_phis (from); !gsi_end_p (gsi);)
3689 gimple phi = gsi_stmt (gsi);
3690 tree op = gimple_phi_result (phi);
3692 if (get_old_iv_from_ssa_name (scop, old_loop_father, op) == NULL)
3694 gimple new_phi = make_phi_node (op, 0);
3695 add_phi_node_to_bb (new_phi, to);
3697 remove_phi_node (&gsi, false);
3701 /* Remove condition from BB. */
3703 static void
3704 remove_condition (basic_block bb)
3706 gimple last = last_stmt (bb);
3708 if (last && gimple_code (last) == GIMPLE_COND)
3710 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3711 gsi_remove (&gsi, true);
3715 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */
3717 static edge
3718 get_true_edge_from_guard_bb (basic_block bb)
3720 edge e;
3721 edge_iterator ei;
3723 FOR_EACH_EDGE (e, ei, bb->succs)
3724 if (e->flags & EDGE_TRUE_VALUE)
3725 return e;
3727 gcc_unreachable ();
3728 return NULL;
3731 /* Translates a CLAST statement STMT to GCC representation. NEXT_E is
3732 the edge where new generated code should be attached. BB_EXIT is the last
3733 basic block that defines the scope of code generation. CONTEXT_LOOP is the
3734 loop in which the generated code will be placed (might be NULL). */
3736 static edge
3737 translate_clast (scop_p scop, struct loop *context_loop,
3738 struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack)
3740 if (!stmt)
3741 return next_e;
3743 if (CLAST_STMT_IS_A (stmt, stmt_root))
3744 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3746 if (CLAST_STMT_IS_A (stmt, stmt_user))
3748 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
3749 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
3750 basic_block bb = gbb->bb;
3751 loop_p old_loop_father = bb->loop_father;
3753 if (bb == ENTRY_BLOCK_PTR)
3754 return next_e;
3756 remove_condition (bb);
3757 expand_scalar_variables (gbb, scop, old_loop_father, ivstack);
3758 remove_all_edges (bb, next_e);
3759 move_phi_nodes (scop, old_loop_father, bb, next_e->src);
3760 redirect_edge_succ_nodup (next_e, bb);
3762 if (context_loop)
3764 remove_bb_from_loops (bb);
3765 add_bb_to_loop (bb, context_loop);
3768 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3769 mark_virtual_ops_in_bb (bb);
3770 next_e = make_edge (bb,
3771 context_loop ? context_loop->latch : EXIT_BLOCK_PTR,
3772 EDGE_FALLTHRU);
3773 loop_iv_stack_patch_for_consts (ivstack,
3774 (struct clast_user_stmt *) stmt);
3775 graphite_rename_ivs (gbb, scop, old_loop_father, ivstack);
3776 loop_iv_stack_remove_constants (ivstack);
3777 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3780 if (CLAST_STMT_IS_A (stmt, stmt_for))
3782 struct loop *loop
3783 = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt,
3784 ivstack, context_loop ? context_loop
3785 : get_loop (0));
3786 edge last_e = single_exit (loop);
3788 next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body,
3789 single_pred_edge (loop->latch), ivstack);
3790 redirect_edge_succ_nodup (next_e, loop->latch);
3792 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3793 loop_iv_stack_pop (ivstack);
3795 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3798 if (CLAST_STMT_IS_A (stmt, stmt_guard))
3800 edge last_e = graphite_create_new_guard (scop, next_e,
3801 ((struct clast_guard *) stmt),
3802 ivstack);
3803 edge true_e = get_true_edge_from_guard_bb (next_e->dest);
3804 next_e = translate_clast (scop, context_loop,
3805 ((struct clast_guard *) stmt)->then,
3806 true_e, ivstack);
3807 redirect_edge_succ_nodup (next_e, last_e->src);
3808 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3811 if (CLAST_STMT_IS_A (stmt, stmt_block))
3813 next_e = translate_clast (scop, context_loop,
3814 ((struct clast_block *) stmt)->body,
3815 next_e, ivstack);
3816 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3819 gcc_unreachable ();
3822 /* Free the SCATTERING domain list. */
3824 static void
3825 free_scattering (CloogDomainList *scattering)
3827 while (scattering)
3829 CloogDomain *dom = cloog_domain (scattering);
3830 CloogDomainList *next = cloog_next_domain (scattering);
3832 cloog_domain_free (dom);
3833 free (scattering);
3834 scattering = next;
3838 /* Build cloog program for SCoP. */
3840 static void
3841 build_cloog_prog (scop_p scop)
3843 int i;
3844 int max_nb_loops = scop_max_loop_depth (scop);
3845 graphite_bb_p gbb;
3846 CloogLoop *loop_list = NULL;
3847 CloogBlockList *block_list = NULL;
3848 CloogDomainList *scattering = NULL;
3849 CloogProgram *prog = SCOP_PROG (scop);
3850 int nbs = 2 * max_nb_loops + 1;
3851 int *scaldims = (int *) xmalloc (nbs * (sizeof (int)));
3853 cloog_program_set_nb_scattdims (prog, nbs);
3854 initialize_cloog_names (scop);
3856 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
3858 /* Build new block. */
3859 CloogMatrix *domain = GBB_DOMAIN (gbb);
3860 CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index);
3861 CloogBlock *block = cloog_block_alloc (stmt, 0, NULL,
3862 nb_loops_around_gb (gbb));
3863 cloog_statement_set_usr (stmt, gbb);
3865 /* Add empty domain to all bbs, which do not yet have a domain, as they
3866 are not part of any loop. */
3867 if (domain == NULL)
3869 domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3870 GBB_DOMAIN (gbb) = domain;
3873 /* Build loop list. */
3875 CloogLoop *new_loop_list = cloog_loop_malloc ();
3876 cloog_loop_set_next (new_loop_list, loop_list);
3877 cloog_loop_set_domain (new_loop_list,
3878 cloog_domain_matrix2domain (domain));
3879 cloog_loop_set_block (new_loop_list, block);
3880 loop_list = new_loop_list;
3883 /* Build block list. */
3885 CloogBlockList *new_block_list = cloog_block_list_malloc ();
3887 cloog_block_list_set_next (new_block_list, block_list);
3888 cloog_block_list_set_block (new_block_list, block);
3889 block_list = new_block_list;
3892 /* Build scattering list. */
3894 /* XXX: Replace with cloog_domain_list_alloc(), when available. */
3895 CloogDomainList *new_scattering
3896 = (CloogDomainList *) xmalloc (sizeof (CloogDomainList));
3897 CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs);
3899 cloog_set_next_domain (new_scattering, scattering);
3900 cloog_set_domain (new_scattering,
3901 cloog_domain_matrix2domain (scat_mat));
3902 scattering = new_scattering;
3903 cloog_matrix_free (scat_mat);
3907 cloog_program_set_loop (prog, loop_list);
3908 cloog_program_set_blocklist (prog, block_list);
3910 for (i = 0; i < nbs; i++)
3911 scaldims[i] = 0 ;
3913 cloog_program_set_scaldims (prog, scaldims);
3915 /* Extract scalar dimensions to simplify the code generation problem. */
3916 cloog_program_extract_scalars (prog, scattering);
3918 /* Apply scattering. */
3919 cloog_program_scatter (prog, scattering);
3920 free_scattering (scattering);
3922 /* Iterators corresponding to scalar dimensions have to be extracted. */
3923 cloog_names_scalarize (cloog_program_names (prog), nbs,
3924 cloog_program_scaldims (prog));
3926 /* Free blocklist. */
3928 CloogBlockList *next = cloog_program_blocklist (prog);
3930 while (next)
3932 CloogBlockList *toDelete = next;
3933 next = cloog_block_list_next (next);
3934 cloog_block_list_set_next (toDelete, NULL);
3935 cloog_block_list_set_block (toDelete, NULL);
3936 cloog_block_list_free (toDelete);
3938 cloog_program_set_blocklist (prog, NULL);
3942 /* Return the options that will be used in GLOOG. */
3944 static CloogOptions *
3945 set_cloog_options (void)
3947 CloogOptions *options = cloog_options_malloc ();
3949 /* Change cloog output language to C. If we do use FORTRAN instead, cloog
3950 will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if
3951 we pass an incomplete program to cloog. */
3952 options->language = LANGUAGE_C;
3954 /* Enable complex equality spreading: removes dummy statements
3955 (assignments) in the generated code which repeats the
3956 substitution equations for statements. This is useless for
3957 GLooG. */
3958 options->esp = 1;
3960 /* Enable C pretty-printing mode: normalizes the substitution
3961 equations for statements. */
3962 options->cpp = 1;
3964 /* Allow cloog to build strides with a stride width different to one.
3965 This example has stride = 4:
3967 for (i = 0; i < 20; i += 4)
3968 A */
3969 options->strides = 1;
3971 /* Disable optimizations and make cloog generate source code closer to the
3972 input. This is useful for debugging, but later we want the optimized
3973 code.
3975 XXX: We can not disable optimizations, as loop blocking is not working
3976 without them. */
3977 if (0)
3979 options->f = -1;
3980 options->l = INT_MAX;
3983 return options;
3986 /* Prints STMT to STDERR. */
3988 void
3989 debug_clast_stmt (struct clast_stmt *stmt)
3991 CloogOptions *options = set_cloog_options ();
3993 pprint (stderr, stmt, 0, options);
3996 /* Find the right transform for the SCOP, and return a Cloog AST
3997 representing the new form of the program. */
3999 static struct clast_stmt *
4000 find_transform (scop_p scop)
4002 struct clast_stmt *stmt;
4003 CloogOptions *options = set_cloog_options ();
4005 /* Connect new cloog prog generation to graphite. */
4006 build_cloog_prog (scop);
4008 if (dump_file && (dump_flags & TDF_DETAILS))
4010 fprintf (dump_file, "Cloog Input [\n");
4011 cloog_program_print (dump_file, SCOP_PROG(scop));
4012 fprintf (dump_file, "]\n");
4015 SCOP_PROG (scop) = cloog_program_generate (SCOP_PROG (scop), options);
4016 stmt = cloog_clast_create (SCOP_PROG (scop), options);
4018 if (dump_file && (dump_flags & TDF_DETAILS))
4020 fprintf (dump_file, "Cloog Output[\n");
4021 pprint (dump_file, stmt, 0, options);
4022 cloog_program_dump_cloog (dump_file, SCOP_PROG (scop));
4023 fprintf (dump_file, "]\n");
4026 cloog_options_free (options);
4027 return stmt;
4030 /* Return a vector of all the virtual phi nodes in the current
4031 function. */
4033 static VEC (gimple, heap) *
4034 collect_virtual_phis (void)
4036 gimple_stmt_iterator si;
4037 gimple_vec phis = VEC_alloc (gimple, heap, 3);
4038 basic_block bb;
4040 FOR_EACH_BB (bb)
4041 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4042 /* The phis we moved will have 0 arguments because the
4043 original edges were removed. */
4044 if (gimple_phi_num_args (gsi_stmt (si)) == 0)
4045 VEC_safe_push (gimple, heap, phis, gsi_stmt (si));
4047 /* Deallocate if we did not find any. */
4048 if (VEC_length (gimple, phis) == 0)
4050 VEC_free (gimple, heap, phis);
4051 phis = NULL;
4054 return phis;
4057 /* Find a virtual definition for variable VAR in BB. */
4059 static tree
4060 find_vdef_for_var_in_bb (basic_block bb, tree var)
4062 gimple_stmt_iterator gsi;
4063 gimple phi;
4064 def_operand_p def_var;
4065 vuse_vec_p vv;
4066 ssa_op_iter op_iter;
4068 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4069 FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter)
4070 if (SSA_NAME_VAR (*def_var) == var)
4071 return *def_var;
4073 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4074 FOR_EACH_SSA_DEF_OPERAND (def_var, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
4075 if (SSA_NAME_VAR (*def_var) == var)
4076 return *def_var;
4078 for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi))
4080 phi = gsi_stmt (gsi);
4081 if (SSA_NAME_VAR (PHI_RESULT (phi)) == var)
4082 return PHI_RESULT (phi);
4085 return NULL;
4088 /* Recursive helper. */
4090 static tree
4091 find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var)
4093 tree result = NULL;
4094 edge_iterator ei;
4095 edge pred_edge;
4097 if (pointer_set_contains (visited, bb))
4098 return NULL;
4100 pointer_set_insert (visited, bb);
4101 result = find_vdef_for_var_in_bb (bb, var);
4103 if (!result)
4104 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
4105 if (!result)
4106 result = find_vdef_for_var_1 (pred_edge->src, visited, var);
4108 return result;
4111 /* Finds a virtual definition for variable VAR. */
4113 static tree
4114 find_vdef_for_var (basic_block bb, tree var)
4116 struct pointer_set_t *visited = pointer_set_create ();
4117 tree def = find_vdef_for_var_1 (bb, visited, var);
4119 pointer_set_destroy (visited);
4120 return def;
4123 /* Update the virtual phis after loop bodies are moved to new
4124 loops. */
4126 static void
4127 patch_phis_for_virtual_defs (void)
4129 int i;
4130 gimple phi;
4131 VEC (gimple, heap) *virtual_phis = collect_virtual_phis ();
4133 for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++)
4135 basic_block bb = gimple_bb (phi);
4136 edge_iterator ei;
4137 edge pred_edge;
4138 gimple_stmt_iterator gsi;
4139 gimple new_phi;
4140 tree phi_result = PHI_RESULT (phi);
4141 tree var = SSA_NAME_VAR (phi_result);
4143 new_phi = create_phi_node (phi_result, bb);
4144 SSA_NAME_DEF_STMT (phi_result) = new_phi;
4146 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
4148 tree def = find_vdef_for_var (pred_edge->src, var);
4150 if (def)
4151 add_phi_arg (new_phi, def, pred_edge);
4152 else
4153 add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge);
4156 gsi = gsi_for_stmt (phi);
4157 remove_phi_node (&gsi, false);
4160 VEC_free (gimple, heap, virtual_phis);
4163 /* Mark the original loops of SCOP for removal, replacing their header
4164 field with NULL. */
4166 static void
4167 mark_old_loops (scop_p scop)
4169 int i;
4170 struct loop *loop;
4172 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
4174 loop->header = NULL;
4175 loop->latch = NULL;
4179 /* Scan the loops and remove the ones that have been marked for
4180 removal. */
4182 static void
4183 remove_dead_loops (void)
4185 struct loop *loop, *ploop;
4186 loop_iterator li;
4188 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
4190 /* Remove only those loops that we marked to be removed with
4191 mark_old_loops. */
4192 if (loop->header)
4193 continue;
4195 while (loop->inner)
4197 ploop = loop->inner;
4198 flow_loop_tree_node_remove (ploop);
4199 flow_loop_tree_node_add (loop_outer (loop), ploop);
4202 /* Remove the loop and free its data. */
4203 delete_loop (loop);
4207 /* Returns true when it is possible to generate code for this STMT.
4208 For the moment we cannot generate code when Cloog decides to
4209 duplicate a statement, as we do not do a copy, but a move.
4210 USED_BASIC_BLOCKS records the blocks that have already been seen.
4211 We return false if we have to generate code twice for the same
4212 block. */
4214 static bool
4215 can_generate_code_stmt (struct clast_stmt *stmt,
4216 struct pointer_set_t *used_basic_blocks)
4218 if (!stmt)
4219 return true;
4221 if (CLAST_STMT_IS_A (stmt, stmt_root))
4222 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4224 if (CLAST_STMT_IS_A (stmt, stmt_user))
4226 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
4227 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
4229 if (pointer_set_contains (used_basic_blocks, gbb))
4230 return false;
4231 pointer_set_insert (used_basic_blocks, gbb);
4232 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4235 if (CLAST_STMT_IS_A (stmt, stmt_for))
4236 return can_generate_code_stmt (((struct clast_for *) stmt)->body,
4237 used_basic_blocks)
4238 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4240 if (CLAST_STMT_IS_A (stmt, stmt_guard))
4241 return can_generate_code_stmt (((struct clast_guard *) stmt)->then,
4242 used_basic_blocks);
4244 if (CLAST_STMT_IS_A (stmt, stmt_block))
4245 return can_generate_code_stmt (((struct clast_block *) stmt)->body,
4246 used_basic_blocks)
4247 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4249 return false;
4252 /* Returns true when it is possible to generate code for this STMT. */
4254 static bool
4255 can_generate_code (struct clast_stmt *stmt)
4257 bool result;
4258 struct pointer_set_t *used_basic_blocks = pointer_set_create ();
4260 result = can_generate_code_stmt (stmt, used_basic_blocks);
4261 pointer_set_destroy (used_basic_blocks);
4262 return result;
4265 /* Skip any definition that is a phi node with a single phi def. */
4267 static tree
4268 skip_phi_defs (tree ssa_name)
4270 tree result = ssa_name;
4271 gimple def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4273 if (gimple_code (def_stmt) == GIMPLE_PHI
4274 && gimple_phi_num_args (def_stmt) == 1)
4275 result = skip_phi_defs (gimple_phi_arg(def_stmt,0)->def);
4277 return result;
4280 /* Returns a VEC containing the phi-arg defs coming from SCOP_EXIT in
4281 the destination block of SCOP_EXIT. */
4283 static VEC (tree, heap) *
4284 collect_scop_exit_phi_args (edge scop_exit)
4286 VEC (tree, heap) *phi_args = VEC_alloc (tree, heap, 1);
4287 gimple_stmt_iterator gsi;
4289 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4291 gimple phi = gsi_stmt (gsi);
4292 tree phi_arg = skip_phi_defs(PHI_ARG_DEF_FROM_EDGE (phi, scop_exit));
4294 VEC_safe_push (tree, heap, phi_args, phi_arg);
4297 return phi_args;
4300 /* Patches (adds) PHI_ARGS to the phi nodes in SCOP_EXIT destination. */
4302 static void
4303 patch_scop_exit_phi_args (edge scop_exit,
4304 VEC (tree, heap) *phi_args)
4306 int i = 0;
4307 gimple_stmt_iterator gsi;
4309 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi);
4310 gsi_next (&gsi), i++)
4312 tree def = VEC_index (tree, phi_args, i);
4313 gimple phi = gsi_stmt (gsi);
4315 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi, scop_exit) == NULL);
4317 add_phi_arg (phi, def, scop_exit);
4321 /* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
4322 the given SCOP. */
4324 static void
4325 gloog (scop_p scop, struct clast_stmt *stmt)
4327 edge new_scop_exit_edge = NULL;
4328 basic_block scop_exit = SCOP_EXIT (scop);
4329 VEC (tree, heap) *phi_args =
4330 collect_scop_exit_phi_args (SESE_EXIT (SCOP_REGION (scop)));
4331 VEC (iv_stack_entry_p, heap) *ivstack =
4332 VEC_alloc (iv_stack_entry_p, heap, 10);
4333 edge construction_edge = SESE_ENTRY (SCOP_REGION (scop));
4334 basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS,
4335 scop_exit);
4337 if (!can_generate_code (stmt))
4339 cloog_clast_free (stmt);
4340 return;
4343 redirect_edge_succ_nodup (construction_edge, EXIT_BLOCK_PTR);
4344 new_scop_exit_edge = translate_clast (scop,
4345 construction_edge->src->loop_father,
4346 stmt, construction_edge, &ivstack);
4347 free_loop_iv_stack (&ivstack);
4348 redirect_edge_succ (new_scop_exit_edge, scop_exit);
4350 if (!old_scop_exit_idom
4351 || !dominated_by_p (CDI_DOMINATORS, SCOP_ENTRY (scop),
4352 old_scop_exit_idom)
4353 || SCOP_ENTRY (scop) == old_scop_exit_idom)
4354 set_immediate_dominator (CDI_DOMINATORS,
4355 new_scop_exit_edge->dest,
4356 new_scop_exit_edge->src);
4358 cloog_clast_free (stmt);
4360 if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR)
4361 new_scop_exit_edge->flags = 0;
4363 delete_unreachable_blocks ();
4364 patch_phis_for_virtual_defs ();
4365 patch_scop_exit_phi_args (new_scop_exit_edge, phi_args);
4366 VEC_free (tree, heap, phi_args);
4367 mark_old_loops (scop);
4368 remove_dead_loops ();
4369 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4371 #ifdef ENABLE_CHECKING
4372 verify_loop_structure ();
4373 verify_dominators (CDI_DOMINATORS);
4374 verify_ssa (false);
4375 #endif
4378 /* Returns the number of data references in SCOP. */
4380 static int
4381 nb_data_refs_in_scop (scop_p scop)
4383 int i;
4384 graphite_bb_p gbb;
4385 int res = 0;
4387 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
4388 res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb));
4390 return res;
4393 /* Check if a graphite bb can be ignored in graphite. We ignore all
4394 bbs, that only contain code, that will be eliminated later.
4396 TODO: - Move PHI nodes and scalar variables out of these bbs, that only
4397 remain conditions and induction variables. */
4399 static bool
4400 gbb_can_be_ignored (graphite_bb_p gb)
4402 gimple_stmt_iterator gsi;
4403 scop_p scop = GBB_SCOP (gb);
4404 loop_p loop = GBB_BB (gb)->loop_father;
4406 if (VEC_length (data_reference_p, GBB_DATA_REFS(gb)))
4407 return false;
4409 /* Check statements. */
4410 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
4412 gimple stmt = gsi_stmt (gsi);
4413 switch (gimple_code (stmt))
4415 /* Control flow expressions can be ignored, as they are
4416 represented in the iteration domains and will be
4417 regenerated by graphite. */
4418 case GIMPLE_COND:
4419 case GIMPLE_GOTO:
4420 case GIMPLE_SWITCH:
4421 break;
4423 /* Scalar variables can be ignored, if we can regenerate
4424 them later using their scalar evolution function.
4425 XXX: Just a heuristic, that needs further investigation. */
4426 case GIMPLE_ASSIGN:
4428 tree var = gimple_assign_lhs (stmt);
4429 var = analyze_scalar_evolution (loop, var);
4430 var = instantiate_scev (block_before_scop (scop), loop, var);
4432 if (TREE_CODE (var) == SCEV_NOT_KNOWN)
4433 return false;
4435 break;
4437 /* Otherwise not ignoreable. */
4438 default:
4439 return false;
4443 return true;
4446 /* Remove all ignoreable gbbs from SCOP. */
4448 static void
4449 scop_remove_ignoreable_gbbs (scop_p scop)
4451 graphite_bb_p gb;
4452 int i;
4454 int max_schedule = scop_max_loop_depth (scop) + 1;
4455 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4456 lambda_vector_clear (last_schedule, max_schedule);
4458 /* Update schedules. */
4459 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4461 int nb_loops = gbb_nb_loops (gb);
4463 if (GBB_STATIC_SCHEDULE (gb) [nb_loops] == 0)
4464 last_schedule [nb_loops] = 0;
4466 if (gbb_can_be_ignored (gb))
4468 /* Mark gbb for remove. */
4469 bitmap_clear_bit (SCOP_BBS_B (scop), gb->bb->index);
4470 GBB_SCOP (gb) = NULL;
4471 last_schedule [nb_loops]--;
4473 else
4474 lambda_vector_add (GBB_STATIC_SCHEDULE (gb), last_schedule,
4475 GBB_STATIC_SCHEDULE (gb), nb_loops + 1);
4478 /* Remove gbbs. */
4479 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4480 if (GBB_SCOP (gb) == NULL)
4482 VEC_unordered_remove (graphite_bb_p, SCOP_BBS (scop), i);
4483 free_graphite_bb (gb);
4484 /* XXX: Hackish? But working. */
4485 i--;
4488 graphite_sort_gbbs (scop);
4491 /* Move the loop at index LOOP and insert it before index NEW_LOOP_POS.
4492 This transformartion is only valid, if the loop nest between i and k is
4493 perfectly nested. Therefore we do not need to change the static schedule.
4495 Example:
4497 for (i = 0; i < 50; i++)
4498 for (j ...)
4499 for (k = 5; k < 100; k++)
4502 To move k before i use:
4504 graphite_trans_bb_move_loop (A, 2, 0)
4506 for (k = 5; k < 100; k++)
4507 for (i = 0; i < 50; i++)
4508 for (j ...)
4511 And to move k back:
4513 graphite_trans_bb_move_loop (A, 0, 2)
4515 This function does not check the validity of interchanging loops.
4516 This should be checked before calling this function. */
4518 static void
4519 graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
4520 int new_loop_pos)
4522 CloogMatrix *domain = GBB_DOMAIN (gb);
4523 int row, j;
4524 loop_p tmp_loop_p;
4526 gcc_assert (loop < gbb_nb_loops (gb)
4527 && new_loop_pos < gbb_nb_loops (gb));
4529 /* Update LOOPS vector. */
4530 tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
4531 VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
4532 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
4534 /* Move the domain columns. */
4535 if (loop < new_loop_pos)
4536 for (row = 0; row < domain->NbRows; row++)
4538 Value tmp;
4539 value_init (tmp);
4540 value_assign (tmp, domain->p[row][loop + 1]);
4542 for (j = loop ; j < new_loop_pos - 1; j++)
4543 value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
4545 value_assign (domain->p[row][new_loop_pos], tmp);
4546 value_clear (tmp);
4548 else
4549 for (row = 0; row < domain->NbRows; row++)
4551 Value tmp;
4552 value_init (tmp);
4553 value_assign (tmp, domain->p[row][loop + 1]);
4555 for (j = loop ; j > new_loop_pos; j--)
4556 value_assign (domain->p[row][j + 1], domain->p[row][j]);
4558 value_assign (domain->p[row][new_loop_pos + 1], tmp);
4559 value_clear (tmp);
4563 /* Get the index of the column representing constants in the DOMAIN
4564 matrix. */
4566 static int
4567 const_column_index (CloogMatrix *domain)
4569 return domain->NbColumns - 1;
4573 /* Get the first index that is positive or negative, determined
4574 following the value of POSITIVE, in matrix DOMAIN in COLUMN. */
4576 static int
4577 get_first_matching_sign_row_index (CloogMatrix *domain, int column,
4578 bool positive)
4580 int row;
4582 for (row = 0; row < domain->NbRows; row++)
4584 int val = value_get_si (domain->p[row][column]);
4586 if (val > 0 && positive)
4587 return row;
4589 else if (val < 0 && !positive)
4590 return row;
4593 gcc_unreachable ();
4596 /* Get the lower bound of COLUMN in matrix DOMAIN. */
4598 static int
4599 get_lower_bound_row (CloogMatrix *domain, int column)
4601 return get_first_matching_sign_row_index (domain, column, true);
4604 /* Get the upper bound of COLUMN in matrix DOMAIN. */
4606 static int
4607 get_upper_bound_row (CloogMatrix *domain, int column)
4609 return get_first_matching_sign_row_index (domain, column, false);
4612 /* Get the lower bound of LOOP. */
4614 static void
4615 get_lower_bound (CloogMatrix *domain, int loop, Value lower_bound_result)
4617 int lower_bound_row = get_lower_bound_row (domain, loop);
4618 value_assign (lower_bound_result,
4619 domain->p[lower_bound_row][const_column_index(domain)]);
4622 /* Get the upper bound of LOOP. */
4624 static void
4625 get_upper_bound (CloogMatrix *domain, int loop, Value upper_bound_result)
4627 int upper_bound_row = get_upper_bound_row (domain, loop);
4628 value_assign (upper_bound_result,
4629 domain->p[upper_bound_row][const_column_index(domain)]);
4632 /* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
4633 Always valid, but not always a performance improvement. */
4635 static void
4636 graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
4638 int row, col;
4640 CloogMatrix *domain = GBB_DOMAIN (gb);
4641 CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
4642 domain->NbColumns + 1);
4644 int col_loop_old = loop_depth + 2;
4645 int col_loop_strip = col_loop_old - 1;
4647 Value old_lower_bound;
4648 Value old_upper_bound;
4650 gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1);
4652 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL);
4654 GBB_DOMAIN (gb) = new_domain;
4657 nrows = 4, ncols = 4
4658 eq i j c
4659 1 1 0 0
4660 1 -1 0 99
4661 1 0 1 0
4662 1 0 -1 99
4665 /* Move domain. */
4666 for (row = 0; row < domain->NbRows; row++)
4667 for (col = 0; col < domain->NbColumns; col++)
4668 if (col <= loop_depth)
4669 value_assign (new_domain->p[row][col], domain->p[row][col]);
4670 else
4671 value_assign (new_domain->p[row][col + 1], domain->p[row][col]);
4675 nrows = 6, ncols = 5
4676 outer inner
4677 eq i jj j c
4678 1 1 0 0 0
4679 1 -1 0 0 99
4680 1 0 0 1 0
4681 1 0 0 -1 99
4682 0 0 0 0 0
4683 0 0 0 0 0
4684 0 0 0 0 0
4687 row = domain->NbRows;
4689 /* Add outer loop. */
4690 value_init (old_lower_bound);
4691 value_init (old_upper_bound);
4692 get_lower_bound (new_domain, col_loop_old, old_lower_bound);
4693 get_upper_bound (new_domain, col_loop_old, old_upper_bound);
4695 /* Set Lower Bound */
4696 value_set_si (new_domain->p[row][0], 1);
4697 value_set_si (new_domain->p[row][col_loop_strip], 1);
4698 value_assign (new_domain->p[row][const_column_index (new_domain)],
4699 old_lower_bound);
4700 value_clear (old_lower_bound);
4701 row++;
4706 eq i jj j c
4707 1 1 0 0 0
4708 1 -1 0 0 99
4709 1 0 0 1 0 -
4710 1 0 0 -1 99 | copy old lower bound
4711 1 0 1 0 0 <-
4712 0 0 0 0 0
4713 0 0 0 0 0
4717 Value new_upper_bound;
4718 Value strip_size_value;
4720 value_init (new_upper_bound);
4721 value_init (strip_size_value);
4722 value_set_si (strip_size_value, (int) stride);
4724 value_pdivision (new_upper_bound, old_upper_bound, strip_size_value);
4725 value_add_int (new_upper_bound, new_upper_bound, 1);
4727 /* Set Upper Bound */
4728 value_set_si (new_domain->p[row][0], 1);
4729 value_set_si (new_domain->p[row][col_loop_strip], -1);
4730 value_assign (new_domain->p[row][const_column_index (new_domain)],
4731 new_upper_bound);
4733 value_clear (strip_size_value);
4734 value_clear (old_upper_bound);
4735 value_clear (new_upper_bound);
4736 row++;
4740 eq i jj j c
4741 1 1 0 0 0
4742 1 -1 0 0 99
4743 1 0 0 1 0
4744 1 0 0 -1 99
4745 1 0 1 0 0
4746 1 0 -1 0 25 (divide old upper bound with stride)
4747 0 0 0 0 0
4751 row = get_lower_bound_row (new_domain, col_loop_old);
4752 /* Add local variable to keep linear representation. */
4753 value_set_si (new_domain->p[row][0], 1);
4754 value_set_si (new_domain->p[row][const_column_index (new_domain)],0);
4755 value_set_si (new_domain->p[row][col_loop_old], 1);
4756 value_set_si (new_domain->p[row][col_loop_strip], -1*((int)stride));
4761 eq i jj j c
4762 1 1 0 0 0
4763 1 -1 0 0 99
4764 1 0 -1 1 0
4765 1 0 0 -1 99
4766 1 0 1 0 0
4767 1 0 -1 0 25 (divide old upper bound with stride)
4768 0 0 0 0 0
4772 row = new_domain->NbRows-1;
4774 value_set_si (new_domain->p[row][0], 1);
4775 value_set_si (new_domain->p[row][col_loop_old], -1);
4776 value_set_si (new_domain->p[row][col_loop_strip], stride);
4777 value_set_si (new_domain->p[row][const_column_index (new_domain)],
4778 stride-1);
4783 eq i jj j c
4784 1 1 0 0 0 i >= 0
4785 1 -1 0 0 99 99 >= i
4786 1 0 -4 1 0 j >= 4*jj
4787 1 0 0 -1 99 99 >= j
4788 1 0 1 0 0 jj >= 0
4789 1 0 -1 0 25 25 >= jj
4790 0 0 4 -1 3 jj+3 >= j
4793 cloog_matrix_free (domain);
4795 /* Update static schedule. */
4797 int i;
4798 int nb_loops = gbb_nb_loops (gb);
4799 lambda_vector new_schedule = lambda_vector_new (nb_loops + 1);
4801 for (i = 0; i <= loop_depth; i++)
4802 new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i];
4804 for (i = loop_depth + 1; i <= nb_loops - 2; i++)
4805 new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i];
4807 GBB_STATIC_SCHEDULE (gb) = new_schedule;
4811 /* Returns true when the strip mining of LOOP_INDEX by STRIDE is
4812 profitable or undecidable. GB is the statement around which the
4813 loops will be strip mined. */
4815 static bool
4816 strip_mine_profitable_p (graphite_bb_p gb, int stride,
4817 int loop_index)
4819 bool res = true;
4820 edge exit = NULL;
4821 tree niter;
4822 loop_p loop;
4823 long niter_val;
4825 loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index);
4826 exit = single_exit (loop);
4828 niter = find_loop_niter (loop, &exit);
4829 if (niter == chrec_dont_know
4830 || TREE_CODE (niter) != INTEGER_CST)
4831 return true;
4833 niter_val = int_cst_value (niter);
4835 if (niter_val < stride)
4837 res = false;
4838 if (dump_file && (dump_flags & TDF_DETAILS))
4840 fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:",
4841 loop_index);
4842 fprintf (dump_file, "number of iterations is too low.\n");
4846 return res;
4849 /* Determines when the interchange of LOOP_A and LOOP_B belonging to
4850 SCOP is legal. */
4852 static bool
4853 is_interchange_valid (scop_p scop, int loop_a, int loop_b)
4855 bool res;
4856 VEC (ddr_p, heap) *dependence_relations;
4857 VEC (data_reference_p, heap) *datarefs;
4859 struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a);
4860 int depth = perfect_loop_nest_depth (nest);
4861 lambda_trans_matrix trans;
4863 gcc_assert (loop_a < loop_b);
4865 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
4866 datarefs = VEC_alloc (data_reference_p, heap, 10);
4868 if (!compute_data_dependences_for_loop (nest, true, &datarefs,
4869 &dependence_relations))
4870 return false;
4872 if (dump_file && (dump_flags & TDF_DETAILS))
4873 dump_ddrs (dump_file, dependence_relations);
4875 trans = lambda_trans_matrix_new (depth, depth);
4876 lambda_matrix_id (LTM_MATRIX (trans), depth);
4878 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4880 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
4882 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4883 res = false;
4885 else
4886 res = true;
4888 free_dependence_relations (dependence_relations);
4889 free_data_refs (datarefs);
4890 return res;
4893 /* Loop block the LOOPS innermost loops of GB with stride size STRIDE.
4895 Example
4897 for (i = 0; i <= 50; i++=4)
4898 for (k = 0; k <= 100; k++=4)
4899 for (l = 0; l <= 200; l++=4)
4902 To strip mine the two inner most loops with stride = 4 call:
4904 graphite_trans_bb_block (A, 4, 2)
4906 for (i = 0; i <= 50; i++)
4907 for (kk = 0; kk <= 100; kk+=4)
4908 for (ll = 0; ll <= 200; ll+=4)
4909 for (k = kk; k <= min (100, kk + 3); k++)
4910 for (l = ll; l <= min (200, ll + 3); l++)
4914 static bool
4915 graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops)
4917 int i, j;
4918 int nb_loops = gbb_nb_loops (gb);
4919 int start = nb_loops - loops;
4920 scop_p scop = GBB_SCOP (gb);
4922 gcc_assert (scop_contains_loop (scop, gbb_loop (gb)));
4924 for (i = start ; i < nb_loops; i++)
4925 for (j = i + 1; j < nb_loops; j++)
4926 if (!is_interchange_valid (scop, i, j))
4928 if (dump_file && (dump_flags & TDF_DETAILS))
4929 fprintf (dump_file,
4930 "\nInterchange not valid for loops %d and %d:\n", i, j);
4931 return false;
4933 else if (dump_file && (dump_flags & TDF_DETAILS))
4934 fprintf (dump_file,
4935 "\nInterchange valid for loops %d and %d:\n", i, j);
4937 /* Check if strip mining is profitable for every loop. */
4938 for (i = 0; i < nb_loops - start; i++)
4939 if (!strip_mine_profitable_p (gb, stride, start + i))
4940 return false;
4942 /* Strip mine loops. */
4943 for (i = 0; i < nb_loops - start; i++)
4944 graphite_trans_bb_strip_mine (gb, start + 2 * i, stride);
4946 /* Interchange loops. */
4947 for (i = 1; i < nb_loops - start; i++)
4948 graphite_trans_bb_move_loop (gb, start + 2 * i, start + i);
4950 return true;
4953 /* Loop block LOOPS innermost loops of a loop nest. BBS represent the
4954 basic blocks that belong to the loop nest to be blocked. */
4956 static bool
4957 graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops)
4959 graphite_bb_p gb;
4960 int i;
4961 bool transform_done = false;
4963 /* TODO: - Calculate the stride size automatically. */
4964 int stride_size = 64;
4966 /* It makes no sense to block a single loop. */
4967 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
4968 if (gbb_nb_loops (gb) < 2)
4969 return false;
4971 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
4972 transform_done |= graphite_trans_bb_block (gb, stride_size, loops);
4974 return transform_done;
4977 /* Loop block all basic blocks of SCOP. Return false when the
4978 transform is not performed. */
4980 static bool
4981 graphite_trans_scop_block (scop_p scop)
4983 graphite_bb_p gb;
4984 int i, j;
4985 int last_nb_loops;
4986 int nb_loops;
4987 bool perfect = true;
4988 bool transform_done = false;
4990 VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3);
4991 int max_schedule = scop_max_loop_depth (scop) + 1;
4992 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4994 if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0)
4995 return false;
4997 /* Get the data of the first bb. */
4998 gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0);
4999 last_nb_loops = gbb_nb_loops (gb);
5000 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5001 last_nb_loops + 1);
5002 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5004 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
5006 /* We did the first bb before. */
5007 if (i == 0)
5008 continue;
5010 nb_loops = gbb_nb_loops (gb);
5012 /* If the number of loops is unchanged and only the last element of the
5013 schedule changes, we stay in the loop nest. */
5014 if (nb_loops == last_nb_loops
5015 && (last_schedule [nb_loops + 1]
5016 != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1]))
5018 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5019 continue;
5022 /* Otherwise, we left the innermost loop. So check, if the last bb was in
5023 a perfect loop nest and how many loops are contained in this perfect
5024 loop nest.
5026 Count the number of zeros from the end of the schedule. They are the
5027 number of surrounding loops.
5029 Example:
5030 last_bb 2 3 2 0 0 0 0 3
5031 bb 2 4 0
5032 <------ j = 4
5034 last_bb 2 3 2 0 0 0 0 3
5035 bb 2 3 2 0 1
5036 <-- j = 2
5038 If there is no zero, there were other bbs in outer loops and the loop
5039 nest is not perfect. */
5040 for (j = last_nb_loops - 1; j >= 0; j--)
5042 if (last_schedule [j] != 0
5043 || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1))
5045 j--;
5046 break;
5050 j++;
5052 /* Found perfect loop nest. */
5053 if (perfect && last_nb_loops - j > 0)
5054 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5056 /* Check if we start with a new loop.
5058 Example:
5060 last_bb 2 3 2 0 0 0 0 3
5061 bb 2 3 2 0 0 1 0
5063 Here we start with the loop "2 3 2 0 0 1"
5065 last_bb 2 3 2 0 0 0 0 3
5066 bb 2 3 2 0 0 1
5068 But here not, so the loop nest can never be perfect. */
5070 perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0);
5072 /* Update the last_bb infos. We do not do that for the bbs in the same
5073 loop, as the data we use is not changed. */
5074 last_nb_loops = nb_loops;
5075 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5076 nb_loops + 1);
5077 VEC_truncate (graphite_bb_p, bbs, 0);
5078 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5081 /* Check if the last loop nest was perfect. It is the same check as above,
5082 but the comparison with the next bb is missing. */
5083 for (j = last_nb_loops - 1; j >= 0; j--)
5084 if (last_schedule [j] != 0)
5086 j--;
5087 break;
5090 j++;
5092 /* Found perfect loop nest. */
5093 if (last_nb_loops - j > 0)
5094 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5095 VEC_free (graphite_bb_p, heap, bbs);
5097 if (dump_file && (dump_flags & TDF_DETAILS))
5098 fprintf (dump_file, "\nLoop blocked.\n");
5100 return transform_done;
5103 /* Apply graphite transformations to all the basic blocks of SCOP. */
5105 static bool
5106 graphite_apply_transformations (scop_p scop)
5108 bool transform_done = false;
5110 /* Sort the list of bbs. Keep them always sorted. */
5111 graphite_sort_gbbs (scop);
5112 scop_remove_ignoreable_gbbs (scop);
5114 if (flag_loop_block)
5115 transform_done = graphite_trans_scop_block (scop);
5117 /* Generate code even if we did not apply any real transformation.
5118 This also allows to check the performance for the identity
5119 transformation: GIMPLE -> GRAPHITE -> GIMPLE
5120 Keep in mind that CLooG optimizes in control, so the loop structure
5121 may change, even if we only use -fgraphite-identity. */
5122 if (flag_graphite_identity)
5123 transform_done = true;
5125 return transform_done;
5128 /* We limit all SCoPs to SCoPs, that are completely surrounded by a loop.
5130 Example:
5132 for (i |
5134 for (j | SCoP 1
5135 for (k |
5138 * SCoP frontier, as this line is not surrounded by any loop. *
5140 for (l | SCoP 2
5142 This is necessary as scalar evolution and parameter detection need a
5143 outermost loop to initialize parameters correctly.
5145 TODO: FIX scalar evolution and parameter detection to allow more flexible
5146 SCoP frontiers. */
5148 static void
5149 limit_scops (void)
5151 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
5153 int i;
5154 scop_p scop;
5156 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5158 int j;
5159 loop_p loop;
5160 build_scop_bbs (scop);
5161 build_scop_loop_nests (scop);
5163 for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++)
5164 if (!loop_in_scop_p (loop_outer (loop), scop))
5166 sd_region open_scop;
5167 open_scop.entry = loop_preheader_edge (loop)->dest;
5168 open_scop.exit = single_exit (loop)->dest;
5169 VEC_safe_push (sd_region, heap, tmp_scops, &open_scop);
5173 free_scops (current_scops);
5174 current_scops = VEC_alloc (scop_p, heap, 3);
5176 create_sese_edges (tmp_scops);
5177 build_graphite_scops (tmp_scops);
5178 VEC_free (sd_region, heap, tmp_scops);
5181 /* Perform a set of linear transforms on the loops of the current
5182 function. */
5184 void
5185 graphite_transform_loops (void)
5187 int i;
5188 scop_p scop;
5190 if (number_of_loops () <= 1)
5191 return;
5193 current_scops = VEC_alloc (scop_p, heap, 3);
5195 calculate_dominance_info (CDI_DOMINATORS);
5196 calculate_dominance_info (CDI_POST_DOMINATORS);
5198 if (dump_file && (dump_flags & TDF_DETAILS))
5199 fprintf (dump_file, "Graphite loop transformations \n");
5201 cloog_initialize ();
5202 build_scops ();
5203 limit_scops ();
5205 if (dump_file && (dump_flags & TDF_DETAILS))
5206 fprintf (dump_file, "\nnumber of SCoPs: %d\n",
5207 VEC_length (scop_p, current_scops));
5209 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5211 build_scop_bbs (scop);
5212 build_scop_loop_nests (scop);
5213 build_scop_canonical_schedules (scop);
5214 build_bb_loops (scop);
5215 find_scop_parameters (scop);
5216 build_scop_context (scop);
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5220 fprintf (dump_file, "\n(In SCoP %d:\n", i);
5221 fprintf (dump_file, "\nnumber of bbs: %d\n",
5222 VEC_length (graphite_bb_p, SCOP_BBS (scop)));
5223 fprintf (dump_file, "\nnumber of loops: %d)\n",
5224 VEC_length (loop_p, SCOP_LOOP_NEST (scop)));
5227 if (!build_scop_iteration_domain (scop))
5228 continue;
5230 build_scop_conditions (scop);
5231 build_scop_data_accesses (scop);
5233 if (dump_file && (dump_flags & TDF_DETAILS))
5235 int nbrefs = nb_data_refs_in_scop (scop);
5236 fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs);
5239 if (graphite_apply_transformations (scop))
5240 gloog (scop, find_transform (scop));
5243 /* Cleanup. */
5244 free_scops (current_scops);
5245 cloog_finalize ();
5248 #else /* If Cloog is not available: #ifndef HAVE_cloog. */
5250 void
5251 graphite_transform_loops (void)
5253 sorry ("Graphite loop optimizations cannot be used");
5256 #endif