2008-12-09 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / graphite.c
blob179d8757cabcc1d998010cb8c86825580d9e280d
1 /* Gimple Represented as Polyhedra.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@inria.fr>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass converts GIMPLE to GRAPHITE, performs some loop
22 transformations and then converts the resulting representation back
23 to GIMPLE.
25 An early description of this pass can be found in the GCC Summit'06
26 paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC".
27 The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to
28 the related work.
30 One important document to read is CLooG's internal manual:
31 http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD
32 that describes the data structure of loops used in this file, and
33 the functions that are used for transforming the code. */
35 #include "config.h"
36 #include "system.h"
37 #include "coretypes.h"
38 #include "tm.h"
39 #include "ggc.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "basic-block.h"
43 #include "diagnostic.h"
44 #include "tree-flow.h"
45 #include "toplev.h"
46 #include "tree-dump.h"
47 #include "timevar.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "domwalk.h"
54 #include "pointer-set.h"
55 #include "gimple.h"
57 #ifdef HAVE_cloog
58 #include "cloog/cloog.h"
59 #include "graphite.h"
61 static VEC (scop_p, heap) *current_scops;
63 /* Converts a GMP constant V to a tree and returns it. */
65 static tree
66 gmp_cst_to_tree (Value v)
68 return build_int_cst (integer_type_node, value_get_si (v));
71 /* Debug the list of old induction variables for this SCOP. */
73 void
74 debug_oldivs (scop_p scop)
76 int i;
77 name_tree oldiv;
79 fprintf (stderr, "Old IVs:");
81 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
83 fprintf (stderr, "(");
84 print_generic_expr (stderr, oldiv->t, 0);
85 fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num);
87 fprintf (stderr, "\n");
90 /* Debug the loops around basic block GB. */
92 void
93 debug_loop_vec (graphite_bb_p gb)
95 int i;
96 loop_p loop;
98 fprintf (stderr, "Loop Vec:");
100 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
101 fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1);
103 fprintf (stderr, "\n");
106 /* Returns true if stack ENTRY is a constant. */
108 static bool
109 iv_stack_entry_is_constant (iv_stack_entry *entry)
111 return entry->kind == iv_stack_entry_const;
114 /* Returns true if stack ENTRY is an induction variable. */
116 static bool
117 iv_stack_entry_is_iv (iv_stack_entry *entry)
119 return entry->kind == iv_stack_entry_iv;
122 /* Push (IV, NAME) on STACK. */
124 static void
125 loop_iv_stack_push_iv (loop_iv_stack stack, tree iv, const char *name)
127 iv_stack_entry *entry = XNEW (iv_stack_entry);
128 name_tree named_iv = XNEW (struct name_tree);
130 named_iv->t = iv;
131 named_iv->name = name;
133 entry->kind = iv_stack_entry_iv;
134 entry->data.iv = named_iv;
136 VEC_safe_push (iv_stack_entry_p, heap, *stack, entry);
139 /* Inserts a CONSTANT in STACK at INDEX. */
141 static void
142 loop_iv_stack_insert_constant (loop_iv_stack stack, int index,
143 tree constant)
145 iv_stack_entry *entry = XNEW (iv_stack_entry);
147 entry->kind = iv_stack_entry_const;
148 entry->data.constant = constant;
150 VEC_safe_insert (iv_stack_entry_p, heap, *stack, index, entry);
153 /* Pops and frees an element out of STACK. */
155 static void
156 loop_iv_stack_pop (loop_iv_stack stack)
158 iv_stack_entry_p entry = VEC_pop (iv_stack_entry_p, *stack);
160 free (entry->data.iv);
161 free (entry);
164 /* Get the IV at INDEX in STACK. */
166 static tree
167 loop_iv_stack_get_iv (loop_iv_stack stack, int index)
169 iv_stack_entry_p entry = VEC_index (iv_stack_entry_p, *stack, index);
171 tree result = NULL;
173 if (entry->kind != iv_stack_entry_const)
174 result = entry->data.iv->t;
176 return result;
179 /* Get the IV from its NAME in STACK. */
181 static tree
182 loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name)
184 int i;
185 iv_stack_entry_p entry;
187 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
189 name_tree iv = entry->data.iv;
190 if (!strcmp (name, iv->name))
191 return iv->t;
194 return NULL;
197 /* Prints on stderr the contents of STACK. */
199 void
200 debug_loop_iv_stack (loop_iv_stack stack)
202 int i;
203 iv_stack_entry_p entry;
204 bool first = true;
206 fprintf (stderr, "(");
208 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
210 if (first)
211 first = false;
212 else
213 fprintf (stderr, " ");
215 if (iv_stack_entry_is_iv (entry))
217 name_tree iv = entry->data.iv;
218 fprintf (stderr, "%s:", iv->name);
219 print_generic_expr (stderr, iv->t, 0);
221 else
223 tree constant = entry->data.constant;
224 print_generic_expr (stderr, constant, 0);
225 fprintf (stderr, ":");
226 print_generic_expr (stderr, constant, 0);
230 fprintf (stderr, ")\n");
233 /* Frees STACK. */
235 static void
236 free_loop_iv_stack (loop_iv_stack stack)
238 int i;
239 iv_stack_entry_p entry;
241 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
243 free (entry->data.iv);
244 free (entry);
247 VEC_free (iv_stack_entry_p, heap, *stack);
250 /* Inserts constants derived from the USER_STMT argument list into the
251 STACK. This is needed to map old ivs to constants when loops have
252 been eliminated. */
254 static void
255 loop_iv_stack_patch_for_consts (loop_iv_stack stack,
256 struct clast_user_stmt *user_stmt)
258 struct clast_stmt *t;
259 int index = 0;
260 for (t = user_stmt->substitutions; t; t = t->next)
262 struct clast_term *term = (struct clast_term*)
263 ((struct clast_assignment *)t)->RHS;
265 /* FIXME: What should be done with expr_bin, expr_red? */
266 if (((struct clast_assignment *)t)->RHS->type == expr_term
267 && !term->var)
269 tree value = gmp_cst_to_tree (term->val);
270 loop_iv_stack_insert_constant (stack, index, value);
272 index = index + 1;
276 /* Removes all constants in the iv STACK. */
278 static void
279 loop_iv_stack_remove_constants (loop_iv_stack stack)
281 int i;
282 iv_stack_entry *entry;
284 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry);)
286 if (iv_stack_entry_is_constant (entry))
288 free (VEC_index (iv_stack_entry_p, *stack, i));
289 VEC_ordered_remove (iv_stack_entry_p, *stack, i);
291 else
292 i++;
296 /* In SCOP, get the induction variable from NAME. OLD is the original
297 loop that contained the definition of NAME. */
299 static name_tree
300 get_old_iv_from_ssa_name (scop_p scop, loop_p old, tree name)
302 tree var = SSA_NAME_VAR (name);
303 int i;
304 name_tree oldiv;
306 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
308 loop_p current = old;
310 while (current)
312 if (var == oldiv->t
313 && oldiv->loop == current)
314 return oldiv;
316 current = loop_outer (current);
319 return NULL;
323 /* Returns a new loop_to_cloog_loop_str structure. */
325 static inline struct loop_to_cloog_loop_str *
326 new_loop_to_cloog_loop_str (int loop_num,
327 int loop_position,
328 CloogLoop *cloog_loop)
330 struct loop_to_cloog_loop_str *result;
332 result = XNEW (struct loop_to_cloog_loop_str);
333 result->loop_num = loop_num;
334 result->cloog_loop = cloog_loop;
335 result->loop_position = loop_position;
337 return result;
340 /* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */
342 static hashval_t
343 hash_loop_to_cloog_loop (const void *elt)
345 return ((const struct loop_to_cloog_loop_str *) elt)->loop_num;
348 /* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */
350 static int
351 eq_loop_to_cloog_loop (const void *el1, const void *el2)
353 const struct loop_to_cloog_loop_str *elt1, *elt2;
355 elt1 = (const struct loop_to_cloog_loop_str *) el1;
356 elt2 = (const struct loop_to_cloog_loop_str *) el2;
357 return elt1->loop_num == elt2->loop_num;
360 /* Compares two graphite bbs and returns an integer less than, equal to, or
361 greater than zero if the first argument is considered to be respectively
362 less than, equal to, or greater than the second.
363 We compare using the lexicographic order of the static schedules. */
365 static int
366 gbb_compare (const void *p_1, const void *p_2)
368 const struct graphite_bb *const gbb_1
369 = *(const struct graphite_bb *const*) p_1;
370 const struct graphite_bb *const gbb_2
371 = *(const struct graphite_bb *const*) p_2;
373 return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1),
374 gbb_nb_loops (gbb_1) + 1,
375 GBB_STATIC_SCHEDULE (gbb_2),
376 gbb_nb_loops (gbb_2) + 1);
379 /* Sort graphite bbs in SCOP. */
381 static void
382 graphite_sort_gbbs (scop_p scop)
384 VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop);
386 qsort (VEC_address (graphite_bb_p, bbs),
387 VEC_length (graphite_bb_p, bbs),
388 sizeof (graphite_bb_p), gbb_compare);
391 /* Dump conditions of a graphite basic block GBB on FILE. */
393 static void
394 dump_gbb_conditions (FILE *file, graphite_bb_p gbb)
396 int i;
397 gimple stmt;
398 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb);
400 if (VEC_empty (gimple, conditions))
401 return;
403 fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index);
405 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
406 print_gimple_stmt (file, stmt, 0, 0);
408 fprintf (file, "}\n");
411 /* Converts the graphite scheduling function into a cloog scattering
412 matrix. This scattering matrix is used to limit the possible cloog
413 output to valid programs in respect to the scheduling function.
415 SCATTERING_DIMENSIONS specifies the dimensionality of the scattering
416 matrix. CLooG 0.14.0 and previous versions require, that all scattering
417 functions of one CloogProgram have the same dimensionality, therefore we
418 allow to specify it. (Should be removed in future versions) */
420 static CloogMatrix *
421 schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions)
423 int i;
424 scop_p scop = GBB_SCOP (gb);
426 int nb_iterators = gbb_nb_loops (gb);
428 /* The cloog scattering matrix consists of these colums:
429 1 col = Eq/Inq,
430 scattering_dimensions cols = Scattering dimensions,
431 nb_iterators cols = bb's iterators,
432 scop_nb_params cols = Parameters,
433 1 col = Constant 1.
435 Example:
437 scattering_dimensions = 5
438 max_nb_iterators = 2
439 nb_iterators = 1
440 scop_nb_params = 2
442 Schedule:
446 Scattering Matrix:
447 s1 s2 s3 s4 s5 i p1 p2 1
448 1 0 0 0 0 0 0 0 -4 = 0
449 0 1 0 0 0 -1 0 0 0 = 0
450 0 0 1 0 0 0 0 0 -5 = 0 */
451 int nb_params = scop_nb_params (scop);
452 int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1;
453 int col_const = nb_cols - 1;
454 int col_iter_offset = 1 + scattering_dimensions;
456 CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols);
458 gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1);
460 /* Initialize the identity matrix. */
461 for (i = 0; i < scattering_dimensions; i++)
462 value_set_si (scat->p[i][i + 1], 1);
464 /* Textual order outside the first loop */
465 value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]);
467 /* For all surrounding loops. */
468 for (i = 0; i < nb_iterators; i++)
470 int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1];
472 /* Iterations of this loop. */
473 value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1);
475 /* Textual order inside this loop. */
476 value_set_si (scat->p[2 * i + 2][col_const], -schedule);
479 return scat;
482 /* Print the schedules of GB to FILE with INDENT white spaces before.
483 VERBOSITY determines how verbose the code pretty printers are. */
485 void
486 print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity)
488 CloogMatrix *scattering;
489 int i;
490 loop_p loop;
491 fprintf (file, "\nGBB (\n");
493 print_loops_bb (file, GBB_BB (gb), indent+2, verbosity);
495 if (GBB_DOMAIN (gb))
497 fprintf (file, " (domain: \n");
498 cloog_matrix_print (dump_file, GBB_DOMAIN (gb));
499 fprintf (file, " )\n");
502 if (GBB_STATIC_SCHEDULE (gb))
504 fprintf (file, " (static schedule: ");
505 print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb),
506 gbb_nb_loops (gb) + 1);
507 fprintf (file, " )\n");
510 if (GBB_LOOPS (gb))
512 fprintf (file, " (contained loops: \n");
513 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
514 if (loop == NULL)
515 fprintf (file, " iterator %d => NULL \n", i);
516 else
517 fprintf (file, " iterator %d => loop %d \n", i,
518 loop->num);
519 fprintf (file, " )\n");
522 if (GBB_DATA_REFS (gb))
523 dump_data_references (file, GBB_DATA_REFS (gb));
525 if (GBB_CONDITIONS (gb))
527 fprintf (file, " (conditions: \n");
528 dump_gbb_conditions (dump_file, gb);
529 fprintf (file, " )\n");
532 if (GBB_SCOP (gb)
533 && GBB_STATIC_SCHEDULE (gb))
535 fprintf (file, " (scattering: \n");
536 scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1);
537 cloog_matrix_print (file, scattering);
538 cloog_matrix_free (scattering);
539 fprintf (file, " )\n");
542 fprintf (file, ")\n");
545 /* Print to STDERR the schedules of GB with VERBOSITY level. */
547 void
548 debug_gbb (graphite_bb_p gb, int verbosity)
550 print_graphite_bb (stderr, gb, 0, verbosity);
554 /* Print SCOP to FILE. VERBOSITY determines how verbose the pretty
555 printers are. */
557 static void
558 print_scop (FILE *file, scop_p scop, int verbosity)
560 if (scop == NULL)
561 return;
563 fprintf (file, "\nSCoP_%d_%d (\n",
564 SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index);
566 fprintf (file, " (cloog: \n");
567 cloog_program_print (file, SCOP_PROG (scop));
568 fprintf (file, " )\n");
570 if (SCOP_BBS (scop))
572 graphite_bb_p gb;
573 int i;
575 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
576 print_graphite_bb (file, gb, 0, verbosity);
579 fprintf (file, ")\n");
582 /* Print all the SCOPs to FILE. VERBOSITY determines how verbose the
583 code pretty printers are. */
585 static void
586 print_scops (FILE *file, int verbosity)
588 int i;
589 scop_p scop;
591 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
592 print_scop (file, scop, verbosity);
595 /* Debug SCOP. VERBOSITY determines how verbose the code pretty
596 printers are. */
598 void
599 debug_scop (scop_p scop, int verbosity)
601 print_scop (stderr, scop, verbosity);
604 /* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how
605 verbose the code pretty printers are. */
607 void
608 debug_scops (int verbosity)
610 print_scops (stderr, verbosity);
613 /* Return true when BB is contained in SCOP. */
615 static inline bool
616 bb_in_scop_p (basic_block bb, scop_p scop)
618 return bitmap_bit_p (SCOP_BBS_B (scop), bb->index);
621 /* Pretty print to FILE the SCOP in DOT format. */
623 static void
624 dot_scop_1 (FILE *file, scop_p scop)
626 edge e;
627 edge_iterator ei;
628 basic_block bb;
629 basic_block entry = SCOP_ENTRY (scop);
630 basic_block exit = SCOP_EXIT (scop);
632 fprintf (file, "digraph SCoP_%d_%d {\n", entry->index,
633 exit->index);
635 FOR_ALL_BB (bb)
637 if (bb == entry)
638 fprintf (file, "%d [shape=triangle];\n", bb->index);
640 if (bb == exit)
641 fprintf (file, "%d [shape=box];\n", bb->index);
643 if (bb_in_scop_p (bb, scop))
644 fprintf (file, "%d [color=red];\n", bb->index);
646 FOR_EACH_EDGE (e, ei, bb->succs)
647 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
650 fputs ("}\n\n", file);
653 /* Display SCOP using dotty. */
655 void
656 dot_scop (scop_p scop)
658 dot_scop_1 (stderr, scop);
661 /* Pretty print all SCoPs in DOT format and mark them with different colors.
662 If there are not enough colors, paint later SCoPs gray.
663 Special nodes:
664 - "*" after the node number: entry of a SCoP,
665 - "#" after the node number: exit of a SCoP,
666 - "()" entry or exit not part of SCoP. */
668 static void
669 dot_all_scops_1 (FILE *file)
671 basic_block bb;
672 edge e;
673 edge_iterator ei;
674 scop_p scop;
675 const char* color;
676 int i;
678 /* Disable debugging while printing graph. */
679 int tmp_dump_flags = dump_flags;
680 dump_flags = 0;
682 fprintf (file, "digraph all {\n");
684 FOR_ALL_BB (bb)
686 int part_of_scop = false;
688 /* Use HTML for every bb label. So we are able to print bbs
689 which are part of two different SCoPs, with two different
690 background colors. */
691 fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ",
692 bb->index);
693 fprintf (file, "CELLSPACING=\"0\">\n");
695 /* Select color for SCoP. */
696 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
697 if (bb_in_scop_p (bb, scop)
698 || (SCOP_EXIT (scop) == bb)
699 || (SCOP_ENTRY (scop) == bb))
701 switch (i % 17)
703 case 0: /* red */
704 color = "#e41a1c";
705 break;
706 case 1: /* blue */
707 color = "#377eb8";
708 break;
709 case 2: /* green */
710 color = "#4daf4a";
711 break;
712 case 3: /* purple */
713 color = "#984ea3";
714 break;
715 case 4: /* orange */
716 color = "#ff7f00";
717 break;
718 case 5: /* yellow */
719 color = "#ffff33";
720 break;
721 case 6: /* brown */
722 color = "#a65628";
723 break;
724 case 7: /* rose */
725 color = "#f781bf";
726 break;
727 case 8:
728 color = "#8dd3c7";
729 break;
730 case 9:
731 color = "#ffffb3";
732 break;
733 case 10:
734 color = "#bebada";
735 break;
736 case 11:
737 color = "#fb8072";
738 break;
739 case 12:
740 color = "#80b1d3";
741 break;
742 case 13:
743 color = "#fdb462";
744 break;
745 case 14:
746 color = "#b3de69";
747 break;
748 case 15:
749 color = "#fccde5";
750 break;
751 case 16:
752 color = "#bc80bd";
753 break;
754 default: /* gray */
755 color = "#999999";
758 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color);
760 if (!bb_in_scop_p (bb, scop))
761 fprintf (file, " (");
763 if (bb == SCOP_ENTRY (scop)
764 && bb == SCOP_EXIT (scop))
765 fprintf (file, " %d*# ", bb->index);
766 else if (bb == SCOP_ENTRY (scop))
767 fprintf (file, " %d* ", bb->index);
768 else if (bb == SCOP_EXIT (scop))
769 fprintf (file, " %d# ", bb->index);
770 else
771 fprintf (file, " %d ", bb->index);
773 if (!bb_in_scop_p (bb, scop))
774 fprintf (file, ")");
776 fprintf (file, "</TD></TR>\n");
777 part_of_scop = true;
780 if (!part_of_scop)
782 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">");
783 fprintf (file, " %d </TD></TR>\n", bb->index);
786 fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n");
789 FOR_ALL_BB (bb)
791 FOR_EACH_EDGE (e, ei, bb->succs)
792 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
795 fputs ("}\n\n", file);
797 /* Enable debugging again. */
798 dump_flags = tmp_dump_flags;
801 /* Display all SCoPs using dotty. */
803 void
804 dot_all_scops (void)
806 /* When debugging, enable the following code. This cannot be used
807 in production compilers because it calls "system". */
808 #if 0
809 FILE *stream = fopen ("/tmp/allscops.dot", "w");
810 gcc_assert (stream);
812 dot_all_scops_1 (stream);
813 fclose (stream);
815 system ("dotty /tmp/allscops.dot");
816 #else
817 dot_all_scops_1 (stderr);
818 #endif
821 /* Returns true when LOOP is in SCOP. */
823 static inline bool
824 loop_in_scop_p (struct loop *loop, scop_p scop)
826 return (bb_in_scop_p (loop->header, scop)
827 && bb_in_scop_p (loop->latch, scop));
830 /* Returns the outermost loop in SCOP that contains BB. */
832 static struct loop *
833 outermost_loop_in_scop (scop_p scop, basic_block bb)
835 struct loop *nest;
837 nest = bb->loop_father;
838 while (loop_outer (nest) && loop_in_scop_p (loop_outer (nest), scop))
839 nest = loop_outer (nest);
841 return nest;
844 /* Returns the block preceding the entry of SCOP. */
846 static basic_block
847 block_before_scop (scop_p scop)
849 return SESE_ENTRY (SCOP_REGION (scop))->src;
852 /* Return true when EXPR is an affine function in LOOP with parameters
853 instantiated relative to SCOP_ENTRY. */
855 static bool
856 loop_affine_expr (basic_block scop_entry, struct loop *loop, tree expr)
858 int n = loop->num;
859 tree scev = analyze_scalar_evolution (loop, expr);
861 scev = instantiate_scev (scop_entry, loop, scev);
863 return (evolution_function_is_invariant_p (scev, n)
864 || evolution_function_is_affine_multivariate_p (scev, n));
867 /* Return false if the tree_code of the operand OP or any of its operands
868 is component_ref. */
870 static bool
871 exclude_component_ref (tree op)
873 int i;
874 int len;
876 if (op)
878 if (TREE_CODE (op) == COMPONENT_REF)
879 return false;
880 else
882 len = TREE_OPERAND_LENGTH (op);
883 for (i = 0; i < len; ++i)
885 if (!exclude_component_ref (TREE_OPERAND (op, i)))
886 return false;
891 return true;
894 /* Return true if the operand OP is simple. */
896 static bool
897 is_simple_operand (loop_p loop, gimple stmt, tree op)
899 /* It is not a simple operand when it is a declaration, */
900 if (DECL_P (op)
901 /* or a structure, */
902 || AGGREGATE_TYPE_P (TREE_TYPE (op))
903 /* or a memory access that cannot be analyzed by the data
904 reference analysis. */
905 || ((handled_component_p (op) || INDIRECT_REF_P (op))
906 && !stmt_simple_memref_p (loop, stmt, op)))
907 return false;
909 return exclude_component_ref (op);
912 /* Return true only when STMT is simple enough for being handled by
913 Graphite. This depends on SCOP_ENTRY, as the parametetrs are
914 initialized relatively to this basic block. */
916 static bool
917 stmt_simple_for_scop_p (basic_block scop_entry, gimple stmt)
919 basic_block bb = gimple_bb (stmt);
920 struct loop *loop = bb->loop_father;
922 /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects.
923 Calls have side-effects, except those to const or pure
924 functions. */
925 if (gimple_has_volatile_ops (stmt)
926 || (gimple_code (stmt) == GIMPLE_CALL
927 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
928 || (gimple_code (stmt) == GIMPLE_ASM))
929 return false;
931 switch (gimple_code (stmt))
933 case GIMPLE_RETURN:
934 case GIMPLE_LABEL:
935 return true;
937 case GIMPLE_COND:
939 tree op;
940 ssa_op_iter op_iter;
941 enum tree_code code = gimple_cond_code (stmt);
943 /* We can only handle this kind of conditional expressions.
944 For inequalities like "if (i != 3 * k)" we need unions of
945 polyhedrons. Expressions like "if (a)" or "if (a == 15)" need
946 them for the else branch. */
947 if (!(code == LT_EXPR
948 || code == GT_EXPR
949 || code == LE_EXPR
950 || code == GE_EXPR))
951 return false;
953 if (!scop_entry)
954 return false;
956 FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
957 if (!loop_affine_expr (scop_entry, loop, op))
958 return false;
960 return true;
963 case GIMPLE_ASSIGN:
965 enum tree_code code = gimple_assign_rhs_code (stmt);
967 switch (get_gimple_rhs_class (code))
969 case GIMPLE_UNARY_RHS:
970 case GIMPLE_SINGLE_RHS:
971 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
972 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));
974 case GIMPLE_BINARY_RHS:
975 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
976 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
977 && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
979 case GIMPLE_INVALID_RHS:
980 default:
981 gcc_unreachable ();
985 case GIMPLE_CALL:
987 size_t i;
988 size_t n = gimple_call_num_args (stmt);
989 tree lhs = gimple_call_lhs (stmt);
991 for (i = 0; i < n; i++)
993 tree arg = gimple_call_arg (stmt, i);
995 if (!(is_simple_operand (loop, stmt, lhs)
996 && is_simple_operand (loop, stmt, arg)))
997 return false;
1000 return true;
1003 default:
1004 /* These nodes cut a new scope. */
1005 return false;
1008 return false;
1011 /* Returns the statement of BB that contains a harmful operation: that
1012 can be a function call with side effects, the induction variables
1013 are not linear with respect to SCOP_ENTRY, etc. The current open
1014 scop should end before this statement. */
1016 static gimple
1017 harmful_stmt_in_bb (basic_block scop_entry, basic_block bb)
1019 gimple_stmt_iterator gsi;
1021 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1022 if (!stmt_simple_for_scop_p (scop_entry, gsi_stmt (gsi)))
1023 return gsi_stmt (gsi);
1025 return NULL;
1028 /* Store the GRAPHITE representation of BB. */
1030 static void
1031 new_graphite_bb (scop_p scop, basic_block bb)
1033 struct graphite_bb *gbb = XNEW (struct graphite_bb);
1035 bb->aux = gbb;
1036 GBB_BB (gbb) = bb;
1037 GBB_SCOP (gbb) = scop;
1038 GBB_DATA_REFS (gbb) = NULL;
1039 GBB_DOMAIN (gbb) = NULL;
1040 GBB_CONDITIONS (gbb) = NULL;
1041 GBB_CONDITION_CASES (gbb) = NULL;
1042 GBB_LOOPS (gbb) = NULL;
1043 VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb);
1044 bitmap_set_bit (SCOP_BBS_B (scop), bb->index);
1047 /* Frees GBB. */
1049 static void
1050 free_graphite_bb (struct graphite_bb *gbb)
1052 if (GBB_DOMAIN (gbb))
1053 cloog_matrix_free (GBB_DOMAIN (gbb));
1055 free_data_refs (GBB_DATA_REFS (gbb));
1056 VEC_free (gimple, heap, GBB_CONDITIONS (gbb));
1057 VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb));
1058 VEC_free (loop_p, heap, GBB_LOOPS (gbb));
1060 GBB_BB (gbb)->aux = 0;
1061 XDELETE (gbb);
1064 /* Creates a new scop starting with ENTRY. */
1066 static scop_p
1067 new_scop (edge entry, edge exit)
1069 scop_p scop = XNEW (struct scop);
1071 gcc_assert (entry && exit);
1073 SCOP_REGION (scop) = XNEW (struct sese);
1074 SESE_ENTRY (SCOP_REGION (scop)) = entry;
1075 SESE_EXIT (SCOP_REGION (scop)) = exit;
1076 SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3);
1077 SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3);
1078 SCOP_BBS_B (scop) = BITMAP_ALLOC (NULL);
1079 SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL);
1080 SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3);
1081 SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3);
1082 SCOP_PROG (scop) = cloog_program_malloc ();
1083 cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ());
1084 SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop,
1085 eq_loop_to_cloog_loop,
1086 free);
1087 return scop;
1090 /* Deletes SCOP. */
1092 static void
1093 free_scop (scop_p scop)
1095 int i;
1096 name_tree p;
1097 struct graphite_bb *gb;
1098 name_tree iv;
1100 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1101 free_graphite_bb (gb);
1103 VEC_free (graphite_bb_p, heap, SCOP_BBS (scop));
1104 BITMAP_FREE (SCOP_BBS_B (scop));
1105 BITMAP_FREE (SCOP_LOOPS (scop));
1106 VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop));
1108 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++)
1109 free (iv);
1110 VEC_free (name_tree, heap, SCOP_OLDIVS (scop));
1112 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
1113 free (p);
1115 VEC_free (name_tree, heap, SCOP_PARAMS (scop));
1116 cloog_program_free (SCOP_PROG (scop));
1117 htab_delete (SCOP_LOOP2CLOOG_LOOP (scop));
1118 XDELETE (SCOP_REGION (scop));
1119 XDELETE (scop);
1122 /* Deletes all scops in SCOPS. */
1124 static void
1125 free_scops (VEC (scop_p, heap) *scops)
1127 int i;
1128 scop_p scop;
1130 for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++)
1131 free_scop (scop);
1133 VEC_free (scop_p, heap, scops);
1136 typedef enum gbb_type {
1137 GBB_UNKNOWN,
1138 GBB_LOOP_SING_EXIT_HEADER,
1139 GBB_LOOP_MULT_EXIT_HEADER,
1140 GBB_LOOP_EXIT,
1141 GBB_COND_HEADER,
1142 GBB_SIMPLE,
1143 GBB_LAST
1144 } gbb_type;
1146 /* Detect the type of BB. Loop headers are only marked, if they are
1147 new. This means their loop_father is different to LAST_LOOP.
1148 Otherwise they are treated like any other bb and their type can be
1149 any other type. */
1151 static gbb_type
1152 get_bb_type (basic_block bb, struct loop *last_loop)
1154 VEC (basic_block, heap) *dom;
1155 int nb_dom, nb_suc;
1156 struct loop *loop = bb->loop_father;
1158 /* Check, if we entry into a new loop. */
1159 if (loop != last_loop)
1161 if (single_exit (loop) != NULL)
1162 return GBB_LOOP_SING_EXIT_HEADER;
1163 else if (loop->num != 0)
1164 return GBB_LOOP_MULT_EXIT_HEADER;
1165 else
1166 return GBB_COND_HEADER;
1169 dom = get_dominated_by (CDI_DOMINATORS, bb);
1170 nb_dom = VEC_length (basic_block, dom);
1171 VEC_free (basic_block, heap, dom);
1173 if (nb_dom == 0)
1174 return GBB_LAST;
1176 nb_suc = VEC_length (edge, bb->succs);
1178 if (nb_dom == 1 && nb_suc == 1)
1179 return GBB_SIMPLE;
1181 return GBB_COND_HEADER;
1184 /* A SCoP detection region, defined using bbs as borders.
1185 All control flow touching this region, comes in passing basic_block ENTRY and
1186 leaves passing basic_block EXIT. By using bbs instead of edges for the
1187 borders we are able to represent also regions that do not have a single
1188 entry or exit edge.
1189 But as they have a single entry basic_block and a single exit basic_block, we
1190 are able to generate for every sd_region a single entry and exit edge.
1194 3 <- entry
1197 / \ This region contains: {3, 4, 5, 6, 7, 8}
1202 9 <- exit */
1205 typedef struct sd_region_p
1207 /* The entry bb dominates all bbs in the sd_region. It is part of the
1208 region. */
1209 basic_block entry;
1211 /* The exit bb postdominates all bbs in the sd_region, but is not
1212 part of the region. */
1213 basic_block exit;
1214 } sd_region;
1216 DEF_VEC_O(sd_region);
1217 DEF_VEC_ALLOC_O(sd_region, heap);
1220 /* Moves the scops from SOURCE to TARGET and clean up SOURCE. */
1222 static void
1223 move_sd_regions (VEC (sd_region, heap) **source, VEC (sd_region, heap) **target)
1225 sd_region *s;
1226 int i;
1228 for (i = 0; VEC_iterate (sd_region, *source, i, s); i++)
1229 VEC_safe_push (sd_region, heap, *target, s);
1231 VEC_free (sd_region, heap, *source);
1234 /* Store information needed by scopdet_* functions. */
1236 struct scopdet_info
1238 /* Where the last open scop would stop if the current BB is harmful. */
1239 basic_block last;
1241 /* Where the next scop would start if the current BB is harmful. */
1242 basic_block next;
1244 /* The bb or one of its children contains open loop exits. That means
1245 loop exit nodes that are not surrounded by a loop dominated by bb. */
1246 bool exits;
1248 /* The bb or one of its children contains only structures we can handle. */
1249 bool difficult;
1253 static struct scopdet_info build_scops_1 (basic_block, VEC (sd_region, heap) **,
1254 loop_p);
1256 /* Calculates BB infos. If bb is difficult we add valid SCoPs dominated by BB
1257 to SCOPS. TYPE is the gbb_type of BB. */
1259 static struct scopdet_info
1260 scopdet_basic_block_info (basic_block bb, VEC (sd_region, heap) **scops,
1261 gbb_type type)
1263 struct loop *loop = bb->loop_father;
1264 struct scopdet_info result;
1265 gimple stmt;
1267 /* XXX: ENTRY_BLOCK_PTR could be optimized in later steps. */
1268 stmt = harmful_stmt_in_bb (ENTRY_BLOCK_PTR, bb);
1269 result.difficult = (stmt != NULL);
1270 result.last = NULL;
1272 switch (type)
1274 case GBB_LAST:
1275 result.next = NULL;
1276 result.exits = false;
1277 result.last = bb;
1278 break;
1280 case GBB_SIMPLE:
1281 result.next = single_succ (bb);
1282 result.exits = false;
1283 result.last = bb;
1284 break;
1286 case GBB_LOOP_SING_EXIT_HEADER:
1288 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap,3);
1289 struct scopdet_info sinfo;
1291 sinfo = build_scops_1 (bb, &tmp_scops, loop);
1293 result.last = single_exit (bb->loop_father)->src;
1294 result.next = single_exit (bb->loop_father)->dest;
1296 /* If we do not dominate result.next, remove it. It's either
1297 the EXIT_BLOCK_PTR, or another bb dominates it and will
1298 call the scop detection for this bb. */
1299 if (!dominated_by_p (CDI_DOMINATORS, result.next, bb))
1300 result.next = NULL;
1302 if (result.last->loop_father != loop)
1303 result.next = NULL;
1305 if (TREE_CODE (number_of_latch_executions (loop))
1306 == SCEV_NOT_KNOWN)
1307 result.difficult = true;
1309 if (sinfo.difficult)
1310 move_sd_regions (&tmp_scops, scops);
1311 else
1312 VEC_free (sd_region, heap, tmp_scops);
1314 result.exits = false;
1315 result.difficult |= sinfo.difficult;
1316 break;
1319 case GBB_LOOP_MULT_EXIT_HEADER:
1321 /* XXX: For now we just do not join loops with multiple exits. If the
1322 exits lead to the same bb it may be possible to join the loop. */
1323 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1324 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1325 edge e;
1326 int i;
1327 build_scops_1 (bb, &tmp_scops, loop);
1329 /* Scan the code dominated by this loop. This means all bbs, that are
1330 are dominated by a bb in this loop, but are not part of this loop.
1332 The easiest case:
1333 - The loop exit destination is dominated by the exit sources.
1335 TODO: We miss here the more complex cases:
1336 - The exit destinations are dominated by another bb inside the
1337 loop.
1338 - The loop dominates bbs, that are not exit destinations. */
1339 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
1340 if (e->src->loop_father == loop
1341 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src))
1343 /* Pass loop_outer to recognize e->dest as loop header in
1344 build_scops_1. */
1345 if (e->dest->loop_father->header == e->dest)
1346 build_scops_1 (e->dest, &tmp_scops,
1347 loop_outer (e->dest->loop_father));
1348 else
1349 build_scops_1 (e->dest, &tmp_scops, e->dest->loop_father);
1352 result.next = NULL;
1353 result.last = NULL;
1354 result.difficult = true;
1355 result.exits = false;
1356 move_sd_regions (&tmp_scops, scops);
1357 VEC_free (edge, heap, exits);
1358 break;
1360 case GBB_COND_HEADER:
1362 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1363 struct scopdet_info sinfo;
1364 VEC (basic_block, heap) *dominated;
1365 int i;
1366 basic_block dom_bb;
1367 basic_block last_bb = NULL;
1368 edge e;
1369 result.exits = false;
1371 /* First check the successors of BB, and check if it is possible to join
1372 the different branches. */
1373 for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++)
1375 /* Ignore loop exits. They will be handled after the loop body. */
1376 if (is_loop_exit (loop, e->dest))
1378 result.exits = true;
1379 continue;
1382 /* Do not follow edges that lead to the end of the
1383 conditions block. For example, in
1386 | /|\
1387 | 1 2 |
1388 | | | |
1389 | 3 4 |
1390 | \|/
1393 the edge from 0 => 6. Only check if all paths lead to
1394 the same node 6. */
1396 if (!single_pred_p (e->dest))
1398 /* Check, if edge leads directly to the end of this
1399 condition. */
1400 if (!last_bb)
1402 last_bb = e->dest;
1405 if (e->dest != last_bb)
1406 result.difficult = true;
1408 continue;
1411 if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb))
1413 result.difficult = true;
1414 continue;
1417 sinfo = build_scops_1 (e->dest, &tmp_scops, loop);
1419 result.exits |= sinfo.exits;
1420 result.last = sinfo.last;
1421 result.difficult |= sinfo.difficult;
1423 /* Checks, if all branches end at the same point.
1424 If that is true, the condition stays joinable.
1425 Have a look at the example above. */
1426 if (sinfo.last && single_succ_p (sinfo.last))
1428 basic_block next_tmp = single_succ (sinfo.last);
1430 if (!last_bb)
1431 last_bb = next_tmp;
1433 if (next_tmp != last_bb)
1434 result.difficult = true;
1436 else
1437 result.difficult = true;
1440 /* If the condition is joinable. */
1441 if (!result.exits && !result.difficult)
1443 /* Only return a next pointer if we dominate this pointer.
1444 Otherwise it will be handled by the bb dominating it. */
1445 if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb)
1446 result.next = last_bb;
1447 else
1448 result.next = NULL;
1450 VEC_free (sd_region, heap, tmp_scops);
1451 break;
1454 /* Scan remaining bbs dominated by BB. */
1455 dominated = get_dominated_by (CDI_DOMINATORS, bb);
1457 for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++)
1459 /* Ignore loop exits: they will be handled after the loop body. */
1460 if (loop_depth (find_common_loop (loop, dom_bb->loop_father))
1461 < loop_depth (loop))
1463 result.exits = true;
1464 continue;
1467 /* Ignore the bbs processed above. */
1468 if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb)
1469 continue;
1471 if (loop_depth (loop) > loop_depth (dom_bb->loop_father))
1472 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop_outer (loop));
1473 else
1474 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop);
1477 result.exits |= sinfo.exits;
1478 result.difficult = true;
1479 result.last = NULL;
1482 VEC_free (basic_block, heap, dominated);
1484 result.next = NULL;
1485 move_sd_regions (&tmp_scops, scops);
1487 break;
1490 default:
1491 gcc_unreachable ();
1494 return result;
1497 /* Creates the SCoPs and writes entry and exit points for every SCoP. */
1499 static struct scopdet_info
1500 build_scops_1 (basic_block current, VEC (sd_region, heap) **scops, loop_p loop)
1503 bool in_scop = false;
1504 sd_region open_scop;
1505 struct scopdet_info sinfo;
1507 /* Initialize result. */
1508 struct scopdet_info result;
1509 result.exits = false;
1510 result.difficult = false;
1511 result.next = NULL;
1512 result.last = NULL;
1513 open_scop.entry = NULL;
1515 /* Loop over the dominance tree. If we meet a difficult bb, close
1516 the current SCoP. Loop and condition header start a new layer,
1517 and can only be added if all bbs in deeper layers are simple. */
1518 while (current != NULL)
1520 sinfo = scopdet_basic_block_info (current, scops, get_bb_type (current,
1521 loop));
1523 if (!in_scop && !(sinfo.exits || sinfo.difficult))
1525 open_scop.entry = current;
1526 open_scop.exit = NULL;
1527 in_scop = true;
1529 else if (in_scop && (sinfo.exits || sinfo.difficult))
1531 open_scop.exit = current;
1532 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1533 in_scop = false;
1536 result.difficult |= sinfo.difficult;
1537 result.exits |= sinfo.exits;
1539 current = sinfo.next;
1542 /* Try to close open_scop, if we are still in an open SCoP. */
1543 if (in_scop)
1545 int i;
1546 edge e;
1548 for (i = 0; VEC_iterate (edge, sinfo.last->succs, i, e); i++)
1549 if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last, e->dest))
1550 open_scop.exit = e->dest;
1552 if (!open_scop.exit && open_scop.entry != sinfo.last)
1553 open_scop.exit = sinfo.last;
1555 if (open_scop.exit)
1556 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1560 result.last = sinfo.last;
1561 return result;
1564 /* Checks if a bb is contained in REGION. */
1566 static bool
1567 bb_in_sd_region (basic_block bb, sd_region *region)
1569 return dominated_by_p (CDI_DOMINATORS, bb, region->entry)
1570 && !(dominated_by_p (CDI_DOMINATORS, bb, region->exit)
1571 && !dominated_by_p (CDI_DOMINATORS, region->entry,
1572 region->exit));
1575 /* Returns the single entry edge of REGION, if it does not exits NULL. */
1577 static edge
1578 find_single_entry_edge (sd_region *region)
1580 edge e;
1581 edge_iterator ei;
1582 edge entry = NULL;
1584 FOR_EACH_EDGE (e, ei, region->entry->preds)
1585 if (!bb_in_sd_region (e->src, region))
1587 if (entry)
1589 entry = NULL;
1590 break;
1593 else
1594 entry = e;
1597 return entry;
1600 /* Returns the single exit edge of REGION, if it does not exits NULL. */
1602 static edge
1603 find_single_exit_edge (sd_region *region)
1605 edge e;
1606 edge_iterator ei;
1607 edge exit = NULL;
1609 FOR_EACH_EDGE (e, ei, region->exit->preds)
1610 if (bb_in_sd_region (e->src, region))
1612 if (exit)
1614 exit = NULL;
1615 break;
1618 else
1619 exit = e;
1622 return exit;
1625 /* Create a single entry edge for REGION. */
1627 static void
1628 create_single_entry_edge (sd_region *region)
1630 if (find_single_entry_edge (region))
1631 return;
1633 /* There are multiple predecessors for bb_3
1635 | 1 2
1636 | | /
1637 | |/
1638 | 3 <- entry
1639 | |\
1640 | | |
1641 | 4 ^
1642 | | |
1643 | |/
1646 There are two edges (1->3, 2->3), that point from outside into the region,
1647 and another one (5->3), a loop latch, lead to bb_3.
1649 We split bb_3.
1651 | 1 2
1652 | | /
1653 | |/
1654 |3.0
1655 | |\ (3.0 -> 3.1) = single entry edge
1656 |3.1 | <- entry
1657 | | |
1658 | | |
1659 | 4 ^
1660 | | |
1661 | |/
1664 If the loop is part of the SCoP, we have to redirect the loop latches.
1666 | 1 2
1667 | | /
1668 | |/
1669 |3.0
1670 | | (3.0 -> 3.1) = entry edge
1671 |3.1 <- entry
1672 | |\
1673 | | |
1674 | 4 ^
1675 | | |
1676 | |/
1677 | 5 */
1679 if (region->entry->loop_father->header != region->entry
1680 || dominated_by_p (CDI_DOMINATORS,
1681 loop_latch_edge (region->entry->loop_father)->src,
1682 region->exit))
1684 edge forwarder = split_block_after_labels (region->entry);
1685 region->entry = forwarder->dest;
1687 else
1688 /* This case is never executed, as the loop headers seem always to have a
1689 single edge pointing from outside into the loop. */
1690 gcc_unreachable ();
1692 #ifdef ENABLE_CHECKING
1693 gcc_assert (find_single_entry_edge (region));
1694 #endif
1697 /* Check if the sd_region, mentioned in EDGE, has no exit bb. */
1699 static bool
1700 sd_region_without_exit (edge e)
1702 sd_region *r = (sd_region *) e->aux;
1704 if (r)
1705 return r->exit == NULL;
1706 else
1707 return false;
1710 /* Create a single exit edge for REGION. */
1712 static void
1713 create_single_exit_edge (sd_region *region)
1715 edge e;
1716 edge_iterator ei;
1717 edge forwarder = NULL;
1718 basic_block exit;
1720 if (find_single_exit_edge (region))
1721 return;
1723 /* We create a forwarder bb (5) for all edges leaving this region
1724 (3->5, 4->5). All other edges leading to the same bb, are moved
1725 to a new bb (6). If these edges where part of another region (2->5)
1726 we update the region->exit pointer, of this region.
1728 To identify which edge belongs to which region we depend on the e->aux
1729 pointer in every edge. It points to the region of the edge or to NULL,
1730 if the edge is not part of any region.
1732 1 2 3 4 1->5 no region, 2->5 region->exit = 5,
1733 \| |/ 3->5 region->exit = NULL, 4->5 region->exit = NULL
1734 5 <- exit
1736 changes to
1738 1 2 3 4 1->6 no region, 2->6 region->exit = 6,
1739 | | \/ 3->5 no region, 4->5 no region,
1740 | | 5
1741 \| / 5->6 region->exit = 6
1744 Now there is only a single exit edge (5->6). */
1745 exit = region->exit;
1746 region->exit = NULL;
1747 forwarder = make_forwarder_block (exit, &sd_region_without_exit, NULL);
1749 /* Unmark the edges, that are no longer exit edges. */
1750 FOR_EACH_EDGE (e, ei, forwarder->src->preds)
1751 if (e->aux)
1752 e->aux = NULL;
1754 /* Mark the new exit edge. */
1755 single_succ_edge (forwarder->src)->aux = region;
1757 /* Update the exit bb of all regions, where exit edges lead to
1758 forwarder->dest. */
1759 FOR_EACH_EDGE (e, ei, forwarder->dest->preds)
1760 if (e->aux)
1761 ((sd_region *) e->aux)->exit = forwarder->dest;
1763 #ifdef ENABLE_CHECKING
1764 gcc_assert (find_single_exit_edge (region));
1765 #endif
1768 /* Unmark the exit edges of all REGIONS.
1769 See comment in "create_single_exit_edge". */
1771 static void
1772 unmark_exit_edges (VEC (sd_region, heap) *regions)
1774 int i;
1775 sd_region *s;
1776 edge e;
1777 edge_iterator ei;
1779 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1780 FOR_EACH_EDGE (e, ei, s->exit->preds)
1781 e->aux = NULL;
1785 /* Mark the exit edges of all REGIONS.
1786 See comment in "create_single_exit_edge". */
1788 static void
1789 mark_exit_edges (VEC (sd_region, heap) *regions)
1791 int i;
1792 sd_region *s;
1793 edge e;
1794 edge_iterator ei;
1796 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1797 FOR_EACH_EDGE (e, ei, s->exit->preds)
1798 if (bb_in_sd_region (e->src, s))
1799 e->aux = s;
1803 /* Create for all scop regions a single entry and a single exit edge. */
1805 static void
1806 create_sese_edges (VEC (sd_region, heap) *regions)
1808 int i;
1809 sd_region *s;
1811 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1812 create_single_entry_edge (s);
1814 mark_exit_edges (regions);
1816 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1817 create_single_exit_edge (s);
1819 unmark_exit_edges (regions);
1821 #ifdef ENABLE_CHECKING
1822 verify_loop_structure ();
1823 verify_dominators (CDI_DOMINATORS);
1824 verify_ssa (false);
1825 #endif
1828 /* Create graphite SCoPs from an array of scop detection regions. */
1830 static void
1831 build_graphite_scops (VEC (sd_region, heap) *scop_regions)
1833 int i;
1834 sd_region *s;
1836 for (i = 0; VEC_iterate (sd_region, scop_regions, i, s); i++)
1838 edge entry = find_single_entry_edge (s);
1839 edge exit = find_single_exit_edge (s);
1840 scop_p scop = new_scop (entry, exit);
1841 VEC_safe_push (scop_p, heap, current_scops, scop);
1843 /* Are there overlapping SCoPs? */
1844 #ifdef ENABLE_CHECKING
1846 int j;
1847 sd_region *s2;
1849 for (j = 0; VEC_iterate (sd_region, scop_regions, j, s2); j++)
1850 if (s != s2)
1851 gcc_assert (!bb_in_sd_region (s->entry, s2));
1853 #endif
1857 /* Find static control parts. */
1859 static void
1860 build_scops (void)
1862 struct loop *loop = current_loops->tree_root;
1863 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1865 build_scops_1 (single_succ (ENTRY_BLOCK_PTR), &tmp_scops, loop);
1866 create_sese_edges (tmp_scops);
1867 build_graphite_scops (tmp_scops);
1868 VEC_free (sd_region, heap, tmp_scops);
1871 /* Gather the basic blocks belonging to the SCOP. */
1873 static void
1874 build_scop_bbs (scop_p scop)
1876 basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
1877 sbitmap visited = sbitmap_alloc (last_basic_block);
1878 int sp = 0;
1880 sbitmap_zero (visited);
1881 stack[sp++] = SCOP_ENTRY (scop);
1883 while (sp)
1885 basic_block bb = stack[--sp];
1886 int depth = loop_depth (bb->loop_father);
1887 int num = bb->loop_father->num;
1888 edge_iterator ei;
1889 edge e;
1891 /* Scop's exit is not in the scop. Exclude also bbs, which are
1892 dominated by the SCoP exit. These are e.g. loop latches. */
1893 if (TEST_BIT (visited, bb->index)
1894 || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
1895 /* Every block in the scop is dominated by scop's entry. */
1896 || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop)))
1897 continue;
1899 new_graphite_bb (scop, bb);
1900 SET_BIT (visited, bb->index);
1902 /* First push the blocks that have to be processed last. Note
1903 that this means that the order in which the code is organized
1904 below is important: do not reorder the following code. */
1905 FOR_EACH_EDGE (e, ei, bb->succs)
1906 if (! TEST_BIT (visited, e->dest->index)
1907 && (int) loop_depth (e->dest->loop_father) < depth)
1908 stack[sp++] = e->dest;
1910 FOR_EACH_EDGE (e, ei, bb->succs)
1911 if (! TEST_BIT (visited, e->dest->index)
1912 && (int) loop_depth (e->dest->loop_father) == depth
1913 && e->dest->loop_father->num != num)
1914 stack[sp++] = e->dest;
1916 FOR_EACH_EDGE (e, ei, bb->succs)
1917 if (! TEST_BIT (visited, e->dest->index)
1918 && (int) loop_depth (e->dest->loop_father) == depth
1919 && e->dest->loop_father->num == num
1920 && EDGE_COUNT (e->dest->preds) > 1)
1921 stack[sp++] = e->dest;
1923 FOR_EACH_EDGE (e, ei, bb->succs)
1924 if (! TEST_BIT (visited, e->dest->index)
1925 && (int) loop_depth (e->dest->loop_father) == depth
1926 && e->dest->loop_father->num == num
1927 && EDGE_COUNT (e->dest->preds) == 1)
1928 stack[sp++] = e->dest;
1930 FOR_EACH_EDGE (e, ei, bb->succs)
1931 if (! TEST_BIT (visited, e->dest->index)
1932 && (int) loop_depth (e->dest->loop_father) > depth)
1933 stack[sp++] = e->dest;
1936 free (stack);
1937 sbitmap_free (visited);
1941 /* Record LOOP as occuring in SCOP. */
1943 static void
1944 scop_record_loop (scop_p scop, struct loop *loop)
1946 loop_p parent;
1947 tree induction_var;
1949 if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
1950 return;
1952 parent = loop_outer (loop);
1953 induction_var = find_induction_var_from_exit_cond (loop);
1955 if (!bb_in_scop_p (parent->latch, scop))
1956 parent = NULL;
1958 if (induction_var != NULL_TREE)
1960 name_tree oldiv = XNEW (struct name_tree);
1961 oldiv->t = SSA_NAME_VAR (induction_var);
1962 if (DECL_NAME (oldiv->t))
1963 oldiv->name = IDENTIFIER_POINTER (DECL_NAME (oldiv->t));
1964 else
1966 int len = 2 + 16;
1967 char *n = XNEWVEC (char, len);
1968 snprintf (n, len, "D.%u", DECL_UID (oldiv->t));
1969 oldiv->name = n;
1971 oldiv->loop = loop;
1973 VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv);
1976 bitmap_set_bit (SCOP_LOOPS (scop), loop->num);
1977 VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop);
1980 /* Build the loop nests contained in SCOP. */
1982 static void
1983 build_scop_loop_nests (scop_p scop)
1985 unsigned i;
1986 graphite_bb_p gb;
1987 struct loop *loop0, *loop1;
1989 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1991 struct loop *loop = gbb_loop (gb);
1993 /* Only add loops, if they are completely contained in the SCoP. */
1994 if (loop->header == GBB_BB (gb)
1995 && bb_in_scop_p (loop->latch, scop))
1996 scop_record_loop (scop, gbb_loop (gb));
1999 /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It
2000 can be the case that an inner loop is inserted before an outer
2001 loop. To avoid this, semi-sort once. */
2002 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++)
2004 if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1)
2005 break;
2007 loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1);
2008 if (loop0->num > loop1->num)
2010 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1);
2011 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0);
2016 /* Calculate the number of loops around GB in the current SCOP. */
2018 static inline int
2019 nb_loops_around_gb (graphite_bb_p gb)
2021 scop_p scop = GBB_SCOP (gb);
2022 struct loop *l = gbb_loop (gb);
2023 int d = 0;
2025 for (; loop_in_scop_p (l, scop); d++, l = loop_outer (l));
2027 return d;
2030 /* Build for BB the static schedule.
2032 The STATIC_SCHEDULE is defined like this:
2035 for (i: ...)
2037 for (j: ...)
2043 for (k: ...)
2051 Static schedules for A to F:
2053 DEPTH
2054 0 1 2
2056 B 1 0 0
2057 C 1 0 1
2058 D 1 1 0
2059 E 1 1 1
2063 static void
2064 build_scop_canonical_schedules (scop_p scop)
2066 int i, j;
2067 graphite_bb_p gb;
2068 int nb = scop_nb_loops (scop) + 1;
2070 SCOP_STATIC_SCHEDULE (scop) = lambda_vector_new (nb);
2072 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2074 int offset = nb_loops_around_gb (gb);
2076 /* After leaving a loop, it is possible that the schedule is not
2077 set at zero. This loop reinitializes components located
2078 after OFFSET. */
2080 for (j = offset + 1; j < nb; j++)
2081 if (SCOP_STATIC_SCHEDULE (scop)[j])
2083 memset (&(SCOP_STATIC_SCHEDULE (scop)[j]), 0,
2084 sizeof (int) * (nb - j));
2085 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2086 break;
2089 GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (offset + 1);
2090 lambda_vector_copy (SCOP_STATIC_SCHEDULE (scop),
2091 GBB_STATIC_SCHEDULE (gb), offset + 1);
2093 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2097 /* Build the LOOPS vector for all bbs in SCOP. */
2099 static void
2100 build_bb_loops (scop_p scop)
2102 graphite_bb_p gb;
2103 int i;
2105 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2107 loop_p loop;
2108 int depth;
2110 depth = nb_loops_around_gb (gb) - 1;
2112 GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3);
2113 VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1);
2115 loop = GBB_BB (gb)->loop_father;
2117 while (scop_contains_loop (scop, loop))
2119 VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop);
2120 loop = loop_outer (loop);
2121 depth--;
2126 /* Get the index for parameter VAR in SCOP. */
2128 static int
2129 param_index (tree var, scop_p scop)
2131 int i;
2132 name_tree p;
2133 name_tree nvar;
2135 gcc_assert (TREE_CODE (var) == SSA_NAME);
2137 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2138 if (p->t == var)
2139 return i;
2141 nvar = XNEW (struct name_tree);
2142 nvar->t = var;
2143 nvar->name = NULL;
2144 VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar);
2145 return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1;
2148 /* Scan EXPR and translate it to an inequality vector INEQ that will
2149 be added, or subtracted, in the constraint domain matrix C at row
2150 R. K is the number of columns for loop iterators in C. */
2152 static void
2153 scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k,
2154 bool subtract)
2156 int cst_col, param_col;
2158 if (e == chrec_dont_know)
2159 return;
2161 switch (TREE_CODE (e))
2163 case POLYNOMIAL_CHREC:
2165 tree left = CHREC_LEFT (e);
2166 tree right = CHREC_RIGHT (e);
2167 int var = CHREC_VARIABLE (e);
2169 if (TREE_CODE (right) != INTEGER_CST)
2170 return;
2172 if (c)
2174 int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1;
2176 if (subtract)
2177 value_sub_int (c->p[r][loop_col], c->p[r][loop_col],
2178 int_cst_value (right));
2179 else
2180 value_add_int (c->p[r][loop_col], c->p[r][loop_col],
2181 int_cst_value (right));
2184 switch (TREE_CODE (left))
2186 case POLYNOMIAL_CHREC:
2187 scan_tree_for_params (s, left, c, r, k, subtract);
2188 return;
2190 case INTEGER_CST:
2191 /* Constant part. */
2192 if (c)
2194 int v = int_cst_value (left);
2195 cst_col = c->NbColumns - 1;
2197 if (v < 0)
2199 v = -v;
2200 subtract = subtract ? false : true;
2203 if (subtract)
2204 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2205 else
2206 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2208 return;
2210 default:
2211 scan_tree_for_params (s, left, c, r, k, subtract);
2212 return;
2215 break;
2217 case MULT_EXPR:
2218 if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
2220 Value val;
2222 gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
2224 value_init (val);
2225 value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
2226 value_multiply (k, k, val);
2227 value_clear (val);
2228 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2230 else
2232 Value val;
2234 gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
2236 value_init (val);
2237 value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
2238 value_multiply (k, k, val);
2239 value_clear (val);
2240 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2242 break;
2244 case PLUS_EXPR:
2245 case POINTER_PLUS_EXPR:
2246 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2247 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2248 break;
2250 case MINUS_EXPR:
2251 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2252 value_oppose (k, k);
2253 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2254 break;
2256 case NEGATE_EXPR:
2257 value_oppose (k, k);
2258 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2259 break;
2261 case SSA_NAME:
2262 param_col = param_index (e, s);
2264 if (c)
2266 param_col += c->NbColumns - scop_nb_params (s) - 1;
2268 if (subtract)
2269 value_subtract (c->p[r][param_col], c->p[r][param_col], k);
2270 else
2271 value_addto (c->p[r][param_col], c->p[r][param_col], k);
2273 break;
2275 case INTEGER_CST:
2276 if (c)
2278 int v = int_cst_value (e);
2279 cst_col = c->NbColumns - 1;
2281 if (v < 0)
2283 v = -v;
2284 subtract = subtract ? false : true;
2287 if (subtract)
2288 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2289 else
2290 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2292 break;
2294 case NOP_EXPR:
2295 case CONVERT_EXPR:
2296 case NON_LVALUE_EXPR:
2297 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2298 break;
2300 default:
2301 gcc_unreachable ();
2302 break;
2306 /* Data structure for idx_record_params. */
2308 struct irp_data
2310 struct loop *loop;
2311 scop_p scop;
2314 /* For a data reference with an ARRAY_REF as its BASE, record the
2315 parameters occurring in IDX. DTA is passed in as complementary
2316 information, and is used by the automatic walker function. This
2317 function is a callback for for_each_index. */
2319 static bool
2320 idx_record_params (tree base, tree *idx, void *dta)
2322 struct irp_data *data = (struct irp_data *) dta;
2324 if (TREE_CODE (base) != ARRAY_REF)
2325 return true;
2327 if (TREE_CODE (*idx) == SSA_NAME)
2329 tree scev;
2330 scop_p scop = data->scop;
2331 struct loop *loop = data->loop;
2332 Value one;
2334 scev = analyze_scalar_evolution (loop, *idx);
2335 scev = instantiate_scev (block_before_scop (scop), loop, scev);
2337 value_init (one);
2338 value_set_si (one, 1);
2339 scan_tree_for_params (scop, scev, NULL, 0, one, false);
2340 value_clear (one);
2343 return true;
2346 /* Find parameters with respect to SCOP in BB. We are looking in memory
2347 access functions, conditions and loop bounds. */
2349 static void
2350 find_params_in_bb (scop_p scop, basic_block bb)
2352 int i;
2353 data_reference_p dr;
2354 VEC (data_reference_p, heap) *drs;
2355 gimple_stmt_iterator gsi;
2356 struct loop *nest = outermost_loop_in_scop (scop, bb);
2358 /* Find the parameters used in the memory access functions. */
2359 drs = VEC_alloc (data_reference_p, heap, 5);
2360 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2361 find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs);
2363 for (i = 0; VEC_iterate (data_reference_p, drs, i, dr); i++)
2365 struct irp_data irp;
2367 irp.loop = bb->loop_father;
2368 irp.scop = scop;
2369 for_each_index (&dr->ref, idx_record_params, &irp);
2370 free_data_ref (dr);
2373 VEC_free (data_reference_p, heap, drs);
2375 /* Find parameters in conditional statements. */
2376 gsi = gsi_last_bb (bb);
2377 if (!gsi_end_p (gsi))
2379 gimple stmt = gsi_stmt (gsi);
2381 if (gimple_code (stmt) == GIMPLE_COND)
2383 Value one;
2384 loop_p loop = bb->loop_father;
2386 tree lhs, rhs;
2388 lhs = gimple_cond_lhs (stmt);
2389 lhs = analyze_scalar_evolution (loop, lhs);
2390 lhs = instantiate_scev (block_before_scop (scop), loop, lhs);
2392 rhs = gimple_cond_rhs (stmt);
2393 rhs = analyze_scalar_evolution (loop, rhs);
2394 rhs = instantiate_scev (block_before_scop (scop), loop, rhs);
2396 value_init (one);
2397 scan_tree_for_params (scop, lhs, NULL, 0, one, false);
2398 value_set_si (one, 1);
2399 scan_tree_for_params (scop, rhs, NULL, 0, one, false);
2400 value_clear (one);
2405 /* Saves in NV the name of variable P->T. */
2407 static void
2408 save_var_name (char **nv, int i, name_tree p)
2410 const char *name = get_name (SSA_NAME_VAR (p->t));
2412 if (name)
2414 int len = strlen (name) + 16;
2415 nv[i] = XNEWVEC (char, len);
2416 snprintf (nv[i], len, "%s_%d", name, SSA_NAME_VERSION (p->t));
2418 else
2420 nv[i] = XNEWVEC (char, 16);
2421 snprintf (nv[i], 2 + 16, "T_%d", SSA_NAME_VERSION (p->t));
2424 p->name = nv[i];
2427 /* Return the maximal loop depth in SCOP. */
2429 static int
2430 scop_max_loop_depth (scop_p scop)
2432 int i;
2433 graphite_bb_p gbb;
2434 int max_nb_loops = 0;
2436 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
2438 int nb_loops = gbb_nb_loops (gbb);
2439 if (max_nb_loops < nb_loops)
2440 max_nb_loops = nb_loops;
2443 return max_nb_loops;
2446 /* Initialize Cloog's parameter names from the names used in GIMPLE.
2447 Initialize Cloog's iterator names, using 'graphite_iterator_%d'
2448 from 0 to scop_nb_loops (scop). */
2450 static void
2451 initialize_cloog_names (scop_p scop)
2453 int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop));
2454 char **params = XNEWVEC (char *, nb_params);
2455 int nb_iterators = scop_max_loop_depth (scop);
2456 int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop));
2457 char **iterators = XNEWVEC (char *, nb_iterators * 2);
2458 char **scattering = XNEWVEC (char *, nb_scattering);
2459 name_tree p;
2461 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2462 save_var_name (params, i, p);
2464 cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)),
2465 nb_params);
2466 cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)),
2467 params);
2469 for (i = 0; i < nb_iterators; i++)
2471 int len = 18 + 16;
2472 iterators[i] = XNEWVEC (char, len);
2473 snprintf (iterators[i], len, "graphite_iterator_%d", i);
2476 cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)),
2477 nb_iterators);
2478 cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)),
2479 iterators);
2481 for (i = 0; i < nb_scattering; i++)
2483 int len = 2 + 16;
2484 scattering[i] = XNEWVEC (char, len);
2485 snprintf (scattering[i], len, "s_%d", i);
2488 cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)),
2489 nb_scattering);
2490 cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)),
2491 scattering);
2494 /* Record the parameters used in the SCOP. A variable is a parameter
2495 in a scop if it does not vary during the execution of that scop. */
2497 static void
2498 find_scop_parameters (scop_p scop)
2500 graphite_bb_p gb;
2501 unsigned i;
2502 struct loop *loop;
2503 Value one;
2505 value_init (one);
2506 value_set_si (one, 1);
2508 /* Find the parameters used in the loop bounds. */
2509 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
2511 tree nb_iters = number_of_latch_executions (loop);
2513 if (!chrec_contains_symbols (nb_iters))
2514 continue;
2516 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2517 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2518 scan_tree_for_params (scop, nb_iters, NULL, 0, one, false);
2521 value_clear (one);
2523 /* Find the parameters used in data accesses. */
2524 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2525 find_params_in_bb (scop, GBB_BB (gb));
2528 /* Build the context constraints for SCOP: constraints and relations
2529 on parameters. */
2531 static void
2532 build_scop_context (scop_p scop)
2534 int nb_params = scop_nb_params (scop);
2535 CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2);
2537 /* Insert '0 >= 0' in the context matrix, as it is not allowed to be
2538 empty. */
2540 value_set_si (matrix->p[0][0], 1);
2542 value_set_si (matrix->p[0][nb_params + 1], 0);
2544 cloog_program_set_context (SCOP_PROG (scop),
2545 cloog_domain_matrix2domain (matrix));
2546 cloog_matrix_free (matrix);
2549 /* Returns a graphite_bb from BB. */
2551 static inline graphite_bb_p
2552 gbb_from_bb (basic_block bb)
2554 return (graphite_bb_p) bb->aux;
2557 /* Add DOMAIN to all the basic blocks in LOOP. */
2559 static void
2560 add_bb_domains (struct loop *loop, CloogMatrix *domain)
2562 basic_block *bbs = get_loop_body (loop);
2563 unsigned i;
2565 for (i = 0; i < loop->num_nodes; i++)
2566 if (bbs[i]->loop_father == loop)
2568 graphite_bb_p gbb = gbb_from_bb (bbs[i]);
2569 GBB_DOMAIN (gbb) = cloog_matrix_copy (domain);
2572 free (bbs);
2575 /* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the
2576 number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the
2577 constraints matrix for the surrounding loops. */
2579 static void
2580 build_loop_iteration_domains (scop_p scop, struct loop *loop,
2581 CloogMatrix *outer_cstr, int nb_outer_loops)
2583 int i, j, row;
2584 CloogMatrix *cstr;
2586 int nb_rows = outer_cstr->NbRows + 1;
2587 int nb_cols = outer_cstr->NbColumns + 1;
2589 /* Last column of CSTR is the column of constants. */
2590 int cst_col = nb_cols - 1;
2592 /* The column for the current loop is just after the columns of
2593 other outer loops. */
2594 int loop_col = nb_outer_loops + 1;
2596 tree nb_iters = number_of_latch_executions (loop);
2598 /* When the number of iterations is a constant or a parameter, we
2599 add a constraint for the upper bound of the loop. So add a row
2600 to the constraint matrix before allocating it. */
2601 if (TREE_CODE (nb_iters) == INTEGER_CST
2602 || !chrec_contains_undetermined (nb_iters))
2603 nb_rows++;
2605 cstr = cloog_matrix_alloc (nb_rows, nb_cols);
2607 /* Copy the outer constraints. */
2608 for (i = 0; i < outer_cstr->NbRows; i++)
2610 /* Copy the eq/ineq and loops columns. */
2611 for (j = 0; j < loop_col; j++)
2612 value_assign (cstr->p[i][j], outer_cstr->p[i][j]);
2614 /* Leave an empty column in CSTR for the current loop, and then
2615 copy the parameter columns. */
2616 for (j = loop_col; j < outer_cstr->NbColumns; j++)
2617 value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]);
2620 /* 0 <= loop_i */
2621 row = outer_cstr->NbRows;
2622 value_set_si (cstr->p[row][0], 1);
2623 value_set_si (cstr->p[row][loop_col], 1);
2625 /* loop_i <= nb_iters */
2626 if (TREE_CODE (nb_iters) == INTEGER_CST)
2628 row++;
2629 value_set_si (cstr->p[row][0], 1);
2630 value_set_si (cstr->p[row][loop_col], -1);
2632 value_set_si (cstr->p[row][cst_col],
2633 int_cst_value (nb_iters));
2635 else if (!chrec_contains_undetermined (nb_iters))
2637 /* Otherwise nb_iters contains parameters: scan the nb_iters
2638 expression and build its matrix representation. */
2639 Value one;
2641 row++;
2642 value_set_si (cstr->p[row][0], 1);
2643 value_set_si (cstr->p[row][loop_col], -1);
2645 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2646 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2648 value_init (one);
2649 value_set_si (one, 1);
2650 scan_tree_for_params (scop, nb_iters, cstr, row, one, false);
2651 value_clear (one);
2653 else
2654 gcc_unreachable ();
2656 if (loop->inner && loop_in_scop_p (loop->inner, scop))
2657 build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1);
2659 /* Only go to the next loops, if we are not at the outermost layer. These
2660 have to be handled seperately, as we can be sure, that the chain at this
2661 layer will be connected. */
2662 if (nb_outer_loops != 0 && loop->next && loop_in_scop_p (loop->next, scop))
2663 build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops);
2665 add_bb_domains (loop, cstr);
2667 cloog_matrix_free (cstr);
2670 /* Add conditions to the domain of GB. */
2672 static void
2673 add_conditions_to_domain (graphite_bb_p gb)
2675 unsigned int i,j;
2676 gimple stmt;
2677 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb);
2678 CloogMatrix *domain = GBB_DOMAIN (gb);
2679 scop_p scop = GBB_SCOP (gb);
2681 unsigned nb_rows;
2682 unsigned nb_cols;
2683 unsigned nb_new_rows = 0;
2684 unsigned row;
2686 if (VEC_empty (gimple, conditions))
2687 return;
2689 if (domain)
2691 nb_rows = domain->NbRows;
2692 nb_cols = domain->NbColumns;
2694 else
2696 nb_rows = 0;
2697 nb_cols = scop_nb_params (scop) + 2;
2700 /* Count number of necessary new rows to add the conditions to the
2701 domain. */
2702 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2704 switch (gimple_code (stmt))
2706 case GIMPLE_COND:
2708 enum tree_code code = gimple_cond_code (stmt);
2710 switch (code)
2712 case NE_EXPR:
2713 case EQ_EXPR:
2714 /* NE and EQ statements are not supported right know. */
2715 gcc_unreachable ();
2716 break;
2717 case LT_EXPR:
2718 case GT_EXPR:
2719 case LE_EXPR:
2720 case GE_EXPR:
2721 nb_new_rows++;
2722 break;
2723 default:
2724 gcc_unreachable ();
2725 break;
2727 break;
2729 case SWITCH_EXPR:
2730 /* Switch statements are not supported right know. */
2731 gcc_unreachable ();
2732 break;
2734 default:
2735 gcc_unreachable ();
2736 break;
2741 /* Enlarge the matrix. */
2743 CloogMatrix *new_domain;
2744 new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols);
2746 for (i = 0; i < nb_rows; i++)
2747 for (j = 0; j < nb_cols; j++)
2748 value_assign (new_domain->p[i][j], domain->p[i][j]);
2750 cloog_matrix_free (domain);
2751 domain = new_domain;
2752 GBB_DOMAIN (gb) = new_domain;
2755 /* Add the conditions to the new enlarged domain matrix. */
2756 row = nb_rows;
2757 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2759 switch (gimple_code (stmt))
2761 case GIMPLE_COND:
2763 Value one;
2764 enum tree_code code;
2765 tree left;
2766 tree right;
2767 loop_p loop = GBB_BB (gb)->loop_father;
2769 left = gimple_cond_lhs (stmt);
2770 right = gimple_cond_rhs (stmt);
2772 left = analyze_scalar_evolution (loop, left);
2773 right = analyze_scalar_evolution (loop, right);
2775 left = instantiate_scev (block_before_scop (scop), loop, left);
2776 right = instantiate_scev (block_before_scop (scop), loop, right);
2778 code = gimple_cond_code (stmt);
2780 /* The conditions for ELSE-branches are inverted. */
2781 if (VEC_index (gimple, gb->condition_cases, i) == NULL)
2782 code = invert_tree_comparison (code, false);
2784 switch (code)
2786 case NE_EXPR:
2787 /* NE statements are not supported right know. */
2788 gcc_unreachable ();
2789 break;
2790 case EQ_EXPR:
2791 value_set_si (domain->p[row][0], 1);
2792 value_init (one);
2793 value_set_si (one, 1);
2794 scan_tree_for_params (scop, left, domain, row, one, true);
2795 value_set_si (one, 1);
2796 scan_tree_for_params (scop, right, domain, row, one, false);
2797 row++;
2798 value_set_si (domain->p[row][0], 1);
2799 value_set_si (one, 1);
2800 scan_tree_for_params (scop, left, domain, row, one, false);
2801 value_set_si (one, 1);
2802 scan_tree_for_params (scop, right, domain, row, one, true);
2803 value_clear (one);
2804 row++;
2805 break;
2806 case LT_EXPR:
2807 value_set_si (domain->p[row][0], 1);
2808 value_init (one);
2809 value_set_si (one, 1);
2810 scan_tree_for_params (scop, left, domain, row, one, true);
2811 value_set_si (one, 1);
2812 scan_tree_for_params (scop, right, domain, row, one, false);
2813 value_sub_int (domain->p[row][nb_cols - 1],
2814 domain->p[row][nb_cols - 1], 1);
2815 value_clear (one);
2816 row++;
2817 break;
2818 case GT_EXPR:
2819 value_set_si (domain->p[row][0], 1);
2820 value_init (one);
2821 value_set_si (one, 1);
2822 scan_tree_for_params (scop, left, domain, row, one, false);
2823 value_set_si (one, 1);
2824 scan_tree_for_params (scop, right, domain, row, one, true);
2825 value_sub_int (domain->p[row][nb_cols - 1],
2826 domain->p[row][nb_cols - 1], 1);
2827 value_clear (one);
2828 row++;
2829 break;
2830 case LE_EXPR:
2831 value_set_si (domain->p[row][0], 1);
2832 value_init (one);
2833 value_set_si (one, 1);
2834 scan_tree_for_params (scop, left, domain, row, one, true);
2835 value_set_si (one, 1);
2836 scan_tree_for_params (scop, right, domain, row, one, false);
2837 value_clear (one);
2838 row++;
2839 break;
2840 case GE_EXPR:
2841 value_set_si (domain->p[row][0], 1);
2842 value_init (one);
2843 value_set_si (one, 1);
2844 scan_tree_for_params (scop, left, domain, row, one, false);
2845 value_set_si (one, 1);
2846 scan_tree_for_params (scop, right, domain, row, one, true);
2847 value_clear (one);
2848 row++;
2849 break;
2850 default:
2851 gcc_unreachable ();
2852 break;
2854 break;
2856 case GIMPLE_SWITCH:
2857 /* Switch statements are not supported right know. */
2858 gcc_unreachable ();
2859 break;
2861 default:
2862 gcc_unreachable ();
2863 break;
2868 /* Helper recursive function. */
2870 static void
2871 build_scop_conditions_1 (VEC (gimple, heap) **conditions,
2872 VEC (gimple, heap) **cases, basic_block bb,
2873 scop_p scop)
2875 int i, j;
2876 graphite_bb_p gbb;
2877 gimple_stmt_iterator gsi;
2878 basic_block bb_child, bb_iter;
2879 VEC (basic_block, heap) *dom;
2881 /* Make sure we are in the SCoP. */
2882 if (!bb_in_scop_p (bb, scop))
2883 return;
2885 /* Record conditions in graphite_bb. */
2886 gbb = gbb_from_bb (bb);
2887 GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions);
2888 GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases);
2890 add_conditions_to_domain (gbb);
2892 dom = get_dominated_by (CDI_DOMINATORS, bb);
2894 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2896 gimple stmt = gsi_stmt (gsi);
2897 VEC (edge, gc) *edges;
2898 edge e;
2900 switch (gimple_code (stmt))
2902 case GIMPLE_COND:
2903 edges = bb->succs;
2904 for (i = 0; VEC_iterate (edge, edges, i, e); i++)
2905 if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb))
2906 && VEC_length (edge, e->dest->preds) == 1)
2908 /* Remove the scanned block from the dominator successors. */
2909 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2910 if (bb_iter == e->dest)
2912 VEC_unordered_remove (basic_block, dom, j);
2913 break;
2916 /* Recursively scan the then or else part. */
2917 if (e->flags & EDGE_TRUE_VALUE)
2918 VEC_safe_push (gimple, heap, *cases, stmt);
2919 else if (e->flags & EDGE_FALSE_VALUE)
2920 VEC_safe_push (gimple, heap, *cases, NULL);
2921 else
2922 gcc_unreachable ();
2924 VEC_safe_push (gimple, heap, *conditions, stmt);
2925 build_scop_conditions_1 (conditions, cases, e->dest, scop);
2926 VEC_pop (gimple, *conditions);
2927 VEC_pop (gimple, *cases);
2929 break;
2931 case GIMPLE_SWITCH:
2933 unsigned i;
2934 gimple_stmt_iterator gsi_search_gimple_label;
2936 for (i = 0; i < gimple_switch_num_labels (stmt); ++i)
2938 basic_block bb_iter;
2939 size_t k;
2940 size_t n_cases = VEC_length (gimple, *conditions);
2941 unsigned n = gimple_switch_num_labels (stmt);
2943 bb_child = label_to_block
2944 (CASE_LABEL (gimple_switch_label (stmt, i)));
2946 /* Do not handle multiple values for the same block. */
2947 for (k = 0; k < n; k++)
2948 if (i != k
2949 && label_to_block
2950 (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child)
2951 break;
2953 if (k != n)
2954 continue;
2956 /* Switch cases with more than one predecessor are not
2957 handled. */
2958 if (VEC_length (edge, bb_child->preds) != 1)
2959 continue;
2961 /* Recursively scan the corresponding 'case' block. */
2963 for (gsi_search_gimple_label = gsi_start_bb (bb_child);
2964 !gsi_end_p (gsi_search_gimple_label);
2965 gsi_next (&gsi_search_gimple_label))
2967 gimple stmt_gimple_label
2968 = gsi_stmt (gsi_search_gimple_label);
2970 if (gimple_code (stmt_gimple_label) == GIMPLE_LABEL)
2972 tree t = gimple_label_label (stmt_gimple_label);
2974 if (t == gimple_switch_label (stmt, i))
2975 VEC_replace (gimple, *cases, n_cases,
2976 stmt_gimple_label);
2977 else
2978 gcc_unreachable ();
2982 build_scop_conditions_1 (conditions, cases, bb_child, scop);
2984 /* Remove the scanned block from the dominator successors. */
2985 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2986 if (bb_iter == bb_child)
2988 VEC_unordered_remove (basic_block, dom, j);
2989 break;
2993 VEC_pop (gimple, *conditions);
2994 VEC_pop (gimple, *cases);
2995 break;
2997 default:
2998 break;
3002 /* Scan all immediate dominated successors. */
3003 for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++)
3004 build_scop_conditions_1 (conditions, cases, bb_child, scop);
3006 VEC_free (basic_block, heap, dom);
3009 /* Record all 'if' and 'switch' conditions in each gbb of SCOP. */
3011 static void
3012 build_scop_conditions (scop_p scop)
3014 VEC (gimple, heap) *conditions = NULL;
3015 VEC (gimple, heap) *cases = NULL;
3017 build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop);
3019 VEC_free (gimple, heap, conditions);
3020 VEC_free (gimple, heap, cases);
3023 /* Build the current domain matrix: the loops belonging to the current
3024 SCOP, and that vary for the execution of the current basic block.
3025 Returns false if there is no loop in SCOP. */
3027 static bool
3028 build_scop_iteration_domain (scop_p scop)
3030 struct loop *loop;
3031 CloogMatrix *outer_cstr;
3032 int i;
3034 /* Build cloog loop for all loops, that are in the uppermost loop layer of
3035 this SCoP. */
3036 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
3037 if (!loop_in_scop_p (loop_outer (loop), scop))
3039 /* The outermost constraints is a matrix that has:
3040 -first column: eq/ineq boolean
3041 -last column: a constant
3042 -scop_nb_params columns for the parameters used in the scop. */
3043 outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3044 build_loop_iteration_domains (scop, loop, outer_cstr, 0);
3045 cloog_matrix_free (outer_cstr);
3048 return (i != 0);
3051 /* Initializes an equation CY of the access matrix using the
3052 information for a subscript from ACCESS_FUN, relatively to the loop
3053 indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is
3054 the dimension of the array access, i.e. the number of
3055 subscripts. Returns true when the operation succeeds. */
3057 static bool
3058 build_access_matrix_with_af (tree access_fun, lambda_vector cy,
3059 scop_p scop, int ndim)
3061 switch (TREE_CODE (access_fun))
3063 case POLYNOMIAL_CHREC:
3065 tree left = CHREC_LEFT (access_fun);
3066 tree right = CHREC_RIGHT (access_fun);
3067 int var;
3069 if (TREE_CODE (right) != INTEGER_CST)
3070 return false;
3072 var = loop_iteration_vector_dim (CHREC_VARIABLE (access_fun), scop);
3073 cy[var] = int_cst_value (right);
3075 switch (TREE_CODE (left))
3077 case POLYNOMIAL_CHREC:
3078 return build_access_matrix_with_af (left, cy, scop, ndim);
3080 case INTEGER_CST:
3081 cy[ndim - 1] = int_cst_value (left);
3082 return true;
3084 default:
3085 /* FIXME: access_fn can have parameters. */
3086 return false;
3089 case INTEGER_CST:
3090 cy[ndim - 1] = int_cst_value (access_fun);
3091 return true;
3093 default:
3094 /* FIXME: access_fn can have parameters. */
3095 return false;
3099 /* Initialize the access matrix in the data reference REF with respect
3100 to the loop nesting LOOP_NEST. Return true when the operation
3101 succeeded. */
3103 static bool
3104 build_access_matrix (data_reference_p ref, graphite_bb_p gb)
3106 int i, ndim = DR_NUM_DIMENSIONS (ref);
3107 struct access_matrix *am = GGC_NEW (struct access_matrix);
3109 AM_MATRIX (am) = VEC_alloc (lambda_vector, heap, ndim);
3110 DR_SCOP (ref) = GBB_SCOP (gb);
3112 for (i = 0; i < ndim; i++)
3114 lambda_vector v = lambda_vector_new (ref_nb_loops (ref));
3115 scop_p scop = GBB_SCOP (gb);
3116 tree af = DR_ACCESS_FN (ref, i);
3118 if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref)))
3119 return false;
3121 VEC_safe_push (lambda_vector, heap, AM_MATRIX (am), v);
3124 DR_ACCESS_MATRIX (ref) = am;
3125 return true;
3128 /* Build the access matrices for the data references in the SCOP. */
3130 static void
3131 build_scop_data_accesses (scop_p scop)
3133 int i;
3134 graphite_bb_p gb;
3136 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
3138 int j;
3139 gimple_stmt_iterator gsi;
3140 data_reference_p dr;
3141 struct loop *nest = outermost_loop_in_scop (scop, GBB_BB (gb));
3143 /* On each statement of the basic block, gather all the occurences
3144 to read/write memory. */
3145 GBB_DATA_REFS (gb) = VEC_alloc (data_reference_p, heap, 5);
3146 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
3147 find_data_references_in_stmt (nest, gsi_stmt (gsi),
3148 &GBB_DATA_REFS (gb));
3150 /* FIXME: Construction of access matrix is disabled until some
3151 pass, like the data dependence analysis, is using it. */
3152 continue;
3154 /* Construct the access matrix for each data ref, with respect to
3155 the loop nest of the current BB in the considered SCOP. */
3156 for (j = 0;
3157 VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr);
3158 j++)
3160 bool res = build_access_matrix (dr, gb);
3162 /* FIXME: At this point the DRs should always have an affine
3163 form. For the moment this fails as build_access_matrix
3164 does not build matrices with parameters. */
3165 gcc_assert (res);
3170 /* Returns the tree variable from the name NAME that was given in
3171 Cloog representation. All the parameters are stored in PARAMS, and
3172 all the loop induction variables are stored in IVSTACK.
3174 FIXME: This is a hack, and Cloog should be fixed to not work with
3175 variable names represented as "char *string", but with void
3176 pointers that could be casted back to a tree. The only problem in
3177 doing that is that Cloog's pretty printer still assumes that
3178 variable names are char *strings. The solution would be to have a
3179 function pointer for pretty-printing that can be redirected to be
3180 print_generic_stmt in our case, or fprintf by default.
3181 ??? Too ugly to live. */
3183 static tree
3184 clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params,
3185 loop_iv_stack ivstack)
3187 int i;
3188 name_tree t;
3189 tree iv;
3191 for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
3192 if (!strcmp (name, t->name))
3193 return t->t;
3195 iv = loop_iv_stack_get_iv_from_name (ivstack, name);
3196 if (iv)
3197 return iv;
3199 gcc_unreachable ();
3202 /* Converts a Cloog AST expression E back to a GCC expression tree. */
3204 static tree
3205 clast_to_gcc_expression (struct clast_expr *e,
3206 VEC (name_tree, heap) *params,
3207 loop_iv_stack ivstack)
3209 tree type = integer_type_node;
3211 gcc_assert (e);
3213 switch (e->type)
3215 case expr_term:
3217 struct clast_term *t = (struct clast_term *) e;
3219 if (t->var)
3221 if (value_one_p (t->val))
3222 return clast_name_to_gcc (t->var, params, ivstack);
3224 else if (value_mone_p (t->val))
3225 return fold_build1 (NEGATE_EXPR, type,
3226 clast_name_to_gcc (t->var, params, ivstack));
3227 else
3228 return fold_build2 (MULT_EXPR, type,
3229 gmp_cst_to_tree (t->val),
3230 clast_name_to_gcc (t->var, params, ivstack));
3232 else
3233 return gmp_cst_to_tree (t->val);
3236 case expr_red:
3238 struct clast_reduction *r = (struct clast_reduction *) e;
3239 tree left, right;
3241 switch (r->type)
3243 case clast_red_sum:
3244 if (r->n == 1)
3245 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3247 else
3249 gcc_assert (r->n >= 1
3250 && r->elts[0]->type == expr_term
3251 && r->elts[1]->type == expr_term);
3253 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3254 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3255 return fold_build2 (PLUS_EXPR, type, left, right);
3258 break;
3260 case clast_red_min:
3261 if (r->n == 1)
3262 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3264 else if (r->n == 2)
3266 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3267 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3268 return fold_build2 (MIN_EXPR, type, left, right);
3271 else
3272 gcc_unreachable();
3274 break;
3276 case clast_red_max:
3277 if (r->n == 1)
3278 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3280 else if (r->n == 2)
3282 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3283 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3284 return fold_build2 (MAX_EXPR, type, left, right);
3287 else
3288 gcc_unreachable();
3290 break;
3292 default:
3293 gcc_unreachable ();
3295 break;
3298 case expr_bin:
3300 struct clast_binary *b = (struct clast_binary *) e;
3301 struct clast_expr *lhs = (struct clast_expr *) b->LHS;
3302 struct clast_expr *rhs = (struct clast_expr *) b->RHS;
3303 tree tl = clast_to_gcc_expression (lhs, params, ivstack);
3305 /* FIXME: The next statement produces a warning: Cloog assumes
3306 that the RHS is a constant, but this is a "void *" pointer
3307 that should be casted into a Value, but this cast cannot be
3308 done as Value is a GMP type, that is an array. Cloog must
3309 be fixed for removing this warning. */
3310 tree tr = gmp_cst_to_tree (rhs);
3312 switch (b->type)
3314 case clast_bin_fdiv:
3315 return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr);
3317 case clast_bin_cdiv:
3318 return fold_build2 (CEIL_DIV_EXPR, type, tl, tr);
3320 case clast_bin_div:
3321 return fold_build2 (EXACT_DIV_EXPR, type, tl, tr);
3323 case clast_bin_mod:
3324 return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr);
3326 default:
3327 gcc_unreachable ();
3331 default:
3332 gcc_unreachable ();
3335 return NULL_TREE;
3338 /* Translates a clast equation CLEQ to a tree. */
3340 static tree
3341 graphite_translate_clast_equation (scop_p scop,
3342 struct clast_equation *cleq,
3343 loop_iv_stack ivstack)
3345 enum tree_code comp;
3346 tree lhs = clast_to_gcc_expression (cleq->LHS, SCOP_PARAMS (scop), ivstack);
3347 tree rhs = clast_to_gcc_expression (cleq->RHS, SCOP_PARAMS (scop), ivstack);
3349 if (cleq->sign == 0)
3350 comp = EQ_EXPR;
3352 else if (cleq->sign > 0)
3353 comp = GE_EXPR;
3355 else
3356 comp = LE_EXPR;
3358 return fold_build2 (comp, integer_type_node, lhs, rhs);
3361 /* Creates the test for the condition in STMT. */
3363 static tree
3364 graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt,
3365 loop_iv_stack ivstack)
3367 tree cond = NULL;
3368 int i;
3370 for (i = 0; i < stmt->n; i++)
3372 tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack);
3374 if (cond)
3375 cond = fold_build2 (TRUTH_AND_EXPR, integer_type_node, cond, eq);
3376 else
3377 cond = eq;
3380 return cond;
3383 /* Creates a new if region corresponding to Cloog's guard. */
3385 static edge
3386 graphite_create_new_guard (scop_p scop, edge entry_edge,
3387 struct clast_guard *stmt,
3388 loop_iv_stack ivstack)
3390 tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack);
3391 edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
3392 return exit_edge;
3396 /* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction
3397 variable for the new LOOP. New LOOP is attached to CFG starting at
3398 ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child
3399 loop of the OUTER_LOOP. */
3401 static struct loop *
3402 graphite_create_new_loop (scop_p scop, edge entry_edge,
3403 struct clast_for *stmt, loop_iv_stack ivstack,
3404 loop_p outer)
3406 struct loop *loop;
3407 tree ivvar;
3408 tree stride, lowb, upb;
3409 tree iv_before;
3411 gcc_assert (stmt->LB
3412 && stmt->UB);
3414 stride = gmp_cst_to_tree (stmt->stride);
3415 lowb = clast_to_gcc_expression (stmt->LB, SCOP_PARAMS (scop), ivstack);
3416 ivvar = create_tmp_var (integer_type_node, "graphiteIV");
3417 add_referenced_var (ivvar);
3419 upb = clast_to_gcc_expression (stmt->UB, SCOP_PARAMS (scop), ivstack);
3420 loop = create_empty_loop_on_edge (entry_edge, lowb, stride, upb, ivvar,
3421 &iv_before, outer ? outer
3422 : entry_edge->src->loop_father);
3424 loop_iv_stack_push_iv (ivstack, iv_before, stmt->iterator);
3426 return loop;
3429 /* Remove all the edges from EDGES except the edge KEEP. */
3431 static void
3432 remove_all_edges_1 (VEC (edge, gc) *edges, edge keep)
3434 edge e;
3435 edge_iterator ei;
3437 for (ei = ei_start (edges); (e = ei_safe_edge (ei)); )
3439 if (e != keep)
3441 remove_edge (e);
3442 e = ei_safe_edge (ei);
3444 else
3445 ei_next (&ei);
3449 /* Remove all the edges from BB except the edge KEEP. */
3451 static void
3452 remove_all_edges (basic_block bb, edge keep)
3454 remove_all_edges_1 (bb->succs, keep);
3455 remove_all_edges_1 (bb->preds, keep);
3458 /* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */
3460 static void
3461 graphite_rename_ivs_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3462 loop_p old, loop_iv_stack ivstack)
3464 ssa_op_iter iter;
3465 use_operand_p use_p;
3467 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3469 tree use = USE_FROM_PTR (use_p);
3470 tree new_iv = NULL;
3471 name_tree old_iv = get_old_iv_from_ssa_name (scop, old, use);
3473 if (old_iv)
3474 new_iv = loop_iv_stack_get_iv (ivstack,
3475 gbb_loop_index (gbb, old_iv->loop));
3477 if (new_iv)
3478 SET_USE (use_p, new_iv);
3482 /* Returns true if SSA_NAME is a parameter of SCOP. */
3484 static bool
3485 is_parameter (scop_p scop, tree ssa_name)
3487 int i;
3488 VEC (name_tree, heap) *params = SCOP_PARAMS (scop);
3489 name_tree param;
3491 for (i = 0; VEC_iterate (name_tree, params, i, param); i++)
3492 if (param->t == ssa_name)
3493 return true;
3495 return false;
3498 /* Returns true if NAME is an old induction variable in SCOP. OLD is
3499 the original loop that contained the definition of NAME. */
3501 static bool
3502 is_old_iv (scop_p scop, loop_p old, tree name)
3504 return get_old_iv_from_ssa_name (scop, old, name) != NULL;
3508 static void expand_scalar_variables_stmt (gimple, graphite_bb_p, scop_p, loop_p,
3509 loop_iv_stack);
3511 /* Constructs a tree which only contains old_ivs and parameters. Any
3512 other variables that are defined outside GBB will be eliminated by
3513 using their definitions in the constructed tree. OLD_LOOP_FATHER
3514 is the original loop that contained GBB. */
3516 static tree
3517 expand_scalar_variables_expr (tree type, tree op0, enum tree_code code,
3518 tree op1, graphite_bb_p gbb, scop_p scop,
3519 loop_p old_loop_father, loop_iv_stack ivstack)
3521 if ((TREE_CODE_CLASS (code) == tcc_constant
3522 && code == INTEGER_CST)
3523 || TREE_CODE_CLASS (code) == tcc_reference)
3524 return op0;
3526 if (TREE_CODE_CLASS (code) == tcc_unary)
3528 tree op0_type = TREE_TYPE (op0);
3529 enum tree_code op0_code = TREE_CODE (op0);
3530 tree op0_expr =
3531 expand_scalar_variables_expr (op0_type, op0, op0_code,
3532 NULL, gbb, scop, old_loop_father,
3533 ivstack);
3535 return fold_build1 (code, type, op0_expr);
3538 if (TREE_CODE_CLASS (code) == tcc_binary)
3540 tree op0_type = TREE_TYPE (op0);
3541 enum tree_code op0_code = TREE_CODE (op0);
3542 tree op0_expr =
3543 expand_scalar_variables_expr (op0_type, op0, op0_code,
3544 NULL, gbb, scop, old_loop_father,
3545 ivstack);
3546 tree op1_type = TREE_TYPE (op1);
3547 enum tree_code op1_code = TREE_CODE (op1);
3548 tree op1_expr =
3549 expand_scalar_variables_expr (op1_type, op1, op1_code,
3550 NULL, gbb, scop, old_loop_father,
3551 ivstack);
3553 return fold_build2 (code, type, op0_expr, op1_expr);
3556 if (code == SSA_NAME)
3558 tree var0, var1;
3559 gimple def_stmt;
3560 enum tree_code subcode;
3562 if(is_parameter (scop, op0) ||
3563 is_old_iv (scop, old_loop_father, op0))
3564 return op0;
3566 def_stmt = SSA_NAME_DEF_STMT (op0);
3568 if (gimple_bb (def_stmt) == GBB_BB (gbb))
3570 /* If the defining statement is in the basic block already
3571 we do not need to create a new expression for it, we
3572 only need to ensure its operands are expanded. */
3573 expand_scalar_variables_stmt (def_stmt, gbb, scop,
3574 old_loop_father, ivstack);
3575 return op0;
3578 else
3580 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3581 return op0;
3583 var0 = gimple_assign_rhs1 (def_stmt);
3584 subcode = gimple_assign_rhs_code (def_stmt);
3585 var1 = gimple_assign_rhs2 (def_stmt);
3587 return expand_scalar_variables_expr (type, var0, subcode, var1,
3588 gbb, scop, old_loop_father,
3589 ivstack);
3593 gcc_unreachable ();
3594 return NULL;
3597 /* Replicates any uses of non-parameters and non-old-ivs variablesthat
3598 are defind outside GBB with code that is inserted in GBB.
3599 OLD_LOOP_FATHER is the original loop that contained STMT. */
3601 static void
3602 expand_scalar_variables_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop,
3603 loop_p old_loop_father, loop_iv_stack ivstack)
3605 ssa_op_iter iter;
3606 use_operand_p use_p;
3607 basic_block bb = GBB_BB (gbb);
3609 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3611 tree use = USE_FROM_PTR (use_p);
3612 tree type = TREE_TYPE (use);
3613 enum tree_code code = TREE_CODE (use);
3614 tree use_expr = expand_scalar_variables_expr (type, use, code, NULL,
3615 gbb, scop, old_loop_father,
3616 ivstack);
3617 if (use_expr != use)
3619 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3620 tree new_use =
3621 force_gimple_operand_gsi (&gsi, use_expr, true, NULL,
3622 true, GSI_NEW_STMT);
3623 SET_USE (use_p, new_use);
3628 /* Copies the definitions outside of GBB of variables that are not
3629 induction variables nor parameters. GBB must only contain
3630 "external" references to these types of variables. OLD_LOOP_FATHER
3631 is the original loop that contained GBB. */
3633 static void
3634 expand_scalar_variables (graphite_bb_p gbb, scop_p scop,
3635 loop_p old_loop_father, loop_iv_stack ivstack)
3637 basic_block bb = GBB_BB (gbb);
3638 gimple_stmt_iterator gsi;
3640 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3642 gimple stmt = gsi_stmt (gsi);
3643 expand_scalar_variables_stmt (stmt, gbb, scop, old_loop_father,
3644 ivstack);
3645 gsi_next (&gsi);
3649 /* Rename all the SSA_NAMEs from block GBB that appear in IVSTACK in
3650 terms of new induction variables. OLD_LOOP_FATHER is the original
3651 loop that contained GBB. */
3653 static void
3654 graphite_rename_ivs (graphite_bb_p gbb, scop_p scop, loop_p old_loop_father,
3655 loop_iv_stack ivstack)
3657 basic_block bb = GBB_BB (gbb);
3658 gimple_stmt_iterator gsi;
3660 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3662 gimple stmt = gsi_stmt (gsi);
3664 if (gimple_get_lhs (stmt)
3665 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
3666 && get_old_iv_from_ssa_name (scop, old_loop_father,
3667 gimple_get_lhs (stmt)))
3668 gsi_remove (&gsi, false);
3669 else
3671 graphite_rename_ivs_stmt (stmt, gbb, scop, old_loop_father, ivstack);
3672 gsi_next (&gsi);
3677 /* Move all the PHI nodes from block FROM to block TO.
3678 OLD_LOOP_FATHER is the original loop that contained FROM. */
3680 static void
3681 move_phi_nodes (scop_p scop, loop_p old_loop_father, basic_block from,
3682 basic_block to)
3684 gimple_stmt_iterator gsi;
3686 for (gsi = gsi_start_phis (from); !gsi_end_p (gsi);)
3688 gimple phi = gsi_stmt (gsi);
3689 tree op = gimple_phi_result (phi);
3691 if (get_old_iv_from_ssa_name (scop, old_loop_father, op) == NULL)
3693 gimple new_phi = make_phi_node (op, 0);
3694 add_phi_node_to_bb (new_phi, to);
3696 remove_phi_node (&gsi, false);
3700 /* Remove condition from BB. */
3702 static void
3703 remove_condition (basic_block bb)
3705 gimple last = last_stmt (bb);
3707 if (last && gimple_code (last) == GIMPLE_COND)
3709 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3710 gsi_remove (&gsi, true);
3714 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */
3716 static edge
3717 get_true_edge_from_guard_bb (basic_block bb)
3719 edge e;
3720 edge_iterator ei;
3722 FOR_EACH_EDGE (e, ei, bb->succs)
3723 if (e->flags & EDGE_TRUE_VALUE)
3724 return e;
3726 gcc_unreachable ();
3727 return NULL;
3730 /* Translates a CLAST statement STMT to GCC representation. NEXT_E is
3731 the edge where new generated code should be attached. BB_EXIT is the last
3732 basic block that defines the scope of code generation. CONTEXT_LOOP is the
3733 loop in which the generated code will be placed (might be NULL). */
3735 static edge
3736 translate_clast (scop_p scop, struct loop *context_loop,
3737 struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack)
3739 if (!stmt)
3740 return next_e;
3742 if (CLAST_STMT_IS_A (stmt, stmt_root))
3743 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3745 if (CLAST_STMT_IS_A (stmt, stmt_user))
3747 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
3748 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
3749 basic_block bb = gbb->bb;
3750 loop_p old_loop_father = bb->loop_father;
3752 if (bb == ENTRY_BLOCK_PTR)
3753 return next_e;
3755 remove_condition (bb);
3756 expand_scalar_variables (gbb, scop, old_loop_father, ivstack);
3757 remove_all_edges (bb, next_e);
3758 move_phi_nodes (scop, old_loop_father, bb, next_e->src);
3759 redirect_edge_succ_nodup (next_e, bb);
3761 if (context_loop)
3763 remove_bb_from_loops (bb);
3764 add_bb_to_loop (bb, context_loop);
3767 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3768 mark_virtual_ops_in_bb (bb);
3769 next_e = make_edge (bb,
3770 context_loop ? context_loop->latch : EXIT_BLOCK_PTR,
3771 EDGE_FALLTHRU);
3772 loop_iv_stack_patch_for_consts (ivstack,
3773 (struct clast_user_stmt *) stmt);
3774 graphite_rename_ivs (gbb, scop, old_loop_father, ivstack);
3775 loop_iv_stack_remove_constants (ivstack);
3776 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3779 if (CLAST_STMT_IS_A (stmt, stmt_for))
3781 struct loop *loop
3782 = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt,
3783 ivstack, context_loop ? context_loop
3784 : get_loop (0));
3785 edge last_e = single_exit (loop);
3787 next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body,
3788 single_pred_edge (loop->latch), ivstack);
3789 redirect_edge_succ_nodup (next_e, loop->latch);
3791 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
3792 loop_iv_stack_pop (ivstack);
3794 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3797 if (CLAST_STMT_IS_A (stmt, stmt_guard))
3799 edge last_e = graphite_create_new_guard (scop, next_e,
3800 ((struct clast_guard *) stmt),
3801 ivstack);
3802 edge true_e = get_true_edge_from_guard_bb (next_e->dest);
3803 next_e = translate_clast (scop, context_loop,
3804 ((struct clast_guard *) stmt)->then,
3805 true_e, ivstack);
3806 redirect_edge_succ_nodup (next_e, last_e->src);
3807 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
3810 if (CLAST_STMT_IS_A (stmt, stmt_block))
3812 next_e = translate_clast (scop, context_loop,
3813 ((struct clast_block *) stmt)->body,
3814 next_e, ivstack);
3815 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3818 gcc_unreachable ();
3821 /* Free the SCATTERING domain list. */
3823 static void
3824 free_scattering (CloogDomainList *scattering)
3826 while (scattering)
3828 CloogDomain *dom = cloog_domain (scattering);
3829 CloogDomainList *next = cloog_next_domain (scattering);
3831 cloog_domain_free (dom);
3832 free (scattering);
3833 scattering = next;
3837 /* Build cloog program for SCoP. */
3839 static void
3840 build_cloog_prog (scop_p scop)
3842 int i;
3843 int max_nb_loops = scop_max_loop_depth (scop);
3844 graphite_bb_p gbb;
3845 CloogLoop *loop_list = NULL;
3846 CloogBlockList *block_list = NULL;
3847 CloogDomainList *scattering = NULL;
3848 CloogProgram *prog = SCOP_PROG (scop);
3849 int nbs = 2 * max_nb_loops + 1;
3850 int *scaldims = (int *) xmalloc (nbs * (sizeof (int)));
3852 cloog_program_set_nb_scattdims (prog, nbs);
3853 initialize_cloog_names (scop);
3855 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
3857 /* Build new block. */
3858 CloogMatrix *domain = GBB_DOMAIN (gbb);
3859 CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index);
3860 CloogBlock *block = cloog_block_alloc (stmt, 0, NULL,
3861 nb_loops_around_gb (gbb));
3862 cloog_statement_set_usr (stmt, gbb);
3864 /* Add empty domain to all bbs, which do not yet have a domain, as they
3865 are not part of any loop. */
3866 if (domain == NULL)
3868 domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3869 GBB_DOMAIN (gbb) = domain;
3872 /* Build loop list. */
3874 CloogLoop *new_loop_list = cloog_loop_malloc ();
3875 cloog_loop_set_next (new_loop_list, loop_list);
3876 cloog_loop_set_domain (new_loop_list,
3877 cloog_domain_matrix2domain (domain));
3878 cloog_loop_set_block (new_loop_list, block);
3879 loop_list = new_loop_list;
3882 /* Build block list. */
3884 CloogBlockList *new_block_list = cloog_block_list_malloc ();
3886 cloog_block_list_set_next (new_block_list, block_list);
3887 cloog_block_list_set_block (new_block_list, block);
3888 block_list = new_block_list;
3891 /* Build scattering list. */
3893 /* XXX: Replace with cloog_domain_list_alloc(), when available. */
3894 CloogDomainList *new_scattering
3895 = (CloogDomainList *) xmalloc (sizeof (CloogDomainList));
3896 CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs);
3898 cloog_set_next_domain (new_scattering, scattering);
3899 cloog_set_domain (new_scattering,
3900 cloog_domain_matrix2domain (scat_mat));
3901 scattering = new_scattering;
3902 cloog_matrix_free (scat_mat);
3906 cloog_program_set_loop (prog, loop_list);
3907 cloog_program_set_blocklist (prog, block_list);
3909 for (i = 0; i < nbs; i++)
3910 scaldims[i] = 0 ;
3912 cloog_program_set_scaldims (prog, scaldims);
3914 /* Extract scalar dimensions to simplify the code generation problem. */
3915 cloog_program_extract_scalars (prog, scattering);
3917 /* Apply scattering. */
3918 cloog_program_scatter (prog, scattering);
3919 free_scattering (scattering);
3921 /* Iterators corresponding to scalar dimensions have to be extracted. */
3922 cloog_names_scalarize (cloog_program_names (prog), nbs,
3923 cloog_program_scaldims (prog));
3925 /* Free blocklist. */
3927 CloogBlockList *next = cloog_program_blocklist (prog);
3929 while (next)
3931 CloogBlockList *toDelete = next;
3932 next = cloog_block_list_next (next);
3933 cloog_block_list_set_next (toDelete, NULL);
3934 cloog_block_list_set_block (toDelete, NULL);
3935 cloog_block_list_free (toDelete);
3937 cloog_program_set_blocklist (prog, NULL);
3941 /* Return the options that will be used in GLOOG. */
3943 static CloogOptions *
3944 set_cloog_options (void)
3946 CloogOptions *options = cloog_options_malloc ();
3948 /* Change cloog output language to C. If we do use FORTRAN instead, cloog
3949 will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if
3950 we pass an incomplete program to cloog. */
3951 options->language = LANGUAGE_C;
3953 /* Enable complex equality spreading: removes dummy statements
3954 (assignments) in the generated code which repeats the
3955 substitution equations for statements. This is useless for
3956 GLooG. */
3957 options->esp = 1;
3959 /* Enable C pretty-printing mode: normalizes the substitution
3960 equations for statements. */
3961 options->cpp = 1;
3963 /* Allow cloog to build strides with a stride width different to one.
3964 This example has stride = 4:
3966 for (i = 0; i < 20; i += 4)
3967 A */
3968 options->strides = 1;
3970 /* Disable optimizations and make cloog generate source code closer to the
3971 input. This is useful for debugging, but later we want the optimized
3972 code.
3974 XXX: We can not disable optimizations, as loop blocking is not working
3975 without them. */
3976 if (0)
3978 options->f = -1;
3979 options->l = INT_MAX;
3982 return options;
3985 /* Prints STMT to STDERR. */
3987 void
3988 debug_clast_stmt (struct clast_stmt *stmt)
3990 CloogOptions *options = set_cloog_options ();
3992 pprint (stderr, stmt, 0, options);
3995 /* Find the right transform for the SCOP, and return a Cloog AST
3996 representing the new form of the program. */
3998 static struct clast_stmt *
3999 find_transform (scop_p scop)
4001 struct clast_stmt *stmt;
4002 CloogOptions *options = set_cloog_options ();
4004 /* Connect new cloog prog generation to graphite. */
4005 build_cloog_prog (scop);
4007 if (dump_file && (dump_flags & TDF_DETAILS))
4009 fprintf (dump_file, "Cloog Input [\n");
4010 cloog_program_print (dump_file, SCOP_PROG(scop));
4011 fprintf (dump_file, "]\n");
4014 SCOP_PROG (scop) = cloog_program_generate (SCOP_PROG (scop), options);
4015 stmt = cloog_clast_create (SCOP_PROG (scop), options);
4017 if (dump_file && (dump_flags & TDF_DETAILS))
4019 fprintf (dump_file, "Cloog Output[\n");
4020 pprint (dump_file, stmt, 0, options);
4021 cloog_program_dump_cloog (dump_file, SCOP_PROG (scop));
4022 fprintf (dump_file, "]\n");
4025 cloog_options_free (options);
4026 return stmt;
4029 /* Return a vector of all the virtual phi nodes in the current
4030 function. */
4032 static VEC (gimple, heap) *
4033 collect_virtual_phis (void)
4035 gimple_stmt_iterator si;
4036 gimple_vec phis = VEC_alloc (gimple, heap, 3);
4037 basic_block bb;
4039 FOR_EACH_BB (bb)
4040 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4041 /* The phis we moved will have 0 arguments because the
4042 original edges were removed. */
4043 if (gimple_phi_num_args (gsi_stmt (si)) == 0)
4044 VEC_safe_push (gimple, heap, phis, gsi_stmt (si));
4046 /* Deallocate if we did not find any. */
4047 if (VEC_length (gimple, phis) == 0)
4049 VEC_free (gimple, heap, phis);
4050 phis = NULL;
4053 return phis;
4056 /* Find a virtual definition for variable VAR in BB. */
4058 static tree
4059 find_vdef_for_var_in_bb (basic_block bb, tree var)
4061 gimple_stmt_iterator gsi;
4062 gimple phi;
4063 def_operand_p def_var;
4064 vuse_vec_p vv;
4065 ssa_op_iter op_iter;
4067 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4068 FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter)
4069 if (SSA_NAME_VAR (*def_var) == var)
4070 return *def_var;
4072 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4073 FOR_EACH_SSA_DEF_OPERAND (def_var, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
4074 if (SSA_NAME_VAR (*def_var) == var)
4075 return *def_var;
4077 for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi))
4079 phi = gsi_stmt (gsi);
4080 if (SSA_NAME_VAR (PHI_RESULT (phi)) == var)
4081 return PHI_RESULT (phi);
4084 return NULL;
4087 /* Recursive helper. */
4089 static tree
4090 find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var)
4092 tree result = NULL;
4093 edge_iterator ei;
4094 edge pred_edge;
4096 if (pointer_set_contains (visited, bb))
4097 return NULL;
4099 pointer_set_insert (visited, bb);
4100 result = find_vdef_for_var_in_bb (bb, var);
4102 if (!result)
4103 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
4104 if (!result)
4105 result = find_vdef_for_var_1 (pred_edge->src, visited, var);
4107 return result;
4110 /* Finds a virtual definition for variable VAR. */
4112 static tree
4113 find_vdef_for_var (basic_block bb, tree var)
4115 struct pointer_set_t *visited = pointer_set_create ();
4116 tree def = find_vdef_for_var_1 (bb, visited, var);
4118 pointer_set_destroy (visited);
4119 return def;
4122 /* Update the virtual phis after loop bodies are moved to new
4123 loops. */
4125 static void
4126 patch_phis_for_virtual_defs (void)
4128 int i;
4129 gimple phi;
4130 VEC (gimple, heap) *virtual_phis = collect_virtual_phis ();
4132 for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++)
4134 basic_block bb = gimple_bb (phi);
4135 edge_iterator ei;
4136 edge pred_edge;
4137 gimple_stmt_iterator gsi;
4138 gimple new_phi;
4139 tree phi_result = PHI_RESULT (phi);
4140 tree var = SSA_NAME_VAR (phi_result);
4142 new_phi = create_phi_node (phi_result, bb);
4143 SSA_NAME_DEF_STMT (phi_result) = new_phi;
4145 FOR_EACH_EDGE (pred_edge, ei, bb->preds)
4147 tree def = find_vdef_for_var (pred_edge->src, var);
4149 if (def)
4150 add_phi_arg (new_phi, def, pred_edge);
4151 else
4152 add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge);
4155 gsi = gsi_for_stmt (phi);
4156 remove_phi_node (&gsi, false);
4159 VEC_free (gimple, heap, virtual_phis);
4162 /* Mark the original loops of SCOP for removal, replacing their header
4163 field with NULL. */
4165 static void
4166 mark_old_loops (scop_p scop)
4168 int i;
4169 struct loop *loop;
4171 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
4173 loop->header = NULL;
4174 loop->latch = NULL;
4178 /* Scan the loops and remove the ones that have been marked for
4179 removal. */
4181 static void
4182 remove_dead_loops (void)
4184 struct loop *loop, *ploop;
4185 loop_iterator li;
4187 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
4189 /* Remove only those loops that we marked to be removed with
4190 mark_old_loops. */
4191 if (loop->header)
4192 continue;
4194 while (loop->inner)
4196 ploop = loop->inner;
4197 flow_loop_tree_node_remove (ploop);
4198 flow_loop_tree_node_add (loop_outer (loop), ploop);
4201 /* Remove the loop and free its data. */
4202 delete_loop (loop);
4206 /* Returns true when it is possible to generate code for this STMT.
4207 For the moment we cannot generate code when Cloog decides to
4208 duplicate a statement, as we do not do a copy, but a move.
4209 USED_BASIC_BLOCKS records the blocks that have already been seen.
4210 We return false if we have to generate code twice for the same
4211 block. */
4213 static bool
4214 can_generate_code_stmt (struct clast_stmt *stmt,
4215 struct pointer_set_t *used_basic_blocks)
4217 if (!stmt)
4218 return true;
4220 if (CLAST_STMT_IS_A (stmt, stmt_root))
4221 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4223 if (CLAST_STMT_IS_A (stmt, stmt_user))
4225 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
4226 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
4228 if (pointer_set_contains (used_basic_blocks, gbb))
4229 return false;
4230 pointer_set_insert (used_basic_blocks, gbb);
4231 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4234 if (CLAST_STMT_IS_A (stmt, stmt_for))
4235 return can_generate_code_stmt (((struct clast_for *) stmt)->body,
4236 used_basic_blocks)
4237 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4239 if (CLAST_STMT_IS_A (stmt, stmt_guard))
4240 return can_generate_code_stmt (((struct clast_guard *) stmt)->then,
4241 used_basic_blocks);
4243 if (CLAST_STMT_IS_A (stmt, stmt_block))
4244 return can_generate_code_stmt (((struct clast_block *) stmt)->body,
4245 used_basic_blocks)
4246 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4248 return false;
4251 /* Returns true when it is possible to generate code for this STMT. */
4253 static bool
4254 can_generate_code (struct clast_stmt *stmt)
4256 bool result;
4257 struct pointer_set_t *used_basic_blocks = pointer_set_create ();
4259 result = can_generate_code_stmt (stmt, used_basic_blocks);
4260 pointer_set_destroy (used_basic_blocks);
4261 return result;
4264 /* Skip any definition that is a phi node with a single phi def. */
4266 static tree
4267 skip_phi_defs (tree ssa_name)
4269 tree result = ssa_name;
4270 gimple def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4272 if (gimple_code (def_stmt) == GIMPLE_PHI
4273 && gimple_phi_num_args (def_stmt) == 1)
4274 result = skip_phi_defs (gimple_phi_arg(def_stmt,0)->def);
4276 return result;
4279 /* Returns a VEC containing the phi-arg defs coming from SCOP_EXIT in
4280 the destination block of SCOP_EXIT. */
4282 static VEC (tree, heap) *
4283 collect_scop_exit_phi_args (edge scop_exit)
4285 VEC (tree, heap) *phi_args = VEC_alloc (tree, heap, 1);
4286 gimple_stmt_iterator gsi;
4288 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4290 gimple phi = gsi_stmt (gsi);
4291 tree phi_arg = skip_phi_defs(PHI_ARG_DEF_FROM_EDGE (phi, scop_exit));
4293 VEC_safe_push (tree, heap, phi_args, phi_arg);
4296 return phi_args;
4299 /* Patches (adds) PHI_ARGS to the phi nodes in SCOP_EXIT destination. */
4301 static void
4302 patch_scop_exit_phi_args (edge scop_exit,
4303 VEC (tree, heap) *phi_args)
4305 int i = 0;
4306 gimple_stmt_iterator gsi;
4308 for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi);
4309 gsi_next (&gsi), i++)
4311 tree def = VEC_index (tree, phi_args, i);
4312 gimple phi = gsi_stmt (gsi);
4314 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi, scop_exit) == NULL);
4316 add_phi_arg (phi, def, scop_exit);
4320 /* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
4321 the given SCOP. */
4323 static void
4324 gloog (scop_p scop, struct clast_stmt *stmt)
4326 edge new_scop_exit_edge = NULL;
4327 basic_block scop_exit = SCOP_EXIT (scop);
4328 VEC (tree, heap) *phi_args =
4329 collect_scop_exit_phi_args (SESE_EXIT (SCOP_REGION (scop)));
4330 VEC (iv_stack_entry_p, heap) *ivstack =
4331 VEC_alloc (iv_stack_entry_p, heap, 10);
4332 edge construction_edge = SESE_ENTRY (SCOP_REGION (scop));
4333 basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS,
4334 scop_exit);
4336 if (!can_generate_code (stmt))
4338 cloog_clast_free (stmt);
4339 return;
4342 redirect_edge_succ_nodup (construction_edge, EXIT_BLOCK_PTR);
4343 new_scop_exit_edge = translate_clast (scop,
4344 construction_edge->src->loop_father,
4345 stmt, construction_edge, &ivstack);
4346 free_loop_iv_stack (&ivstack);
4347 redirect_edge_succ (new_scop_exit_edge, scop_exit);
4349 if (!old_scop_exit_idom
4350 || !dominated_by_p (CDI_DOMINATORS, SCOP_ENTRY (scop),
4351 old_scop_exit_idom)
4352 || SCOP_ENTRY (scop) == old_scop_exit_idom)
4353 set_immediate_dominator (CDI_DOMINATORS,
4354 new_scop_exit_edge->dest,
4355 new_scop_exit_edge->src);
4357 cloog_clast_free (stmt);
4359 if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR)
4360 new_scop_exit_edge->flags = 0;
4362 delete_unreachable_blocks ();
4363 patch_phis_for_virtual_defs ();
4364 patch_scop_exit_phi_args (new_scop_exit_edge, phi_args);
4365 VEC_free (tree, heap, phi_args);
4366 mark_old_loops (scop);
4367 remove_dead_loops ();
4368 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4370 #ifdef ENABLE_CHECKING
4371 verify_loop_structure ();
4372 verify_dominators (CDI_DOMINATORS);
4373 verify_ssa (false);
4374 #endif
4377 /* Returns the number of data references in SCOP. */
4379 static int
4380 nb_data_refs_in_scop (scop_p scop)
4382 int i;
4383 graphite_bb_p gbb;
4384 int res = 0;
4386 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
4387 res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb));
4389 return res;
4392 /* Check if a graphite bb can be ignored in graphite. We ignore all
4393 bbs, that only contain code, that will be eliminated later.
4395 TODO: - Move PHI nodes and scalar variables out of these bbs, that only
4396 remain conditions and induction variables. */
4398 static bool
4399 gbb_can_be_ignored (graphite_bb_p gb)
4401 gimple_stmt_iterator gsi;
4402 scop_p scop = GBB_SCOP (gb);
4403 loop_p loop = GBB_BB (gb)->loop_father;
4405 if (VEC_length (data_reference_p, GBB_DATA_REFS(gb)))
4406 return false;
4408 /* Check statements. */
4409 for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi))
4411 gimple stmt = gsi_stmt (gsi);
4412 switch (gimple_code (stmt))
4414 /* Control flow expressions can be ignored, as they are
4415 represented in the iteration domains and will be
4416 regenerated by graphite. */
4417 case GIMPLE_COND:
4418 case GIMPLE_GOTO:
4419 case GIMPLE_SWITCH:
4420 break;
4422 /* Scalar variables can be ignored, if we can regenerate
4423 them later using their scalar evolution function.
4424 XXX: Just a heuristic, that needs further investigation. */
4425 case GIMPLE_ASSIGN:
4427 tree var = gimple_assign_lhs (stmt);
4428 var = analyze_scalar_evolution (loop, var);
4429 var = instantiate_scev (block_before_scop (scop), loop, var);
4431 if (TREE_CODE (var) == SCEV_NOT_KNOWN)
4432 return false;
4434 break;
4436 /* Otherwise not ignoreable. */
4437 default:
4438 return false;
4442 return true;
4445 /* Remove all ignoreable gbbs from SCOP. */
4447 static void
4448 scop_remove_ignoreable_gbbs (scop_p scop)
4450 graphite_bb_p gb;
4451 int i;
4453 int max_schedule = scop_max_loop_depth (scop) + 1;
4454 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4455 lambda_vector_clear (last_schedule, max_schedule);
4457 /* Update schedules. */
4458 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4460 int nb_loops = gbb_nb_loops (gb);
4462 if (GBB_STATIC_SCHEDULE (gb) [nb_loops] == 0)
4463 last_schedule [nb_loops] = 0;
4465 if (gbb_can_be_ignored (gb))
4467 /* Mark gbb for remove. */
4468 bitmap_clear_bit (SCOP_BBS_B (scop), gb->bb->index);
4469 GBB_SCOP (gb) = NULL;
4470 last_schedule [nb_loops]--;
4472 else
4473 lambda_vector_add (GBB_STATIC_SCHEDULE (gb), last_schedule,
4474 GBB_STATIC_SCHEDULE (gb), nb_loops + 1);
4477 /* Remove gbbs. */
4478 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
4479 if (GBB_SCOP (gb) == NULL)
4481 VEC_unordered_remove (graphite_bb_p, SCOP_BBS (scop), i);
4482 free_graphite_bb (gb);
4483 /* XXX: Hackish? But working. */
4484 i--;
4487 graphite_sort_gbbs (scop);
4490 /* Move the loop at index LOOP and insert it before index NEW_LOOP_POS.
4491 This transformartion is only valid, if the loop nest between i and k is
4492 perfectly nested. Therefore we do not need to change the static schedule.
4494 Example:
4496 for (i = 0; i < 50; i++)
4497 for (j ...)
4498 for (k = 5; k < 100; k++)
4501 To move k before i use:
4503 graphite_trans_bb_move_loop (A, 2, 0)
4505 for (k = 5; k < 100; k++)
4506 for (i = 0; i < 50; i++)
4507 for (j ...)
4510 And to move k back:
4512 graphite_trans_bb_move_loop (A, 0, 2)
4514 This function does not check the validity of interchanging loops.
4515 This should be checked before calling this function. */
4517 static void
4518 graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
4519 int new_loop_pos)
4521 CloogMatrix *domain = GBB_DOMAIN (gb);
4522 int row, j;
4523 loop_p tmp_loop_p;
4525 gcc_assert (loop < gbb_nb_loops (gb)
4526 && new_loop_pos < gbb_nb_loops (gb));
4528 /* Update LOOPS vector. */
4529 tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
4530 VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
4531 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
4533 /* Move the domain columns. */
4534 if (loop < new_loop_pos)
4535 for (row = 0; row < domain->NbRows; row++)
4537 Value tmp;
4538 value_init (tmp);
4539 value_assign (tmp, domain->p[row][loop + 1]);
4541 for (j = loop ; j < new_loop_pos - 1; j++)
4542 value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
4544 value_assign (domain->p[row][new_loop_pos], tmp);
4545 value_clear (tmp);
4547 else
4548 for (row = 0; row < domain->NbRows; row++)
4550 Value tmp;
4551 value_init (tmp);
4552 value_assign (tmp, domain->p[row][loop + 1]);
4554 for (j = loop ; j > new_loop_pos; j--)
4555 value_assign (domain->p[row][j + 1], domain->p[row][j]);
4557 value_assign (domain->p[row][new_loop_pos + 1], tmp);
4558 value_clear (tmp);
4562 /* Get the index of the column representing constants in the DOMAIN
4563 matrix. */
4565 static int
4566 const_column_index (CloogMatrix *domain)
4568 return domain->NbColumns - 1;
4572 /* Get the first index that is positive or negative, determined
4573 following the value of POSITIVE, in matrix DOMAIN in COLUMN. */
4575 static int
4576 get_first_matching_sign_row_index (CloogMatrix *domain, int column,
4577 bool positive)
4579 int row;
4581 for (row = 0; row < domain->NbRows; row++)
4583 int val = value_get_si (domain->p[row][column]);
4585 if (val > 0 && positive)
4586 return row;
4588 else if (val < 0 && !positive)
4589 return row;
4592 gcc_unreachable ();
4595 /* Get the lower bound of COLUMN in matrix DOMAIN. */
4597 static int
4598 get_lower_bound_row (CloogMatrix *domain, int column)
4600 return get_first_matching_sign_row_index (domain, column, true);
4603 /* Get the upper bound of COLUMN in matrix DOMAIN. */
4605 static int
4606 get_upper_bound_row (CloogMatrix *domain, int column)
4608 return get_first_matching_sign_row_index (domain, column, false);
4611 /* Get the lower bound of LOOP. */
4613 static void
4614 get_lower_bound (CloogMatrix *domain, int loop, Value lower_bound_result)
4616 int lower_bound_row = get_lower_bound_row (domain, loop);
4617 value_assign (lower_bound_result,
4618 domain->p[lower_bound_row][const_column_index(domain)]);
4621 /* Get the upper bound of LOOP. */
4623 static void
4624 get_upper_bound (CloogMatrix *domain, int loop, Value upper_bound_result)
4626 int upper_bound_row = get_upper_bound_row (domain, loop);
4627 value_assign (upper_bound_result,
4628 domain->p[upper_bound_row][const_column_index(domain)]);
4631 /* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
4632 Always valid, but not always a performance improvement. */
4634 static void
4635 graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
4637 int row, col;
4639 CloogMatrix *domain = GBB_DOMAIN (gb);
4640 CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
4641 domain->NbColumns + 1);
4643 int col_loop_old = loop_depth + 2;
4644 int col_loop_strip = col_loop_old - 1;
4646 Value old_lower_bound;
4647 Value old_upper_bound;
4649 gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1);
4651 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL);
4653 GBB_DOMAIN (gb) = new_domain;
4656 nrows = 4, ncols = 4
4657 eq i j c
4658 1 1 0 0
4659 1 -1 0 99
4660 1 0 1 0
4661 1 0 -1 99
4664 /* Move domain. */
4665 for (row = 0; row < domain->NbRows; row++)
4666 for (col = 0; col < domain->NbColumns; col++)
4667 if (col <= loop_depth)
4668 value_assign (new_domain->p[row][col], domain->p[row][col]);
4669 else
4670 value_assign (new_domain->p[row][col + 1], domain->p[row][col]);
4674 nrows = 6, ncols = 5
4675 outer inner
4676 eq i jj j c
4677 1 1 0 0 0
4678 1 -1 0 0 99
4679 1 0 0 1 0
4680 1 0 0 -1 99
4681 0 0 0 0 0
4682 0 0 0 0 0
4683 0 0 0 0 0
4686 row = domain->NbRows;
4688 /* Add outer loop. */
4689 value_init (old_lower_bound);
4690 value_init (old_upper_bound);
4691 get_lower_bound (new_domain, col_loop_old, old_lower_bound);
4692 get_upper_bound (new_domain, col_loop_old, old_upper_bound);
4694 /* Set Lower Bound */
4695 value_set_si (new_domain->p[row][0], 1);
4696 value_set_si (new_domain->p[row][col_loop_strip], 1);
4697 value_assign (new_domain->p[row][const_column_index (new_domain)],
4698 old_lower_bound);
4699 value_clear (old_lower_bound);
4700 row++;
4705 eq i jj j c
4706 1 1 0 0 0
4707 1 -1 0 0 99
4708 1 0 0 1 0 -
4709 1 0 0 -1 99 | copy old lower bound
4710 1 0 1 0 0 <-
4711 0 0 0 0 0
4712 0 0 0 0 0
4716 Value new_upper_bound;
4717 Value strip_size_value;
4719 value_init (new_upper_bound);
4720 value_init (strip_size_value);
4721 value_set_si (strip_size_value, (int) stride);
4723 value_pdivision (new_upper_bound, old_upper_bound, strip_size_value);
4724 value_add_int (new_upper_bound, new_upper_bound, 1);
4726 /* Set Upper Bound */
4727 value_set_si (new_domain->p[row][0], 1);
4728 value_set_si (new_domain->p[row][col_loop_strip], -1);
4729 value_assign (new_domain->p[row][const_column_index (new_domain)],
4730 new_upper_bound);
4732 value_clear (strip_size_value);
4733 value_clear (old_upper_bound);
4734 value_clear (new_upper_bound);
4735 row++;
4739 eq i jj j c
4740 1 1 0 0 0
4741 1 -1 0 0 99
4742 1 0 0 1 0
4743 1 0 0 -1 99
4744 1 0 1 0 0
4745 1 0 -1 0 25 (divide old upper bound with stride)
4746 0 0 0 0 0
4750 row = get_lower_bound_row (new_domain, col_loop_old);
4751 /* Add local variable to keep linear representation. */
4752 value_set_si (new_domain->p[row][0], 1);
4753 value_set_si (new_domain->p[row][const_column_index (new_domain)],0);
4754 value_set_si (new_domain->p[row][col_loop_old], 1);
4755 value_set_si (new_domain->p[row][col_loop_strip], -1*((int)stride));
4760 eq i jj j c
4761 1 1 0 0 0
4762 1 -1 0 0 99
4763 1 0 -1 1 0
4764 1 0 0 -1 99
4765 1 0 1 0 0
4766 1 0 -1 0 25 (divide old upper bound with stride)
4767 0 0 0 0 0
4771 row = new_domain->NbRows-1;
4773 value_set_si (new_domain->p[row][0], 1);
4774 value_set_si (new_domain->p[row][col_loop_old], -1);
4775 value_set_si (new_domain->p[row][col_loop_strip], stride);
4776 value_set_si (new_domain->p[row][const_column_index (new_domain)],
4777 stride-1);
4782 eq i jj j c
4783 1 1 0 0 0 i >= 0
4784 1 -1 0 0 99 99 >= i
4785 1 0 -4 1 0 j >= 4*jj
4786 1 0 0 -1 99 99 >= j
4787 1 0 1 0 0 jj >= 0
4788 1 0 -1 0 25 25 >= jj
4789 0 0 4 -1 3 jj+3 >= j
4792 cloog_matrix_free (domain);
4794 /* Update static schedule. */
4796 int i;
4797 int nb_loops = gbb_nb_loops (gb);
4798 lambda_vector new_schedule = lambda_vector_new (nb_loops + 1);
4800 for (i = 0; i <= loop_depth; i++)
4801 new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i];
4803 for (i = loop_depth + 1; i <= nb_loops - 2; i++)
4804 new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i];
4806 GBB_STATIC_SCHEDULE (gb) = new_schedule;
4810 /* Returns true when the strip mining of LOOP_INDEX by STRIDE is
4811 profitable or undecidable. GB is the statement around which the
4812 loops will be strip mined. */
4814 static bool
4815 strip_mine_profitable_p (graphite_bb_p gb, int stride,
4816 int loop_index)
4818 bool res = true;
4819 edge exit = NULL;
4820 tree niter;
4821 loop_p loop;
4822 long niter_val;
4824 loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index);
4825 exit = single_exit (loop);
4827 niter = find_loop_niter (loop, &exit);
4828 if (niter == chrec_dont_know
4829 || TREE_CODE (niter) != INTEGER_CST)
4830 return true;
4832 niter_val = int_cst_value (niter);
4834 if (niter_val < stride)
4836 res = false;
4837 if (dump_file && (dump_flags & TDF_DETAILS))
4839 fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:",
4840 loop_index);
4841 fprintf (dump_file, "number of iterations is too low.\n");
4845 return res;
4848 /* Determines when the interchange of LOOP_A and LOOP_B belonging to
4849 SCOP is legal. */
4851 static bool
4852 is_interchange_valid (scop_p scop, int loop_a, int loop_b)
4854 bool res;
4855 VEC (ddr_p, heap) *dependence_relations;
4856 VEC (data_reference_p, heap) *datarefs;
4858 struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a);
4859 int depth = perfect_loop_nest_depth (nest);
4860 lambda_trans_matrix trans;
4862 gcc_assert (loop_a < loop_b);
4864 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
4865 datarefs = VEC_alloc (data_reference_p, heap, 10);
4867 if (!compute_data_dependences_for_loop (nest, true, &datarefs,
4868 &dependence_relations))
4869 return false;
4871 if (dump_file && (dump_flags & TDF_DETAILS))
4872 dump_ddrs (dump_file, dependence_relations);
4874 trans = lambda_trans_matrix_new (depth, depth);
4875 lambda_matrix_id (LTM_MATRIX (trans), depth);
4877 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4879 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
4881 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
4882 res = false;
4884 else
4885 res = true;
4887 free_dependence_relations (dependence_relations);
4888 free_data_refs (datarefs);
4889 return res;
4892 /* Loop block the LOOPS innermost loops of GB with stride size STRIDE.
4894 Example
4896 for (i = 0; i <= 50; i++=4)
4897 for (k = 0; k <= 100; k++=4)
4898 for (l = 0; l <= 200; l++=4)
4901 To strip mine the two inner most loops with stride = 4 call:
4903 graphite_trans_bb_block (A, 4, 2)
4905 for (i = 0; i <= 50; i++)
4906 for (kk = 0; kk <= 100; kk+=4)
4907 for (ll = 0; ll <= 200; ll+=4)
4908 for (k = kk; k <= min (100, kk + 3); k++)
4909 for (l = ll; l <= min (200, ll + 3); l++)
4913 static bool
4914 graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops)
4916 int i, j;
4917 int nb_loops = gbb_nb_loops (gb);
4918 int start = nb_loops - loops;
4919 scop_p scop = GBB_SCOP (gb);
4921 gcc_assert (scop_contains_loop (scop, gbb_loop (gb)));
4923 for (i = start ; i < nb_loops; i++)
4924 for (j = i + 1; j < nb_loops; j++)
4925 if (!is_interchange_valid (scop, i, j))
4927 if (dump_file && (dump_flags & TDF_DETAILS))
4928 fprintf (dump_file,
4929 "\nInterchange not valid for loops %d and %d:\n", i, j);
4930 return false;
4932 else if (dump_file && (dump_flags & TDF_DETAILS))
4933 fprintf (dump_file,
4934 "\nInterchange valid for loops %d and %d:\n", i, j);
4936 /* Check if strip mining is profitable for every loop. */
4937 for (i = 0; i < nb_loops - start; i++)
4938 if (!strip_mine_profitable_p (gb, stride, start + i))
4939 return false;
4941 /* Strip mine loops. */
4942 for (i = 0; i < nb_loops - start; i++)
4943 graphite_trans_bb_strip_mine (gb, start + 2 * i, stride);
4945 /* Interchange loops. */
4946 for (i = 1; i < nb_loops - start; i++)
4947 graphite_trans_bb_move_loop (gb, start + 2 * i, start + i);
4949 return true;
4952 /* Loop block LOOPS innermost loops of a loop nest. BBS represent the
4953 basic blocks that belong to the loop nest to be blocked. */
4955 static bool
4956 graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops)
4958 graphite_bb_p gb;
4959 int i;
4960 bool transform_done = false;
4962 /* TODO: - Calculate the stride size automatically. */
4963 int stride_size = 64;
4965 /* It makes no sense to block a single loop. */
4966 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
4967 if (gbb_nb_loops (gb) < 2)
4968 return false;
4970 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
4971 transform_done |= graphite_trans_bb_block (gb, stride_size, loops);
4973 return transform_done;
4976 /* Loop block all basic blocks of SCOP. Return false when the
4977 transform is not performed. */
4979 static bool
4980 graphite_trans_scop_block (scop_p scop)
4982 graphite_bb_p gb;
4983 int i, j;
4984 int last_nb_loops;
4985 int nb_loops;
4986 bool perfect = true;
4987 bool transform_done = false;
4989 VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3);
4990 int max_schedule = scop_max_loop_depth (scop) + 1;
4991 lambda_vector last_schedule = lambda_vector_new (max_schedule);
4993 if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0)
4994 return false;
4996 /* Get the data of the first bb. */
4997 gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0);
4998 last_nb_loops = gbb_nb_loops (gb);
4999 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5000 last_nb_loops + 1);
5001 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5003 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
5005 /* We did the first bb before. */
5006 if (i == 0)
5007 continue;
5009 nb_loops = gbb_nb_loops (gb);
5011 /* If the number of loops is unchanged and only the last element of the
5012 schedule changes, we stay in the loop nest. */
5013 if (nb_loops == last_nb_loops
5014 && (last_schedule [nb_loops + 1]
5015 != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1]))
5017 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5018 continue;
5021 /* Otherwise, we left the innermost loop. So check, if the last bb was in
5022 a perfect loop nest and how many loops are contained in this perfect
5023 loop nest.
5025 Count the number of zeros from the end of the schedule. They are the
5026 number of surrounding loops.
5028 Example:
5029 last_bb 2 3 2 0 0 0 0 3
5030 bb 2 4 0
5031 <------ j = 4
5033 last_bb 2 3 2 0 0 0 0 3
5034 bb 2 3 2 0 1
5035 <-- j = 2
5037 If there is no zero, there were other bbs in outer loops and the loop
5038 nest is not perfect. */
5039 for (j = last_nb_loops - 1; j >= 0; j--)
5041 if (last_schedule [j] != 0
5042 || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1))
5044 j--;
5045 break;
5049 j++;
5051 /* Found perfect loop nest. */
5052 if (perfect && last_nb_loops - j > 0)
5053 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5055 /* Check if we start with a new loop.
5057 Example:
5059 last_bb 2 3 2 0 0 0 0 3
5060 bb 2 3 2 0 0 1 0
5062 Here we start with the loop "2 3 2 0 0 1"
5064 last_bb 2 3 2 0 0 0 0 3
5065 bb 2 3 2 0 0 1
5067 But here not, so the loop nest can never be perfect. */
5069 perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0);
5071 /* Update the last_bb infos. We do not do that for the bbs in the same
5072 loop, as the data we use is not changed. */
5073 last_nb_loops = nb_loops;
5074 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5075 nb_loops + 1);
5076 VEC_truncate (graphite_bb_p, bbs, 0);
5077 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5080 /* Check if the last loop nest was perfect. It is the same check as above,
5081 but the comparison with the next bb is missing. */
5082 for (j = last_nb_loops - 1; j >= 0; j--)
5083 if (last_schedule [j] != 0)
5085 j--;
5086 break;
5089 j++;
5091 /* Found perfect loop nest. */
5092 if (last_nb_loops - j > 0)
5093 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5094 VEC_free (graphite_bb_p, heap, bbs);
5096 if (dump_file && (dump_flags & TDF_DETAILS))
5097 fprintf (dump_file, "\nLoop blocked.\n");
5099 return transform_done;
5102 /* Apply graphite transformations to all the basic blocks of SCOP. */
5104 static bool
5105 graphite_apply_transformations (scop_p scop)
5107 bool transform_done = false;
5109 /* Sort the list of bbs. Keep them always sorted. */
5110 graphite_sort_gbbs (scop);
5111 scop_remove_ignoreable_gbbs (scop);
5113 if (flag_loop_block)
5114 transform_done = graphite_trans_scop_block (scop);
5116 /* Generate code even if we did not apply any real transformation.
5117 This also allows to check the performance for the identity
5118 transformation: GIMPLE -> GRAPHITE -> GIMPLE
5119 Keep in mind that CLooG optimizes in control, so the loop structure
5120 may change, even if we only use -fgraphite-identity. */
5121 if (flag_graphite_identity)
5122 transform_done = true;
5124 return transform_done;
5127 /* We limit all SCoPs to SCoPs, that are completely surrounded by a loop.
5129 Example:
5131 for (i |
5133 for (j | SCoP 1
5134 for (k |
5137 * SCoP frontier, as this line is not surrounded by any loop. *
5139 for (l | SCoP 2
5141 This is necessary as scalar evolution and parameter detection need a
5142 outermost loop to initialize parameters correctly.
5144 TODO: FIX scalar evolution and parameter detection to allow more flexible
5145 SCoP frontiers. */
5147 static void
5148 limit_scops (void)
5150 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
5152 int i;
5153 scop_p scop;
5155 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5157 int j;
5158 loop_p loop;
5159 build_scop_bbs (scop);
5160 build_scop_loop_nests (scop);
5162 for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++)
5163 if (!loop_in_scop_p (loop_outer (loop), scop))
5165 sd_region open_scop;
5166 open_scop.entry = loop_preheader_edge (loop)->dest;
5167 open_scop.exit = single_exit (loop)->dest;
5168 VEC_safe_push (sd_region, heap, tmp_scops, &open_scop);
5172 free_scops (current_scops);
5173 current_scops = VEC_alloc (scop_p, heap, 3);
5175 create_sese_edges (tmp_scops);
5176 build_graphite_scops (tmp_scops);
5177 VEC_free (sd_region, heap, tmp_scops);
5180 /* Perform a set of linear transforms on the loops of the current
5181 function. */
5183 void
5184 graphite_transform_loops (void)
5186 int i;
5187 scop_p scop;
5189 if (number_of_loops () <= 1)
5190 return;
5192 current_scops = VEC_alloc (scop_p, heap, 3);
5194 calculate_dominance_info (CDI_DOMINATORS);
5195 calculate_dominance_info (CDI_POST_DOMINATORS);
5197 if (dump_file && (dump_flags & TDF_DETAILS))
5198 fprintf (dump_file, "Graphite loop transformations \n");
5200 cloog_initialize ();
5201 build_scops ();
5202 limit_scops ();
5204 if (dump_file && (dump_flags & TDF_DETAILS))
5205 fprintf (dump_file, "\nnumber of SCoPs: %d\n",
5206 VEC_length (scop_p, current_scops));
5208 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5210 build_scop_bbs (scop);
5211 build_scop_loop_nests (scop);
5212 build_scop_canonical_schedules (scop);
5213 build_bb_loops (scop);
5214 find_scop_parameters (scop);
5215 build_scop_context (scop);
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5219 fprintf (dump_file, "\n(In SCoP %d:\n", i);
5220 fprintf (dump_file, "\nnumber of bbs: %d\n",
5221 VEC_length (graphite_bb_p, SCOP_BBS (scop)));
5222 fprintf (dump_file, "\nnumber of loops: %d)\n",
5223 VEC_length (loop_p, SCOP_LOOP_NEST (scop)));
5226 if (!build_scop_iteration_domain (scop))
5227 continue;
5229 build_scop_conditions (scop);
5230 build_scop_data_accesses (scop);
5232 if (dump_file && (dump_flags & TDF_DETAILS))
5234 int nbrefs = nb_data_refs_in_scop (scop);
5235 fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs);
5238 if (graphite_apply_transformations (scop))
5239 gloog (scop, find_transform (scop));
5242 /* Cleanup. */
5243 free_scops (current_scops);
5244 cloog_finalize ();
5247 #else /* If Cloog is not available: #ifndef HAVE_cloog. */
5249 void
5250 graphite_transform_loops (void)
5252 sorry ("Graphite loop optimizations cannot be used");
5255 #endif