2013-09-12 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobb740545c83f4f75fd844eecde97ed2990381b3bc
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree-ssa.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "gimple-pretty-print.h"
56 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
57 typedef struct rdg_vertex
59 /* The statement represented by this vertex. */
60 gimple stmt;
62 /* Vector of data-references in this statement. */
63 vec<data_reference_p> datarefs;
65 /* True when the statement contains a write to memory. */
66 bool has_mem_write;
68 /* True when the statement contains a read from memory. */
69 bool has_mem_reads;
70 } *rdg_vertex_p;
72 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
73 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
74 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
75 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
76 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
77 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
78 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
79 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
81 /* Data dependence type. */
83 enum rdg_dep_type
85 /* Read After Write (RAW). */
86 flow_dd = 'f',
88 /* Write After Read (WAR). */
89 anti_dd = 'a',
91 /* Write After Write (WAW). */
92 output_dd = 'o',
94 /* Read After Read (RAR). */
95 input_dd = 'i'
98 /* Dependence information attached to an edge of the RDG. */
100 typedef struct rdg_edge
102 /* Type of the dependence. */
103 enum rdg_dep_type type;
105 /* Levels of the dependence: the depth of the loops that carry the
106 dependence. */
107 unsigned level;
109 /* Dependence relation between data dependences, NULL when one of
110 the vertices is a scalar. */
111 ddr_p relation;
112 } *rdg_edge_p;
114 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
116 #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
118 /* Strongly connected components of the reduced data dependence graph. */
120 typedef struct rdg_component
122 int num;
123 vec<int> vertices;
124 } *rdgc;
126 /* Dump vertex I in RDG to FILE. */
128 static void
129 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
131 struct vertex *v = &(rdg->vertices[i]);
132 struct graph_edge *e;
134 fprintf (file, "(vertex %d: (%s%s) (in:", i,
135 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
136 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
138 if (v->pred)
139 for (e = v->pred; e; e = e->pred_next)
140 fprintf (file, " %d", e->src);
142 fprintf (file, ") (out:");
144 if (v->succ)
145 for (e = v->succ; e; e = e->succ_next)
146 fprintf (file, " %d", e->dest);
148 fprintf (file, ")\n");
149 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
150 fprintf (file, ")\n");
153 /* Call dump_rdg_vertex on stderr. */
155 DEBUG_FUNCTION void
156 debug_rdg_vertex (struct graph *rdg, int i)
158 dump_rdg_vertex (stderr, rdg, i);
161 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
162 dumped vertices to that bitmap. */
164 static void
165 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
167 int i;
169 fprintf (file, "(%d\n", c);
171 for (i = 0; i < rdg->n_vertices; i++)
172 if (rdg->vertices[i].component == c)
174 if (dumped)
175 bitmap_set_bit (dumped, i);
177 dump_rdg_vertex (file, rdg, i);
180 fprintf (file, ")\n");
183 /* Call dump_rdg_vertex on stderr. */
185 DEBUG_FUNCTION void
186 debug_rdg_component (struct graph *rdg, int c)
188 dump_rdg_component (stderr, rdg, c, NULL);
191 /* Dump the reduced dependence graph RDG to FILE. */
193 static void
194 dump_rdg (FILE *file, struct graph *rdg)
196 int i;
197 bitmap dumped = BITMAP_ALLOC (NULL);
199 fprintf (file, "(rdg\n");
201 for (i = 0; i < rdg->n_vertices; i++)
202 if (!bitmap_bit_p (dumped, i))
203 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
205 fprintf (file, ")\n");
206 BITMAP_FREE (dumped);
209 /* Call dump_rdg on stderr. */
211 DEBUG_FUNCTION void
212 debug_rdg (struct graph *rdg)
214 dump_rdg (stderr, rdg);
217 static void
218 dot_rdg_1 (FILE *file, struct graph *rdg)
220 int i;
221 pretty_printer buffer;
222 pp_needs_newline (&buffer) = false;
223 buffer.buffer->stream = file;
225 fprintf (file, "digraph RDG {\n");
227 for (i = 0; i < rdg->n_vertices; i++)
229 struct vertex *v = &(rdg->vertices[i]);
230 struct graph_edge *e;
232 fprintf (file, "%d [label=\"[%d] ", i, i);
233 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
234 pp_flush (&buffer);
235 fprintf (file, "\"]\n");
237 /* Highlight reads from memory. */
238 if (RDG_MEM_READS_STMT (rdg, i))
239 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
241 /* Highlight stores to memory. */
242 if (RDG_MEM_WRITE_STMT (rdg, i))
243 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
245 if (v->succ)
246 for (e = v->succ; e; e = e->succ_next)
247 switch (RDGE_TYPE (e))
249 case input_dd:
250 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
251 break;
253 case output_dd:
254 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
255 break;
257 case flow_dd:
258 /* These are the most common dependences: don't print these. */
259 fprintf (file, "%d -> %d \n", i, e->dest);
260 break;
262 case anti_dd:
263 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
264 break;
266 default:
267 gcc_unreachable ();
271 fprintf (file, "}\n\n");
274 /* Display the Reduced Dependence Graph using dotty. */
276 DEBUG_FUNCTION void
277 dot_rdg (struct graph *rdg)
279 /* When debugging, you may want to enable the following code. */
280 #if 1
281 FILE *file = popen("dot -Tx11", "w");
282 if (!file)
283 return;
284 dot_rdg_1 (file, rdg);
285 fflush (file);
286 close (fileno (file));
287 pclose (file);
288 #else
289 dot_rdg_1 (stderr, rdg);
290 #endif
293 /* Returns the index of STMT in RDG. */
295 static int
296 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
298 int index = gimple_uid (stmt);
299 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
300 return index;
303 /* Creates an edge in RDG for each distance vector from DDR. The
304 order that we keep track of in the RDG is the order in which
305 statements have to be executed. */
307 static void
308 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
310 struct graph_edge *e;
311 int va, vb;
312 data_reference_p dra = DDR_A (ddr);
313 data_reference_p drb = DDR_B (ddr);
314 unsigned level = ddr_dependence_level (ddr);
316 /* For non scalar dependences, when the dependence is REVERSED,
317 statement B has to be executed before statement A. */
318 if (level > 0
319 && !DDR_REVERSED_P (ddr))
321 data_reference_p tmp = dra;
322 dra = drb;
323 drb = tmp;
326 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
327 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
329 if (va < 0 || vb < 0)
330 return;
332 e = add_edge (rdg, va, vb);
333 e->data = XNEW (struct rdg_edge);
335 RDGE_LEVEL (e) = level;
336 RDGE_RELATION (e) = ddr;
338 /* Determines the type of the data dependence. */
339 if (DR_IS_READ (dra) && DR_IS_READ (drb))
340 RDGE_TYPE (e) = input_dd;
341 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
342 RDGE_TYPE (e) = output_dd;
343 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
344 RDGE_TYPE (e) = flow_dd;
345 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
346 RDGE_TYPE (e) = anti_dd;
349 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
350 the index of DEF in RDG. */
352 static void
353 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
355 use_operand_p imm_use_p;
356 imm_use_iterator iterator;
358 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
360 struct graph_edge *e;
361 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
363 if (use < 0)
364 continue;
366 e = add_edge (rdg, idef, use);
367 e->data = XNEW (struct rdg_edge);
368 RDGE_TYPE (e) = flow_dd;
369 RDGE_RELATION (e) = NULL;
373 /* Creates the edges of the reduced dependence graph RDG. */
375 static void
376 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
378 int i;
379 struct data_dependence_relation *ddr;
380 def_operand_p def_p;
381 ssa_op_iter iter;
383 FOR_EACH_VEC_ELT (ddrs, i, ddr)
384 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
385 create_rdg_edge_for_ddr (rdg, ddr);
386 else
387 free_dependence_relation (ddr);
389 for (i = 0; i < rdg->n_vertices; i++)
390 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
391 iter, SSA_OP_DEF)
392 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
395 /* Build the vertices of the reduced dependence graph RDG. Return false
396 if that failed. */
398 static bool
399 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
400 vec<data_reference_p> *datarefs)
402 int i;
403 gimple stmt;
405 FOR_EACH_VEC_ELT (stmts, i, stmt)
407 struct vertex *v = &(rdg->vertices[i]);
409 /* Record statement to vertex mapping. */
410 gimple_set_uid (stmt, i);
412 v->data = XNEW (struct rdg_vertex);
413 RDGV_STMT (v) = stmt;
414 RDGV_DATAREFS (v).create (0);
415 RDGV_HAS_MEM_WRITE (v) = false;
416 RDGV_HAS_MEM_READS (v) = false;
417 if (gimple_code (stmt) == GIMPLE_PHI)
418 continue;
420 unsigned drp = datarefs->length ();
421 if (!find_data_references_in_stmt (loop, stmt, datarefs))
422 return false;
423 for (unsigned j = drp; j < datarefs->length (); ++j)
425 data_reference_p dr = (*datarefs)[j];
426 if (DR_IS_READ (dr))
427 RDGV_HAS_MEM_READS (v) = true;
428 else
429 RDGV_HAS_MEM_WRITE (v) = true;
430 RDGV_DATAREFS (v).safe_push (dr);
433 return true;
436 /* Initialize STMTS with all the statements of LOOP. When
437 INCLUDE_PHIS is true, include also the PHI nodes. The order in
438 which we discover statements is important as
439 generate_loops_for_partition is using the same traversal for
440 identifying statements. */
442 static void
443 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
445 unsigned int i;
446 basic_block *bbs = get_loop_body_in_dom_order (loop);
448 for (i = 0; i < loop->num_nodes; i++)
450 basic_block bb = bbs[i];
451 gimple_stmt_iterator bsi;
452 gimple stmt;
454 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
455 stmts->safe_push (gsi_stmt (bsi));
457 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
459 stmt = gsi_stmt (bsi);
460 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
461 stmts->safe_push (stmt);
465 free (bbs);
468 /* Returns true when all the dependences are computable. */
470 static bool
471 known_dependences_p (vec<ddr_p> dependence_relations)
473 ddr_p ddr;
474 unsigned int i;
476 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
477 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
478 return false;
480 return true;
483 /* Build the Reduced Dependence Graph (RDG) with one vertex per
484 statement of the loop nest, and one edge per data dependence or
485 scalar dependence. */
487 struct graph *
488 build_empty_rdg (int n_stmts)
490 struct graph *rdg = new_graph (n_stmts);
491 return rdg;
494 /* Free the reduced dependence graph RDG. */
496 static void
497 free_rdg (struct graph *rdg)
499 int i;
501 for (i = 0; i < rdg->n_vertices; i++)
503 struct vertex *v = &(rdg->vertices[i]);
504 struct graph_edge *e;
506 for (e = v->succ; e; e = e->succ_next)
508 free_dependence_relation (RDGE_RELATION (e));
509 free (e->data);
512 if (v->data)
514 gimple_set_uid (RDGV_STMT (v), -1);
515 free_data_refs (RDGV_DATAREFS (v));
516 free (v->data);
520 free_graph (rdg);
523 /* Build the Reduced Dependence Graph (RDG) with one vertex per
524 statement of the loop nest LOOP_NEST, and one edge per data dependence or
525 scalar dependence. */
527 static struct graph *
528 build_rdg (vec<loop_p> loop_nest)
530 struct graph *rdg;
531 vec<gimple> stmts;
532 vec<data_reference_p> datarefs;
533 vec<ddr_p> dependence_relations;
535 /* Create the RDG vertices from the stmts of the loop nest. */
536 stmts.create (10);
537 stmts_from_loop (loop_nest[0], &stmts);
538 rdg = build_empty_rdg (stmts.length ());
539 datarefs.create (10);
540 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
542 stmts.release ();
543 datarefs.release ();
544 free_rdg (rdg);
545 return NULL;
547 stmts.release ();
549 /* Create the RDG edges from the data dependences in the loop nest. */
550 dependence_relations.create (100);
551 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
552 false)
553 || !known_dependences_p (dependence_relations))
555 free_dependence_relations (dependence_relations);
556 datarefs.release ();
557 free_rdg (rdg);
558 return NULL;
560 create_rdg_edges (rdg, dependence_relations);
561 dependence_relations.release ();
562 datarefs.release ();
564 return rdg;
567 /* Determines whether the statement from vertex V of the RDG has a
568 definition used outside the loop that contains this statement. */
570 static bool
571 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
573 gimple stmt = RDG_STMT (rdg, v);
574 struct loop *loop = loop_containing_stmt (stmt);
575 use_operand_p imm_use_p;
576 imm_use_iterator iterator;
577 ssa_op_iter it;
578 def_operand_p def_p;
580 if (!loop)
581 return true;
583 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
585 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
587 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
588 return true;
592 return false;
597 enum partition_kind {
598 PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
601 typedef struct partition_s
603 bitmap stmts;
604 bitmap loops;
605 bool has_writes;
606 enum partition_kind kind;
607 /* data-references a kind != PKIND_NORMAL partition is about. */
608 data_reference_p main_dr;
609 data_reference_p secondary_dr;
610 } *partition_t;
613 /* Allocate and initialize a partition from BITMAP. */
615 static partition_t
616 partition_alloc (bitmap stmts, bitmap loops)
618 partition_t partition = XCNEW (struct partition_s);
619 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
620 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
621 partition->has_writes = false;
622 partition->kind = PKIND_NORMAL;
623 return partition;
626 /* Free PARTITION. */
628 static void
629 partition_free (partition_t partition)
631 BITMAP_FREE (partition->stmts);
632 BITMAP_FREE (partition->loops);
633 free (partition);
636 /* Returns true if the partition can be generated as a builtin. */
638 static bool
639 partition_builtin_p (partition_t partition)
641 return partition->kind > PKIND_REDUCTION;
644 /* Returns true if the partition has an writes. */
646 static bool
647 partition_has_writes (partition_t partition)
649 return partition->has_writes;
652 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
653 the LOOP. */
655 static bool
656 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
658 imm_use_iterator imm_iter;
659 use_operand_p use_p;
661 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
663 gimple use_stmt = USE_STMT (use_p);
664 if (!is_gimple_debug (use_stmt)
665 && loop != loop_containing_stmt (use_stmt))
666 return true;
669 return false;
672 /* Returns true when STMT defines a scalar variable used after the
673 loop LOOP. */
675 static bool
676 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
678 def_operand_p def_p;
679 ssa_op_iter op_iter;
681 if (gimple_code (stmt) == GIMPLE_PHI)
682 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
684 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
685 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
686 return true;
688 return false;
691 /* Return a copy of LOOP placed before LOOP. */
693 static struct loop *
694 copy_loop_before (struct loop *loop)
696 struct loop *res;
697 edge preheader = loop_preheader_edge (loop);
699 initialize_original_copy_tables ();
700 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
701 gcc_assert (res != NULL);
702 free_original_copy_tables ();
703 delete_update_ssa ();
705 return res;
708 /* Creates an empty basic block after LOOP. */
710 static void
711 create_bb_after_loop (struct loop *loop)
713 edge exit = single_exit (loop);
715 if (!exit)
716 return;
718 split_edge (exit);
721 /* Generate code for PARTITION from the code in LOOP. The loop is
722 copied when COPY_P is true. All the statements not flagged in the
723 PARTITION bitmap are removed from the loop or from its copy. The
724 statements are indexed in sequence inside a basic block, and the
725 basic blocks of a loop are taken in dom order. */
727 static void
728 generate_loops_for_partition (struct loop *loop, partition_t partition,
729 bool copy_p)
731 unsigned i, x;
732 gimple_stmt_iterator bsi;
733 basic_block *bbs;
735 if (copy_p)
737 loop = copy_loop_before (loop);
738 gcc_assert (loop != NULL);
739 create_preheader (loop, CP_SIMPLE_PREHEADERS);
740 create_bb_after_loop (loop);
743 /* Remove stmts not in the PARTITION bitmap. The order in which we
744 visit the phi nodes and the statements is exactly as in
745 stmts_from_loop. */
746 bbs = get_loop_body_in_dom_order (loop);
748 if (MAY_HAVE_DEBUG_STMTS)
749 for (x = 0, i = 0; i < loop->num_nodes; i++)
751 basic_block bb = bbs[i];
753 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
754 if (!bitmap_bit_p (partition->stmts, x++))
755 reset_debug_uses (gsi_stmt (bsi));
757 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
759 gimple stmt = gsi_stmt (bsi);
760 if (gimple_code (stmt) != GIMPLE_LABEL
761 && !is_gimple_debug (stmt)
762 && !bitmap_bit_p (partition->stmts, x++))
763 reset_debug_uses (stmt);
767 for (x = 0, i = 0; i < loop->num_nodes; i++)
769 basic_block bb = bbs[i];
771 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
772 if (!bitmap_bit_p (partition->stmts, x++))
774 gimple phi = gsi_stmt (bsi);
775 if (virtual_operand_p (gimple_phi_result (phi)))
776 mark_virtual_phi_result_for_renaming (phi);
777 remove_phi_node (&bsi, true);
779 else
780 gsi_next (&bsi);
782 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
784 gimple stmt = gsi_stmt (bsi);
785 if (gimple_code (stmt) != GIMPLE_LABEL
786 && !is_gimple_debug (stmt)
787 && !bitmap_bit_p (partition->stmts, x++))
789 unlink_stmt_vdef (stmt);
790 gsi_remove (&bsi, true);
791 release_defs (stmt);
793 else
794 gsi_next (&bsi);
798 free (bbs);
801 /* Build the size argument for a memory operation call. */
803 static tree
804 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
806 tree size;
807 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
808 fold_convert_loc (loc, sizetype, nb_iter),
809 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
810 return fold_convert_loc (loc, size_type_node, size);
813 /* Build an address argument for a memory operation call. */
815 static tree
816 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
818 tree addr_base;
820 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
821 addr_base = fold_convert_loc (loc, sizetype, addr_base);
823 /* Test for a negative stride, iterating over every element. */
824 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
826 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
827 fold_convert_loc (loc, sizetype, nb_bytes));
828 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
829 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
832 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
835 /* If VAL memory representation contains the same value in all bytes,
836 return that value, otherwise return -1.
837 E.g. for 0x24242424 return 0x24, for IEEE double
838 747708026454360457216.0 return 0x44, etc. */
840 static int
841 const_with_all_bytes_same (tree val)
843 unsigned char buf[64];
844 int i, len;
846 if (integer_zerop (val)
847 || real_zerop (val)
848 || (TREE_CODE (val) == CONSTRUCTOR
849 && !TREE_CLOBBER_P (val)
850 && CONSTRUCTOR_NELTS (val) == 0))
851 return 0;
853 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
854 return -1;
856 len = native_encode_expr (val, buf, sizeof (buf));
857 if (len == 0)
858 return -1;
859 for (i = 1; i < len; i++)
860 if (buf[i] != buf[0])
861 return -1;
862 return buf[0];
865 /* Generate a call to memset for PARTITION in LOOP. */
867 static void
868 generate_memset_builtin (struct loop *loop, partition_t partition)
870 gimple_stmt_iterator gsi;
871 gimple stmt, fn_call;
872 tree nb_iter, mem, fn, nb_bytes;
873 location_t loc;
874 tree val;
876 stmt = DR_STMT (partition->main_dr);
877 loc = gimple_location (stmt);
878 if (gimple_bb (stmt) == loop->latch)
879 nb_iter = number_of_latch_executions (loop);
880 else
881 nb_iter = number_of_exit_cond_executions (loop);
883 /* The new statements will be placed before LOOP. */
884 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
886 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
887 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
888 false, GSI_CONTINUE_LINKING);
889 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
890 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
891 false, GSI_CONTINUE_LINKING);
893 /* This exactly matches the pattern recognition in classify_partition. */
894 val = gimple_assign_rhs1 (stmt);
895 /* Handle constants like 0x15151515 and similarly
896 floating point constants etc. where all bytes are the same. */
897 int bytev = const_with_all_bytes_same (val);
898 if (bytev != -1)
899 val = build_int_cst (integer_type_node, bytev);
900 else if (TREE_CODE (val) == INTEGER_CST)
901 val = fold_convert (integer_type_node, val);
902 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
904 gimple cstmt;
905 tree tem = make_ssa_name (integer_type_node, NULL);
906 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
907 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
908 val = tem;
911 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
912 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
913 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
915 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "generated memset");
918 if (bytev == 0)
919 fprintf (dump_file, " zero\n");
920 else
921 fprintf (dump_file, "\n");
925 /* Generate a call to memcpy for PARTITION in LOOP. */
927 static void
928 generate_memcpy_builtin (struct loop *loop, partition_t partition)
930 gimple_stmt_iterator gsi;
931 gimple stmt, fn_call;
932 tree nb_iter, dest, src, fn, nb_bytes;
933 location_t loc;
934 enum built_in_function kind;
936 stmt = DR_STMT (partition->main_dr);
937 loc = gimple_location (stmt);
938 if (gimple_bb (stmt) == loop->latch)
939 nb_iter = number_of_latch_executions (loop);
940 else
941 nb_iter = number_of_exit_cond_executions (loop);
943 /* The new statements will be placed before LOOP. */
944 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
946 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
947 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
948 false, GSI_CONTINUE_LINKING);
949 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
950 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
951 if (ptr_derefs_may_alias_p (dest, src))
952 kind = BUILT_IN_MEMMOVE;
953 else
954 kind = BUILT_IN_MEMCPY;
956 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
957 false, GSI_CONTINUE_LINKING);
958 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
959 false, GSI_CONTINUE_LINKING);
960 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
961 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
962 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
964 if (dump_file && (dump_flags & TDF_DETAILS))
966 if (kind == BUILT_IN_MEMCPY)
967 fprintf (dump_file, "generated memcpy\n");
968 else
969 fprintf (dump_file, "generated memmove\n");
973 /* Remove and destroy the loop LOOP. */
975 static void
976 destroy_loop (struct loop *loop)
978 unsigned nbbs = loop->num_nodes;
979 edge exit = single_exit (loop);
980 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
981 basic_block *bbs;
982 unsigned i;
984 bbs = get_loop_body_in_dom_order (loop);
986 redirect_edge_pred (exit, src);
987 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
988 exit->flags |= EDGE_FALLTHRU;
989 cancel_loop_tree (loop);
990 rescan_loop_exit (exit, false, true);
992 for (i = 0; i < nbbs; i++)
994 /* We have made sure to not leave any dangling uses of SSA
995 names defined in the loop. With the exception of virtuals.
996 Make sure we replace all uses of virtual defs that will remain
997 outside of the loop with the bare symbol as delete_basic_block
998 will release them. */
999 gimple_stmt_iterator gsi;
1000 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1002 gimple phi = gsi_stmt (gsi);
1003 if (virtual_operand_p (gimple_phi_result (phi)))
1004 mark_virtual_phi_result_for_renaming (phi);
1006 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1008 gimple stmt = gsi_stmt (gsi);
1009 tree vdef = gimple_vdef (stmt);
1010 if (vdef && TREE_CODE (vdef) == SSA_NAME)
1011 mark_virtual_operand_for_renaming (vdef);
1013 delete_basic_block (bbs[i]);
1015 free (bbs);
1017 set_immediate_dominator (CDI_DOMINATORS, dest,
1018 recompute_dominator (CDI_DOMINATORS, dest));
1021 /* Generates code for PARTITION. */
1023 static void
1024 generate_code_for_partition (struct loop *loop,
1025 partition_t partition, bool copy_p)
1027 switch (partition->kind)
1029 case PKIND_MEMSET:
1030 generate_memset_builtin (loop, partition);
1031 /* If this is the last partition for which we generate code, we have
1032 to destroy the loop. */
1033 if (!copy_p)
1034 destroy_loop (loop);
1035 break;
1037 case PKIND_MEMCPY:
1038 generate_memcpy_builtin (loop, partition);
1039 /* If this is the last partition for which we generate code, we have
1040 to destroy the loop. */
1041 if (!copy_p)
1042 destroy_loop (loop);
1043 break;
1045 case PKIND_REDUCTION:
1046 /* Reductions all have to be in the last partition. */
1047 gcc_assert (!copy_p);
1048 case PKIND_NORMAL:
1049 generate_loops_for_partition (loop, partition, copy_p);
1050 break;
1052 default:
1053 gcc_unreachable ();
1058 /* Returns true if the node V of RDG cannot be recomputed. */
1060 static bool
1061 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
1063 if (RDG_MEM_WRITE_STMT (rdg, v))
1064 return true;
1066 return false;
1069 /* Returns true when the vertex V has already been generated in the
1070 current partition (V is in PROCESSED), or when V belongs to another
1071 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
1073 static inline bool
1074 already_processed_vertex_p (bitmap processed, int v)
1076 return bitmap_bit_p (processed, v);
1079 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
1080 bitmap);
1082 /* Flag V from RDG as part of PARTITION, and also flag its loop number
1083 in LOOPS. */
1085 static void
1086 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
1088 struct loop *loop;
1090 if (!bitmap_set_bit (partition->stmts, v))
1091 return;
1093 loop = loop_containing_stmt (RDG_STMT (rdg, v));
1094 bitmap_set_bit (partition->loops, loop->num);
1096 if (rdg_cannot_recompute_vertex_p (rdg, v))
1097 partition->has_writes = true;
1100 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
1101 Also flag their loop number in LOOPS. */
1103 static void
1104 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
1105 bitmap processed)
1107 unsigned i;
1108 vec<int> nodes;
1109 nodes.create (3);
1110 int x;
1112 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1114 FOR_EACH_VEC_ELT (nodes, i, x)
1115 if (bitmap_set_bit (processed, x))
1116 rdg_flag_vertex (rdg, x, partition);
1118 nodes.release ();
1121 /* Initialize CONDS with all the condition statements from the basic
1122 blocks of LOOP. */
1124 static void
1125 collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
1127 unsigned i;
1128 edge e;
1129 vec<edge> exits = get_loop_exit_edges (loop);
1131 FOR_EACH_VEC_ELT (exits, i, e)
1133 gimple cond = last_stmt (e->src);
1135 if (cond)
1136 conds->safe_push (cond);
1139 exits.release ();
1142 /* Add to PARTITION all the exit condition statements for LOOPS
1143 together with all their dependent statements determined from
1144 RDG. */
1146 static void
1147 rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
1148 bitmap processed)
1150 unsigned i;
1151 bitmap_iterator bi;
1152 vec<gimple> conds;
1153 conds.create (3);
1155 EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi)
1156 collect_condition_stmts (get_loop (cfun, i), &conds);
1158 while (!conds.is_empty ())
1160 gimple cond = conds.pop ();
1161 int v = rdg_vertex_for_stmt (rdg, cond);
1162 if (!already_processed_vertex_p (processed, v))
1164 bitmap saved_loops = BITMAP_ALLOC (NULL);
1165 bitmap_copy (saved_loops, partition->loops);
1166 rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
1167 EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops,
1168 0, i, bi)
1169 collect_condition_stmts (get_loop (cfun, i), &conds);
1170 BITMAP_FREE (saved_loops);
1174 conds.release ();
1177 /* Returns a bitmap in which all the statements needed for computing
1178 the strongly connected component C of the RDG are flagged, also
1179 including the loop exit conditions. */
1181 static partition_t
1182 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
1184 partition_t partition = partition_alloc (NULL, NULL);
1185 bitmap processed = BITMAP_ALLOC (NULL);
1187 /* Flag the first vertex of the component and its dependent nodes.
1188 Other members of the component are included in its dependencies.
1189 ??? What do we need components for again? To early merge initial
1190 vertices that are in a SCC of the RDG? */
1191 rdg_flag_vertex_and_dependent (rdg, c->vertices[0], partition, processed);
1193 rdg_flag_loop_exits (rdg, partition, processed);
1195 BITMAP_FREE (processed);
1196 return partition;
1199 /* Free memory for COMPONENTS. */
1201 static void
1202 free_rdg_components (vec<rdgc> components)
1204 int i;
1205 rdgc x;
1207 FOR_EACH_VEC_ELT (components, i, x)
1209 x->vertices.release ();
1210 free (x);
1213 components.release ();
1216 /* Build the COMPONENTS vector with the strongly connected components
1217 of RDG in which the STARTING_VERTICES occur. */
1219 static void
1220 rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
1221 vec<rdgc> *components)
1223 int i, v;
1224 bitmap saved_components = BITMAP_ALLOC (NULL);
1225 int n_components = graphds_scc (rdg, NULL);
1226 /* ??? Macros cannot process template types with more than one
1227 argument, so we need this typedef. */
1228 typedef vec<int> vec_int_heap;
1229 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
1231 for (i = 0; i < n_components; i++)
1232 all_components[i].create (3);
1234 for (i = 0; i < rdg->n_vertices; i++)
1235 all_components[rdg->vertices[i].component].safe_push (i);
1237 FOR_EACH_VEC_ELT (starting_vertices, i, v)
1239 int c = rdg->vertices[v].component;
1241 if (bitmap_set_bit (saved_components, c))
1243 rdgc x = XCNEW (struct rdg_component);
1244 x->num = c;
1245 x->vertices = all_components[c];
1247 components->safe_push (x);
1251 for (i = 0; i < n_components; i++)
1252 if (!bitmap_bit_p (saved_components, i))
1253 all_components[i].release ();
1255 free (all_components);
1256 BITMAP_FREE (saved_components);
1259 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1260 For the moment we detect only the memset zero pattern. */
1262 static void
1263 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1265 bitmap_iterator bi;
1266 unsigned i;
1267 tree nb_iter;
1268 data_reference_p single_load, single_store;
1269 bool volatiles_p = false;
1271 partition->kind = PKIND_NORMAL;
1272 partition->main_dr = NULL;
1273 partition->secondary_dr = NULL;
1275 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1277 gimple stmt = RDG_STMT (rdg, i);
1279 if (gimple_has_volatile_ops (stmt))
1280 volatiles_p = true;
1282 /* If the stmt has uses outside of the loop fail.
1283 ??? If the stmt is generated in another partition that
1284 is not created as builtin we can ignore this. */
1285 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1287 if (dump_file && (dump_flags & TDF_DETAILS))
1288 fprintf (dump_file, "not generating builtin, partition has "
1289 "scalar uses outside of the loop\n");
1290 partition->kind = PKIND_REDUCTION;
1291 return;
1295 /* Perform general partition disqualification for builtins. */
1296 if (volatiles_p
1297 || !flag_tree_loop_distribute_patterns)
1298 return;
1300 nb_iter = number_of_exit_cond_executions (loop);
1301 if (!nb_iter || nb_iter == chrec_dont_know)
1302 return;
1304 /* Detect memset and memcpy. */
1305 single_load = NULL;
1306 single_store = NULL;
1307 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1309 gimple stmt = RDG_STMT (rdg, i);
1310 data_reference_p dr;
1311 unsigned j;
1313 if (gimple_code (stmt) == GIMPLE_PHI)
1314 continue;
1316 /* Any scalar stmts are ok. */
1317 if (!gimple_vuse (stmt))
1318 continue;
1320 /* Otherwise just regular loads/stores. */
1321 if (!gimple_assign_single_p (stmt))
1322 return;
1324 /* But exactly one store and/or load. */
1325 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1327 if (DR_IS_READ (dr))
1329 if (single_load != NULL)
1330 return;
1331 single_load = dr;
1333 else
1335 if (single_store != NULL)
1336 return;
1337 single_store = dr;
1342 if (single_store && !single_load)
1344 gimple stmt = DR_STMT (single_store);
1345 tree rhs = gimple_assign_rhs1 (stmt);
1346 if (const_with_all_bytes_same (rhs) == -1
1347 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1348 || (TYPE_MODE (TREE_TYPE (rhs))
1349 != TYPE_MODE (unsigned_char_type_node))))
1350 return;
1351 if (TREE_CODE (rhs) == SSA_NAME
1352 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1353 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1354 return;
1355 if (!adjacent_dr_p (single_store))
1356 return;
1357 partition->kind = PKIND_MEMSET;
1358 partition->main_dr = single_store;
1360 else if (single_store && single_load)
1362 gimple store = DR_STMT (single_store);
1363 gimple load = DR_STMT (single_load);
1364 /* Direct aggregate copy or via an SSA name temporary. */
1365 if (load != store
1366 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1367 return;
1368 if (!adjacent_dr_p (single_store)
1369 || !adjacent_dr_p (single_load)
1370 || !operand_equal_p (DR_STEP (single_store),
1371 DR_STEP (single_load), 0))
1372 return;
1373 /* Now check that if there is a dependence this dependence is
1374 of a suitable form for memmove. */
1375 vec<loop_p> loops = vNULL;
1376 ddr_p ddr;
1377 loops.safe_push (loop);
1378 ddr = initialize_data_dependence_relation (single_load, single_store,
1379 loops);
1380 compute_affine_dependence (ddr, loop);
1381 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1383 free_dependence_relation (ddr);
1384 loops.release ();
1385 return;
1387 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1389 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1391 free_dependence_relation (ddr);
1392 loops.release ();
1393 return;
1395 lambda_vector dist_v;
1396 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1398 int dist = dist_v[index_in_loop_nest (loop->num,
1399 DDR_LOOP_NEST (ddr))];
1400 if (dist > 0 && !DDR_REVERSED_P (ddr))
1402 free_dependence_relation (ddr);
1403 loops.release ();
1404 return;
1408 free_dependence_relation (ddr);
1409 loops.release ();
1410 partition->kind = PKIND_MEMCPY;
1411 partition->main_dr = single_store;
1412 partition->secondary_dr = single_load;
1416 /* For a data reference REF, return the declaration of its base
1417 address or NULL_TREE if the base is not determined. */
1419 static tree
1420 ref_base_address (data_reference_p dr)
1422 tree base_address = DR_BASE_ADDRESS (dr);
1423 if (base_address
1424 && TREE_CODE (base_address) == ADDR_EXPR)
1425 return TREE_OPERAND (base_address, 0);
1427 return base_address;
1430 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1431 accesses in RDG. */
1433 static bool
1434 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1435 partition_t partition2)
1437 unsigned i, j, k, l;
1438 bitmap_iterator bi, bj;
1439 data_reference_p ref1, ref2;
1441 /* First check whether in the intersection of the two partitions are
1442 any loads or stores. Common loads are the situation that happens
1443 most often. */
1444 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1445 if (RDG_MEM_WRITE_STMT (rdg, i)
1446 || RDG_MEM_READS_STMT (rdg, i))
1447 return true;
1449 /* Then check all data-references against each other. */
1450 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1451 if (RDG_MEM_WRITE_STMT (rdg, i)
1452 || RDG_MEM_READS_STMT (rdg, i))
1453 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1454 if (RDG_MEM_WRITE_STMT (rdg, j)
1455 || RDG_MEM_READS_STMT (rdg, j))
1457 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1459 tree base1 = ref_base_address (ref1);
1460 if (base1)
1461 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1462 if (base1 == ref_base_address (ref2))
1463 return true;
1467 return false;
1470 /* Aggregate several components into a useful partition that is
1471 registered in the PARTITIONS vector. Partitions will be
1472 distributed in different loops. */
1474 static void
1475 rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1476 vec<int> *other_stores,
1477 vec<partition_t> *partitions, bitmap processed)
1479 int i;
1480 rdgc x;
1481 partition_t partition = partition_alloc (NULL, NULL);
1483 FOR_EACH_VEC_ELT (components, i, x)
1485 partition_t np;
1486 int v = x->vertices[0];
1488 if (bitmap_bit_p (processed, v))
1489 continue;
1491 np = build_rdg_partition_for_component (rdg, x);
1492 bitmap_ior_into (partition->stmts, np->stmts);
1493 partition->has_writes = partition_has_writes (np);
1494 bitmap_ior_into (processed, np->stmts);
1495 partition_free (np);
1497 if (partition_has_writes (partition))
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1501 fprintf (dump_file, "ldist useful partition:\n");
1502 dump_bitmap (dump_file, partition->stmts);
1505 partitions->safe_push (partition);
1506 partition = partition_alloc (NULL, NULL);
1510 /* Add the nodes from the RDG that were not marked as processed, and
1511 that are used outside the current loop. These are scalar
1512 computations that are not yet part of previous partitions. */
1513 for (i = 0; i < rdg->n_vertices; i++)
1514 if (!bitmap_bit_p (processed, i)
1515 && rdg_defs_used_in_other_loops_p (rdg, i))
1516 other_stores->safe_push (i);
1518 /* If there are still statements left in the OTHER_STORES array,
1519 create other components and partitions with these stores and
1520 their dependences. */
1521 if (other_stores->length () > 0)
1523 vec<rdgc> comps;
1524 comps.create (3);
1525 vec<int> foo;
1526 foo.create (3);
1528 rdg_build_components (rdg, *other_stores, &comps);
1529 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1531 foo.release ();
1532 free_rdg_components (comps);
1535 /* If there is something left in the last partition, save it. */
1536 if (bitmap_count_bits (partition->stmts) > 0)
1537 partitions->safe_push (partition);
1538 else
1539 partition_free (partition);
1542 /* Dump to FILE the PARTITIONS. */
1544 static void
1545 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1547 int i;
1548 partition_t partition;
1550 FOR_EACH_VEC_ELT (partitions, i, partition)
1551 debug_bitmap_file (file, partition->stmts);
1554 /* Debug PARTITIONS. */
1555 extern void debug_rdg_partitions (vec<partition_t> );
1557 DEBUG_FUNCTION void
1558 debug_rdg_partitions (vec<partition_t> partitions)
1560 dump_rdg_partitions (stderr, partitions);
1563 /* Returns the number of read and write operations in the RDG. */
1565 static int
1566 number_of_rw_in_rdg (struct graph *rdg)
1568 int i, res = 0;
1570 for (i = 0; i < rdg->n_vertices; i++)
1572 if (RDG_MEM_WRITE_STMT (rdg, i))
1573 ++res;
1575 if (RDG_MEM_READS_STMT (rdg, i))
1576 ++res;
1579 return res;
1582 /* Returns the number of read and write operations in a PARTITION of
1583 the RDG. */
1585 static int
1586 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1588 int res = 0;
1589 unsigned i;
1590 bitmap_iterator ii;
1592 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1594 if (RDG_MEM_WRITE_STMT (rdg, i))
1595 ++res;
1597 if (RDG_MEM_READS_STMT (rdg, i))
1598 ++res;
1601 return res;
1604 /* Returns true when one of the PARTITIONS contains all the read or
1605 write operations of RDG. */
1607 static bool
1608 partition_contains_all_rw (struct graph *rdg,
1609 vec<partition_t> partitions)
1611 int i;
1612 partition_t partition;
1613 int nrw = number_of_rw_in_rdg (rdg);
1615 FOR_EACH_VEC_ELT (partitions, i, partition)
1616 if (nrw == number_of_rw_in_partition (rdg, partition))
1617 return true;
1619 return false;
1622 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1623 distributed loops. */
1625 static int
1626 ldist_gen (struct loop *loop, struct graph *rdg,
1627 vec<int> starting_vertices)
1629 int i, nbp;
1630 vec<rdgc> components;
1631 components.create (3);
1632 vec<partition_t> partitions;
1633 partitions.create (3);
1634 vec<int> other_stores;
1635 other_stores.create (3);
1636 partition_t partition;
1637 bitmap processed = BITMAP_ALLOC (NULL);
1638 bool any_builtin;
1640 for (i = 0; i < rdg->n_vertices; i++)
1642 /* Save in OTHER_STORES all the memory writes that are not in
1643 STARTING_VERTICES. */
1644 if (RDG_MEM_WRITE_STMT (rdg, i))
1646 int v;
1647 unsigned j;
1648 bool found = false;
1650 FOR_EACH_VEC_ELT (starting_vertices, j, v)
1651 if (i == v)
1653 found = true;
1654 break;
1657 if (!found)
1658 other_stores.safe_push (i);
1662 rdg_build_components (rdg, starting_vertices, &components);
1663 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1664 processed);
1665 BITMAP_FREE (processed);
1667 any_builtin = false;
1668 FOR_EACH_VEC_ELT (partitions, i, partition)
1670 classify_partition (loop, rdg, partition);
1671 any_builtin |= partition_builtin_p (partition);
1674 /* If we are only distributing patterns fuse all partitions that
1675 were not properly classified as builtins. Else fuse partitions
1676 with similar memory accesses. */
1677 if (!flag_tree_loop_distribution)
1679 partition_t into;
1680 /* If we did not detect any builtin simply bail out. */
1681 if (!any_builtin)
1683 nbp = 0;
1684 goto ldist_done;
1686 /* Only fuse adjacent non-builtin partitions, see PR53616.
1687 ??? Use dependence information to improve partition ordering. */
1688 i = 0;
1691 for (; partitions.iterate (i, &into); ++i)
1692 if (!partition_builtin_p (into))
1693 break;
1694 for (++i; partitions.iterate (i, &partition); ++i)
1695 if (!partition_builtin_p (partition))
1697 bitmap_ior_into (into->stmts, partition->stmts);
1698 if (partition->kind == PKIND_REDUCTION)
1699 into->kind = PKIND_REDUCTION;
1700 partitions.ordered_remove (i);
1701 partition_free (partition);
1702 i--;
1704 else
1705 break;
1707 while ((unsigned) i < partitions.length ());
1709 else
1711 partition_t into;
1712 int j;
1713 for (i = 0; partitions.iterate (i, &into); ++i)
1715 if (partition_builtin_p (into))
1716 continue;
1717 for (j = i + 1;
1718 partitions.iterate (j, &partition); ++j)
1720 if (!partition_builtin_p (partition)
1721 /* ??? The following is horribly inefficient,
1722 we are re-computing and analyzing data-references
1723 of the stmts in the partitions all the time. */
1724 && similar_memory_accesses (rdg, into, partition))
1726 if (dump_file && (dump_flags & TDF_DETAILS))
1728 fprintf (dump_file, "fusing partitions\n");
1729 dump_bitmap (dump_file, into->stmts);
1730 dump_bitmap (dump_file, partition->stmts);
1731 fprintf (dump_file, "because they have similar "
1732 "memory accesses\n");
1734 bitmap_ior_into (into->stmts, partition->stmts);
1735 if (partition->kind == PKIND_REDUCTION)
1736 into->kind = PKIND_REDUCTION;
1737 partitions.ordered_remove (j);
1738 partition_free (partition);
1739 j--;
1745 /* Fuse all reduction partitions into the last. */
1746 if (partitions.length () > 1)
1748 partition_t into = partitions.last ();
1749 for (i = partitions.length () - 2; i >= 0; --i)
1751 partition_t what = partitions[i];
1752 if (what->kind == PKIND_REDUCTION)
1754 if (dump_file && (dump_flags & TDF_DETAILS))
1756 fprintf (dump_file, "fusing partitions\n");
1757 dump_bitmap (dump_file, into->stmts);
1758 dump_bitmap (dump_file, what->stmts);
1759 fprintf (dump_file, "because the latter has reductions\n");
1761 bitmap_ior_into (into->stmts, what->stmts);
1762 into->kind = PKIND_REDUCTION;
1763 partitions.ordered_remove (i);
1764 partition_free (what);
1769 nbp = partitions.length ();
1770 if (nbp == 0
1771 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1772 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1774 nbp = 0;
1775 goto ldist_done;
1778 if (dump_file && (dump_flags & TDF_DETAILS))
1779 dump_rdg_partitions (dump_file, partitions);
1781 FOR_EACH_VEC_ELT (partitions, i, partition)
1782 generate_code_for_partition (loop, partition, i < nbp - 1);
1784 ldist_done:
1786 FOR_EACH_VEC_ELT (partitions, i, partition)
1787 partition_free (partition);
1789 other_stores.release ();
1790 partitions.release ();
1791 free_rdg_components (components);
1792 return nbp;
1795 /* Distributes the code from LOOP in such a way that producer
1796 statements are placed before consumer statements. When STMTS is
1797 NULL, performs the maximal distribution, if STMTS is not NULL,
1798 tries to separate only these statements from the LOOP's body.
1799 Returns the number of distributed loops. */
1801 static int
1802 distribute_loop (struct loop *loop, vec<gimple> stmts)
1804 int res = 0;
1805 struct graph *rdg;
1806 gimple s;
1807 unsigned i;
1808 vec<int> vertices;
1809 vec<loop_p> loop_nest;
1811 loop_nest.create (3);
1812 if (!find_loop_nest (loop, &loop_nest))
1814 loop_nest.release ();
1815 return 0;
1818 rdg = build_rdg (loop_nest);
1819 if (!rdg)
1821 if (dump_file && (dump_flags & TDF_DETAILS))
1822 fprintf (dump_file,
1823 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1824 loop->num);
1826 loop_nest.release ();
1827 return res;
1830 vertices.create (3);
1832 if (dump_file && (dump_flags & TDF_DETAILS))
1833 dump_rdg (dump_file, rdg);
1835 FOR_EACH_VEC_ELT (stmts, i, s)
1837 int v = rdg_vertex_for_stmt (rdg, s);
1839 if (v >= 0)
1841 vertices.safe_push (v);
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1844 fprintf (dump_file,
1845 "ldist asked to generate code for vertex %d\n", v);
1849 res = ldist_gen (loop, rdg, vertices);
1850 vertices.release ();
1851 free_rdg (rdg);
1852 loop_nest.release ();
1853 return res;
1856 /* Distribute all loops in the current function. */
1858 static unsigned int
1859 tree_loop_distribution (void)
1861 struct loop *loop;
1862 loop_iterator li;
1863 bool changed = false;
1864 basic_block bb;
1866 FOR_ALL_BB (bb)
1868 gimple_stmt_iterator gsi;
1869 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1870 gimple_set_uid (gsi_stmt (gsi), -1);
1871 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1872 gimple_set_uid (gsi_stmt (gsi), -1);
1875 /* We can at the moment only distribute non-nested loops, thus restrict
1876 walking to innermost loops. */
1877 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1879 vec<gimple> work_list = vNULL;
1880 basic_block *bbs;
1881 int num = loop->num;
1882 int nb_generated_loops = 0;
1883 unsigned int i;
1885 /* If the loop doesn't have a single exit we will fail anyway,
1886 so do that early. */
1887 if (!single_exit (loop))
1888 continue;
1890 /* Only optimize hot loops. */
1891 if (!optimize_loop_for_speed_p (loop))
1892 continue;
1894 /* Only distribute loops with a header and latch for now. */
1895 if (loop->num_nodes > 2)
1896 continue;
1898 /* Initialize the worklist with stmts we seed the partitions with. */
1899 bbs = get_loop_body_in_dom_order (loop);
1900 for (i = 0; i < loop->num_nodes; ++i)
1902 gimple_stmt_iterator gsi;
1903 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1905 gimple stmt = gsi_stmt (gsi);
1906 /* Distribute stmts which have defs that are used outside of
1907 the loop. */
1908 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1910 /* Otherwise only distribute stores for now. */
1911 else if (!gimple_assign_single_p (stmt)
1912 || is_gimple_reg (gimple_assign_lhs (stmt)))
1913 continue;
1915 work_list.safe_push (stmt);
1918 free (bbs);
1920 if (work_list.length () > 0)
1921 nb_generated_loops = distribute_loop (loop, work_list);
1923 if (nb_generated_loops > 0)
1924 changed = true;
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1928 if (nb_generated_loops > 1)
1929 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1930 num, nb_generated_loops);
1931 else
1932 fprintf (dump_file, "Loop %d is the same.\n", num);
1935 work_list.release ();
1938 if (changed)
1940 mark_virtual_operands_for_renaming (cfun);
1941 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1944 #ifdef ENABLE_CHECKING
1945 verify_loop_structure ();
1946 #endif
1948 return 0;
1951 static bool
1952 gate_tree_loop_distribution (void)
1954 return flag_tree_loop_distribution
1955 || flag_tree_loop_distribute_patterns;
1958 namespace {
1960 const pass_data pass_data_loop_distribution =
1962 GIMPLE_PASS, /* type */
1963 "ldist", /* name */
1964 OPTGROUP_LOOP, /* optinfo_flags */
1965 true, /* has_gate */
1966 true, /* has_execute */
1967 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1968 ( PROP_cfg | PROP_ssa ), /* properties_required */
1969 0, /* properties_provided */
1970 0, /* properties_destroyed */
1971 0, /* todo_flags_start */
1972 TODO_verify_ssa, /* todo_flags_finish */
1975 class pass_loop_distribution : public gimple_opt_pass
1977 public:
1978 pass_loop_distribution(gcc::context *ctxt)
1979 : gimple_opt_pass(pass_data_loop_distribution, ctxt)
1982 /* opt_pass methods: */
1983 bool gate () { return gate_tree_loop_distribution (); }
1984 unsigned int execute () { return tree_loop_distribution (); }
1986 }; // class pass_loop_distribution
1988 } // anon namespace
1990 gimple_opt_pass *
1991 make_pass_loop_distribution (gcc::context *ctxt)
1993 return new pass_loop_distribution (ctxt);