* config/darwin.c (darwin_assemble_visibility): Treat
[official-gcc.git] / gcc / tree-loop-distribution.c
blobd29fe1cd386f4f4553bfa06630f94c1c738fb9aa
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
30 is transformed to
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
57 typedef struct partition_s
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
67 DEF_VEC_P (partition_t);
68 DEF_VEC_ALLOC_P (partition_t, heap);
70 /* Allocate and initialize a partition from BITMAP. */
72 static partition_t
73 partition_alloc (bitmap stmts)
75 partition_t partition = XCNEW (struct partition_s);
76 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
77 partition->has_writes = false;
78 partition->kind = PKIND_NORMAL;
79 return partition;
82 /* Free PARTITION. */
84 static void
85 partition_free (partition_t partition)
87 BITMAP_FREE (partition->stmts);
88 free (partition);
91 /* Returns true if the partition can be generated as a builtin. */
93 static bool
94 partition_builtin_p (partition_t partition)
96 return partition->kind != PKIND_NORMAL;
99 /* Returns true if the partition has an writes. */
101 static bool
102 partition_has_writes (partition_t partition)
104 return partition->has_writes;
107 /* If bit I is not set, it means that this node represents an
108 operation that has already been performed, and that should not be
109 performed again. This is the subgraph of remaining important
110 computations that is passed to the DFS algorithm for avoiding to
111 include several times the same stores in different loops. */
112 static bitmap remaining_stmts;
114 /* A node of the RDG is marked in this bitmap when it has as a
115 predecessor a node that writes to memory. */
116 static bitmap upstream_mem_writes;
118 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
119 the LOOP. */
121 static bool
122 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
124 imm_use_iterator imm_iter;
125 use_operand_p use_p;
127 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
128 if (loop != loop_containing_stmt (USE_STMT (use_p)))
129 return true;
131 return false;
134 /* Returns true when STMT defines a scalar variable used after the
135 loop LOOP. */
137 static bool
138 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
140 def_operand_p def_p;
141 ssa_op_iter op_iter;
143 if (gimple_code (stmt) == GIMPLE_PHI)
144 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
146 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
147 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
148 return true;
150 return false;
153 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
154 ORIG_LOOP. */
156 static void
157 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
159 tree new_ssa_name;
160 gimple_stmt_iterator si_new, si_orig;
161 edge orig_loop_latch = loop_latch_edge (orig_loop);
162 edge orig_entry_e = loop_preheader_edge (orig_loop);
163 edge new_loop_entry_e = loop_preheader_edge (new_loop);
165 /* Scan the phis in the headers of the old and new loops
166 (they are organized in exactly the same order). */
167 for (si_new = gsi_start_phis (new_loop->header),
168 si_orig = gsi_start_phis (orig_loop->header);
169 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
170 gsi_next (&si_new), gsi_next (&si_orig))
172 tree def;
173 source_location locus;
174 gimple phi_new = gsi_stmt (si_new);
175 gimple phi_orig = gsi_stmt (si_orig);
177 /* Add the first phi argument for the phi in NEW_LOOP (the one
178 associated with the entry of NEW_LOOP) */
179 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
180 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
181 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
183 /* Add the second phi argument for the phi in NEW_LOOP (the one
184 associated with the latch of NEW_LOOP) */
185 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
186 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
188 if (TREE_CODE (def) == SSA_NAME)
190 new_ssa_name = get_current_def (def);
192 if (!new_ssa_name)
193 /* This only happens if there are no definitions inside the
194 loop. Use the the invariant in the new loop as is. */
195 new_ssa_name = def;
197 else
198 /* Could be an integer. */
199 new_ssa_name = def;
201 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
205 /* Return a copy of LOOP placed before LOOP. */
207 static struct loop *
208 copy_loop_before (struct loop *loop)
210 struct loop *res;
211 edge preheader = loop_preheader_edge (loop);
213 initialize_original_copy_tables ();
214 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
215 gcc_assert (res != NULL);
216 free_original_copy_tables ();
218 update_phis_for_loop_copy (loop, res);
219 rename_variables_in_loop (res);
221 return res;
224 /* Creates an empty basic block after LOOP. */
226 static void
227 create_bb_after_loop (struct loop *loop)
229 edge exit = single_exit (loop);
231 if (!exit)
232 return;
234 split_edge (exit);
237 /* Generate code for PARTITION from the code in LOOP. The loop is
238 copied when COPY_P is true. All the statements not flagged in the
239 PARTITION bitmap are removed from the loop or from its copy. The
240 statements are indexed in sequence inside a basic block, and the
241 basic blocks of a loop are taken in dom order. */
243 static void
244 generate_loops_for_partition (struct loop *loop, partition_t partition,
245 bool copy_p)
247 unsigned i, x;
248 gimple_stmt_iterator bsi;
249 basic_block *bbs;
251 if (copy_p)
253 loop = copy_loop_before (loop);
254 gcc_assert (loop != NULL);
255 create_preheader (loop, CP_SIMPLE_PREHEADERS);
256 create_bb_after_loop (loop);
259 /* Remove stmts not in the PARTITION bitmap. The order in which we
260 visit the phi nodes and the statements is exactly as in
261 stmts_from_loop. */
262 bbs = get_loop_body_in_dom_order (loop);
264 if (MAY_HAVE_DEBUG_STMTS)
265 for (x = 0, i = 0; i < loop->num_nodes; i++)
267 basic_block bb = bbs[i];
269 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
270 if (!bitmap_bit_p (partition->stmts, x++))
271 reset_debug_uses (gsi_stmt (bsi));
273 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
275 gimple stmt = gsi_stmt (bsi);
276 if (gimple_code (stmt) != GIMPLE_LABEL
277 && !is_gimple_debug (stmt)
278 && !bitmap_bit_p (partition->stmts, x++))
279 reset_debug_uses (stmt);
283 for (x = 0, i = 0; i < loop->num_nodes; i++)
285 basic_block bb = bbs[i];
287 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
288 if (!bitmap_bit_p (partition->stmts, x++))
290 gimple phi = gsi_stmt (bsi);
291 if (virtual_operand_p (gimple_phi_result (phi)))
292 mark_virtual_phi_result_for_renaming (phi);
293 remove_phi_node (&bsi, true);
295 else
296 gsi_next (&bsi);
298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
300 gimple stmt = gsi_stmt (bsi);
301 if (gimple_code (stmt) != GIMPLE_LABEL
302 && !is_gimple_debug (stmt)
303 && !bitmap_bit_p (partition->stmts, x++))
305 unlink_stmt_vdef (stmt);
306 gsi_remove (&bsi, true);
307 release_defs (stmt);
309 else
310 gsi_next (&bsi);
314 free (bbs);
317 /* Build the size argument for a memory operation call. */
319 static tree
320 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
322 tree size;
323 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
324 fold_convert_loc (loc, sizetype, nb_iter),
325 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
326 return fold_convert_loc (loc, size_type_node, size);
329 /* Build an address argument for a memory operation call. */
331 static tree
332 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
334 tree addr_base;
336 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
337 addr_base = fold_convert_loc (loc, sizetype, addr_base);
339 /* Test for a negative stride, iterating over every element. */
340 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
342 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
343 fold_convert_loc (loc, sizetype, nb_bytes));
344 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
345 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
348 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
351 /* Generate a call to memset for PARTITION in LOOP. */
353 static void
354 generate_memset_builtin (struct loop *loop, partition_t partition)
356 gimple_stmt_iterator gsi;
357 gimple stmt, fn_call;
358 tree nb_iter, mem, fn, nb_bytes;
359 location_t loc;
360 tree val;
362 stmt = DR_STMT (partition->main_dr);
363 loc = gimple_location (stmt);
364 if (gimple_bb (stmt) == loop->latch)
365 nb_iter = number_of_latch_executions (loop);
366 else
367 nb_iter = number_of_exit_cond_executions (loop);
369 /* The new statements will be placed before LOOP. */
370 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
372 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
373 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
374 false, GSI_CONTINUE_LINKING);
375 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
376 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
377 false, GSI_CONTINUE_LINKING);
379 /* This exactly matches the pattern recognition in classify_partition. */
380 val = gimple_assign_rhs1 (stmt);
381 if (integer_zerop (val)
382 || real_zerop (val)
383 || TREE_CODE (val) == CONSTRUCTOR)
384 val = integer_zero_node;
385 else if (integer_all_onesp (val))
386 val = build_int_cst (integer_type_node, -1);
387 else
389 if (TREE_CODE (val) == INTEGER_CST)
390 val = fold_convert (integer_type_node, val);
391 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
393 gimple cstmt;
394 tree tem = make_ssa_name (integer_type_node, NULL);
395 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
396 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
397 val = tem;
401 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
402 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
403 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
405 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, "generated memset");
408 if (integer_zerop (val))
409 fprintf (dump_file, " zero\n");
410 else if (integer_all_onesp (val))
411 fprintf (dump_file, " minus one\n");
412 else
413 fprintf (dump_file, "\n");
417 /* Generate a call to memcpy for PARTITION in LOOP. */
419 static void
420 generate_memcpy_builtin (struct loop *loop, partition_t partition)
422 gimple_stmt_iterator gsi;
423 gimple stmt, fn_call;
424 tree nb_iter, dest, src, fn, nb_bytes;
425 location_t loc;
426 enum built_in_function kind;
428 stmt = DR_STMT (partition->main_dr);
429 loc = gimple_location (stmt);
430 if (gimple_bb (stmt) == loop->latch)
431 nb_iter = number_of_latch_executions (loop);
432 else
433 nb_iter = number_of_exit_cond_executions (loop);
435 /* The new statements will be placed before LOOP. */
436 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
438 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
439 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
440 false, GSI_CONTINUE_LINKING);
441 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
442 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
443 if (ptr_derefs_may_alias_p (dest, src))
444 kind = BUILT_IN_MEMMOVE;
445 else
446 kind = BUILT_IN_MEMCPY;
448 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
449 false, GSI_CONTINUE_LINKING);
450 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
451 false, GSI_CONTINUE_LINKING);
452 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
453 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
454 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
456 if (dump_file && (dump_flags & TDF_DETAILS))
458 if (kind == BUILT_IN_MEMCPY)
459 fprintf (dump_file, "generated memcpy\n");
460 else
461 fprintf (dump_file, "generated memmove\n");
465 /* Remove and destroy the loop LOOP. */
467 static void
468 destroy_loop (struct loop *loop)
470 unsigned nbbs = loop->num_nodes;
471 edge exit = single_exit (loop);
472 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
473 basic_block *bbs;
474 unsigned i;
476 bbs = get_loop_body_in_dom_order (loop);
478 redirect_edge_pred (exit, src);
479 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
480 exit->flags |= EDGE_FALLTHRU;
481 cancel_loop_tree (loop);
482 rescan_loop_exit (exit, false, true);
484 for (i = 0; i < nbbs; i++)
486 /* We have made sure to not leave any dangling uses of SSA
487 names defined in the loop. With the exception of virtuals.
488 Make sure we replace all uses of virtual defs that will remain
489 outside of the loop with the bare symbol as delete_basic_block
490 will release them. */
491 gimple_stmt_iterator gsi;
492 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
494 gimple phi = gsi_stmt (gsi);
495 if (virtual_operand_p (gimple_phi_result (phi)))
496 mark_virtual_phi_result_for_renaming (phi);
498 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
500 gimple stmt = gsi_stmt (gsi);
501 tree vdef = gimple_vdef (stmt);
502 if (vdef && TREE_CODE (vdef) == SSA_NAME)
503 mark_virtual_operand_for_renaming (vdef);
505 delete_basic_block (bbs[i]);
507 free (bbs);
509 set_immediate_dominator (CDI_DOMINATORS, dest,
510 recompute_dominator (CDI_DOMINATORS, dest));
513 /* Generates code for PARTITION. */
515 static void
516 generate_code_for_partition (struct loop *loop,
517 partition_t partition, bool copy_p)
519 switch (partition->kind)
521 case PKIND_MEMSET:
522 generate_memset_builtin (loop, partition);
523 /* If this is the last partition for which we generate code, we have
524 to destroy the loop. */
525 if (!copy_p)
526 destroy_loop (loop);
527 break;
529 case PKIND_MEMCPY:
530 generate_memcpy_builtin (loop, partition);
531 /* If this is the last partition for which we generate code, we have
532 to destroy the loop. */
533 if (!copy_p)
534 destroy_loop (loop);
535 break;
537 case PKIND_NORMAL:
538 generate_loops_for_partition (loop, partition, copy_p);
539 break;
541 default:
542 gcc_unreachable ();
547 /* Returns true if the node V of RDG cannot be recomputed. */
549 static bool
550 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
552 if (RDG_MEM_WRITE_STMT (rdg, v))
553 return true;
555 return false;
558 /* Returns true when the vertex V has already been generated in the
559 current partition (V is in PROCESSED), or when V belongs to another
560 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
562 static inline bool
563 already_processed_vertex_p (bitmap processed, int v)
565 return (bitmap_bit_p (processed, v)
566 || !bitmap_bit_p (remaining_stmts, v));
569 /* Returns NULL when there is no anti-dependence among the successors
570 of vertex V, otherwise returns the edge with the anti-dep. */
572 static struct graph_edge *
573 has_anti_dependence (struct vertex *v)
575 struct graph_edge *e;
577 if (v->succ)
578 for (e = v->succ; e; e = e->succ_next)
579 if (RDGE_TYPE (e) == anti_dd)
580 return e;
582 return NULL;
585 /* Returns true when V has an anti-dependence edge among its successors. */
587 static bool
588 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
590 struct graph_edge *e;
592 if (v->pred)
593 for (e = v->pred; e; e = e->pred_next)
594 if (bitmap_bit_p (upstream_mem_writes, e->src)
595 /* Don't consider flow channels: a write to memory followed
596 by a read from memory. These channels allow the split of
597 the RDG in different partitions. */
598 && !RDG_MEM_WRITE_STMT (rdg, e->src))
599 return true;
601 return false;
604 /* Initializes the upstream_mem_writes bitmap following the
605 information from RDG. */
607 static void
608 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
610 int v, x;
611 bitmap seen = BITMAP_ALLOC (NULL);
613 for (v = rdg->n_vertices - 1; v >= 0; v--)
614 if (!bitmap_bit_p (seen, v))
616 unsigned i;
617 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
619 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
621 FOR_EACH_VEC_ELT (int, nodes, i, x)
623 if (!bitmap_set_bit (seen, x))
624 continue;
626 if (RDG_MEM_WRITE_STMT (rdg, x)
627 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
628 /* In anti dependences the read should occur before
629 the write, this is why both the read and the write
630 should be placed in the same partition. */
631 || has_anti_dependence (&(rdg->vertices[x])))
633 bitmap_set_bit (upstream_mem_writes, x);
637 VEC_free (int, heap, nodes);
641 /* Returns true when vertex u has a memory write node as a predecessor
642 in RDG. */
644 static bool
645 has_upstream_mem_writes (int u)
647 return bitmap_bit_p (upstream_mem_writes, u);
650 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
651 bitmap, bitmap);
653 /* Flag the uses of U stopping following the information from
654 upstream_mem_writes. */
656 static void
657 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
658 bitmap processed)
660 use_operand_p use_p;
661 struct vertex *x = &(rdg->vertices[u]);
662 gimple stmt = RDGV_STMT (x);
663 struct graph_edge *anti_dep = has_anti_dependence (x);
665 /* Keep in the same partition the destination of an antidependence,
666 because this is a store to the exact same location. Putting this
667 in another partition is bad for cache locality. */
668 if (anti_dep)
670 int v = anti_dep->dest;
672 if (!already_processed_vertex_p (processed, v))
673 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
674 processed);
677 if (gimple_code (stmt) != GIMPLE_PHI)
679 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
681 tree use = USE_FROM_PTR (use_p);
683 if (TREE_CODE (use) == SSA_NAME)
685 gimple def_stmt = SSA_NAME_DEF_STMT (use);
686 int v = rdg_vertex_for_stmt (rdg, def_stmt);
688 if (v >= 0
689 && !already_processed_vertex_p (processed, v))
690 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
691 processed);
696 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
698 tree op0 = gimple_assign_lhs (stmt);
700 /* Scalar channels don't have enough space for transmitting data
701 between tasks, unless we add more storage by privatizing. */
702 if (is_gimple_reg (op0))
704 use_operand_p use_p;
705 imm_use_iterator iter;
707 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
709 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
711 if (!already_processed_vertex_p (processed, v))
712 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
713 processed);
719 /* Flag V from RDG as part of PARTITION, and also flag its loop number
720 in LOOPS. */
722 static void
723 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
725 struct loop *loop;
727 if (!bitmap_set_bit (partition->stmts, v))
728 return;
730 loop = loop_containing_stmt (RDG_STMT (rdg, v));
731 bitmap_set_bit (loops, loop->num);
733 if (rdg_cannot_recompute_vertex_p (rdg, v))
735 partition->has_writes = true;
736 bitmap_clear_bit (remaining_stmts, v);
740 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
741 Also flag their loop number in LOOPS. */
743 static void
744 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
745 bitmap loops, bitmap processed)
747 unsigned i;
748 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
749 int x;
751 bitmap_set_bit (processed, v);
752 rdg_flag_uses (rdg, v, partition, loops, processed);
753 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
754 rdg_flag_vertex (rdg, v, partition, loops);
756 FOR_EACH_VEC_ELT (int, nodes, i, x)
757 if (!already_processed_vertex_p (processed, x))
758 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
760 VEC_free (int, heap, nodes);
763 /* Initialize CONDS with all the condition statements from the basic
764 blocks of LOOP. */
766 static void
767 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
769 unsigned i;
770 edge e;
771 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
773 FOR_EACH_VEC_ELT (edge, exits, i, e)
775 gimple cond = last_stmt (e->src);
777 if (cond)
778 VEC_safe_push (gimple, heap, *conds, cond);
781 VEC_free (edge, heap, exits);
784 /* Add to PARTITION all the exit condition statements for LOOPS
785 together with all their dependent statements determined from
786 RDG. */
788 static void
789 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
790 bitmap processed)
792 unsigned i;
793 bitmap_iterator bi;
794 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
796 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
797 collect_condition_stmts (get_loop (i), &conds);
799 while (!VEC_empty (gimple, conds))
801 gimple cond = VEC_pop (gimple, conds);
802 int v = rdg_vertex_for_stmt (rdg, cond);
803 bitmap new_loops = BITMAP_ALLOC (NULL);
805 if (!already_processed_vertex_p (processed, v))
806 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
808 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
809 if (bitmap_set_bit (loops, i))
810 collect_condition_stmts (get_loop (i), &conds);
812 BITMAP_FREE (new_loops);
815 VEC_free (gimple, heap, conds);
818 /* Returns a bitmap in which all the statements needed for computing
819 the strongly connected component C of the RDG are flagged, also
820 including the loop exit conditions. */
822 static partition_t
823 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
825 int i, v;
826 partition_t partition = partition_alloc (NULL);
827 bitmap loops = BITMAP_ALLOC (NULL);
828 bitmap processed = BITMAP_ALLOC (NULL);
830 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
831 if (!already_processed_vertex_p (processed, v))
832 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
834 rdg_flag_loop_exits (rdg, loops, partition, processed);
836 BITMAP_FREE (processed);
837 BITMAP_FREE (loops);
838 return partition;
841 /* Free memory for COMPONENTS. */
843 static void
844 free_rdg_components (VEC (rdgc, heap) *components)
846 int i;
847 rdgc x;
849 FOR_EACH_VEC_ELT (rdgc, components, i, x)
851 VEC_free (int, heap, x->vertices);
852 free (x);
855 VEC_free (rdgc, heap, components);
858 /* Build the COMPONENTS vector with the strongly connected components
859 of RDG in which the STARTING_VERTICES occur. */
861 static void
862 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
863 VEC (rdgc, heap) **components)
865 int i, v;
866 bitmap saved_components = BITMAP_ALLOC (NULL);
867 int n_components = graphds_scc (rdg, NULL);
868 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
870 for (i = 0; i < n_components; i++)
871 all_components[i] = VEC_alloc (int, heap, 3);
873 for (i = 0; i < rdg->n_vertices; i++)
874 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
876 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
878 int c = rdg->vertices[v].component;
880 if (bitmap_set_bit (saved_components, c))
882 rdgc x = XCNEW (struct rdg_component);
883 x->num = c;
884 x->vertices = all_components[c];
886 VEC_safe_push (rdgc, heap, *components, x);
890 for (i = 0; i < n_components; i++)
891 if (!bitmap_bit_p (saved_components, i))
892 VEC_free (int, heap, all_components[i]);
894 free (all_components);
895 BITMAP_FREE (saved_components);
898 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
899 For the moment we detect only the memset zero pattern. */
901 static void
902 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
904 bitmap_iterator bi;
905 unsigned i;
906 tree nb_iter;
907 data_reference_p single_load, single_store;
909 partition->kind = PKIND_NORMAL;
910 partition->main_dr = NULL;
911 partition->secondary_dr = NULL;
913 if (!flag_tree_loop_distribute_patterns)
914 return;
916 /* Perform general partition disqualification for builtins. */
917 nb_iter = number_of_exit_cond_executions (loop);
918 if (!nb_iter || nb_iter == chrec_dont_know)
919 return;
921 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
923 gimple stmt = RDG_STMT (rdg, i);
925 if (gimple_has_volatile_ops (stmt))
926 return;
928 /* If the stmt has uses outside of the loop fail.
929 ??? If the stmt is generated in another partition that
930 is not created as builtin we can ignore this. */
931 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "not generating builtin, partition has "
935 "scalar uses outside of the loop\n");
936 return;
940 /* Detect memset and memcpy. */
941 single_load = NULL;
942 single_store = NULL;
943 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
945 gimple stmt = RDG_STMT (rdg, i);
946 data_reference_p dr;
947 unsigned j;
949 if (gimple_code (stmt) == GIMPLE_PHI)
950 continue;
952 /* Any scalar stmts are ok. */
953 if (!gimple_vuse (stmt))
954 continue;
956 /* Otherwise just regular loads/stores. */
957 if (!gimple_assign_single_p (stmt))
958 return;
960 /* But exactly one store and/or load. */
961 for (j = 0;
962 VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
964 if (DR_IS_READ (dr))
966 if (single_load != NULL)
967 return;
968 single_load = dr;
970 else
972 if (single_store != NULL)
973 return;
974 single_store = dr;
979 if (single_store && !single_load)
981 gimple stmt = DR_STMT (single_store);
982 tree rhs = gimple_assign_rhs1 (stmt);
983 if (!(integer_zerop (rhs)
984 || integer_all_onesp (rhs)
985 || real_zerop (rhs)
986 || (TREE_CODE (rhs) == CONSTRUCTOR
987 && !TREE_CLOBBER_P (rhs))
988 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
989 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
990 == TYPE_MODE (unsigned_char_type_node)))))
991 return;
992 if (TREE_CODE (rhs) == SSA_NAME
993 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
994 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
995 return;
996 if (!adjacent_dr_p (single_store))
997 return;
998 partition->kind = PKIND_MEMSET;
999 partition->main_dr = single_store;
1001 else if (single_store && single_load)
1003 gimple store = DR_STMT (single_store);
1004 gimple load = DR_STMT (single_load);
1005 /* Direct aggregate copy or via an SSA name temporary. */
1006 if (load != store
1007 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1008 return;
1009 if (!adjacent_dr_p (single_store)
1010 || !adjacent_dr_p (single_load)
1011 || !operand_equal_p (DR_STEP (single_store),
1012 DR_STEP (single_load), 0))
1013 return;
1014 /* Now check that if there is a dependence this dependence is
1015 of a suitable form for memmove. */
1016 VEC(loop_p, heap) *loops = NULL;
1017 ddr_p ddr;
1018 VEC_safe_push (loop_p, heap, loops, loop);
1019 ddr = initialize_data_dependence_relation (single_load, single_store,
1020 loops);
1021 compute_affine_dependence (ddr, loop);
1022 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1024 free_dependence_relation (ddr);
1025 VEC_free (loop_p, heap, loops);
1026 return;
1028 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1030 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1032 free_dependence_relation (ddr);
1033 VEC_free (loop_p, heap, loops);
1034 return;
1036 lambda_vector dist_v;
1037 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1039 int dist = dist_v[index_in_loop_nest (loop->num,
1040 DDR_LOOP_NEST (ddr))];
1041 if (dist > 0 && !DDR_REVERSED_P (ddr))
1043 free_dependence_relation (ddr);
1044 VEC_free (loop_p, heap, loops);
1045 return;
1049 free_dependence_relation (ddr);
1050 VEC_free (loop_p, heap, loops);
1051 partition->kind = PKIND_MEMCPY;
1052 partition->main_dr = single_store;
1053 partition->secondary_dr = single_load;
1057 /* For a data reference REF, return the declaration of its base
1058 address or NULL_TREE if the base is not determined. */
1060 static tree
1061 ref_base_address (data_reference_p dr)
1063 tree base_address = DR_BASE_ADDRESS (dr);
1064 if (base_address
1065 && TREE_CODE (base_address) == ADDR_EXPR)
1066 return TREE_OPERAND (base_address, 0);
1068 return base_address;
1071 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1072 accesses in RDG. */
1074 static bool
1075 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1076 partition_t partition2)
1078 unsigned i, j, k, l;
1079 bitmap_iterator bi, bj;
1080 data_reference_p ref1, ref2;
1082 /* First check whether in the intersection of the two partitions are
1083 any loads or stores. Common loads are the situation that happens
1084 most often. */
1085 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1086 if (RDG_MEM_WRITE_STMT (rdg, i)
1087 || RDG_MEM_READS_STMT (rdg, i))
1088 return true;
1090 /* Then check all data-references against each other. */
1091 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1092 if (RDG_MEM_WRITE_STMT (rdg, i)
1093 || RDG_MEM_READS_STMT (rdg, i))
1094 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1095 if (RDG_MEM_WRITE_STMT (rdg, j)
1096 || RDG_MEM_READS_STMT (rdg, j))
1098 FOR_EACH_VEC_ELT (data_reference_p, RDG_DATAREFS (rdg, i), k, ref1)
1100 tree base1 = ref_base_address (ref1);
1101 if (base1)
1102 FOR_EACH_VEC_ELT (data_reference_p,
1103 RDG_DATAREFS (rdg, j), l, ref2)
1104 if (base1 == ref_base_address (ref2))
1105 return true;
1109 return false;
1112 /* Aggregate several components into a useful partition that is
1113 registered in the PARTITIONS vector. Partitions will be
1114 distributed in different loops. */
1116 static void
1117 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
1118 VEC (int, heap) **other_stores,
1119 VEC (partition_t, heap) **partitions, bitmap processed)
1121 int i;
1122 rdgc x;
1123 partition_t partition = partition_alloc (NULL);
1125 FOR_EACH_VEC_ELT (rdgc, components, i, x)
1127 partition_t np;
1128 int v = VEC_index (int, x->vertices, 0);
1130 if (bitmap_bit_p (processed, v))
1131 continue;
1133 np = build_rdg_partition_for_component (rdg, x);
1134 bitmap_ior_into (partition->stmts, np->stmts);
1135 partition->has_writes = partition_has_writes (np);
1136 bitmap_ior_into (processed, np->stmts);
1137 partition_free (np);
1139 if (partition_has_writes (partition))
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1143 fprintf (dump_file, "ldist useful partition:\n");
1144 dump_bitmap (dump_file, partition->stmts);
1147 VEC_safe_push (partition_t, heap, *partitions, partition);
1148 partition = partition_alloc (NULL);
1152 /* Add the nodes from the RDG that were not marked as processed, and
1153 that are used outside the current loop. These are scalar
1154 computations that are not yet part of previous partitions. */
1155 for (i = 0; i < rdg->n_vertices; i++)
1156 if (!bitmap_bit_p (processed, i)
1157 && rdg_defs_used_in_other_loops_p (rdg, i))
1158 VEC_safe_push (int, heap, *other_stores, i);
1160 /* If there are still statements left in the OTHER_STORES array,
1161 create other components and partitions with these stores and
1162 their dependences. */
1163 if (VEC_length (int, *other_stores) > 0)
1165 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
1166 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
1168 rdg_build_components (rdg, *other_stores, &comps);
1169 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1171 VEC_free (int, heap, foo);
1172 free_rdg_components (comps);
1175 /* If there is something left in the last partition, save it. */
1176 if (bitmap_count_bits (partition->stmts) > 0)
1177 VEC_safe_push (partition_t, heap, *partitions, partition);
1178 else
1179 partition_free (partition);
1182 /* Dump to FILE the PARTITIONS. */
1184 static void
1185 dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
1187 int i;
1188 partition_t partition;
1190 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1191 debug_bitmap_file (file, partition->stmts);
1194 /* Debug PARTITIONS. */
1195 extern void debug_rdg_partitions (VEC (partition_t, heap) *);
1197 DEBUG_FUNCTION void
1198 debug_rdg_partitions (VEC (partition_t, heap) *partitions)
1200 dump_rdg_partitions (stderr, partitions);
1203 /* Returns the number of read and write operations in the RDG. */
1205 static int
1206 number_of_rw_in_rdg (struct graph *rdg)
1208 int i, res = 0;
1210 for (i = 0; i < rdg->n_vertices; i++)
1212 if (RDG_MEM_WRITE_STMT (rdg, i))
1213 ++res;
1215 if (RDG_MEM_READS_STMT (rdg, i))
1216 ++res;
1219 return res;
1222 /* Returns the number of read and write operations in a PARTITION of
1223 the RDG. */
1225 static int
1226 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1228 int res = 0;
1229 unsigned i;
1230 bitmap_iterator ii;
1232 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1234 if (RDG_MEM_WRITE_STMT (rdg, i))
1235 ++res;
1237 if (RDG_MEM_READS_STMT (rdg, i))
1238 ++res;
1241 return res;
1244 /* Returns true when one of the PARTITIONS contains all the read or
1245 write operations of RDG. */
1247 static bool
1248 partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
1250 int i;
1251 partition_t partition;
1252 int nrw = number_of_rw_in_rdg (rdg);
1254 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1255 if (nrw == number_of_rw_in_partition (rdg, partition))
1256 return true;
1258 return false;
1261 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1262 distributed loops. */
1264 static int
1265 ldist_gen (struct loop *loop, struct graph *rdg,
1266 VEC (int, heap) *starting_vertices)
1268 int i, nbp;
1269 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1270 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
1271 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1272 partition_t partition;
1273 bitmap processed = BITMAP_ALLOC (NULL);
1274 bool any_builtin;
1276 remaining_stmts = BITMAP_ALLOC (NULL);
1277 upstream_mem_writes = BITMAP_ALLOC (NULL);
1279 for (i = 0; i < rdg->n_vertices; i++)
1281 bitmap_set_bit (remaining_stmts, i);
1283 /* Save in OTHER_STORES all the memory writes that are not in
1284 STARTING_VERTICES. */
1285 if (RDG_MEM_WRITE_STMT (rdg, i))
1287 int v;
1288 unsigned j;
1289 bool found = false;
1291 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1292 if (i == v)
1294 found = true;
1295 break;
1298 if (!found)
1299 VEC_safe_push (int, heap, other_stores, i);
1303 mark_nodes_having_upstream_mem_writes (rdg);
1304 rdg_build_components (rdg, starting_vertices, &components);
1305 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1306 processed);
1307 BITMAP_FREE (processed);
1309 any_builtin = false;
1310 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1312 classify_partition (loop, rdg, partition);
1313 any_builtin |= partition_builtin_p (partition);
1316 /* If we are only distributing patterns fuse all partitions that
1317 were not properly classified as builtins. Else fuse partitions
1318 with similar memory accesses. */
1319 if (!flag_tree_loop_distribution)
1321 partition_t into;
1322 /* If we did not detect any builtin simply bail out. */
1323 if (!any_builtin)
1325 nbp = 0;
1326 goto ldist_done;
1328 /* Only fuse adjacent non-builtin partitions, see PR53616.
1329 ??? Use dependence information to improve partition ordering. */
1330 i = 0;
1333 for (; VEC_iterate (partition_t, partitions, i, into); ++i)
1334 if (!partition_builtin_p (into))
1335 break;
1336 for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i)
1337 if (!partition_builtin_p (partition))
1339 bitmap_ior_into (into->stmts, partition->stmts);
1340 VEC_ordered_remove (partition_t, partitions, i);
1341 i--;
1343 else
1344 break;
1346 while ((unsigned) i < VEC_length (partition_t, partitions));
1348 else
1350 partition_t into;
1351 int j;
1352 for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
1354 if (partition_builtin_p (into))
1355 continue;
1356 for (j = i + 1;
1357 VEC_iterate (partition_t, partitions, j, partition); ++j)
1359 if (!partition_builtin_p (partition)
1360 /* ??? The following is horribly inefficient,
1361 we are re-computing and analyzing data-references
1362 of the stmts in the partitions all the time. */
1363 && similar_memory_accesses (rdg, into, partition))
1365 if (dump_file && (dump_flags & TDF_DETAILS))
1367 fprintf (dump_file, "fusing partitions\n");
1368 dump_bitmap (dump_file, into->stmts);
1369 dump_bitmap (dump_file, partition->stmts);
1370 fprintf (dump_file, "because they have similar "
1371 "memory accesses\n");
1373 bitmap_ior_into (into->stmts, partition->stmts);
1374 VEC_ordered_remove (partition_t, partitions, j);
1375 j--;
1381 nbp = VEC_length (partition_t, partitions);
1382 if (nbp == 0
1383 || (nbp == 1
1384 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
1385 || (nbp > 1
1386 && partition_contains_all_rw (rdg, partitions)))
1388 nbp = 0;
1389 goto ldist_done;
1392 if (dump_file && (dump_flags & TDF_DETAILS))
1393 dump_rdg_partitions (dump_file, partitions);
1395 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1396 generate_code_for_partition (loop, partition, i < nbp - 1);
1398 ldist_done:
1400 BITMAP_FREE (remaining_stmts);
1401 BITMAP_FREE (upstream_mem_writes);
1403 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1404 partition_free (partition);
1406 VEC_free (int, heap, other_stores);
1407 VEC_free (partition_t, heap, partitions);
1408 free_rdg_components (components);
1409 return nbp;
1412 /* Distributes the code from LOOP in such a way that producer
1413 statements are placed before consumer statements. When STMTS is
1414 NULL, performs the maximal distribution, if STMTS is not NULL,
1415 tries to separate only these statements from the LOOP's body.
1416 Returns the number of distributed loops. */
1418 static int
1419 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1421 int res = 0;
1422 struct graph *rdg;
1423 gimple s;
1424 unsigned i;
1425 VEC (int, heap) *vertices;
1426 VEC (ddr_p, heap) *dependence_relations;
1427 VEC (data_reference_p, heap) *datarefs;
1428 VEC (loop_p, heap) *loop_nest;
1430 datarefs = VEC_alloc (data_reference_p, heap, 10);
1431 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1432 loop_nest = VEC_alloc (loop_p, heap, 3);
1433 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1435 if (!rdg)
1437 if (dump_file && (dump_flags & TDF_DETAILS))
1438 fprintf (dump_file,
1439 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1440 loop->num);
1442 free_dependence_relations (dependence_relations);
1443 free_data_refs (datarefs);
1444 VEC_free (loop_p, heap, loop_nest);
1445 return res;
1448 vertices = VEC_alloc (int, heap, 3);
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1451 dump_rdg (dump_file, rdg);
1453 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1455 int v = rdg_vertex_for_stmt (rdg, s);
1457 if (v >= 0)
1459 VEC_safe_push (int, heap, vertices, v);
1461 if (dump_file && (dump_flags & TDF_DETAILS))
1462 fprintf (dump_file,
1463 "ldist asked to generate code for vertex %d\n", v);
1467 res = ldist_gen (loop, rdg, vertices);
1468 VEC_free (int, heap, vertices);
1469 free_rdg (rdg);
1470 free_dependence_relations (dependence_relations);
1471 free_data_refs (datarefs);
1472 VEC_free (loop_p, heap, loop_nest);
1473 return res;
1476 /* Distribute all loops in the current function. */
1478 static unsigned int
1479 tree_loop_distribution (void)
1481 struct loop *loop;
1482 loop_iterator li;
1483 bool changed = false;
1484 basic_block bb;
1486 FOR_ALL_BB (bb)
1488 gimple_stmt_iterator gsi;
1489 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1490 gimple_set_uid (gsi_stmt (gsi), -1);
1491 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492 gimple_set_uid (gsi_stmt (gsi), -1);
1495 /* We can at the moment only distribute non-nested loops, thus restrict
1496 walking to innermost loops. */
1497 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1499 VEC (gimple, heap) *work_list = NULL;
1500 basic_block *bbs;
1501 int num = loop->num;
1502 int nb_generated_loops = 0;
1503 unsigned int i;
1505 /* If the loop doesn't have a single exit we will fail anyway,
1506 so do that early. */
1507 if (!single_exit (loop))
1508 continue;
1510 /* Only distribute loops with a header and latch for now. */
1511 if (loop->num_nodes > 2)
1512 continue;
1514 /* Initialize the worklist with stmts we seed the partitions with. */
1515 bbs = get_loop_body_in_dom_order (loop);
1516 for (i = 0; i < loop->num_nodes; ++i)
1518 gimple_stmt_iterator gsi;
1519 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1521 gimple stmt = gsi_stmt (gsi);
1522 /* Only distribute stores for now.
1523 ??? We should also try to distribute scalar reductions,
1524 thus SSA defs that have scalar uses outside of the loop. */
1525 if (!gimple_assign_single_p (stmt)
1526 || is_gimple_reg (gimple_assign_lhs (stmt)))
1527 continue;
1529 VEC_safe_push (gimple, heap, work_list, stmt);
1532 free (bbs);
1534 if (VEC_length (gimple, work_list) > 0)
1535 nb_generated_loops = distribute_loop (loop, work_list);
1537 if (nb_generated_loops > 0)
1538 changed = true;
1540 if (dump_file && (dump_flags & TDF_DETAILS))
1542 if (nb_generated_loops > 1)
1543 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1544 num, nb_generated_loops);
1545 else
1546 fprintf (dump_file, "Loop %d is the same.\n", num);
1549 VEC_free (gimple, heap, work_list);
1552 if (changed)
1554 mark_virtual_operands_for_renaming (cfun);
1555 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1558 #ifdef ENABLE_CHECKING
1559 verify_loop_structure ();
1560 #endif
1562 return 0;
1565 static bool
1566 gate_tree_loop_distribution (void)
1568 return flag_tree_loop_distribution
1569 || flag_tree_loop_distribute_patterns;
1572 struct gimple_opt_pass pass_loop_distribution =
1575 GIMPLE_PASS,
1576 "ldist", /* name */
1577 gate_tree_loop_distribution, /* gate */
1578 tree_loop_distribution, /* execute */
1579 NULL, /* sub */
1580 NULL, /* next */
1581 0, /* static_pass_number */
1582 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1583 PROP_cfg | PROP_ssa, /* properties_required */
1584 0, /* properties_provided */
1585 0, /* properties_destroyed */
1586 0, /* todo_flags_start */
1587 TODO_ggc_collect
1588 | TODO_verify_ssa /* todo_flags_finish */