2012-08-01 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob394a38c3dbae1c0c1653ca46972f420477fe33fb
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
30 is transformed to
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
57 typedef struct partition_s
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
67 DEF_VEC_P (partition_t);
68 DEF_VEC_ALLOC_P (partition_t, heap);
70 /* Allocate and initialize a partition from BITMAP. */
72 static partition_t
73 partition_alloc (bitmap stmts)
75 partition_t partition = XCNEW (struct partition_s);
76 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
77 partition->has_writes = false;
78 partition->kind = PKIND_NORMAL;
79 return partition;
82 /* Free PARTITION. */
84 static void
85 partition_free (partition_t partition)
87 BITMAP_FREE (partition->stmts);
88 free (partition);
91 /* Returns true if the partition can be generated as a builtin. */
93 static bool
94 partition_builtin_p (partition_t partition)
96 return partition->kind != PKIND_NORMAL;
99 /* Returns true if the partition has an writes. */
101 static bool
102 partition_has_writes (partition_t partition)
104 return partition->has_writes;
107 /* If bit I is not set, it means that this node represents an
108 operation that has already been performed, and that should not be
109 performed again. This is the subgraph of remaining important
110 computations that is passed to the DFS algorithm for avoiding to
111 include several times the same stores in different loops. */
112 static bitmap remaining_stmts;
114 /* A node of the RDG is marked in this bitmap when it has as a
115 predecessor a node that writes to memory. */
116 static bitmap upstream_mem_writes;
118 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
119 the LOOP. */
121 static bool
122 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
124 imm_use_iterator imm_iter;
125 use_operand_p use_p;
127 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
128 if (loop != loop_containing_stmt (USE_STMT (use_p)))
129 return true;
131 return false;
134 /* Returns true when STMT defines a scalar variable used after the
135 loop LOOP. */
137 static bool
138 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
140 def_operand_p def_p;
141 ssa_op_iter op_iter;
143 if (gimple_code (stmt) == GIMPLE_PHI)
144 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
146 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
147 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
148 return true;
150 return false;
153 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
154 ORIG_LOOP. */
156 static void
157 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
159 tree new_ssa_name;
160 gimple_stmt_iterator si_new, si_orig;
161 edge orig_loop_latch = loop_latch_edge (orig_loop);
162 edge orig_entry_e = loop_preheader_edge (orig_loop);
163 edge new_loop_entry_e = loop_preheader_edge (new_loop);
165 /* Scan the phis in the headers of the old and new loops
166 (they are organized in exactly the same order). */
167 for (si_new = gsi_start_phis (new_loop->header),
168 si_orig = gsi_start_phis (orig_loop->header);
169 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
170 gsi_next (&si_new), gsi_next (&si_orig))
172 tree def;
173 source_location locus;
174 gimple phi_new = gsi_stmt (si_new);
175 gimple phi_orig = gsi_stmt (si_orig);
177 /* Add the first phi argument for the phi in NEW_LOOP (the one
178 associated with the entry of NEW_LOOP) */
179 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
180 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
181 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
183 /* Add the second phi argument for the phi in NEW_LOOP (the one
184 associated with the latch of NEW_LOOP) */
185 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
186 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
188 if (TREE_CODE (def) == SSA_NAME)
190 new_ssa_name = get_current_def (def);
192 if (!new_ssa_name)
193 /* This only happens if there are no definitions inside the
194 loop. Use the the invariant in the new loop as is. */
195 new_ssa_name = def;
197 else
198 /* Could be an integer. */
199 new_ssa_name = def;
201 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
205 /* Return a copy of LOOP placed before LOOP. */
207 static struct loop *
208 copy_loop_before (struct loop *loop)
210 struct loop *res;
211 edge preheader = loop_preheader_edge (loop);
213 initialize_original_copy_tables ();
214 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
215 gcc_assert (res != NULL);
216 free_original_copy_tables ();
218 update_phis_for_loop_copy (loop, res);
219 rename_variables_in_loop (res);
221 return res;
224 /* Creates an empty basic block after LOOP. */
226 static void
227 create_bb_after_loop (struct loop *loop)
229 edge exit = single_exit (loop);
231 if (!exit)
232 return;
234 split_edge (exit);
237 /* Generate code for PARTITION from the code in LOOP. The loop is
238 copied when COPY_P is true. All the statements not flagged in the
239 PARTITION bitmap are removed from the loop or from its copy. The
240 statements are indexed in sequence inside a basic block, and the
241 basic blocks of a loop are taken in dom order. */
243 static void
244 generate_loops_for_partition (struct loop *loop, partition_t partition,
245 bool copy_p)
247 unsigned i, x;
248 gimple_stmt_iterator bsi;
249 basic_block *bbs;
251 if (copy_p)
253 loop = copy_loop_before (loop);
254 gcc_assert (loop != NULL);
255 create_preheader (loop, CP_SIMPLE_PREHEADERS);
256 create_bb_after_loop (loop);
259 /* Remove stmts not in the PARTITION bitmap. The order in which we
260 visit the phi nodes and the statements is exactly as in
261 stmts_from_loop. */
262 bbs = get_loop_body_in_dom_order (loop);
264 if (MAY_HAVE_DEBUG_STMTS)
265 for (x = 0, i = 0; i < loop->num_nodes; i++)
267 basic_block bb = bbs[i];
269 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
270 if (!bitmap_bit_p (partition->stmts, x++))
271 reset_debug_uses (gsi_stmt (bsi));
273 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
275 gimple stmt = gsi_stmt (bsi);
276 if (gimple_code (stmt) != GIMPLE_LABEL
277 && !is_gimple_debug (stmt)
278 && !bitmap_bit_p (partition->stmts, x++))
279 reset_debug_uses (stmt);
283 for (x = 0, i = 0; i < loop->num_nodes; i++)
285 basic_block bb = bbs[i];
287 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
288 if (!bitmap_bit_p (partition->stmts, x++))
290 gimple phi = gsi_stmt (bsi);
291 if (!is_gimple_reg (gimple_phi_result (phi)))
292 mark_virtual_phi_result_for_renaming (phi);
293 remove_phi_node (&bsi, true);
295 else
296 gsi_next (&bsi);
298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
300 gimple stmt = gsi_stmt (bsi);
301 if (gimple_code (stmt) != GIMPLE_LABEL
302 && !is_gimple_debug (stmt)
303 && !bitmap_bit_p (partition->stmts, x++))
305 unlink_stmt_vdef (stmt);
306 gsi_remove (&bsi, true);
307 release_defs (stmt);
309 else
310 gsi_next (&bsi);
314 free (bbs);
317 /* Build the size argument for a memory operation call. */
319 static tree
320 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
322 tree size;
323 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
324 fold_convert_loc (loc, sizetype, nb_iter),
325 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
326 return fold_convert_loc (loc, size_type_node, size);
329 /* Build an address argument for a memory operation call. */
331 static tree
332 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
334 tree addr_base;
336 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
337 addr_base = fold_convert_loc (loc, sizetype, addr_base);
339 /* Test for a negative stride, iterating over every element. */
340 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
342 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
343 fold_convert_loc (loc, sizetype, nb_bytes));
344 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
345 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
348 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
351 /* Generate a call to memset for PARTITION in LOOP. */
353 static void
354 generate_memset_builtin (struct loop *loop, partition_t partition)
356 gimple_stmt_iterator gsi;
357 gimple stmt, fn_call;
358 tree nb_iter, mem, fn, nb_bytes;
359 location_t loc;
360 tree val;
362 stmt = DR_STMT (partition->main_dr);
363 loc = gimple_location (stmt);
364 if (gimple_bb (stmt) == loop->latch)
365 nb_iter = number_of_latch_executions (loop);
366 else
367 nb_iter = number_of_exit_cond_executions (loop);
369 /* The new statements will be placed before LOOP. */
370 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
372 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
373 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
374 false, GSI_CONTINUE_LINKING);
375 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
376 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
377 false, GSI_CONTINUE_LINKING);
379 /* This exactly matches the pattern recognition in classify_partition. */
380 val = gimple_assign_rhs1 (stmt);
381 if (integer_zerop (val)
382 || real_zerop (val)
383 || TREE_CODE (val) == CONSTRUCTOR)
384 val = integer_zero_node;
385 else if (integer_all_onesp (val))
386 val = build_int_cst (integer_type_node, -1);
387 else
389 if (TREE_CODE (val) == INTEGER_CST)
390 val = fold_convert (integer_type_node, val);
391 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
393 gimple cstmt;
394 tree tem = create_tmp_reg (integer_type_node, NULL);
395 tem = make_ssa_name (tem, NULL);
396 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
397 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
398 val = tem;
402 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
403 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
404 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
406 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, "generated memset");
409 if (integer_zerop (val))
410 fprintf (dump_file, " zero\n");
411 else if (integer_all_onesp (val))
412 fprintf (dump_file, " minus one\n");
413 else
414 fprintf (dump_file, "\n");
418 /* Generate a call to memcpy for PARTITION in LOOP. */
420 static void
421 generate_memcpy_builtin (struct loop *loop, partition_t partition)
423 gimple_stmt_iterator gsi;
424 gimple stmt, fn_call;
425 tree nb_iter, dest, src, fn, nb_bytes;
426 location_t loc;
427 enum built_in_function kind;
429 stmt = DR_STMT (partition->main_dr);
430 loc = gimple_location (stmt);
431 if (gimple_bb (stmt) == loop->latch)
432 nb_iter = number_of_latch_executions (loop);
433 else
434 nb_iter = number_of_exit_cond_executions (loop);
436 /* The new statements will be placed before LOOP. */
437 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
439 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
440 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
441 false, GSI_CONTINUE_LINKING);
442 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
443 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
444 if (ptr_derefs_may_alias_p (dest, src))
445 kind = BUILT_IN_MEMMOVE;
446 else
447 kind = BUILT_IN_MEMCPY;
449 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
450 false, GSI_CONTINUE_LINKING);
451 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
452 false, GSI_CONTINUE_LINKING);
453 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
454 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
455 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
457 if (dump_file && (dump_flags & TDF_DETAILS))
459 if (kind == BUILT_IN_MEMCPY)
460 fprintf (dump_file, "generated memcpy\n");
461 else
462 fprintf (dump_file, "generated memmove\n");
466 /* Remove and destroy the loop LOOP. */
468 static void
469 destroy_loop (struct loop *loop)
471 unsigned nbbs = loop->num_nodes;
472 edge exit = single_exit (loop);
473 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
474 basic_block *bbs;
475 unsigned i;
477 bbs = get_loop_body_in_dom_order (loop);
479 redirect_edge_pred (exit, src);
480 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
481 exit->flags |= EDGE_FALLTHRU;
482 cancel_loop_tree (loop);
483 rescan_loop_exit (exit, false, true);
485 for (i = 0; i < nbbs; i++)
487 /* We have made sure to not leave any dangling uses of SSA
488 names defined in the loop. With the exception of virtuals.
489 Make sure we replace all uses of virtual defs that will remain
490 outside of the loop with the bare symbol as delete_basic_block
491 will release them. */
492 gimple_stmt_iterator gsi;
493 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
495 gimple phi = gsi_stmt (gsi);
496 if (!is_gimple_reg (gimple_phi_result (phi)))
497 mark_virtual_phi_result_for_renaming (phi);
499 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
501 gimple stmt = gsi_stmt (gsi);
502 tree vdef = gimple_vdef (stmt);
503 if (vdef && TREE_CODE (vdef) == SSA_NAME)
504 mark_virtual_operand_for_renaming (vdef);
506 delete_basic_block (bbs[i]);
508 free (bbs);
510 set_immediate_dominator (CDI_DOMINATORS, dest,
511 recompute_dominator (CDI_DOMINATORS, dest));
514 /* Generates code for PARTITION. */
516 static void
517 generate_code_for_partition (struct loop *loop,
518 partition_t partition, bool copy_p)
520 switch (partition->kind)
522 case PKIND_MEMSET:
523 generate_memset_builtin (loop, partition);
524 /* If this is the last partition for which we generate code, we have
525 to destroy the loop. */
526 if (!copy_p)
527 destroy_loop (loop);
528 break;
530 case PKIND_MEMCPY:
531 generate_memcpy_builtin (loop, partition);
532 /* If this is the last partition for which we generate code, we have
533 to destroy the loop. */
534 if (!copy_p)
535 destroy_loop (loop);
536 break;
538 case PKIND_NORMAL:
539 generate_loops_for_partition (loop, partition, copy_p);
540 break;
542 default:
543 gcc_unreachable ();
548 /* Returns true if the node V of RDG cannot be recomputed. */
550 static bool
551 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
553 if (RDG_MEM_WRITE_STMT (rdg, v))
554 return true;
556 return false;
559 /* Returns true when the vertex V has already been generated in the
560 current partition (V is in PROCESSED), or when V belongs to another
561 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
563 static inline bool
564 already_processed_vertex_p (bitmap processed, int v)
566 return (bitmap_bit_p (processed, v)
567 || !bitmap_bit_p (remaining_stmts, v));
570 /* Returns NULL when there is no anti-dependence among the successors
571 of vertex V, otherwise returns the edge with the anti-dep. */
573 static struct graph_edge *
574 has_anti_dependence (struct vertex *v)
576 struct graph_edge *e;
578 if (v->succ)
579 for (e = v->succ; e; e = e->succ_next)
580 if (RDGE_TYPE (e) == anti_dd)
581 return e;
583 return NULL;
586 /* Returns true when V has an anti-dependence edge among its successors. */
588 static bool
589 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
591 struct graph_edge *e;
593 if (v->pred)
594 for (e = v->pred; e; e = e->pred_next)
595 if (bitmap_bit_p (upstream_mem_writes, e->src)
596 /* Don't consider flow channels: a write to memory followed
597 by a read from memory. These channels allow the split of
598 the RDG in different partitions. */
599 && !RDG_MEM_WRITE_STMT (rdg, e->src))
600 return true;
602 return false;
605 /* Initializes the upstream_mem_writes bitmap following the
606 information from RDG. */
608 static void
609 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
611 int v, x;
612 bitmap seen = BITMAP_ALLOC (NULL);
614 for (v = rdg->n_vertices - 1; v >= 0; v--)
615 if (!bitmap_bit_p (seen, v))
617 unsigned i;
618 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
620 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
622 FOR_EACH_VEC_ELT (int, nodes, i, x)
624 if (!bitmap_set_bit (seen, x))
625 continue;
627 if (RDG_MEM_WRITE_STMT (rdg, x)
628 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
629 /* In anti dependences the read should occur before
630 the write, this is why both the read and the write
631 should be placed in the same partition. */
632 || has_anti_dependence (&(rdg->vertices[x])))
634 bitmap_set_bit (upstream_mem_writes, x);
638 VEC_free (int, heap, nodes);
642 /* Returns true when vertex u has a memory write node as a predecessor
643 in RDG. */
645 static bool
646 has_upstream_mem_writes (int u)
648 return bitmap_bit_p (upstream_mem_writes, u);
651 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
652 bitmap, bitmap);
654 /* Flag the uses of U stopping following the information from
655 upstream_mem_writes. */
657 static void
658 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
659 bitmap processed)
661 use_operand_p use_p;
662 struct vertex *x = &(rdg->vertices[u]);
663 gimple stmt = RDGV_STMT (x);
664 struct graph_edge *anti_dep = has_anti_dependence (x);
666 /* Keep in the same partition the destination of an antidependence,
667 because this is a store to the exact same location. Putting this
668 in another partition is bad for cache locality. */
669 if (anti_dep)
671 int v = anti_dep->dest;
673 if (!already_processed_vertex_p (processed, v))
674 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
675 processed);
678 if (gimple_code (stmt) != GIMPLE_PHI)
680 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
682 tree use = USE_FROM_PTR (use_p);
684 if (TREE_CODE (use) == SSA_NAME)
686 gimple def_stmt = SSA_NAME_DEF_STMT (use);
687 int v = rdg_vertex_for_stmt (rdg, def_stmt);
689 if (v >= 0
690 && !already_processed_vertex_p (processed, v))
691 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
692 processed);
697 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
699 tree op0 = gimple_assign_lhs (stmt);
701 /* Scalar channels don't have enough space for transmitting data
702 between tasks, unless we add more storage by privatizing. */
703 if (is_gimple_reg (op0))
705 use_operand_p use_p;
706 imm_use_iterator iter;
708 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
710 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
712 if (!already_processed_vertex_p (processed, v))
713 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
714 processed);
720 /* Flag V from RDG as part of PARTITION, and also flag its loop number
721 in LOOPS. */
723 static void
724 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
726 struct loop *loop;
728 if (!bitmap_set_bit (partition->stmts, v))
729 return;
731 loop = loop_containing_stmt (RDG_STMT (rdg, v));
732 bitmap_set_bit (loops, loop->num);
734 if (rdg_cannot_recompute_vertex_p (rdg, v))
736 partition->has_writes = true;
737 bitmap_clear_bit (remaining_stmts, v);
741 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
742 Also flag their loop number in LOOPS. */
744 static void
745 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
746 bitmap loops, bitmap processed)
748 unsigned i;
749 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
750 int x;
752 bitmap_set_bit (processed, v);
753 rdg_flag_uses (rdg, v, partition, loops, processed);
754 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
755 rdg_flag_vertex (rdg, v, partition, loops);
757 FOR_EACH_VEC_ELT (int, nodes, i, x)
758 if (!already_processed_vertex_p (processed, x))
759 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
761 VEC_free (int, heap, nodes);
764 /* Initialize CONDS with all the condition statements from the basic
765 blocks of LOOP. */
767 static void
768 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
770 unsigned i;
771 edge e;
772 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
774 FOR_EACH_VEC_ELT (edge, exits, i, e)
776 gimple cond = last_stmt (e->src);
778 if (cond)
779 VEC_safe_push (gimple, heap, *conds, cond);
782 VEC_free (edge, heap, exits);
785 /* Add to PARTITION all the exit condition statements for LOOPS
786 together with all their dependent statements determined from
787 RDG. */
789 static void
790 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
791 bitmap processed)
793 unsigned i;
794 bitmap_iterator bi;
795 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
797 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
798 collect_condition_stmts (get_loop (i), &conds);
800 while (!VEC_empty (gimple, conds))
802 gimple cond = VEC_pop (gimple, conds);
803 int v = rdg_vertex_for_stmt (rdg, cond);
804 bitmap new_loops = BITMAP_ALLOC (NULL);
806 if (!already_processed_vertex_p (processed, v))
807 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
809 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
810 if (bitmap_set_bit (loops, i))
811 collect_condition_stmts (get_loop (i), &conds);
813 BITMAP_FREE (new_loops);
816 VEC_free (gimple, heap, conds);
819 /* Returns a bitmap in which all the statements needed for computing
820 the strongly connected component C of the RDG are flagged, also
821 including the loop exit conditions. */
823 static partition_t
824 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
826 int i, v;
827 partition_t partition = partition_alloc (NULL);
828 bitmap loops = BITMAP_ALLOC (NULL);
829 bitmap processed = BITMAP_ALLOC (NULL);
831 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
832 if (!already_processed_vertex_p (processed, v))
833 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
835 rdg_flag_loop_exits (rdg, loops, partition, processed);
837 BITMAP_FREE (processed);
838 BITMAP_FREE (loops);
839 return partition;
842 /* Free memory for COMPONENTS. */
844 static void
845 free_rdg_components (VEC (rdgc, heap) *components)
847 int i;
848 rdgc x;
850 FOR_EACH_VEC_ELT (rdgc, components, i, x)
852 VEC_free (int, heap, x->vertices);
853 free (x);
856 VEC_free (rdgc, heap, components);
859 /* Build the COMPONENTS vector with the strongly connected components
860 of RDG in which the STARTING_VERTICES occur. */
862 static void
863 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
864 VEC (rdgc, heap) **components)
866 int i, v;
867 bitmap saved_components = BITMAP_ALLOC (NULL);
868 int n_components = graphds_scc (rdg, NULL);
869 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
871 for (i = 0; i < n_components; i++)
872 all_components[i] = VEC_alloc (int, heap, 3);
874 for (i = 0; i < rdg->n_vertices; i++)
875 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
877 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
879 int c = rdg->vertices[v].component;
881 if (bitmap_set_bit (saved_components, c))
883 rdgc x = XCNEW (struct rdg_component);
884 x->num = c;
885 x->vertices = all_components[c];
887 VEC_safe_push (rdgc, heap, *components, x);
891 for (i = 0; i < n_components; i++)
892 if (!bitmap_bit_p (saved_components, i))
893 VEC_free (int, heap, all_components[i]);
895 free (all_components);
896 BITMAP_FREE (saved_components);
899 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
900 For the moment we detect only the memset zero pattern. */
902 static void
903 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
905 bitmap_iterator bi;
906 unsigned i;
907 tree nb_iter;
908 data_reference_p single_load, single_store;
910 partition->kind = PKIND_NORMAL;
911 partition->main_dr = NULL;
912 partition->secondary_dr = NULL;
914 if (!flag_tree_loop_distribute_patterns)
915 return;
917 /* Perform general partition disqualification for builtins. */
918 nb_iter = number_of_exit_cond_executions (loop);
919 if (!nb_iter || nb_iter == chrec_dont_know)
920 return;
922 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
924 gimple stmt = RDG_STMT (rdg, i);
926 if (gimple_has_volatile_ops (stmt))
927 return;
929 /* If the stmt has uses outside of the loop fail.
930 ??? If the stmt is generated in another partition that
931 is not created as builtin we can ignore this. */
932 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
934 if (dump_file && (dump_flags & TDF_DETAILS))
935 fprintf (dump_file, "not generating builtin, partition has "
936 "scalar uses outside of the loop\n");
937 return;
941 /* Detect memset and memcpy. */
942 single_load = NULL;
943 single_store = NULL;
944 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
946 gimple stmt = RDG_STMT (rdg, i);
947 data_reference_p dr;
948 unsigned j;
950 if (gimple_code (stmt) == GIMPLE_PHI)
951 continue;
953 /* Any scalar stmts are ok. */
954 if (!gimple_vuse (stmt))
955 continue;
957 /* Otherwise just regular loads/stores. */
958 if (!gimple_assign_single_p (stmt))
959 return;
961 /* But exactly one store and/or load. */
962 for (j = 0;
963 VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
965 if (DR_IS_READ (dr))
967 if (single_load != NULL)
968 return;
969 single_load = dr;
971 else
973 if (single_store != NULL)
974 return;
975 single_store = dr;
980 if (single_store && !single_load)
982 gimple stmt = DR_STMT (single_store);
983 tree rhs = gimple_assign_rhs1 (stmt);
984 if (!(integer_zerop (rhs)
985 || integer_all_onesp (rhs)
986 || real_zerop (rhs)
987 || (TREE_CODE (rhs) == CONSTRUCTOR
988 && !TREE_CLOBBER_P (rhs))
989 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
990 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
991 == TYPE_MODE (unsigned_char_type_node)))))
992 return;
993 if (TREE_CODE (rhs) == SSA_NAME
994 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
995 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
996 return;
997 if (!adjacent_dr_p (single_store))
998 return;
999 partition->kind = PKIND_MEMSET;
1000 partition->main_dr = single_store;
1002 else if (single_store && single_load)
1004 gimple store = DR_STMT (single_store);
1005 gimple load = DR_STMT (single_load);
1006 /* Direct aggregate copy or via an SSA name temporary. */
1007 if (load != store
1008 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1009 return;
1010 if (!adjacent_dr_p (single_store)
1011 || !adjacent_dr_p (single_load)
1012 || !operand_equal_p (DR_STEP (single_store),
1013 DR_STEP (single_load), 0))
1014 return;
1015 partition->kind = PKIND_MEMCPY;
1016 partition->main_dr = single_store;
1017 partition->secondary_dr = single_load;
1021 /* For a data reference REF, return the declaration of its base
1022 address or NULL_TREE if the base is not determined. */
1024 static tree
1025 ref_base_address (data_reference_p dr)
1027 tree base_address = DR_BASE_ADDRESS (dr);
1028 if (base_address
1029 && TREE_CODE (base_address) == ADDR_EXPR)
1030 return TREE_OPERAND (base_address, 0);
1032 return base_address;
1035 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1036 accesses in RDG. */
1038 static bool
1039 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1040 partition_t partition2)
1042 unsigned i, j, k, l;
1043 bitmap_iterator bi, bj;
1044 data_reference_p ref1, ref2;
1046 /* First check whether in the intersection of the two partitions are
1047 any loads or stores. Common loads are the situation that happens
1048 most often. */
1049 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1050 if (RDG_MEM_WRITE_STMT (rdg, i)
1051 || RDG_MEM_READS_STMT (rdg, i))
1052 return true;
1054 /* Then check all data-references against each other. */
1055 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1056 if (RDG_MEM_WRITE_STMT (rdg, i)
1057 || RDG_MEM_READS_STMT (rdg, i))
1058 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1059 if (RDG_MEM_WRITE_STMT (rdg, j)
1060 || RDG_MEM_READS_STMT (rdg, j))
1062 FOR_EACH_VEC_ELT (data_reference_p, RDG_DATAREFS (rdg, i), k, ref1)
1064 tree base1 = ref_base_address (ref1);
1065 if (base1)
1066 FOR_EACH_VEC_ELT (data_reference_p,
1067 RDG_DATAREFS (rdg, j), l, ref2)
1068 if (base1 == ref_base_address (ref2))
1069 return true;
1073 return false;
1076 /* Aggregate several components into a useful partition that is
1077 registered in the PARTITIONS vector. Partitions will be
1078 distributed in different loops. */
1080 static void
1081 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
1082 VEC (int, heap) **other_stores,
1083 VEC (partition_t, heap) **partitions, bitmap processed)
1085 int i;
1086 rdgc x;
1087 partition_t partition = partition_alloc (NULL);
1089 FOR_EACH_VEC_ELT (rdgc, components, i, x)
1091 partition_t np;
1092 int v = VEC_index (int, x->vertices, 0);
1094 if (bitmap_bit_p (processed, v))
1095 continue;
1097 np = build_rdg_partition_for_component (rdg, x);
1098 bitmap_ior_into (partition->stmts, np->stmts);
1099 partition->has_writes = partition_has_writes (np);
1100 bitmap_ior_into (processed, np->stmts);
1101 partition_free (np);
1103 if (partition_has_writes (partition))
1105 if (dump_file && (dump_flags & TDF_DETAILS))
1107 fprintf (dump_file, "ldist useful partition:\n");
1108 dump_bitmap (dump_file, partition->stmts);
1111 VEC_safe_push (partition_t, heap, *partitions, partition);
1112 partition = partition_alloc (NULL);
1116 /* Add the nodes from the RDG that were not marked as processed, and
1117 that are used outside the current loop. These are scalar
1118 computations that are not yet part of previous partitions. */
1119 for (i = 0; i < rdg->n_vertices; i++)
1120 if (!bitmap_bit_p (processed, i)
1121 && rdg_defs_used_in_other_loops_p (rdg, i))
1122 VEC_safe_push (int, heap, *other_stores, i);
1124 /* If there are still statements left in the OTHER_STORES array,
1125 create other components and partitions with these stores and
1126 their dependences. */
1127 if (VEC_length (int, *other_stores) > 0)
1129 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
1130 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
1132 rdg_build_components (rdg, *other_stores, &comps);
1133 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1135 VEC_free (int, heap, foo);
1136 free_rdg_components (comps);
1139 /* If there is something left in the last partition, save it. */
1140 if (bitmap_count_bits (partition->stmts) > 0)
1141 VEC_safe_push (partition_t, heap, *partitions, partition);
1142 else
1143 partition_free (partition);
1146 /* Dump to FILE the PARTITIONS. */
1148 static void
1149 dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
1151 int i;
1152 partition_t partition;
1154 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1155 debug_bitmap_file (file, partition->stmts);
1158 /* Debug PARTITIONS. */
1159 extern void debug_rdg_partitions (VEC (partition_t, heap) *);
1161 DEBUG_FUNCTION void
1162 debug_rdg_partitions (VEC (partition_t, heap) *partitions)
1164 dump_rdg_partitions (stderr, partitions);
1167 /* Returns the number of read and write operations in the RDG. */
1169 static int
1170 number_of_rw_in_rdg (struct graph *rdg)
1172 int i, res = 0;
1174 for (i = 0; i < rdg->n_vertices; i++)
1176 if (RDG_MEM_WRITE_STMT (rdg, i))
1177 ++res;
1179 if (RDG_MEM_READS_STMT (rdg, i))
1180 ++res;
1183 return res;
1186 /* Returns the number of read and write operations in a PARTITION of
1187 the RDG. */
1189 static int
1190 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1192 int res = 0;
1193 unsigned i;
1194 bitmap_iterator ii;
1196 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1198 if (RDG_MEM_WRITE_STMT (rdg, i))
1199 ++res;
1201 if (RDG_MEM_READS_STMT (rdg, i))
1202 ++res;
1205 return res;
1208 /* Returns true when one of the PARTITIONS contains all the read or
1209 write operations of RDG. */
1211 static bool
1212 partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
1214 int i;
1215 partition_t partition;
1216 int nrw = number_of_rw_in_rdg (rdg);
1218 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1219 if (nrw == number_of_rw_in_partition (rdg, partition))
1220 return true;
1222 return false;
1225 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1226 distributed loops. */
1228 static int
1229 ldist_gen (struct loop *loop, struct graph *rdg,
1230 VEC (int, heap) *starting_vertices)
1232 int i, nbp;
1233 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1234 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
1235 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1236 partition_t partition;
1237 bitmap processed = BITMAP_ALLOC (NULL);
1238 bool any_builtin;
1240 remaining_stmts = BITMAP_ALLOC (NULL);
1241 upstream_mem_writes = BITMAP_ALLOC (NULL);
1243 for (i = 0; i < rdg->n_vertices; i++)
1245 bitmap_set_bit (remaining_stmts, i);
1247 /* Save in OTHER_STORES all the memory writes that are not in
1248 STARTING_VERTICES. */
1249 if (RDG_MEM_WRITE_STMT (rdg, i))
1251 int v;
1252 unsigned j;
1253 bool found = false;
1255 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1256 if (i == v)
1258 found = true;
1259 break;
1262 if (!found)
1263 VEC_safe_push (int, heap, other_stores, i);
1267 mark_nodes_having_upstream_mem_writes (rdg);
1268 rdg_build_components (rdg, starting_vertices, &components);
1269 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1270 processed);
1271 BITMAP_FREE (processed);
1273 any_builtin = false;
1274 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1276 classify_partition (loop, rdg, partition);
1277 any_builtin |= partition_builtin_p (partition);
1280 /* If we are only distributing patterns fuse all partitions that
1281 were not properly classified as builtins. Else fuse partitions
1282 with similar memory accesses. */
1283 if (!flag_tree_loop_distribution)
1285 partition_t into;
1286 /* If we did not detect any builtin simply bail out. */
1287 if (!any_builtin)
1289 nbp = 0;
1290 goto ldist_done;
1292 /* Only fuse adjacent non-builtin partitions, see PR53616.
1293 ??? Use dependence information to improve partition ordering. */
1294 i = 0;
1297 for (; VEC_iterate (partition_t, partitions, i, into); ++i)
1298 if (!partition_builtin_p (into))
1299 break;
1300 for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i)
1301 if (!partition_builtin_p (partition))
1303 bitmap_ior_into (into->stmts, partition->stmts);
1304 VEC_ordered_remove (partition_t, partitions, i);
1305 i--;
1307 else
1308 break;
1310 while ((unsigned) i < VEC_length (partition_t, partitions));
1312 else
1314 partition_t into;
1315 int j;
1316 for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
1318 if (partition_builtin_p (into))
1319 continue;
1320 for (j = i + 1;
1321 VEC_iterate (partition_t, partitions, j, partition); ++j)
1323 if (!partition_builtin_p (partition)
1324 /* ??? The following is horribly inefficient,
1325 we are re-computing and analyzing data-references
1326 of the stmts in the partitions all the time. */
1327 && similar_memory_accesses (rdg, into, partition))
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file, "fusing partitions\n");
1332 dump_bitmap (dump_file, into->stmts);
1333 dump_bitmap (dump_file, partition->stmts);
1334 fprintf (dump_file, "because they have similar "
1335 "memory accesses\n");
1337 bitmap_ior_into (into->stmts, partition->stmts);
1338 VEC_ordered_remove (partition_t, partitions, j);
1339 j--;
1345 nbp = VEC_length (partition_t, partitions);
1346 if (nbp == 0
1347 || (nbp == 1
1348 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
1349 || (nbp > 1
1350 && partition_contains_all_rw (rdg, partitions)))
1352 nbp = 0;
1353 goto ldist_done;
1356 if (dump_file && (dump_flags & TDF_DETAILS))
1357 dump_rdg_partitions (dump_file, partitions);
1359 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1360 generate_code_for_partition (loop, partition, i < nbp - 1);
1362 ldist_done:
1364 BITMAP_FREE (remaining_stmts);
1365 BITMAP_FREE (upstream_mem_writes);
1367 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1368 partition_free (partition);
1370 VEC_free (int, heap, other_stores);
1371 VEC_free (partition_t, heap, partitions);
1372 free_rdg_components (components);
1373 return nbp;
1376 /* Distributes the code from LOOP in such a way that producer
1377 statements are placed before consumer statements. When STMTS is
1378 NULL, performs the maximal distribution, if STMTS is not NULL,
1379 tries to separate only these statements from the LOOP's body.
1380 Returns the number of distributed loops. */
1382 static int
1383 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1385 int res = 0;
1386 struct graph *rdg;
1387 gimple s;
1388 unsigned i;
1389 VEC (int, heap) *vertices;
1390 VEC (ddr_p, heap) *dependence_relations;
1391 VEC (data_reference_p, heap) *datarefs;
1392 VEC (loop_p, heap) *loop_nest;
1394 datarefs = VEC_alloc (data_reference_p, heap, 10);
1395 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1396 loop_nest = VEC_alloc (loop_p, heap, 3);
1397 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1399 if (!rdg)
1401 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fprintf (dump_file,
1403 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1404 loop->num);
1406 free_dependence_relations (dependence_relations);
1407 free_data_refs (datarefs);
1408 VEC_free (loop_p, heap, loop_nest);
1409 return res;
1412 vertices = VEC_alloc (int, heap, 3);
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1415 dump_rdg (dump_file, rdg);
1417 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1419 int v = rdg_vertex_for_stmt (rdg, s);
1421 if (v >= 0)
1423 VEC_safe_push (int, heap, vertices, v);
1425 if (dump_file && (dump_flags & TDF_DETAILS))
1426 fprintf (dump_file,
1427 "ldist asked to generate code for vertex %d\n", v);
1431 res = ldist_gen (loop, rdg, vertices);
1432 VEC_free (int, heap, vertices);
1433 free_rdg (rdg);
1434 free_dependence_relations (dependence_relations);
1435 free_data_refs (datarefs);
1436 VEC_free (loop_p, heap, loop_nest);
1437 return res;
1440 /* Distribute all loops in the current function. */
1442 static unsigned int
1443 tree_loop_distribution (void)
1445 struct loop *loop;
1446 loop_iterator li;
1447 bool changed = false;
1448 basic_block bb;
1450 FOR_ALL_BB (bb)
1452 gimple_stmt_iterator gsi;
1453 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1454 gimple_set_uid (gsi_stmt (gsi), -1);
1455 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1456 gimple_set_uid (gsi_stmt (gsi), -1);
1459 /* We can at the moment only distribute non-nested loops, thus restrict
1460 walking to innermost loops. */
1461 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1463 VEC (gimple, heap) *work_list = NULL;
1464 basic_block *bbs;
1465 int num = loop->num;
1466 int nb_generated_loops = 0;
1467 unsigned int i;
1469 /* If the loop doesn't have a single exit we will fail anyway,
1470 so do that early. */
1471 if (!single_exit (loop))
1472 continue;
1474 /* Only distribute loops with a header and latch for now. */
1475 if (loop->num_nodes > 2)
1476 continue;
1478 /* Initialize the worklist with stmts we seed the partitions with. */
1479 bbs = get_loop_body_in_dom_order (loop);
1480 for (i = 0; i < loop->num_nodes; ++i)
1482 gimple_stmt_iterator gsi;
1483 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1485 gimple stmt = gsi_stmt (gsi);
1486 /* Only distribute stores for now.
1487 ??? We should also try to distribute scalar reductions,
1488 thus SSA defs that have scalar uses outside of the loop. */
1489 if (!gimple_assign_single_p (stmt)
1490 || is_gimple_reg (gimple_assign_lhs (stmt)))
1491 continue;
1493 VEC_safe_push (gimple, heap, work_list, stmt);
1496 free (bbs);
1498 if (VEC_length (gimple, work_list) > 0)
1499 nb_generated_loops = distribute_loop (loop, work_list);
1501 if (nb_generated_loops > 0)
1502 changed = true;
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1506 if (nb_generated_loops > 1)
1507 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1508 num, nb_generated_loops);
1509 else
1510 fprintf (dump_file, "Loop %d is the same.\n", num);
1513 VEC_free (gimple, heap, work_list);
1516 if (changed)
1518 mark_virtual_operands_for_renaming (cfun);
1519 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1522 #ifdef ENABLE_CHECKING
1523 verify_loop_structure ();
1524 #endif
1526 return 0;
1529 static bool
1530 gate_tree_loop_distribution (void)
1532 return flag_tree_loop_distribution
1533 || flag_tree_loop_distribute_patterns;
1536 struct gimple_opt_pass pass_loop_distribution =
1539 GIMPLE_PASS,
1540 "ldist", /* name */
1541 gate_tree_loop_distribution, /* gate */
1542 tree_loop_distribution, /* execute */
1543 NULL, /* sub */
1544 NULL, /* next */
1545 0, /* static_pass_number */
1546 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1547 PROP_cfg | PROP_ssa, /* properties_required */
1548 0, /* properties_provided */
1549 0, /* properties_destroyed */
1550 0, /* todo_flags_start */
1551 TODO_ggc_collect
1552 | TODO_verify_ssa /* todo_flags_finish */