Merge trunk version 193672 into gupc branch.
[official-gcc.git] / gcc / tree-loop-distribution.c
bloba0a766b810070228b65e4aa2ff8a7aa26fa2b430
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
30 is transformed to
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
57 typedef struct partition_s
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
68 /* Allocate and initialize a partition from BITMAP. */
70 static partition_t
71 partition_alloc (bitmap stmts)
73 partition_t partition = XCNEW (struct partition_s);
74 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
75 partition->has_writes = false;
76 partition->kind = PKIND_NORMAL;
77 return partition;
80 /* Free PARTITION. */
82 static void
83 partition_free (partition_t partition)
85 BITMAP_FREE (partition->stmts);
86 free (partition);
89 /* Returns true if the partition can be generated as a builtin. */
91 static bool
92 partition_builtin_p (partition_t partition)
94 return partition->kind != PKIND_NORMAL;
97 /* Returns true if the partition has an writes. */
99 static bool
100 partition_has_writes (partition_t partition)
102 return partition->has_writes;
105 /* If bit I is not set, it means that this node represents an
106 operation that has already been performed, and that should not be
107 performed again. This is the subgraph of remaining important
108 computations that is passed to the DFS algorithm for avoiding to
109 include several times the same stores in different loops. */
110 static bitmap remaining_stmts;
112 /* A node of the RDG is marked in this bitmap when it has as a
113 predecessor a node that writes to memory. */
114 static bitmap upstream_mem_writes;
116 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
117 the LOOP. */
119 static bool
120 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
122 imm_use_iterator imm_iter;
123 use_operand_p use_p;
125 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
127 gimple use_stmt = USE_STMT (use_p);
128 if (!is_gimple_debug (use_stmt)
129 && loop != loop_containing_stmt (use_stmt))
130 return true;
133 return false;
136 /* Returns true when STMT defines a scalar variable used after the
137 loop LOOP. */
139 static bool
140 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
142 def_operand_p def_p;
143 ssa_op_iter op_iter;
145 if (gimple_code (stmt) == GIMPLE_PHI)
146 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
148 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
149 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
150 return true;
152 return false;
155 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
156 ORIG_LOOP. */
158 static void
159 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
161 tree new_ssa_name;
162 gimple_stmt_iterator si_new, si_orig;
163 edge orig_loop_latch = loop_latch_edge (orig_loop);
164 edge orig_entry_e = loop_preheader_edge (orig_loop);
165 edge new_loop_entry_e = loop_preheader_edge (new_loop);
167 /* Scan the phis in the headers of the old and new loops
168 (they are organized in exactly the same order). */
169 for (si_new = gsi_start_phis (new_loop->header),
170 si_orig = gsi_start_phis (orig_loop->header);
171 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
172 gsi_next (&si_new), gsi_next (&si_orig))
174 tree def;
175 source_location locus;
176 gimple phi_new = gsi_stmt (si_new);
177 gimple phi_orig = gsi_stmt (si_orig);
179 /* Add the first phi argument for the phi in NEW_LOOP (the one
180 associated with the entry of NEW_LOOP) */
181 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
182 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
183 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
188 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
190 if (TREE_CODE (def) == SSA_NAME)
192 new_ssa_name = get_current_def (def);
194 if (!new_ssa_name)
195 /* This only happens if there are no definitions inside the
196 loop. Use the the invariant in the new loop as is. */
197 new_ssa_name = def;
199 else
200 /* Could be an integer. */
201 new_ssa_name = def;
203 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
207 /* Return a copy of LOOP placed before LOOP. */
209 static struct loop *
210 copy_loop_before (struct loop *loop)
212 struct loop *res;
213 edge preheader = loop_preheader_edge (loop);
215 initialize_original_copy_tables ();
216 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
217 gcc_assert (res != NULL);
218 free_original_copy_tables ();
220 update_phis_for_loop_copy (loop, res);
221 rename_variables_in_loop (res);
223 return res;
226 /* Creates an empty basic block after LOOP. */
228 static void
229 create_bb_after_loop (struct loop *loop)
231 edge exit = single_exit (loop);
233 if (!exit)
234 return;
236 split_edge (exit);
239 /* Generate code for PARTITION from the code in LOOP. The loop is
240 copied when COPY_P is true. All the statements not flagged in the
241 PARTITION bitmap are removed from the loop or from its copy. The
242 statements are indexed in sequence inside a basic block, and the
243 basic blocks of a loop are taken in dom order. */
245 static void
246 generate_loops_for_partition (struct loop *loop, partition_t partition,
247 bool copy_p)
249 unsigned i, x;
250 gimple_stmt_iterator bsi;
251 basic_block *bbs;
253 if (copy_p)
255 loop = copy_loop_before (loop);
256 gcc_assert (loop != NULL);
257 create_preheader (loop, CP_SIMPLE_PREHEADERS);
258 create_bb_after_loop (loop);
261 /* Remove stmts not in the PARTITION bitmap. The order in which we
262 visit the phi nodes and the statements is exactly as in
263 stmts_from_loop. */
264 bbs = get_loop_body_in_dom_order (loop);
266 if (MAY_HAVE_DEBUG_STMTS)
267 for (x = 0, i = 0; i < loop->num_nodes; i++)
269 basic_block bb = bbs[i];
271 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
272 if (!bitmap_bit_p (partition->stmts, x++))
273 reset_debug_uses (gsi_stmt (bsi));
275 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
277 gimple stmt = gsi_stmt (bsi);
278 if (gimple_code (stmt) != GIMPLE_LABEL
279 && !is_gimple_debug (stmt)
280 && !bitmap_bit_p (partition->stmts, x++))
281 reset_debug_uses (stmt);
285 for (x = 0, i = 0; i < loop->num_nodes; i++)
287 basic_block bb = bbs[i];
289 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
290 if (!bitmap_bit_p (partition->stmts, x++))
292 gimple phi = gsi_stmt (bsi);
293 if (virtual_operand_p (gimple_phi_result (phi)))
294 mark_virtual_phi_result_for_renaming (phi);
295 remove_phi_node (&bsi, true);
297 else
298 gsi_next (&bsi);
300 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
302 gimple stmt = gsi_stmt (bsi);
303 if (gimple_code (stmt) != GIMPLE_LABEL
304 && !is_gimple_debug (stmt)
305 && !bitmap_bit_p (partition->stmts, x++))
307 unlink_stmt_vdef (stmt);
308 gsi_remove (&bsi, true);
309 release_defs (stmt);
311 else
312 gsi_next (&bsi);
316 free (bbs);
319 /* Build the size argument for a memory operation call. */
321 static tree
322 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
324 tree size;
325 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
326 fold_convert_loc (loc, sizetype, nb_iter),
327 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
328 return fold_convert_loc (loc, size_type_node, size);
331 /* Build an address argument for a memory operation call. */
333 static tree
334 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
336 tree addr_base;
338 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
339 addr_base = fold_convert_loc (loc, sizetype, addr_base);
341 /* Test for a negative stride, iterating over every element. */
342 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
344 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
345 fold_convert_loc (loc, sizetype, nb_bytes));
346 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
347 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
350 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
353 /* Generate a call to memset for PARTITION in LOOP. */
355 static void
356 generate_memset_builtin (struct loop *loop, partition_t partition)
358 gimple_stmt_iterator gsi;
359 gimple stmt, fn_call;
360 tree nb_iter, mem, fn, nb_bytes;
361 location_t loc;
362 tree val;
364 stmt = DR_STMT (partition->main_dr);
365 loc = gimple_location (stmt);
366 if (gimple_bb (stmt) == loop->latch)
367 nb_iter = number_of_latch_executions (loop);
368 else
369 nb_iter = number_of_exit_cond_executions (loop);
371 /* The new statements will be placed before LOOP. */
372 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
374 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
375 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
376 false, GSI_CONTINUE_LINKING);
377 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
378 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
379 false, GSI_CONTINUE_LINKING);
381 /* This exactly matches the pattern recognition in classify_partition. */
382 val = gimple_assign_rhs1 (stmt);
383 if (integer_zerop (val)
384 || real_zerop (val)
385 || TREE_CODE (val) == CONSTRUCTOR)
386 val = integer_zero_node;
387 else if (integer_all_onesp (val))
388 val = build_int_cst (integer_type_node, -1);
389 else
391 if (TREE_CODE (val) == INTEGER_CST)
392 val = fold_convert (integer_type_node, val);
393 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
395 gimple cstmt;
396 tree tem = make_ssa_name (integer_type_node, NULL);
397 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
398 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
399 val = tem;
403 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
404 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
405 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
407 if (dump_file && (dump_flags & TDF_DETAILS))
409 fprintf (dump_file, "generated memset");
410 if (integer_zerop (val))
411 fprintf (dump_file, " zero\n");
412 else if (integer_all_onesp (val))
413 fprintf (dump_file, " minus one\n");
414 else
415 fprintf (dump_file, "\n");
419 /* Generate a call to memcpy for PARTITION in LOOP. */
421 static void
422 generate_memcpy_builtin (struct loop *loop, partition_t partition)
424 gimple_stmt_iterator gsi;
425 gimple stmt, fn_call;
426 tree nb_iter, dest, src, fn, nb_bytes;
427 location_t loc;
428 enum built_in_function kind;
430 stmt = DR_STMT (partition->main_dr);
431 loc = gimple_location (stmt);
432 if (gimple_bb (stmt) == loop->latch)
433 nb_iter = number_of_latch_executions (loop);
434 else
435 nb_iter = number_of_exit_cond_executions (loop);
437 /* The new statements will be placed before LOOP. */
438 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
440 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
441 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
442 false, GSI_CONTINUE_LINKING);
443 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
444 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
445 if (ptr_derefs_may_alias_p (dest, src))
446 kind = BUILT_IN_MEMMOVE;
447 else
448 kind = BUILT_IN_MEMCPY;
450 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
451 false, GSI_CONTINUE_LINKING);
452 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
453 false, GSI_CONTINUE_LINKING);
454 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
455 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
456 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
458 if (dump_file && (dump_flags & TDF_DETAILS))
460 if (kind == BUILT_IN_MEMCPY)
461 fprintf (dump_file, "generated memcpy\n");
462 else
463 fprintf (dump_file, "generated memmove\n");
467 /* Remove and destroy the loop LOOP. */
469 static void
470 destroy_loop (struct loop *loop)
472 unsigned nbbs = loop->num_nodes;
473 edge exit = single_exit (loop);
474 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
475 basic_block *bbs;
476 unsigned i;
478 bbs = get_loop_body_in_dom_order (loop);
480 redirect_edge_pred (exit, src);
481 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
482 exit->flags |= EDGE_FALLTHRU;
483 cancel_loop_tree (loop);
484 rescan_loop_exit (exit, false, true);
486 for (i = 0; i < nbbs; i++)
488 /* We have made sure to not leave any dangling uses of SSA
489 names defined in the loop. With the exception of virtuals.
490 Make sure we replace all uses of virtual defs that will remain
491 outside of the loop with the bare symbol as delete_basic_block
492 will release them. */
493 gimple_stmt_iterator gsi;
494 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
496 gimple phi = gsi_stmt (gsi);
497 if (virtual_operand_p (gimple_phi_result (phi)))
498 mark_virtual_phi_result_for_renaming (phi);
500 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
502 gimple stmt = gsi_stmt (gsi);
503 tree vdef = gimple_vdef (stmt);
504 if (vdef && TREE_CODE (vdef) == SSA_NAME)
505 mark_virtual_operand_for_renaming (vdef);
507 delete_basic_block (bbs[i]);
509 free (bbs);
511 set_immediate_dominator (CDI_DOMINATORS, dest,
512 recompute_dominator (CDI_DOMINATORS, dest));
515 /* Generates code for PARTITION. */
517 static void
518 generate_code_for_partition (struct loop *loop,
519 partition_t partition, bool copy_p)
521 switch (partition->kind)
523 case PKIND_MEMSET:
524 generate_memset_builtin (loop, partition);
525 /* If this is the last partition for which we generate code, we have
526 to destroy the loop. */
527 if (!copy_p)
528 destroy_loop (loop);
529 break;
531 case PKIND_MEMCPY:
532 generate_memcpy_builtin (loop, partition);
533 /* If this is the last partition for which we generate code, we have
534 to destroy the loop. */
535 if (!copy_p)
536 destroy_loop (loop);
537 break;
539 case PKIND_NORMAL:
540 generate_loops_for_partition (loop, partition, copy_p);
541 break;
543 default:
544 gcc_unreachable ();
549 /* Returns true if the node V of RDG cannot be recomputed. */
551 static bool
552 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
554 if (RDG_MEM_WRITE_STMT (rdg, v))
555 return true;
557 return false;
560 /* Returns true when the vertex V has already been generated in the
561 current partition (V is in PROCESSED), or when V belongs to another
562 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
564 static inline bool
565 already_processed_vertex_p (bitmap processed, int v)
567 return (bitmap_bit_p (processed, v)
568 || !bitmap_bit_p (remaining_stmts, v));
571 /* Returns NULL when there is no anti-dependence among the successors
572 of vertex V, otherwise returns the edge with the anti-dep. */
574 static struct graph_edge *
575 has_anti_dependence (struct vertex *v)
577 struct graph_edge *e;
579 if (v->succ)
580 for (e = v->succ; e; e = e->succ_next)
581 if (RDGE_TYPE (e) == anti_dd)
582 return e;
584 return NULL;
587 /* Returns true when V has an anti-dependence edge among its successors. */
589 static bool
590 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
592 struct graph_edge *e;
594 if (v->pred)
595 for (e = v->pred; e; e = e->pred_next)
596 if (bitmap_bit_p (upstream_mem_writes, e->src)
597 /* Don't consider flow channels: a write to memory followed
598 by a read from memory. These channels allow the split of
599 the RDG in different partitions. */
600 && !RDG_MEM_WRITE_STMT (rdg, e->src))
601 return true;
603 return false;
606 /* Initializes the upstream_mem_writes bitmap following the
607 information from RDG. */
609 static void
610 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
612 int v, x;
613 bitmap seen = BITMAP_ALLOC (NULL);
615 for (v = rdg->n_vertices - 1; v >= 0; v--)
616 if (!bitmap_bit_p (seen, v))
618 unsigned i;
619 vec<int> nodes;
620 nodes.create (3);
622 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
624 FOR_EACH_VEC_ELT (nodes, i, x)
626 if (!bitmap_set_bit (seen, x))
627 continue;
629 if (RDG_MEM_WRITE_STMT (rdg, x)
630 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
631 /* In anti dependences the read should occur before
632 the write, this is why both the read and the write
633 should be placed in the same partition. */
634 || has_anti_dependence (&(rdg->vertices[x])))
636 bitmap_set_bit (upstream_mem_writes, x);
640 nodes.release ();
644 /* Returns true when vertex u has a memory write node as a predecessor
645 in RDG. */
647 static bool
648 has_upstream_mem_writes (int u)
650 return bitmap_bit_p (upstream_mem_writes, u);
653 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
654 bitmap, bitmap);
656 /* Flag the uses of U stopping following the information from
657 upstream_mem_writes. */
659 static void
660 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
661 bitmap processed)
663 use_operand_p use_p;
664 struct vertex *x = &(rdg->vertices[u]);
665 gimple stmt = RDGV_STMT (x);
666 struct graph_edge *anti_dep = has_anti_dependence (x);
668 /* Keep in the same partition the destination of an antidependence,
669 because this is a store to the exact same location. Putting this
670 in another partition is bad for cache locality. */
671 if (anti_dep)
673 int v = anti_dep->dest;
675 if (!already_processed_vertex_p (processed, v))
676 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
677 processed);
680 if (gimple_code (stmt) != GIMPLE_PHI)
682 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
684 tree use = USE_FROM_PTR (use_p);
686 if (TREE_CODE (use) == SSA_NAME)
688 gimple def_stmt = SSA_NAME_DEF_STMT (use);
689 int v = rdg_vertex_for_stmt (rdg, def_stmt);
691 if (v >= 0
692 && !already_processed_vertex_p (processed, v))
693 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
694 processed);
699 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
701 tree op0 = gimple_assign_lhs (stmt);
703 /* Scalar channels don't have enough space for transmitting data
704 between tasks, unless we add more storage by privatizing. */
705 if (is_gimple_reg (op0))
707 use_operand_p use_p;
708 imm_use_iterator iter;
710 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
712 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
714 if (!already_processed_vertex_p (processed, v))
715 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
716 processed);
722 /* Flag V from RDG as part of PARTITION, and also flag its loop number
723 in LOOPS. */
725 static void
726 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
728 struct loop *loop;
730 if (!bitmap_set_bit (partition->stmts, v))
731 return;
733 loop = loop_containing_stmt (RDG_STMT (rdg, v));
734 bitmap_set_bit (loops, loop->num);
736 if (rdg_cannot_recompute_vertex_p (rdg, v))
738 partition->has_writes = true;
739 bitmap_clear_bit (remaining_stmts, v);
743 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
744 Also flag their loop number in LOOPS. */
746 static void
747 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
748 bitmap loops, bitmap processed)
750 unsigned i;
751 vec<int> nodes;
752 nodes.create (3);
753 int x;
755 bitmap_set_bit (processed, v);
756 rdg_flag_uses (rdg, v, partition, loops, processed);
757 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
758 rdg_flag_vertex (rdg, v, partition, loops);
760 FOR_EACH_VEC_ELT (nodes, i, x)
761 if (!already_processed_vertex_p (processed, x))
762 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
764 nodes.release ();
767 /* Initialize CONDS with all the condition statements from the basic
768 blocks of LOOP. */
770 static void
771 collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
773 unsigned i;
774 edge e;
775 vec<edge> exits = get_loop_exit_edges (loop);
777 FOR_EACH_VEC_ELT (exits, i, e)
779 gimple cond = last_stmt (e->src);
781 if (cond)
782 conds->safe_push (cond);
785 exits.release ();
788 /* Add to PARTITION all the exit condition statements for LOOPS
789 together with all their dependent statements determined from
790 RDG. */
792 static void
793 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
794 bitmap processed)
796 unsigned i;
797 bitmap_iterator bi;
798 vec<gimple> conds;
799 conds.create (3);
801 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
802 collect_condition_stmts (get_loop (i), &conds);
804 while (!conds.is_empty ())
806 gimple cond = conds.pop ();
807 int v = rdg_vertex_for_stmt (rdg, cond);
808 bitmap new_loops = BITMAP_ALLOC (NULL);
810 if (!already_processed_vertex_p (processed, v))
811 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
813 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
814 if (bitmap_set_bit (loops, i))
815 collect_condition_stmts (get_loop (i), &conds);
817 BITMAP_FREE (new_loops);
820 conds.release ();
823 /* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
827 static partition_t
828 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
830 int i, v;
831 partition_t partition = partition_alloc (NULL);
832 bitmap loops = BITMAP_ALLOC (NULL);
833 bitmap processed = BITMAP_ALLOC (NULL);
835 FOR_EACH_VEC_ELT (c->vertices, i, v)
836 if (!already_processed_vertex_p (processed, v))
837 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
839 rdg_flag_loop_exits (rdg, loops, partition, processed);
841 BITMAP_FREE (processed);
842 BITMAP_FREE (loops);
843 return partition;
846 /* Free memory for COMPONENTS. */
848 static void
849 free_rdg_components (vec<rdgc> components)
851 int i;
852 rdgc x;
854 FOR_EACH_VEC_ELT (components, i, x)
856 x->vertices.release ();
857 free (x);
860 components.release ();
863 /* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
866 static void
867 rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
868 vec<rdgc> *components)
870 int i, v;
871 bitmap saved_components = BITMAP_ALLOC (NULL);
872 int n_components = graphds_scc (rdg, NULL);
873 /* ??? Macros cannot process template types with more than one
874 argument, so we need this typedef. */
875 typedef vec<int> vec_int_heap;
876 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
878 for (i = 0; i < n_components; i++)
879 all_components[i].create (3);
881 for (i = 0; i < rdg->n_vertices; i++)
882 all_components[rdg->vertices[i].component].safe_push (i);
884 FOR_EACH_VEC_ELT (starting_vertices, i, v)
886 int c = rdg->vertices[v].component;
888 if (bitmap_set_bit (saved_components, c))
890 rdgc x = XCNEW (struct rdg_component);
891 x->num = c;
892 x->vertices = all_components[c];
894 components->safe_push (x);
898 for (i = 0; i < n_components; i++)
899 if (!bitmap_bit_p (saved_components, i))
900 all_components[i].release ();
902 free (all_components);
903 BITMAP_FREE (saved_components);
906 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
907 For the moment we detect only the memset zero pattern. */
909 static void
910 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
912 bitmap_iterator bi;
913 unsigned i;
914 tree nb_iter;
915 data_reference_p single_load, single_store;
917 partition->kind = PKIND_NORMAL;
918 partition->main_dr = NULL;
919 partition->secondary_dr = NULL;
921 if (!flag_tree_loop_distribute_patterns)
922 return;
924 /* Perform general partition disqualification for builtins. */
925 nb_iter = number_of_exit_cond_executions (loop);
926 if (!nb_iter || nb_iter == chrec_dont_know)
927 return;
929 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
931 gimple stmt = RDG_STMT (rdg, i);
933 if (gimple_has_volatile_ops (stmt))
934 return;
936 /* If the stmt has uses outside of the loop fail.
937 ??? If the stmt is generated in another partition that
938 is not created as builtin we can ignore this. */
939 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
941 if (dump_file && (dump_flags & TDF_DETAILS))
942 fprintf (dump_file, "not generating builtin, partition has "
943 "scalar uses outside of the loop\n");
944 return;
948 /* Detect memset and memcpy. */
949 single_load = NULL;
950 single_store = NULL;
951 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
953 gimple stmt = RDG_STMT (rdg, i);
954 data_reference_p dr;
955 unsigned j;
957 if (gimple_code (stmt) == GIMPLE_PHI)
958 continue;
960 /* Any scalar stmts are ok. */
961 if (!gimple_vuse (stmt))
962 continue;
964 /* Otherwise just regular loads/stores. */
965 if (!gimple_assign_single_p (stmt))
966 return;
968 /* But exactly one store and/or load. */
969 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
971 if (DR_IS_READ (dr))
973 if (single_load != NULL)
974 return;
975 single_load = dr;
977 else
979 if (single_store != NULL)
980 return;
981 single_store = dr;
986 if (single_store && !single_load)
988 gimple stmt = DR_STMT (single_store);
989 tree rhs = gimple_assign_rhs1 (stmt);
990 if (!(integer_zerop (rhs)
991 || integer_all_onesp (rhs)
992 || real_zerop (rhs)
993 || (TREE_CODE (rhs) == CONSTRUCTOR
994 && !TREE_CLOBBER_P (rhs))
995 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
996 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
997 == TYPE_MODE (unsigned_char_type_node)))))
998 return;
999 if (TREE_CODE (rhs) == SSA_NAME
1000 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1001 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1002 return;
1003 if (!adjacent_dr_p (single_store))
1004 return;
1005 partition->kind = PKIND_MEMSET;
1006 partition->main_dr = single_store;
1008 else if (single_store && single_load)
1010 gimple store = DR_STMT (single_store);
1011 gimple load = DR_STMT (single_load);
1012 /* Direct aggregate copy or via an SSA name temporary. */
1013 if (load != store
1014 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1015 return;
1016 if (!adjacent_dr_p (single_store)
1017 || !adjacent_dr_p (single_load)
1018 || !operand_equal_p (DR_STEP (single_store),
1019 DR_STEP (single_load), 0))
1020 return;
1021 /* Now check that if there is a dependence this dependence is
1022 of a suitable form for memmove. */
1023 vec<loop_p> loops = vec<loop_p>();
1024 ddr_p ddr;
1025 loops.safe_push (loop);
1026 ddr = initialize_data_dependence_relation (single_load, single_store,
1027 loops);
1028 compute_affine_dependence (ddr, loop);
1029 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1031 free_dependence_relation (ddr);
1032 loops.release ();
1033 return;
1035 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1037 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1039 free_dependence_relation (ddr);
1040 loops.release ();
1041 return;
1043 lambda_vector dist_v;
1044 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1046 int dist = dist_v[index_in_loop_nest (loop->num,
1047 DDR_LOOP_NEST (ddr))];
1048 if (dist > 0 && !DDR_REVERSED_P (ddr))
1050 free_dependence_relation (ddr);
1051 loops.release ();
1052 return;
1056 free_dependence_relation (ddr);
1057 loops.release ();
1058 partition->kind = PKIND_MEMCPY;
1059 partition->main_dr = single_store;
1060 partition->secondary_dr = single_load;
1064 /* For a data reference REF, return the declaration of its base
1065 address or NULL_TREE if the base is not determined. */
1067 static tree
1068 ref_base_address (data_reference_p dr)
1070 tree base_address = DR_BASE_ADDRESS (dr);
1071 if (base_address
1072 && TREE_CODE (base_address) == ADDR_EXPR)
1073 return TREE_OPERAND (base_address, 0);
1075 return base_address;
1078 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1079 accesses in RDG. */
1081 static bool
1082 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1083 partition_t partition2)
1085 unsigned i, j, k, l;
1086 bitmap_iterator bi, bj;
1087 data_reference_p ref1, ref2;
1089 /* First check whether in the intersection of the two partitions are
1090 any loads or stores. Common loads are the situation that happens
1091 most often. */
1092 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1093 if (RDG_MEM_WRITE_STMT (rdg, i)
1094 || RDG_MEM_READS_STMT (rdg, i))
1095 return true;
1097 /* Then check all data-references against each other. */
1098 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1099 if (RDG_MEM_WRITE_STMT (rdg, i)
1100 || RDG_MEM_READS_STMT (rdg, i))
1101 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1102 if (RDG_MEM_WRITE_STMT (rdg, j)
1103 || RDG_MEM_READS_STMT (rdg, j))
1105 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1107 tree base1 = ref_base_address (ref1);
1108 if (base1)
1109 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1110 if (base1 == ref_base_address (ref2))
1111 return true;
1115 return false;
1118 /* Aggregate several components into a useful partition that is
1119 registered in the PARTITIONS vector. Partitions will be
1120 distributed in different loops. */
1122 static void
1123 rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1124 vec<int> *other_stores,
1125 vec<partition_t> *partitions, bitmap processed)
1127 int i;
1128 rdgc x;
1129 partition_t partition = partition_alloc (NULL);
1131 FOR_EACH_VEC_ELT (components, i, x)
1133 partition_t np;
1134 int v = x->vertices[0];
1136 if (bitmap_bit_p (processed, v))
1137 continue;
1139 np = build_rdg_partition_for_component (rdg, x);
1140 bitmap_ior_into (partition->stmts, np->stmts);
1141 partition->has_writes = partition_has_writes (np);
1142 bitmap_ior_into (processed, np->stmts);
1143 partition_free (np);
1145 if (partition_has_writes (partition))
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, "ldist useful partition:\n");
1150 dump_bitmap (dump_file, partition->stmts);
1153 partitions->safe_push (partition);
1154 partition = partition_alloc (NULL);
1158 /* Add the nodes from the RDG that were not marked as processed, and
1159 that are used outside the current loop. These are scalar
1160 computations that are not yet part of previous partitions. */
1161 for (i = 0; i < rdg->n_vertices; i++)
1162 if (!bitmap_bit_p (processed, i)
1163 && rdg_defs_used_in_other_loops_p (rdg, i))
1164 other_stores->safe_push (i);
1166 /* If there are still statements left in the OTHER_STORES array,
1167 create other components and partitions with these stores and
1168 their dependences. */
1169 if (other_stores->length () > 0)
1171 vec<rdgc> comps;
1172 comps.create (3);
1173 vec<int> foo;
1174 foo.create (3);
1176 rdg_build_components (rdg, *other_stores, &comps);
1177 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1179 foo.release ();
1180 free_rdg_components (comps);
1183 /* If there is something left in the last partition, save it. */
1184 if (bitmap_count_bits (partition->stmts) > 0)
1185 partitions->safe_push (partition);
1186 else
1187 partition_free (partition);
1190 /* Dump to FILE the PARTITIONS. */
1192 static void
1193 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1195 int i;
1196 partition_t partition;
1198 FOR_EACH_VEC_ELT (partitions, i, partition)
1199 debug_bitmap_file (file, partition->stmts);
1202 /* Debug PARTITIONS. */
1203 extern void debug_rdg_partitions (vec<partition_t> );
1205 DEBUG_FUNCTION void
1206 debug_rdg_partitions (vec<partition_t> partitions)
1208 dump_rdg_partitions (stderr, partitions);
1211 /* Returns the number of read and write operations in the RDG. */
1213 static int
1214 number_of_rw_in_rdg (struct graph *rdg)
1216 int i, res = 0;
1218 for (i = 0; i < rdg->n_vertices; i++)
1220 if (RDG_MEM_WRITE_STMT (rdg, i))
1221 ++res;
1223 if (RDG_MEM_READS_STMT (rdg, i))
1224 ++res;
1227 return res;
1230 /* Returns the number of read and write operations in a PARTITION of
1231 the RDG. */
1233 static int
1234 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1236 int res = 0;
1237 unsigned i;
1238 bitmap_iterator ii;
1240 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1242 if (RDG_MEM_WRITE_STMT (rdg, i))
1243 ++res;
1245 if (RDG_MEM_READS_STMT (rdg, i))
1246 ++res;
1249 return res;
1252 /* Returns true when one of the PARTITIONS contains all the read or
1253 write operations of RDG. */
1255 static bool
1256 partition_contains_all_rw (struct graph *rdg,
1257 vec<partition_t> partitions)
1259 int i;
1260 partition_t partition;
1261 int nrw = number_of_rw_in_rdg (rdg);
1263 FOR_EACH_VEC_ELT (partitions, i, partition)
1264 if (nrw == number_of_rw_in_partition (rdg, partition))
1265 return true;
1267 return false;
1270 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1271 distributed loops. */
1273 static int
1274 ldist_gen (struct loop *loop, struct graph *rdg,
1275 vec<int> starting_vertices)
1277 int i, nbp;
1278 vec<rdgc> components;
1279 components.create (3);
1280 vec<partition_t> partitions;
1281 partitions.create (3);
1282 vec<int> other_stores;
1283 other_stores.create (3);
1284 partition_t partition;
1285 bitmap processed = BITMAP_ALLOC (NULL);
1286 bool any_builtin;
1288 remaining_stmts = BITMAP_ALLOC (NULL);
1289 upstream_mem_writes = BITMAP_ALLOC (NULL);
1291 for (i = 0; i < rdg->n_vertices; i++)
1293 bitmap_set_bit (remaining_stmts, i);
1295 /* Save in OTHER_STORES all the memory writes that are not in
1296 STARTING_VERTICES. */
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1299 int v;
1300 unsigned j;
1301 bool found = false;
1303 FOR_EACH_VEC_ELT (starting_vertices, j, v)
1304 if (i == v)
1306 found = true;
1307 break;
1310 if (!found)
1311 other_stores.safe_push (i);
1315 mark_nodes_having_upstream_mem_writes (rdg);
1316 rdg_build_components (rdg, starting_vertices, &components);
1317 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1318 processed);
1319 BITMAP_FREE (processed);
1321 any_builtin = false;
1322 FOR_EACH_VEC_ELT (partitions, i, partition)
1324 classify_partition (loop, rdg, partition);
1325 any_builtin |= partition_builtin_p (partition);
1328 /* If we are only distributing patterns fuse all partitions that
1329 were not properly classified as builtins. Else fuse partitions
1330 with similar memory accesses. */
1331 if (!flag_tree_loop_distribution)
1333 partition_t into;
1334 /* If we did not detect any builtin simply bail out. */
1335 if (!any_builtin)
1337 nbp = 0;
1338 goto ldist_done;
1340 /* Only fuse adjacent non-builtin partitions, see PR53616.
1341 ??? Use dependence information to improve partition ordering. */
1342 i = 0;
1345 for (; partitions.iterate (i, &into); ++i)
1346 if (!partition_builtin_p (into))
1347 break;
1348 for (++i; partitions.iterate (i, &partition); ++i)
1349 if (!partition_builtin_p (partition))
1351 bitmap_ior_into (into->stmts, partition->stmts);
1352 partitions.ordered_remove (i);
1353 i--;
1355 else
1356 break;
1358 while ((unsigned) i < partitions.length ());
1360 else
1362 partition_t into;
1363 int j;
1364 for (i = 0; partitions.iterate (i, &into); ++i)
1366 if (partition_builtin_p (into))
1367 continue;
1368 for (j = i + 1;
1369 partitions.iterate (j, &partition); ++j)
1371 if (!partition_builtin_p (partition)
1372 /* ??? The following is horribly inefficient,
1373 we are re-computing and analyzing data-references
1374 of the stmts in the partitions all the time. */
1375 && similar_memory_accesses (rdg, into, partition))
1377 if (dump_file && (dump_flags & TDF_DETAILS))
1379 fprintf (dump_file, "fusing partitions\n");
1380 dump_bitmap (dump_file, into->stmts);
1381 dump_bitmap (dump_file, partition->stmts);
1382 fprintf (dump_file, "because they have similar "
1383 "memory accesses\n");
1385 bitmap_ior_into (into->stmts, partition->stmts);
1386 partitions.ordered_remove (j);
1387 j--;
1393 nbp = partitions.length ();
1394 if (nbp == 0
1395 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1396 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1398 nbp = 0;
1399 goto ldist_done;
1402 if (dump_file && (dump_flags & TDF_DETAILS))
1403 dump_rdg_partitions (dump_file, partitions);
1405 FOR_EACH_VEC_ELT (partitions, i, partition)
1406 generate_code_for_partition (loop, partition, i < nbp - 1);
1408 ldist_done:
1410 BITMAP_FREE (remaining_stmts);
1411 BITMAP_FREE (upstream_mem_writes);
1413 FOR_EACH_VEC_ELT (partitions, i, partition)
1414 partition_free (partition);
1416 other_stores.release ();
1417 partitions.release ();
1418 free_rdg_components (components);
1419 return nbp;
1422 /* Distributes the code from LOOP in such a way that producer
1423 statements are placed before consumer statements. When STMTS is
1424 NULL, performs the maximal distribution, if STMTS is not NULL,
1425 tries to separate only these statements from the LOOP's body.
1426 Returns the number of distributed loops. */
1428 static int
1429 distribute_loop (struct loop *loop, vec<gimple> stmts)
1431 int res = 0;
1432 struct graph *rdg;
1433 gimple s;
1434 unsigned i;
1435 vec<int> vertices;
1436 vec<ddr_p> dependence_relations;
1437 vec<data_reference_p> datarefs;
1438 vec<loop_p> loop_nest;
1440 datarefs.create (10);
1441 dependence_relations.create (100);
1442 loop_nest.create (3);
1443 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1445 if (!rdg)
1447 if (dump_file && (dump_flags & TDF_DETAILS))
1448 fprintf (dump_file,
1449 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1450 loop->num);
1452 free_dependence_relations (dependence_relations);
1453 free_data_refs (datarefs);
1454 loop_nest.release ();
1455 return res;
1458 vertices.create (3);
1460 if (dump_file && (dump_flags & TDF_DETAILS))
1461 dump_rdg (dump_file, rdg);
1463 FOR_EACH_VEC_ELT (stmts, i, s)
1465 int v = rdg_vertex_for_stmt (rdg, s);
1467 if (v >= 0)
1469 vertices.safe_push (v);
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1472 fprintf (dump_file,
1473 "ldist asked to generate code for vertex %d\n", v);
1477 res = ldist_gen (loop, rdg, vertices);
1478 vertices.release ();
1479 free_rdg (rdg);
1480 free_dependence_relations (dependence_relations);
1481 free_data_refs (datarefs);
1482 loop_nest.release ();
1483 return res;
1486 /* Distribute all loops in the current function. */
1488 static unsigned int
1489 tree_loop_distribution (void)
1491 struct loop *loop;
1492 loop_iterator li;
1493 bool changed = false;
1494 basic_block bb;
1496 FOR_ALL_BB (bb)
1498 gimple_stmt_iterator gsi;
1499 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1500 gimple_set_uid (gsi_stmt (gsi), -1);
1501 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1502 gimple_set_uid (gsi_stmt (gsi), -1);
1505 /* We can at the moment only distribute non-nested loops, thus restrict
1506 walking to innermost loops. */
1507 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1509 vec<gimple> work_list = vec<gimple>();
1510 basic_block *bbs;
1511 int num = loop->num;
1512 int nb_generated_loops = 0;
1513 unsigned int i;
1515 /* If the loop doesn't have a single exit we will fail anyway,
1516 so do that early. */
1517 if (!single_exit (loop))
1518 continue;
1520 /* Only optimize hot loops. */
1521 if (!optimize_loop_for_speed_p (loop))
1522 continue;
1524 /* Only distribute loops with a header and latch for now. */
1525 if (loop->num_nodes > 2)
1526 continue;
1528 /* Initialize the worklist with stmts we seed the partitions with. */
1529 bbs = get_loop_body_in_dom_order (loop);
1530 for (i = 0; i < loop->num_nodes; ++i)
1532 gimple_stmt_iterator gsi;
1533 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1535 gimple stmt = gsi_stmt (gsi);
1536 /* Only distribute stores for now.
1537 ??? We should also try to distribute scalar reductions,
1538 thus SSA defs that have scalar uses outside of the loop. */
1539 if (!gimple_assign_single_p (stmt)
1540 || is_gimple_reg (gimple_assign_lhs (stmt)))
1541 continue;
1543 work_list.safe_push (stmt);
1546 free (bbs);
1548 if (work_list.length () > 0)
1549 nb_generated_loops = distribute_loop (loop, work_list);
1551 if (nb_generated_loops > 0)
1552 changed = true;
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1556 if (nb_generated_loops > 1)
1557 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1558 num, nb_generated_loops);
1559 else
1560 fprintf (dump_file, "Loop %d is the same.\n", num);
1563 work_list.release ();
1566 if (changed)
1568 mark_virtual_operands_for_renaming (cfun);
1569 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1572 #ifdef ENABLE_CHECKING
1573 verify_loop_structure ();
1574 #endif
1576 return 0;
1579 static bool
1580 gate_tree_loop_distribution (void)
1582 return flag_tree_loop_distribution
1583 || flag_tree_loop_distribute_patterns;
1586 struct gimple_opt_pass pass_loop_distribution =
1589 GIMPLE_PASS,
1590 "ldist", /* name */
1591 OPTGROUP_LOOP, /* optinfo_flags */
1592 gate_tree_loop_distribution, /* gate */
1593 tree_loop_distribution, /* execute */
1594 NULL, /* sub */
1595 NULL, /* next */
1596 0, /* static_pass_number */
1597 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1598 PROP_cfg | PROP_ssa, /* properties_required */
1599 0, /* properties_provided */
1600 0, /* properties_destroyed */
1601 0, /* todo_flags_start */
1602 TODO_ggc_collect
1603 | TODO_verify_ssa /* todo_flags_finish */