2011-12-09 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob0daef06b9542d19f701b40b79048b628857594f2
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
30 is transformed to
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes;
66 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
67 the LOOP. */
69 static bool
70 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
72 imm_use_iterator imm_iter;
73 use_operand_p use_p;
75 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76 if (loop != loop_containing_stmt (USE_STMT (use_p)))
77 return true;
79 return false;
82 /* Returns true when STMT defines a scalar variable used after the
83 loop. */
85 static bool
86 stmt_has_scalar_dependences_outside_loop (gimple stmt)
88 tree name;
90 switch (gimple_code (stmt))
92 case GIMPLE_ASSIGN:
93 name = gimple_assign_lhs (stmt);
94 break;
96 case GIMPLE_PHI:
97 name = gimple_phi_result (stmt);
98 break;
100 default:
101 return false;
104 return TREE_CODE (name) == SSA_NAME
105 && ssa_name_has_uses_outside_loop_p (name, loop_containing_stmt (stmt));
108 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
109 ORIG_LOOP. */
111 static void
112 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
114 tree new_ssa_name;
115 gimple_stmt_iterator si_new, si_orig;
116 edge orig_loop_latch = loop_latch_edge (orig_loop);
117 edge orig_entry_e = loop_preheader_edge (orig_loop);
118 edge new_loop_entry_e = loop_preheader_edge (new_loop);
120 /* Scan the phis in the headers of the old and new loops
121 (they are organized in exactly the same order). */
122 for (si_new = gsi_start_phis (new_loop->header),
123 si_orig = gsi_start_phis (orig_loop->header);
124 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
125 gsi_next (&si_new), gsi_next (&si_orig))
127 tree def;
128 source_location locus;
129 gimple phi_new = gsi_stmt (si_new);
130 gimple phi_orig = gsi_stmt (si_orig);
132 /* Add the first phi argument for the phi in NEW_LOOP (the one
133 associated with the entry of NEW_LOOP) */
134 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
135 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
136 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
138 /* Add the second phi argument for the phi in NEW_LOOP (the one
139 associated with the latch of NEW_LOOP) */
140 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
141 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
143 if (TREE_CODE (def) == SSA_NAME)
145 new_ssa_name = get_current_def (def);
147 if (!new_ssa_name)
148 /* This only happens if there are no definitions inside the
149 loop. Use the the invariant in the new loop as is. */
150 new_ssa_name = def;
152 else
153 /* Could be an integer. */
154 new_ssa_name = def;
156 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
160 /* Return a copy of LOOP placed before LOOP. */
162 static struct loop *
163 copy_loop_before (struct loop *loop)
165 struct loop *res;
166 edge preheader = loop_preheader_edge (loop);
168 if (!single_exit (loop))
169 return NULL;
171 initialize_original_copy_tables ();
172 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
173 free_original_copy_tables ();
175 if (!res)
176 return NULL;
178 update_phis_for_loop_copy (loop, res);
179 rename_variables_in_loop (res);
181 return res;
184 /* Creates an empty basic block after LOOP. */
186 static void
187 create_bb_after_loop (struct loop *loop)
189 edge exit = single_exit (loop);
191 if (!exit)
192 return;
194 split_edge (exit);
197 /* Generate code for PARTITION from the code in LOOP. The loop is
198 copied when COPY_P is true. All the statements not flagged in the
199 PARTITION bitmap are removed from the loop or from its copy. The
200 statements are indexed in sequence inside a basic block, and the
201 basic blocks of a loop are taken in dom order. Returns true when
202 the code gen succeeded. */
204 static bool
205 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
207 unsigned i, x;
208 gimple_stmt_iterator bsi;
209 basic_block *bbs;
211 if (copy_p)
213 loop = copy_loop_before (loop);
214 create_preheader (loop, CP_SIMPLE_PREHEADERS);
215 create_bb_after_loop (loop);
218 if (loop == NULL)
219 return false;
221 /* Remove stmts not in the PARTITION bitmap. The order in which we
222 visit the phi nodes and the statements is exactly as in
223 stmts_from_loop. */
224 bbs = get_loop_body_in_dom_order (loop);
226 if (MAY_HAVE_DEBUG_STMTS)
227 for (x = 0, i = 0; i < loop->num_nodes; i++)
229 basic_block bb = bbs[i];
231 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
232 if (!bitmap_bit_p (partition, x++))
233 reset_debug_uses (gsi_stmt (bsi));
235 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
237 gimple stmt = gsi_stmt (bsi);
238 if (gimple_code (stmt) != GIMPLE_LABEL
239 && !is_gimple_debug (stmt)
240 && !bitmap_bit_p (partition, x++))
241 reset_debug_uses (stmt);
245 for (x = 0, i = 0; i < loop->num_nodes; i++)
247 basic_block bb = bbs[i];
249 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
250 if (!bitmap_bit_p (partition, x++))
252 gimple phi = gsi_stmt (bsi);
253 if (!is_gimple_reg (gimple_phi_result (phi)))
254 mark_virtual_phi_result_for_renaming (phi);
255 remove_phi_node (&bsi, true);
257 else
258 gsi_next (&bsi);
260 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
262 gimple stmt = gsi_stmt (bsi);
263 if (gimple_code (stmt) != GIMPLE_LABEL
264 && !is_gimple_debug (stmt)
265 && !bitmap_bit_p (partition, x++))
267 unlink_stmt_vdef (stmt);
268 gsi_remove (&bsi, true);
269 release_defs (stmt);
271 else
272 gsi_next (&bsi);
276 free (bbs);
277 return true;
280 /* Build the size argument for a memset call. */
282 static inline tree
283 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
284 gimple_seq *stmt_list)
286 gimple_seq stmts;
287 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
288 fold_convert_loc (loc, size_type_node, nb_iter),
289 fold_convert_loc (loc, size_type_node,
290 TYPE_SIZE_UNIT (TREE_TYPE (op))));
291 x = force_gimple_operand (x, &stmts, true, NULL);
292 gimple_seq_add_seq (stmt_list, stmts);
294 return x;
297 /* Generate a call to memset. Return true when the operation succeeded. */
299 static void
300 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
301 gimple_stmt_iterator bsi)
303 tree addr_base, nb_bytes;
304 bool res = false;
305 gimple_seq stmt_list = NULL, stmts;
306 gimple fn_call;
307 tree mem, fn;
308 struct data_reference *dr = XCNEW (struct data_reference);
309 location_t loc = gimple_location (stmt);
311 DR_STMT (dr) = stmt;
312 DR_REF (dr) = op0;
313 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
314 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
316 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
317 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
318 addr_base = fold_convert_loc (loc, sizetype, addr_base);
320 /* Test for a negative stride, iterating over every element. */
321 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
323 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
324 fold_convert_loc (loc, sizetype, nb_bytes));
325 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
326 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
329 addr_base = fold_build_pointer_plus_loc (loc,
330 DR_BASE_ADDRESS (dr), addr_base);
331 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
332 gimple_seq_add_seq (&stmt_list, stmts);
334 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
335 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
336 gimple_seq_add_stmt (&stmt_list, fn_call);
337 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
339 if (dump_file && (dump_flags & TDF_DETAILS))
340 fprintf (dump_file, "generated memset zero\n");
342 free_data_ref (dr);
345 /* Tries to generate a builtin function for the instructions of LOOP
346 pointed to by the bits set in PARTITION. Returns true when the
347 operation succeeded. */
349 static bool
350 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
352 bool res = false;
353 unsigned i, x = 0;
354 basic_block *bbs;
355 gimple write = NULL;
356 gimple_stmt_iterator bsi;
357 tree nb_iter = number_of_exit_cond_executions (loop);
359 if (!nb_iter || nb_iter == chrec_dont_know)
360 return false;
362 bbs = get_loop_body_in_dom_order (loop);
364 for (i = 0; i < loop->num_nodes; i++)
366 basic_block bb = bbs[i];
368 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
369 x++;
371 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
373 gimple stmt = gsi_stmt (bsi);
375 if (gimple_code (stmt) == GIMPLE_LABEL
376 || is_gimple_debug (stmt))
377 continue;
379 if (!bitmap_bit_p (partition, x++))
380 continue;
382 /* If the stmt has uses outside of the loop fail. */
383 if (stmt_has_scalar_dependences_outside_loop (stmt))
384 goto end;
386 if (is_gimple_assign (stmt)
387 && !is_gimple_reg (gimple_assign_lhs (stmt)))
389 /* Don't generate the builtins when there are more than
390 one memory write. */
391 if (write != NULL)
392 goto end;
394 write = stmt;
395 if (bb == loop->latch)
396 nb_iter = number_of_latch_executions (loop);
401 if (!stmt_with_adjacent_zero_store_dr_p (write))
402 goto end;
404 /* The new statements will be placed before LOOP. */
405 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
406 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
407 res = true;
409 /* If this is the last partition for which we generate code, we have
410 to destroy the loop. */
411 if (!copy_p)
413 unsigned nbbs = loop->num_nodes;
414 edge exit = single_exit (loop);
415 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
416 redirect_edge_pred (exit, src);
417 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
418 exit->flags |= EDGE_FALLTHRU;
419 cancel_loop_tree (loop);
420 rescan_loop_exit (exit, false, true);
422 for (i = 0; i < nbbs; i++)
423 delete_basic_block (bbs[i]);
425 set_immediate_dominator (CDI_DOMINATORS, dest,
426 recompute_dominator (CDI_DOMINATORS, dest));
429 end:
430 free (bbs);
431 return res;
434 /* Generates code for PARTITION. For simple loops, this function can
435 generate a built-in. */
437 static bool
438 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
440 if (generate_builtin (loop, partition, copy_p))
441 return true;
443 return generate_loops_for_partition (loop, partition, copy_p);
447 /* Returns true if the node V of RDG cannot be recomputed. */
449 static bool
450 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
452 if (RDG_MEM_WRITE_STMT (rdg, v))
453 return true;
455 return false;
458 /* Returns true when the vertex V has already been generated in the
459 current partition (V is in PROCESSED), or when V belongs to another
460 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
462 static inline bool
463 already_processed_vertex_p (bitmap processed, int v)
465 return (bitmap_bit_p (processed, v)
466 || !bitmap_bit_p (remaining_stmts, v));
469 /* Returns NULL when there is no anti-dependence among the successors
470 of vertex V, otherwise returns the edge with the anti-dep. */
472 static struct graph_edge *
473 has_anti_dependence (struct vertex *v)
475 struct graph_edge *e;
477 if (v->succ)
478 for (e = v->succ; e; e = e->succ_next)
479 if (RDGE_TYPE (e) == anti_dd)
480 return e;
482 return NULL;
485 /* Returns true when V has an anti-dependence edge among its successors. */
487 static bool
488 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
490 struct graph_edge *e;
492 if (v->pred)
493 for (e = v->pred; e; e = e->pred_next)
494 if (bitmap_bit_p (upstream_mem_writes, e->src)
495 /* Don't consider flow channels: a write to memory followed
496 by a read from memory. These channels allow the split of
497 the RDG in different partitions. */
498 && !RDG_MEM_WRITE_STMT (rdg, e->src))
499 return true;
501 return false;
504 /* Initializes the upstream_mem_writes bitmap following the
505 information from RDG. */
507 static void
508 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
510 int v, x;
511 bitmap seen = BITMAP_ALLOC (NULL);
513 for (v = rdg->n_vertices - 1; v >= 0; v--)
514 if (!bitmap_bit_p (seen, v))
516 unsigned i;
517 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
519 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
521 FOR_EACH_VEC_ELT (int, nodes, i, x)
523 if (!bitmap_set_bit (seen, x))
524 continue;
526 if (RDG_MEM_WRITE_STMT (rdg, x)
527 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
528 /* In anti dependences the read should occur before
529 the write, this is why both the read and the write
530 should be placed in the same partition. */
531 || has_anti_dependence (&(rdg->vertices[x])))
533 bitmap_set_bit (upstream_mem_writes, x);
537 VEC_free (int, heap, nodes);
541 /* Returns true when vertex u has a memory write node as a predecessor
542 in RDG. */
544 static bool
545 has_upstream_mem_writes (int u)
547 return bitmap_bit_p (upstream_mem_writes, u);
550 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
551 bitmap, bool *);
553 /* Flag the uses of U stopping following the information from
554 upstream_mem_writes. */
556 static void
557 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
558 bitmap processed, bool *part_has_writes)
560 use_operand_p use_p;
561 struct vertex *x = &(rdg->vertices[u]);
562 gimple stmt = RDGV_STMT (x);
563 struct graph_edge *anti_dep = has_anti_dependence (x);
565 /* Keep in the same partition the destination of an antidependence,
566 because this is a store to the exact same location. Putting this
567 in another partition is bad for cache locality. */
568 if (anti_dep)
570 int v = anti_dep->dest;
572 if (!already_processed_vertex_p (processed, v))
573 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
574 processed, part_has_writes);
577 if (gimple_code (stmt) != GIMPLE_PHI)
579 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
581 tree use = USE_FROM_PTR (use_p);
583 if (TREE_CODE (use) == SSA_NAME)
585 gimple def_stmt = SSA_NAME_DEF_STMT (use);
586 int v = rdg_vertex_for_stmt (rdg, def_stmt);
588 if (v >= 0
589 && !already_processed_vertex_p (processed, v))
590 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
591 processed, part_has_writes);
596 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
598 tree op0 = gimple_assign_lhs (stmt);
600 /* Scalar channels don't have enough space for transmitting data
601 between tasks, unless we add more storage by privatizing. */
602 if (is_gimple_reg (op0))
604 use_operand_p use_p;
605 imm_use_iterator iter;
607 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
609 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
611 if (!already_processed_vertex_p (processed, v))
612 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
613 processed, part_has_writes);
619 /* Flag V from RDG as part of PARTITION, and also flag its loop number
620 in LOOPS. */
622 static void
623 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
624 bool *part_has_writes)
626 struct loop *loop;
628 if (!bitmap_set_bit (partition, v))
629 return;
631 loop = loop_containing_stmt (RDG_STMT (rdg, v));
632 bitmap_set_bit (loops, loop->num);
634 if (rdg_cannot_recompute_vertex_p (rdg, v))
636 *part_has_writes = true;
637 bitmap_clear_bit (remaining_stmts, v);
641 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
642 Also flag their loop number in LOOPS. */
644 static void
645 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
646 bitmap loops, bitmap processed,
647 bool *part_has_writes)
649 unsigned i;
650 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
651 int x;
653 bitmap_set_bit (processed, v);
654 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
655 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
656 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
658 FOR_EACH_VEC_ELT (int, nodes, i, x)
659 if (!already_processed_vertex_p (processed, x))
660 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
661 part_has_writes);
663 VEC_free (int, heap, nodes);
666 /* Initialize CONDS with all the condition statements from the basic
667 blocks of LOOP. */
669 static void
670 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
672 unsigned i;
673 edge e;
674 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
676 FOR_EACH_VEC_ELT (edge, exits, i, e)
678 gimple cond = last_stmt (e->src);
680 if (cond)
681 VEC_safe_push (gimple, heap, *conds, cond);
684 VEC_free (edge, heap, exits);
687 /* Add to PARTITION all the exit condition statements for LOOPS
688 together with all their dependent statements determined from
689 RDG. */
691 static void
692 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
693 bitmap processed, bool *part_has_writes)
695 unsigned i;
696 bitmap_iterator bi;
697 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
699 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
700 collect_condition_stmts (get_loop (i), &conds);
702 while (!VEC_empty (gimple, conds))
704 gimple cond = VEC_pop (gimple, conds);
705 int v = rdg_vertex_for_stmt (rdg, cond);
706 bitmap new_loops = BITMAP_ALLOC (NULL);
708 if (!already_processed_vertex_p (processed, v))
709 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
710 part_has_writes);
712 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
713 if (bitmap_set_bit (loops, i))
714 collect_condition_stmts (get_loop (i), &conds);
716 BITMAP_FREE (new_loops);
719 VEC_free (gimple, heap, conds);
722 /* Returns a bitmap in which all the statements needed for computing
723 the strongly connected component C of the RDG are flagged, also
724 including the loop exit conditions. */
726 static bitmap
727 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
728 bool *part_has_writes)
730 int i, v;
731 bitmap partition = BITMAP_ALLOC (NULL);
732 bitmap loops = BITMAP_ALLOC (NULL);
733 bitmap processed = BITMAP_ALLOC (NULL);
735 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
736 if (!already_processed_vertex_p (processed, v))
737 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
738 part_has_writes);
740 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
742 BITMAP_FREE (processed);
743 BITMAP_FREE (loops);
744 return partition;
747 /* Free memory for COMPONENTS. */
749 static void
750 free_rdg_components (VEC (rdgc, heap) *components)
752 int i;
753 rdgc x;
755 FOR_EACH_VEC_ELT (rdgc, components, i, x)
757 VEC_free (int, heap, x->vertices);
758 free (x);
761 VEC_free (rdgc, heap, components);
764 /* Build the COMPONENTS vector with the strongly connected components
765 of RDG in which the STARTING_VERTICES occur. */
767 static void
768 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
769 VEC (rdgc, heap) **components)
771 int i, v;
772 bitmap saved_components = BITMAP_ALLOC (NULL);
773 int n_components = graphds_scc (rdg, NULL);
774 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
776 for (i = 0; i < n_components; i++)
777 all_components[i] = VEC_alloc (int, heap, 3);
779 for (i = 0; i < rdg->n_vertices; i++)
780 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
782 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
784 int c = rdg->vertices[v].component;
786 if (bitmap_set_bit (saved_components, c))
788 rdgc x = XCNEW (struct rdg_component);
789 x->num = c;
790 x->vertices = all_components[c];
792 VEC_safe_push (rdgc, heap, *components, x);
796 for (i = 0; i < n_components; i++)
797 if (!bitmap_bit_p (saved_components, i))
798 VEC_free (int, heap, all_components[i]);
800 free (all_components);
801 BITMAP_FREE (saved_components);
804 /* Returns true when it is possible to generate a builtin pattern for
805 the PARTITION of RDG. For the moment we detect only the memset
806 zero pattern. */
808 static bool
809 can_generate_builtin (struct graph *rdg, bitmap partition)
811 unsigned i;
812 bitmap_iterator bi;
813 int nb_reads = 0;
814 int nb_writes = 0;
815 int stores_zero = 0;
817 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
818 if (RDG_MEM_READS_STMT (rdg, i))
819 nb_reads++;
820 else if (RDG_MEM_WRITE_STMT (rdg, i))
822 nb_writes++;
823 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
824 stores_zero++;
827 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
830 /* Returns true when PARTITION1 and PARTITION2 have similar memory
831 accesses in RDG. */
833 static bool
834 similar_memory_accesses (struct graph *rdg, bitmap partition1,
835 bitmap partition2)
837 unsigned i, j;
838 bitmap_iterator bi, bj;
840 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
841 if (RDG_MEM_WRITE_STMT (rdg, i)
842 || RDG_MEM_READS_STMT (rdg, i))
843 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
844 if (RDG_MEM_WRITE_STMT (rdg, j)
845 || RDG_MEM_READS_STMT (rdg, j))
846 if (rdg_has_similar_memory_accesses (rdg, i, j))
847 return true;
849 return false;
852 /* Fuse all the partitions from PARTITIONS that contain similar memory
853 references, i.e., we're taking care of cache locality. This
854 function does not fuse those partitions that contain patterns that
855 can be code generated with builtins. */
857 static void
858 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
859 VEC (bitmap, heap) **partitions)
861 int p1, p2;
862 bitmap partition1, partition2;
864 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
865 if (!can_generate_builtin (rdg, partition1))
866 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
867 if (p1 != p2
868 && !can_generate_builtin (rdg, partition2)
869 && similar_memory_accesses (rdg, partition1, partition2))
871 bitmap_ior_into (partition1, partition2);
872 VEC_ordered_remove (bitmap, *partitions, p2);
873 p2--;
877 /* Returns true when STMT will be code generated in a partition of RDG
878 different than PART and that will not be code generated as a
879 builtin. */
881 static bool
882 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
883 VEC (bitmap, heap) *partitions)
885 int p;
886 bitmap pp;
887 unsigned i;
888 bitmap_iterator bi;
890 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
891 if (p != part
892 && !can_generate_builtin (rdg, pp))
893 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
894 if (stmt == RDG_STMT (rdg, i))
895 return true;
897 return false;
900 /* For each partition in PARTITIONS that will be code generated using
901 a builtin, add its scalar computations used after the loop to
902 PARTITION. */
904 static void
905 add_scalar_computations_to_partition (struct graph *rdg,
906 VEC (bitmap, heap) *partitions,
907 bitmap partition)
909 int p;
910 bitmap pp;
911 unsigned i;
912 bitmap_iterator bi;
913 bitmap l = BITMAP_ALLOC (NULL);
914 bitmap pr = BITMAP_ALLOC (NULL);
915 bool f = false;
917 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
918 if (can_generate_builtin (rdg, pp))
919 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
920 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
921 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
922 partitions))
923 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
925 rdg_flag_loop_exits (rdg, l, partition, pr, &f);
927 BITMAP_FREE (pr);
928 BITMAP_FREE (l);
931 /* Aggregate several components into a useful partition that is
932 registered in the PARTITIONS vector. Partitions will be
933 distributed in different loops. */
935 static void
936 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
937 VEC (int, heap) **other_stores,
938 VEC (bitmap, heap) **partitions, bitmap processed)
940 int i;
941 rdgc x;
942 bitmap partition = BITMAP_ALLOC (NULL);
944 FOR_EACH_VEC_ELT (rdgc, components, i, x)
946 bitmap np;
947 bool part_has_writes = false;
948 int v = VEC_index (int, x->vertices, 0);
950 if (bitmap_bit_p (processed, v))
951 continue;
953 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
954 bitmap_ior_into (partition, np);
955 bitmap_ior_into (processed, np);
956 BITMAP_FREE (np);
958 if (part_has_writes)
960 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "ldist useful partition:\n");
963 dump_bitmap (dump_file, partition);
966 VEC_safe_push (bitmap, heap, *partitions, partition);
967 partition = BITMAP_ALLOC (NULL);
971 /* Add the nodes from the RDG that were not marked as processed, and
972 that are used outside the current loop. These are scalar
973 computations that are not yet part of previous partitions. */
974 for (i = 0; i < rdg->n_vertices; i++)
975 if (!bitmap_bit_p (processed, i)
976 && rdg_defs_used_in_other_loops_p (rdg, i))
977 VEC_safe_push (int, heap, *other_stores, i);
979 /* If there are still statements left in the OTHER_STORES array,
980 create other components and partitions with these stores and
981 their dependences. */
982 if (VEC_length (int, *other_stores) > 0)
984 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
985 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
987 rdg_build_components (rdg, *other_stores, &comps);
988 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
990 VEC_free (int, heap, foo);
991 free_rdg_components (comps);
994 add_scalar_computations_to_partition (rdg, *partitions, partition);
996 /* If there is something left in the last partition, save it. */
997 if (bitmap_count_bits (partition) > 0)
998 VEC_safe_push (bitmap, heap, *partitions, partition);
999 else
1000 BITMAP_FREE (partition);
1002 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
1005 /* Dump to FILE the PARTITIONS. */
1007 static void
1008 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1010 int i;
1011 bitmap partition;
1013 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1014 debug_bitmap_file (file, partition);
1017 /* Debug PARTITIONS. */
1018 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1020 DEBUG_FUNCTION void
1021 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1023 dump_rdg_partitions (stderr, partitions);
1026 /* Returns the number of read and write operations in the RDG. */
1028 static int
1029 number_of_rw_in_rdg (struct graph *rdg)
1031 int i, res = 0;
1033 for (i = 0; i < rdg->n_vertices; i++)
1035 if (RDG_MEM_WRITE_STMT (rdg, i))
1036 ++res;
1038 if (RDG_MEM_READS_STMT (rdg, i))
1039 ++res;
1042 return res;
1045 /* Returns the number of read and write operations in a PARTITION of
1046 the RDG. */
1048 static int
1049 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1051 int res = 0;
1052 unsigned i;
1053 bitmap_iterator ii;
1055 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1057 if (RDG_MEM_WRITE_STMT (rdg, i))
1058 ++res;
1060 if (RDG_MEM_READS_STMT (rdg, i))
1061 ++res;
1064 return res;
1067 /* Returns true when one of the PARTITIONS contains all the read or
1068 write operations of RDG. */
1070 static bool
1071 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1073 int i;
1074 bitmap partition;
1075 int nrw = number_of_rw_in_rdg (rdg);
1077 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1078 if (nrw == number_of_rw_in_partition (rdg, partition))
1079 return true;
1081 return false;
1084 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1085 distributed loops. */
1087 static int
1088 ldist_gen (struct loop *loop, struct graph *rdg,
1089 VEC (int, heap) *starting_vertices)
1091 int i, nbp;
1092 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1093 VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1094 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1095 bitmap partition, processed = BITMAP_ALLOC (NULL);
1097 remaining_stmts = BITMAP_ALLOC (NULL);
1098 upstream_mem_writes = BITMAP_ALLOC (NULL);
1100 for (i = 0; i < rdg->n_vertices; i++)
1102 bitmap_set_bit (remaining_stmts, i);
1104 /* Save in OTHER_STORES all the memory writes that are not in
1105 STARTING_VERTICES. */
1106 if (RDG_MEM_WRITE_STMT (rdg, i))
1108 int v;
1109 unsigned j;
1110 bool found = false;
1112 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1113 if (i == v)
1115 found = true;
1116 break;
1119 if (!found)
1120 VEC_safe_push (int, heap, other_stores, i);
1124 mark_nodes_having_upstream_mem_writes (rdg);
1125 rdg_build_components (rdg, starting_vertices, &components);
1126 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1127 processed);
1128 BITMAP_FREE (processed);
1129 nbp = VEC_length (bitmap, partitions);
1131 if (nbp <= 1
1132 || partition_contains_all_rw (rdg, partitions))
1133 goto ldist_done;
1135 if (dump_file && (dump_flags & TDF_DETAILS))
1136 dump_rdg_partitions (dump_file, partitions);
1138 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1139 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1140 goto ldist_done;
1142 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1143 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1145 ldist_done:
1147 BITMAP_FREE (remaining_stmts);
1148 BITMAP_FREE (upstream_mem_writes);
1150 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1151 BITMAP_FREE (partition);
1153 VEC_free (int, heap, other_stores);
1154 VEC_free (bitmap, heap, partitions);
1155 free_rdg_components (components);
1156 return nbp;
1159 /* Distributes the code from LOOP in such a way that producer
1160 statements are placed before consumer statements. When STMTS is
1161 NULL, performs the maximal distribution, if STMTS is not NULL,
1162 tries to separate only these statements from the LOOP's body.
1163 Returns the number of distributed loops. */
1165 static int
1166 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1168 int res = 0;
1169 struct graph *rdg;
1170 gimple s;
1171 unsigned i;
1172 VEC (int, heap) *vertices;
1173 VEC (ddr_p, heap) *dependence_relations;
1174 VEC (data_reference_p, heap) *datarefs;
1175 VEC (loop_p, heap) *loop_nest;
1177 if (loop->num_nodes > 2)
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file,
1181 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1182 loop->num);
1184 return res;
1187 datarefs = VEC_alloc (data_reference_p, heap, 10);
1188 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1189 loop_nest = VEC_alloc (loop_p, heap, 3);
1190 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1192 if (!rdg)
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1195 fprintf (dump_file,
1196 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1197 loop->num);
1199 free_dependence_relations (dependence_relations);
1200 free_data_refs (datarefs);
1201 VEC_free (loop_p, heap, loop_nest);
1202 return res;
1205 vertices = VEC_alloc (int, heap, 3);
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 dump_rdg (dump_file, rdg);
1210 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1212 int v = rdg_vertex_for_stmt (rdg, s);
1214 if (v >= 0)
1216 VEC_safe_push (int, heap, vertices, v);
1218 if (dump_file && (dump_flags & TDF_DETAILS))
1219 fprintf (dump_file,
1220 "ldist asked to generate code for vertex %d\n", v);
1224 res = ldist_gen (loop, rdg, vertices);
1225 VEC_free (int, heap, vertices);
1226 free_rdg (rdg);
1227 free_dependence_relations (dependence_relations);
1228 free_data_refs (datarefs);
1229 VEC_free (loop_p, heap, loop_nest);
1230 return res;
1233 /* Distribute all loops in the current function. */
1235 static unsigned int
1236 tree_loop_distribution (void)
1238 struct loop *loop;
1239 loop_iterator li;
1240 int nb_generated_loops = 0;
1242 FOR_EACH_LOOP (li, loop, 0)
1244 VEC (gimple, heap) *work_list = NULL;
1245 int num = loop->num;
1247 /* If the loop doesn't have a single exit we will fail anyway,
1248 so do that early. */
1249 if (!single_exit (loop))
1250 continue;
1252 /* If both flag_tree_loop_distribute_patterns and
1253 flag_tree_loop_distribution are set, then only
1254 distribute_patterns is executed. */
1255 if (flag_tree_loop_distribute_patterns)
1257 /* With the following working list, we're asking
1258 distribute_loop to separate from the rest of the loop the
1259 stores of the form "A[i] = 0". */
1260 stores_zero_from_loop (loop, &work_list);
1262 /* Do nothing if there are no patterns to be distributed. */
1263 if (VEC_length (gimple, work_list) > 0)
1264 nb_generated_loops = distribute_loop (loop, work_list);
1266 else if (flag_tree_loop_distribution)
1268 /* With the following working list, we're asking
1269 distribute_loop to separate the stores of the loop: when
1270 dependences allow, it will end on having one store per
1271 loop. */
1272 stores_from_loop (loop, &work_list);
1274 /* A simple heuristic for cache locality is to not split
1275 stores to the same array. Without this call, an unrolled
1276 loop would be split into as many loops as unroll factor,
1277 each loop storing in the same array. */
1278 remove_similar_memory_refs (&work_list);
1280 nb_generated_loops = distribute_loop (loop, work_list);
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1285 if (nb_generated_loops > 1)
1286 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1287 num, nb_generated_loops);
1288 else
1289 fprintf (dump_file, "Loop %d is the same.\n", num);
1292 verify_loop_structure ();
1294 VEC_free (gimple, heap, work_list);
1297 return 0;
1300 static bool
1301 gate_tree_loop_distribution (void)
1303 return flag_tree_loop_distribution
1304 || flag_tree_loop_distribute_patterns;
1307 struct gimple_opt_pass pass_loop_distribution =
1310 GIMPLE_PASS,
1311 "ldist", /* name */
1312 gate_tree_loop_distribution, /* gate */
1313 tree_loop_distribution, /* execute */
1314 NULL, /* sub */
1315 NULL, /* next */
1316 0, /* static_pass_number */
1317 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1318 PROP_cfg | PROP_ssa, /* properties_required */
1319 0, /* properties_provided */
1320 0, /* properties_destroyed */
1321 0, /* todo_flags_start */
1322 TODO_ggc_collect
1323 | TODO_verify_ssa /* todo_flags_finish */