2011-08-31 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobf1db27a007d50388ac4c2aef7c43ea3a31402e5f
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
30 is transformed to
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes;
66 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
67 ORIG_LOOP. */
69 static void
70 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
72 tree new_ssa_name;
73 gimple_stmt_iterator si_new, si_orig;
74 edge orig_loop_latch = loop_latch_edge (orig_loop);
75 edge orig_entry_e = loop_preheader_edge (orig_loop);
76 edge new_loop_entry_e = loop_preheader_edge (new_loop);
78 /* Scan the phis in the headers of the old and new loops
79 (they are organized in exactly the same order). */
80 for (si_new = gsi_start_phis (new_loop->header),
81 si_orig = gsi_start_phis (orig_loop->header);
82 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
83 gsi_next (&si_new), gsi_next (&si_orig))
85 tree def;
86 source_location locus;
87 gimple phi_new = gsi_stmt (si_new);
88 gimple phi_orig = gsi_stmt (si_orig);
90 /* Add the first phi argument for the phi in NEW_LOOP (the one
91 associated with the entry of NEW_LOOP) */
92 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
93 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
94 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
96 /* Add the second phi argument for the phi in NEW_LOOP (the one
97 associated with the latch of NEW_LOOP) */
98 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
99 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
101 if (TREE_CODE (def) == SSA_NAME)
103 new_ssa_name = get_current_def (def);
105 if (!new_ssa_name)
106 /* This only happens if there are no definitions inside the
107 loop. Use the the invariant in the new loop as is. */
108 new_ssa_name = def;
110 else
111 /* Could be an integer. */
112 new_ssa_name = def;
114 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
118 /* Return a copy of LOOP placed before LOOP. */
120 static struct loop *
121 copy_loop_before (struct loop *loop)
123 struct loop *res;
124 edge preheader = loop_preheader_edge (loop);
126 if (!single_exit (loop))
127 return NULL;
129 initialize_original_copy_tables ();
130 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
131 free_original_copy_tables ();
133 if (!res)
134 return NULL;
136 update_phis_for_loop_copy (loop, res);
137 rename_variables_in_loop (res);
139 return res;
142 /* Creates an empty basic block after LOOP. */
144 static void
145 create_bb_after_loop (struct loop *loop)
147 edge exit = single_exit (loop);
149 if (!exit)
150 return;
152 split_edge (exit);
155 /* Generate code for PARTITION from the code in LOOP. The loop is
156 copied when COPY_P is true. All the statements not flagged in the
157 PARTITION bitmap are removed from the loop or from its copy. The
158 statements are indexed in sequence inside a basic block, and the
159 basic blocks of a loop are taken in dom order. Returns true when
160 the code gen succeeded. */
162 static bool
163 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
165 unsigned i, x;
166 gimple_stmt_iterator bsi;
167 basic_block *bbs;
169 if (copy_p)
171 loop = copy_loop_before (loop);
172 create_preheader (loop, CP_SIMPLE_PREHEADERS);
173 create_bb_after_loop (loop);
176 if (loop == NULL)
177 return false;
179 /* Remove stmts not in the PARTITION bitmap. The order in which we
180 visit the phi nodes and the statements is exactly as in
181 stmts_from_loop. */
182 bbs = get_loop_body_in_dom_order (loop);
184 if (MAY_HAVE_DEBUG_STMTS)
185 for (x = 0, i = 0; i < loop->num_nodes; i++)
187 basic_block bb = bbs[i];
189 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
190 if (!bitmap_bit_p (partition, x++))
191 reset_debug_uses (gsi_stmt (bsi));
193 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
195 gimple stmt = gsi_stmt (bsi);
196 if (gimple_code (stmt) != GIMPLE_LABEL
197 && !is_gimple_debug (stmt)
198 && !bitmap_bit_p (partition, x++))
199 reset_debug_uses (stmt);
203 for (x = 0, i = 0; i < loop->num_nodes; i++)
205 basic_block bb = bbs[i];
207 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
208 if (!bitmap_bit_p (partition, x++))
210 gimple phi = gsi_stmt (bsi);
211 if (!is_gimple_reg (gimple_phi_result (phi)))
212 mark_virtual_phi_result_for_renaming (phi);
213 remove_phi_node (&bsi, true);
215 else
216 gsi_next (&bsi);
218 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
220 gimple stmt = gsi_stmt (bsi);
221 if (gimple_code (stmt) != GIMPLE_LABEL
222 && !is_gimple_debug (stmt)
223 && !bitmap_bit_p (partition, x++))
225 unlink_stmt_vdef (stmt);
226 gsi_remove (&bsi, true);
227 release_defs (stmt);
229 else
230 gsi_next (&bsi);
234 free (bbs);
235 return true;
238 /* Build the size argument for a memset call. */
240 static inline tree
241 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
242 gimple_seq *stmt_list)
244 gimple_seq stmts;
245 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
246 fold_convert_loc (loc, size_type_node, nb_iter),
247 fold_convert_loc (loc, size_type_node,
248 TYPE_SIZE_UNIT (TREE_TYPE (op))));
249 x = force_gimple_operand (x, &stmts, true, NULL);
250 gimple_seq_add_seq (stmt_list, stmts);
252 return x;
255 /* Generate a call to memset. Return true when the operation succeeded. */
257 static void
258 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
259 gimple_stmt_iterator bsi)
261 tree addr_base, nb_bytes;
262 bool res = false;
263 gimple_seq stmt_list = NULL, stmts;
264 gimple fn_call;
265 tree mem, fn;
266 struct data_reference *dr = XCNEW (struct data_reference);
267 location_t loc = gimple_location (stmt);
269 DR_STMT (dr) = stmt;
270 DR_REF (dr) = op0;
271 res = dr_analyze_innermost (dr);
272 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
274 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
275 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
276 addr_base = fold_convert_loc (loc, sizetype, addr_base);
278 /* Test for a negative stride, iterating over every element. */
279 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
281 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
282 fold_convert_loc (loc, sizetype, nb_bytes));
283 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
284 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
287 addr_base = fold_build_pointer_plus_loc (loc,
288 DR_BASE_ADDRESS (dr), addr_base);
289 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
290 gimple_seq_add_seq (&stmt_list, stmts);
292 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]);
293 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
294 gimple_seq_add_stmt (&stmt_list, fn_call);
295 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
297 if (dump_file && (dump_flags & TDF_DETAILS))
298 fprintf (dump_file, "generated memset zero\n");
300 free_data_ref (dr);
303 /* Tries to generate a builtin function for the instructions of LOOP
304 pointed to by the bits set in PARTITION. Returns true when the
305 operation succeeded. */
307 static bool
308 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
310 bool res = false;
311 unsigned i, x = 0;
312 basic_block *bbs;
313 gimple write = NULL;
314 gimple_stmt_iterator bsi;
315 tree nb_iter = number_of_exit_cond_executions (loop);
317 if (!nb_iter || nb_iter == chrec_dont_know)
318 return false;
320 bbs = get_loop_body_in_dom_order (loop);
322 for (i = 0; i < loop->num_nodes; i++)
324 basic_block bb = bbs[i];
326 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
327 x++;
329 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
331 gimple stmt = gsi_stmt (bsi);
333 if (gimple_code (stmt) != GIMPLE_LABEL
334 && !is_gimple_debug (stmt)
335 && bitmap_bit_p (partition, x++)
336 && is_gimple_assign (stmt)
337 && !is_gimple_reg (gimple_assign_lhs (stmt)))
339 /* Don't generate the builtins when there are more than
340 one memory write. */
341 if (write != NULL)
342 goto end;
344 write = stmt;
345 if (bb == loop->latch)
346 nb_iter = number_of_latch_executions (loop);
351 if (!stmt_with_adjacent_zero_store_dr_p (write))
352 goto end;
354 /* The new statements will be placed before LOOP. */
355 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
356 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
357 res = true;
359 /* If this is the last partition for which we generate code, we have
360 to destroy the loop. */
361 if (!copy_p)
363 unsigned nbbs = loop->num_nodes;
364 edge exit = single_exit (loop);
365 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
366 redirect_edge_pred (exit, src);
367 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
368 exit->flags |= EDGE_FALLTHRU;
369 cancel_loop_tree (loop);
370 rescan_loop_exit (exit, false, true);
372 for (i = 0; i < nbbs; i++)
373 delete_basic_block (bbs[i]);
375 set_immediate_dominator (CDI_DOMINATORS, dest,
376 recompute_dominator (CDI_DOMINATORS, dest));
379 end:
380 free (bbs);
381 return res;
384 /* Generates code for PARTITION. For simple loops, this function can
385 generate a built-in. */
387 static bool
388 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
390 if (generate_builtin (loop, partition, copy_p))
391 return true;
393 return generate_loops_for_partition (loop, partition, copy_p);
397 /* Returns true if the node V of RDG cannot be recomputed. */
399 static bool
400 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
402 if (RDG_MEM_WRITE_STMT (rdg, v))
403 return true;
405 return false;
408 /* Returns true when the vertex V has already been generated in the
409 current partition (V is in PROCESSED), or when V belongs to another
410 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
412 static inline bool
413 already_processed_vertex_p (bitmap processed, int v)
415 return (bitmap_bit_p (processed, v)
416 || !bitmap_bit_p (remaining_stmts, v));
419 /* Returns NULL when there is no anti-dependence among the successors
420 of vertex V, otherwise returns the edge with the anti-dep. */
422 static struct graph_edge *
423 has_anti_dependence (struct vertex *v)
425 struct graph_edge *e;
427 if (v->succ)
428 for (e = v->succ; e; e = e->succ_next)
429 if (RDGE_TYPE (e) == anti_dd)
430 return e;
432 return NULL;
435 /* Returns true when V has an anti-dependence edge among its successors. */
437 static bool
438 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
440 struct graph_edge *e;
442 if (v->pred)
443 for (e = v->pred; e; e = e->pred_next)
444 if (bitmap_bit_p (upstream_mem_writes, e->src)
445 /* Don't consider flow channels: a write to memory followed
446 by a read from memory. These channels allow the split of
447 the RDG in different partitions. */
448 && !RDG_MEM_WRITE_STMT (rdg, e->src))
449 return true;
451 return false;
454 /* Initializes the upstream_mem_writes bitmap following the
455 information from RDG. */
457 static void
458 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
460 int v, x;
461 bitmap seen = BITMAP_ALLOC (NULL);
463 for (v = rdg->n_vertices - 1; v >= 0; v--)
464 if (!bitmap_bit_p (seen, v))
466 unsigned i;
467 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
469 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
471 FOR_EACH_VEC_ELT (int, nodes, i, x)
473 if (!bitmap_set_bit (seen, x))
474 continue;
476 if (RDG_MEM_WRITE_STMT (rdg, x)
477 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
478 /* In anti dependences the read should occur before
479 the write, this is why both the read and the write
480 should be placed in the same partition. */
481 || has_anti_dependence (&(rdg->vertices[x])))
483 bitmap_set_bit (upstream_mem_writes, x);
487 VEC_free (int, heap, nodes);
491 /* Returns true when vertex u has a memory write node as a predecessor
492 in RDG. */
494 static bool
495 has_upstream_mem_writes (int u)
497 return bitmap_bit_p (upstream_mem_writes, u);
500 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
501 bitmap, bool *);
503 /* Flag the uses of U stopping following the information from
504 upstream_mem_writes. */
506 static void
507 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
508 bitmap processed, bool *part_has_writes)
510 use_operand_p use_p;
511 struct vertex *x = &(rdg->vertices[u]);
512 gimple stmt = RDGV_STMT (x);
513 struct graph_edge *anti_dep = has_anti_dependence (x);
515 /* Keep in the same partition the destination of an antidependence,
516 because this is a store to the exact same location. Putting this
517 in another partition is bad for cache locality. */
518 if (anti_dep)
520 int v = anti_dep->dest;
522 if (!already_processed_vertex_p (processed, v))
523 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
524 processed, part_has_writes);
527 if (gimple_code (stmt) != GIMPLE_PHI)
529 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
531 tree use = USE_FROM_PTR (use_p);
533 if (TREE_CODE (use) == SSA_NAME)
535 gimple def_stmt = SSA_NAME_DEF_STMT (use);
536 int v = rdg_vertex_for_stmt (rdg, def_stmt);
538 if (v >= 0
539 && !already_processed_vertex_p (processed, v))
540 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
541 processed, part_has_writes);
546 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
548 tree op0 = gimple_assign_lhs (stmt);
550 /* Scalar channels don't have enough space for transmitting data
551 between tasks, unless we add more storage by privatizing. */
552 if (is_gimple_reg (op0))
554 use_operand_p use_p;
555 imm_use_iterator iter;
557 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
559 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
561 if (!already_processed_vertex_p (processed, v))
562 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
563 processed, part_has_writes);
569 /* Flag V from RDG as part of PARTITION, and also flag its loop number
570 in LOOPS. */
572 static void
573 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
574 bool *part_has_writes)
576 struct loop *loop;
578 if (!bitmap_set_bit (partition, v))
579 return;
581 loop = loop_containing_stmt (RDG_STMT (rdg, v));
582 bitmap_set_bit (loops, loop->num);
584 if (rdg_cannot_recompute_vertex_p (rdg, v))
586 *part_has_writes = true;
587 bitmap_clear_bit (remaining_stmts, v);
591 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
592 Also flag their loop number in LOOPS. */
594 static void
595 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
596 bitmap loops, bitmap processed,
597 bool *part_has_writes)
599 unsigned i;
600 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
601 int x;
603 bitmap_set_bit (processed, v);
604 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
605 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
606 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
608 FOR_EACH_VEC_ELT (int, nodes, i, x)
609 if (!already_processed_vertex_p (processed, x))
610 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
611 part_has_writes);
613 VEC_free (int, heap, nodes);
616 /* Initialize CONDS with all the condition statements from the basic
617 blocks of LOOP. */
619 static void
620 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
622 unsigned i;
623 edge e;
624 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
626 FOR_EACH_VEC_ELT (edge, exits, i, e)
628 gimple cond = last_stmt (e->src);
630 if (cond)
631 VEC_safe_push (gimple, heap, *conds, cond);
634 VEC_free (edge, heap, exits);
637 /* Add to PARTITION all the exit condition statements for LOOPS
638 together with all their dependent statements determined from
639 RDG. */
641 static void
642 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
643 bitmap processed, bool *part_has_writes)
645 unsigned i;
646 bitmap_iterator bi;
647 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
649 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
650 collect_condition_stmts (get_loop (i), &conds);
652 while (!VEC_empty (gimple, conds))
654 gimple cond = VEC_pop (gimple, conds);
655 int v = rdg_vertex_for_stmt (rdg, cond);
656 bitmap new_loops = BITMAP_ALLOC (NULL);
658 if (!already_processed_vertex_p (processed, v))
659 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
660 part_has_writes);
662 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
663 if (bitmap_set_bit (loops, i))
664 collect_condition_stmts (get_loop (i), &conds);
666 BITMAP_FREE (new_loops);
669 VEC_free (gimple, heap, conds);
672 /* Returns a bitmap in which all the statements needed for computing
673 the strongly connected component C of the RDG are flagged, also
674 including the loop exit conditions. */
676 static bitmap
677 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
678 bool *part_has_writes)
680 int i, v;
681 bitmap partition = BITMAP_ALLOC (NULL);
682 bitmap loops = BITMAP_ALLOC (NULL);
683 bitmap processed = BITMAP_ALLOC (NULL);
685 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
686 if (!already_processed_vertex_p (processed, v))
687 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
688 part_has_writes);
690 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
692 BITMAP_FREE (processed);
693 BITMAP_FREE (loops);
694 return partition;
697 /* Free memory for COMPONENTS. */
699 static void
700 free_rdg_components (VEC (rdgc, heap) *components)
702 int i;
703 rdgc x;
705 FOR_EACH_VEC_ELT (rdgc, components, i, x)
707 VEC_free (int, heap, x->vertices);
708 free (x);
711 VEC_free (rdgc, heap, components);
714 /* Build the COMPONENTS vector with the strongly connected components
715 of RDG in which the STARTING_VERTICES occur. */
717 static void
718 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
719 VEC (rdgc, heap) **components)
721 int i, v;
722 bitmap saved_components = BITMAP_ALLOC (NULL);
723 int n_components = graphds_scc (rdg, NULL);
724 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
726 for (i = 0; i < n_components; i++)
727 all_components[i] = VEC_alloc (int, heap, 3);
729 for (i = 0; i < rdg->n_vertices; i++)
730 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
732 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
734 int c = rdg->vertices[v].component;
736 if (bitmap_set_bit (saved_components, c))
738 rdgc x = XCNEW (struct rdg_component);
739 x->num = c;
740 x->vertices = all_components[c];
742 VEC_safe_push (rdgc, heap, *components, x);
746 for (i = 0; i < n_components; i++)
747 if (!bitmap_bit_p (saved_components, i))
748 VEC_free (int, heap, all_components[i]);
750 free (all_components);
751 BITMAP_FREE (saved_components);
754 /* Returns true when it is possible to generate a builtin pattern for
755 the PARTITION of RDG. For the moment we detect only the memset
756 zero pattern. */
758 static bool
759 can_generate_builtin (struct graph *rdg, bitmap partition)
761 unsigned i;
762 bitmap_iterator bi;
763 int nb_reads = 0;
764 int nb_writes = 0;
765 int stores_zero = 0;
767 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
768 if (RDG_MEM_READS_STMT (rdg, i))
769 nb_reads++;
770 else if (RDG_MEM_WRITE_STMT (rdg, i))
772 nb_writes++;
773 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
774 stores_zero++;
777 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
780 /* Returns true when PARTITION1 and PARTITION2 have similar memory
781 accesses in RDG. */
783 static bool
784 similar_memory_accesses (struct graph *rdg, bitmap partition1,
785 bitmap partition2)
787 unsigned i, j;
788 bitmap_iterator bi, bj;
790 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
791 if (RDG_MEM_WRITE_STMT (rdg, i)
792 || RDG_MEM_READS_STMT (rdg, i))
793 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
794 if (RDG_MEM_WRITE_STMT (rdg, j)
795 || RDG_MEM_READS_STMT (rdg, j))
796 if (rdg_has_similar_memory_accesses (rdg, i, j))
797 return true;
799 return false;
802 /* Fuse all the partitions from PARTITIONS that contain similar memory
803 references, i.e., we're taking care of cache locality. This
804 function does not fuse those partitions that contain patterns that
805 can be code generated with builtins. */
807 static void
808 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
809 VEC (bitmap, heap) **partitions)
811 int p1, p2;
812 bitmap partition1, partition2;
814 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
815 if (!can_generate_builtin (rdg, partition1))
816 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
817 if (p1 != p2
818 && !can_generate_builtin (rdg, partition2)
819 && similar_memory_accesses (rdg, partition1, partition2))
821 bitmap_ior_into (partition1, partition2);
822 VEC_ordered_remove (bitmap, *partitions, p2);
823 p2--;
827 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
828 the LOOP. */
830 static bool
831 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
833 imm_use_iterator imm_iter;
834 use_operand_p use_p;
836 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
837 if (loop != loop_containing_stmt (USE_STMT (use_p)))
838 return true;
840 return false;
843 /* Returns true when STMT defines a scalar variable used after the
844 loop. */
846 static bool
847 stmt_has_scalar_dependences_outside_loop (gimple stmt)
849 tree name;
851 switch (gimple_code (stmt))
853 case GIMPLE_ASSIGN:
854 name = gimple_assign_lhs (stmt);
855 break;
857 case GIMPLE_PHI:
858 name = gimple_phi_result (stmt);
859 break;
861 default:
862 return false;
865 return TREE_CODE (name) == SSA_NAME
866 && ssa_name_has_uses_outside_loop_p (name, loop_containing_stmt (stmt));
869 /* Returns true when STMT will be code generated in a partition of RDG
870 different than PART and that will not be code generated as a
871 builtin. */
873 static bool
874 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
875 VEC (bitmap, heap) *partitions)
877 int p;
878 bitmap pp;
879 unsigned i;
880 bitmap_iterator bi;
882 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
883 if (p != part
884 && !can_generate_builtin (rdg, pp))
885 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
886 if (stmt == RDG_STMT (rdg, i))
887 return true;
889 return false;
892 /* For each partition in PARTITIONS that will be code generated using
893 a builtin, add its scalar computations used after the loop to
894 PARTITION. */
896 static void
897 add_scalar_computations_to_partition (struct graph *rdg,
898 VEC (bitmap, heap) *partitions,
899 bitmap partition)
901 int p;
902 bitmap pp;
903 unsigned i;
904 bitmap_iterator bi;
905 bitmap l = BITMAP_ALLOC (NULL);
906 bitmap pr = BITMAP_ALLOC (NULL);
907 bool f = false;
909 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
910 if (can_generate_builtin (rdg, pp))
911 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
912 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
913 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
914 partitions))
915 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
917 rdg_flag_loop_exits (rdg, l, partition, pr, &f);
919 BITMAP_FREE (pr);
920 BITMAP_FREE (l);
923 /* Aggregate several components into a useful partition that is
924 registered in the PARTITIONS vector. Partitions will be
925 distributed in different loops. */
927 static void
928 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
929 VEC (int, heap) **other_stores,
930 VEC (bitmap, heap) **partitions, bitmap processed)
932 int i;
933 rdgc x;
934 bitmap partition = BITMAP_ALLOC (NULL);
936 FOR_EACH_VEC_ELT (rdgc, components, i, x)
938 bitmap np;
939 bool part_has_writes = false;
940 int v = VEC_index (int, x->vertices, 0);
942 if (bitmap_bit_p (processed, v))
943 continue;
945 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
946 bitmap_ior_into (partition, np);
947 bitmap_ior_into (processed, np);
948 BITMAP_FREE (np);
950 if (part_has_writes)
952 if (dump_file && (dump_flags & TDF_DETAILS))
954 fprintf (dump_file, "ldist useful partition:\n");
955 dump_bitmap (dump_file, partition);
958 VEC_safe_push (bitmap, heap, *partitions, partition);
959 partition = BITMAP_ALLOC (NULL);
963 /* Add the nodes from the RDG that were not marked as processed, and
964 that are used outside the current loop. These are scalar
965 computations that are not yet part of previous partitions. */
966 for (i = 0; i < rdg->n_vertices; i++)
967 if (!bitmap_bit_p (processed, i)
968 && rdg_defs_used_in_other_loops_p (rdg, i))
969 VEC_safe_push (int, heap, *other_stores, i);
971 /* If there are still statements left in the OTHER_STORES array,
972 create other components and partitions with these stores and
973 their dependences. */
974 if (VEC_length (int, *other_stores) > 0)
976 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
977 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
979 rdg_build_components (rdg, *other_stores, &comps);
980 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
982 VEC_free (int, heap, foo);
983 free_rdg_components (comps);
986 add_scalar_computations_to_partition (rdg, *partitions, partition);
988 /* If there is something left in the last partition, save it. */
989 if (bitmap_count_bits (partition) > 0)
990 VEC_safe_push (bitmap, heap, *partitions, partition);
991 else
992 BITMAP_FREE (partition);
994 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
997 /* Dump to FILE the PARTITIONS. */
999 static void
1000 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1002 int i;
1003 bitmap partition;
1005 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1006 debug_bitmap_file (file, partition);
1009 /* Debug PARTITIONS. */
1010 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1012 DEBUG_FUNCTION void
1013 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1015 dump_rdg_partitions (stderr, partitions);
1018 /* Returns the number of read and write operations in the RDG. */
1020 static int
1021 number_of_rw_in_rdg (struct graph *rdg)
1023 int i, res = 0;
1025 for (i = 0; i < rdg->n_vertices; i++)
1027 if (RDG_MEM_WRITE_STMT (rdg, i))
1028 ++res;
1030 if (RDG_MEM_READS_STMT (rdg, i))
1031 ++res;
1034 return res;
1037 /* Returns the number of read and write operations in a PARTITION of
1038 the RDG. */
1040 static int
1041 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1043 int res = 0;
1044 unsigned i;
1045 bitmap_iterator ii;
1047 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1049 if (RDG_MEM_WRITE_STMT (rdg, i))
1050 ++res;
1052 if (RDG_MEM_READS_STMT (rdg, i))
1053 ++res;
1056 return res;
1059 /* Returns true when one of the PARTITIONS contains all the read or
1060 write operations of RDG. */
1062 static bool
1063 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1065 int i;
1066 bitmap partition;
1067 int nrw = number_of_rw_in_rdg (rdg);
1069 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1070 if (nrw == number_of_rw_in_partition (rdg, partition))
1071 return true;
1073 return false;
1076 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1077 distributed loops. */
1079 static int
1080 ldist_gen (struct loop *loop, struct graph *rdg,
1081 VEC (int, heap) *starting_vertices)
1083 int i, nbp;
1084 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1085 VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1086 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1087 bitmap partition, processed = BITMAP_ALLOC (NULL);
1089 remaining_stmts = BITMAP_ALLOC (NULL);
1090 upstream_mem_writes = BITMAP_ALLOC (NULL);
1092 for (i = 0; i < rdg->n_vertices; i++)
1094 bitmap_set_bit (remaining_stmts, i);
1096 /* Save in OTHER_STORES all the memory writes that are not in
1097 STARTING_VERTICES. */
1098 if (RDG_MEM_WRITE_STMT (rdg, i))
1100 int v;
1101 unsigned j;
1102 bool found = false;
1104 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1105 if (i == v)
1107 found = true;
1108 break;
1111 if (!found)
1112 VEC_safe_push (int, heap, other_stores, i);
1116 mark_nodes_having_upstream_mem_writes (rdg);
1117 rdg_build_components (rdg, starting_vertices, &components);
1118 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1119 processed);
1120 BITMAP_FREE (processed);
1121 nbp = VEC_length (bitmap, partitions);
1123 if (nbp <= 1
1124 || partition_contains_all_rw (rdg, partitions))
1125 goto ldist_done;
1127 if (dump_file && (dump_flags & TDF_DETAILS))
1128 dump_rdg_partitions (dump_file, partitions);
1130 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1131 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1132 goto ldist_done;
1134 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1135 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1137 ldist_done:
1139 BITMAP_FREE (remaining_stmts);
1140 BITMAP_FREE (upstream_mem_writes);
1142 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1143 BITMAP_FREE (partition);
1145 VEC_free (int, heap, other_stores);
1146 VEC_free (bitmap, heap, partitions);
1147 free_rdg_components (components);
1148 return nbp;
1151 /* Distributes the code from LOOP in such a way that producer
1152 statements are placed before consumer statements. When STMTS is
1153 NULL, performs the maximal distribution, if STMTS is not NULL,
1154 tries to separate only these statements from the LOOP's body.
1155 Returns the number of distributed loops. */
1157 static int
1158 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1160 int res = 0;
1161 struct graph *rdg;
1162 gimple s;
1163 unsigned i;
1164 VEC (int, heap) *vertices;
1165 VEC (ddr_p, heap) *dependence_relations;
1166 VEC (data_reference_p, heap) *datarefs;
1167 VEC (loop_p, heap) *loop_nest;
1169 if (loop->num_nodes > 2)
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file,
1173 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1174 loop->num);
1176 return res;
1179 datarefs = VEC_alloc (data_reference_p, heap, 10);
1180 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1181 loop_nest = VEC_alloc (loop_p, heap, 3);
1182 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1184 if (!rdg)
1186 if (dump_file && (dump_flags & TDF_DETAILS))
1187 fprintf (dump_file,
1188 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1189 loop->num);
1191 free_dependence_relations (dependence_relations);
1192 free_data_refs (datarefs);
1193 VEC_free (loop_p, heap, loop_nest);
1194 return res;
1197 vertices = VEC_alloc (int, heap, 3);
1199 if (dump_file && (dump_flags & TDF_DETAILS))
1200 dump_rdg (dump_file, rdg);
1202 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1204 int v = rdg_vertex_for_stmt (rdg, s);
1206 if (v >= 0)
1208 VEC_safe_push (int, heap, vertices, v);
1210 if (dump_file && (dump_flags & TDF_DETAILS))
1211 fprintf (dump_file,
1212 "ldist asked to generate code for vertex %d\n", v);
1216 res = ldist_gen (loop, rdg, vertices);
1217 VEC_free (int, heap, vertices);
1218 free_rdg (rdg);
1219 free_dependence_relations (dependence_relations);
1220 free_data_refs (datarefs);
1221 VEC_free (loop_p, heap, loop_nest);
1222 return res;
1225 /* Distribute all loops in the current function. */
1227 static unsigned int
1228 tree_loop_distribution (void)
1230 struct loop *loop;
1231 loop_iterator li;
1232 int nb_generated_loops = 0;
1234 FOR_EACH_LOOP (li, loop, 0)
1236 VEC (gimple, heap) *work_list = NULL;
1237 int num = loop->num;
1239 /* If the loop doesn't have a single exit we will fail anyway,
1240 so do that early. */
1241 if (!single_exit (loop))
1242 continue;
1244 /* If both flag_tree_loop_distribute_patterns and
1245 flag_tree_loop_distribution are set, then only
1246 distribute_patterns is executed. */
1247 if (flag_tree_loop_distribute_patterns)
1249 /* With the following working list, we're asking
1250 distribute_loop to separate from the rest of the loop the
1251 stores of the form "A[i] = 0". */
1252 stores_zero_from_loop (loop, &work_list);
1254 /* Do nothing if there are no patterns to be distributed. */
1255 if (VEC_length (gimple, work_list) > 0)
1256 nb_generated_loops = distribute_loop (loop, work_list);
1258 else if (flag_tree_loop_distribution)
1260 /* With the following working list, we're asking
1261 distribute_loop to separate the stores of the loop: when
1262 dependences allow, it will end on having one store per
1263 loop. */
1264 stores_from_loop (loop, &work_list);
1266 /* A simple heuristic for cache locality is to not split
1267 stores to the same array. Without this call, an unrolled
1268 loop would be split into as many loops as unroll factor,
1269 each loop storing in the same array. */
1270 remove_similar_memory_refs (&work_list);
1272 nb_generated_loops = distribute_loop (loop, work_list);
1275 if (dump_file && (dump_flags & TDF_DETAILS))
1277 if (nb_generated_loops > 1)
1278 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1279 num, nb_generated_loops);
1280 else
1281 fprintf (dump_file, "Loop %d is the same.\n", num);
1284 verify_loop_structure ();
1286 VEC_free (gimple, heap, work_list);
1289 return 0;
1292 static bool
1293 gate_tree_loop_distribution (void)
1295 return flag_tree_loop_distribution
1296 || flag_tree_loop_distribute_patterns;
1299 struct gimple_opt_pass pass_loop_distribution =
1302 GIMPLE_PASS,
1303 "ldist", /* name */
1304 gate_tree_loop_distribution, /* gate */
1305 tree_loop_distribution, /* execute */
1306 NULL, /* sub */
1307 NULL, /* next */
1308 0, /* static_pass_number */
1309 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1310 PROP_cfg | PROP_ssa, /* properties_required */
1311 0, /* properties_provided */
1312 0, /* properties_destroyed */
1313 0, /* todo_flags_start */
1314 0 /* todo_flags_finish */