2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts
;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes
;
66 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
70 update_phis_for_loop_copy (struct loop
*orig_loop
, struct loop
*new_loop
)
73 gimple_stmt_iterator si_new
, si_orig
;
74 edge orig_loop_latch
= loop_latch_edge (orig_loop
);
75 edge orig_entry_e
= loop_preheader_edge (orig_loop
);
76 edge new_loop_entry_e
= loop_preheader_edge (new_loop
);
78 /* Scan the phis in the headers of the old and new loops
79 (they are organized in exactly the same order). */
80 for (si_new
= gsi_start_phis (new_loop
->header
),
81 si_orig
= gsi_start_phis (orig_loop
->header
);
82 !gsi_end_p (si_new
) && !gsi_end_p (si_orig
);
83 gsi_next (&si_new
), gsi_next (&si_orig
))
86 source_location locus
;
87 gimple phi_new
= gsi_stmt (si_new
);
88 gimple phi_orig
= gsi_stmt (si_orig
);
90 /* Add the first phi argument for the phi in NEW_LOOP (the one
91 associated with the entry of NEW_LOOP) */
92 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_entry_e
);
93 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_entry_e
);
94 add_phi_arg (phi_new
, def
, new_loop_entry_e
, locus
);
96 /* Add the second phi argument for the phi in NEW_LOOP (the one
97 associated with the latch of NEW_LOOP) */
98 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_loop_latch
);
99 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_loop_latch
);
101 if (TREE_CODE (def
) == SSA_NAME
)
103 new_ssa_name
= get_current_def (def
);
106 /* This only happens if there are no definitions inside the
107 loop. Use the the invariant in the new loop as is. */
111 /* Could be an integer. */
114 add_phi_arg (phi_new
, new_ssa_name
, loop_latch_edge (new_loop
), locus
);
118 /* Return a copy of LOOP placed before LOOP. */
121 copy_loop_before (struct loop
*loop
)
124 edge preheader
= loop_preheader_edge (loop
);
126 if (!single_exit (loop
))
129 initialize_original_copy_tables ();
130 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
131 free_original_copy_tables ();
136 update_phis_for_loop_copy (loop
, res
);
137 rename_variables_in_loop (res
);
142 /* Creates an empty basic block after LOOP. */
145 create_bb_after_loop (struct loop
*loop
)
147 edge exit
= single_exit (loop
);
155 /* Generate code for PARTITION from the code in LOOP. The loop is
156 copied when COPY_P is true. All the statements not flagged in the
157 PARTITION bitmap are removed from the loop or from its copy. The
158 statements are indexed in sequence inside a basic block, and the
159 basic blocks of a loop are taken in dom order. Returns true when
160 the code gen succeeded. */
163 generate_loops_for_partition (struct loop
*loop
, bitmap partition
, bool copy_p
)
166 gimple_stmt_iterator bsi
;
171 loop
= copy_loop_before (loop
);
172 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
173 create_bb_after_loop (loop
);
179 /* Remove stmts not in the PARTITION bitmap. The order in which we
180 visit the phi nodes and the statements is exactly as in
182 bbs
= get_loop_body_in_dom_order (loop
);
184 if (MAY_HAVE_DEBUG_STMTS
)
185 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
187 basic_block bb
= bbs
[i
];
189 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
190 if (!bitmap_bit_p (partition
, x
++))
191 reset_debug_uses (gsi_stmt (bsi
));
193 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
195 gimple stmt
= gsi_stmt (bsi
);
196 if (gimple_code (stmt
) != GIMPLE_LABEL
197 && !is_gimple_debug (stmt
)
198 && !bitmap_bit_p (partition
, x
++))
199 reset_debug_uses (stmt
);
203 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
205 basic_block bb
= bbs
[i
];
207 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
208 if (!bitmap_bit_p (partition
, x
++))
210 gimple phi
= gsi_stmt (bsi
);
211 if (!is_gimple_reg (gimple_phi_result (phi
)))
212 mark_virtual_phi_result_for_renaming (phi
);
213 remove_phi_node (&bsi
, true);
218 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
220 gimple stmt
= gsi_stmt (bsi
);
221 if (gimple_code (stmt
) != GIMPLE_LABEL
222 && !is_gimple_debug (stmt
)
223 && !bitmap_bit_p (partition
, x
++))
225 unlink_stmt_vdef (stmt
);
226 gsi_remove (&bsi
, true);
238 /* Build the size argument for a memset call. */
241 build_size_arg_loc (location_t loc
, tree nb_iter
, tree op
,
242 gimple_seq
*stmt_list
)
245 tree x
= fold_build2_loc (loc
, MULT_EXPR
, size_type_node
,
246 fold_convert_loc (loc
, size_type_node
, nb_iter
),
247 fold_convert_loc (loc
, size_type_node
,
248 TYPE_SIZE_UNIT (TREE_TYPE (op
))));
249 x
= force_gimple_operand (x
, &stmts
, true, NULL
);
250 gimple_seq_add_seq (stmt_list
, stmts
);
255 /* Generate a call to memset. Return true when the operation succeeded. */
258 generate_memset_zero (gimple stmt
, tree op0
, tree nb_iter
,
259 gimple_stmt_iterator bsi
)
261 tree addr_base
, nb_bytes
;
263 gimple_seq stmt_list
= NULL
, stmts
;
266 struct data_reference
*dr
= XCNEW (struct data_reference
);
267 location_t loc
= gimple_location (stmt
);
271 res
= dr_analyze_innermost (dr
, loop_containing_stmt (stmt
));
272 gcc_assert (res
&& stride_of_unit_type_p (DR_STEP (dr
), TREE_TYPE (op0
)));
274 nb_bytes
= build_size_arg_loc (loc
, nb_iter
, op0
, &stmt_list
);
275 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
276 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
278 /* Test for a negative stride, iterating over every element. */
279 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
281 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
282 fold_convert_loc (loc
, sizetype
, nb_bytes
));
283 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
284 TYPE_SIZE_UNIT (TREE_TYPE (op0
)));
287 addr_base
= fold_build_pointer_plus_loc (loc
,
288 DR_BASE_ADDRESS (dr
), addr_base
);
289 mem
= force_gimple_operand (addr_base
, &stmts
, true, NULL
);
290 gimple_seq_add_seq (&stmt_list
, stmts
);
292 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
293 fn_call
= gimple_build_call (fn
, 3, mem
, integer_zero_node
, nb_bytes
);
294 gimple_seq_add_stmt (&stmt_list
, fn_call
);
295 gsi_insert_seq_after (&bsi
, stmt_list
, GSI_CONTINUE_LINKING
);
297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
298 fprintf (dump_file
, "generated memset zero\n");
303 /* Tries to generate a builtin function for the instructions of LOOP
304 pointed to by the bits set in PARTITION. Returns true when the
305 operation succeeded. */
308 generate_builtin (struct loop
*loop
, bitmap partition
, bool copy_p
)
314 gimple_stmt_iterator bsi
;
315 tree nb_iter
= number_of_exit_cond_executions (loop
);
317 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
320 bbs
= get_loop_body_in_dom_order (loop
);
322 for (i
= 0; i
< loop
->num_nodes
; i
++)
324 basic_block bb
= bbs
[i
];
326 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
329 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
331 gimple stmt
= gsi_stmt (bsi
);
333 if (gimple_code (stmt
) != GIMPLE_LABEL
334 && !is_gimple_debug (stmt
)
335 && bitmap_bit_p (partition
, x
++)
336 && is_gimple_assign (stmt
)
337 && !is_gimple_reg (gimple_assign_lhs (stmt
)))
339 /* Don't generate the builtins when there are more than
345 if (bb
== loop
->latch
)
346 nb_iter
= number_of_latch_executions (loop
);
351 if (!stmt_with_adjacent_zero_store_dr_p (write
))
354 /* The new statements will be placed before LOOP. */
355 bsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
356 generate_memset_zero (write
, gimple_assign_lhs (write
), nb_iter
, bsi
);
359 /* If this is the last partition for which we generate code, we have
360 to destroy the loop. */
363 unsigned nbbs
= loop
->num_nodes
;
364 edge exit
= single_exit (loop
);
365 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
366 redirect_edge_pred (exit
, src
);
367 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
368 exit
->flags
|= EDGE_FALLTHRU
;
369 cancel_loop_tree (loop
);
370 rescan_loop_exit (exit
, false, true);
372 for (i
= 0; i
< nbbs
; i
++)
373 delete_basic_block (bbs
[i
]);
375 set_immediate_dominator (CDI_DOMINATORS
, dest
,
376 recompute_dominator (CDI_DOMINATORS
, dest
));
384 /* Generates code for PARTITION. For simple loops, this function can
385 generate a built-in. */
388 generate_code_for_partition (struct loop
*loop
, bitmap partition
, bool copy_p
)
390 if (generate_builtin (loop
, partition
, copy_p
))
393 return generate_loops_for_partition (loop
, partition
, copy_p
);
397 /* Returns true if the node V of RDG cannot be recomputed. */
400 rdg_cannot_recompute_vertex_p (struct graph
*rdg
, int v
)
402 if (RDG_MEM_WRITE_STMT (rdg
, v
))
408 /* Returns true when the vertex V has already been generated in the
409 current partition (V is in PROCESSED), or when V belongs to another
410 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
413 already_processed_vertex_p (bitmap processed
, int v
)
415 return (bitmap_bit_p (processed
, v
)
416 || !bitmap_bit_p (remaining_stmts
, v
));
419 /* Returns NULL when there is no anti-dependence among the successors
420 of vertex V, otherwise returns the edge with the anti-dep. */
422 static struct graph_edge
*
423 has_anti_dependence (struct vertex
*v
)
425 struct graph_edge
*e
;
428 for (e
= v
->succ
; e
; e
= e
->succ_next
)
429 if (RDGE_TYPE (e
) == anti_dd
)
435 /* Returns true when V has an anti-dependence edge among its successors. */
438 predecessor_has_mem_write (struct graph
*rdg
, struct vertex
*v
)
440 struct graph_edge
*e
;
443 for (e
= v
->pred
; e
; e
= e
->pred_next
)
444 if (bitmap_bit_p (upstream_mem_writes
, e
->src
)
445 /* Don't consider flow channels: a write to memory followed
446 by a read from memory. These channels allow the split of
447 the RDG in different partitions. */
448 && !RDG_MEM_WRITE_STMT (rdg
, e
->src
))
454 /* Initializes the upstream_mem_writes bitmap following the
455 information from RDG. */
458 mark_nodes_having_upstream_mem_writes (struct graph
*rdg
)
461 bitmap seen
= BITMAP_ALLOC (NULL
);
463 for (v
= rdg
->n_vertices
- 1; v
>= 0; v
--)
464 if (!bitmap_bit_p (seen
, v
))
467 VEC (int, heap
) *nodes
= VEC_alloc (int, heap
, 3);
469 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
471 FOR_EACH_VEC_ELT (int, nodes
, i
, x
)
473 if (!bitmap_set_bit (seen
, x
))
476 if (RDG_MEM_WRITE_STMT (rdg
, x
)
477 || predecessor_has_mem_write (rdg
, &(rdg
->vertices
[x
]))
478 /* In anti dependences the read should occur before
479 the write, this is why both the read and the write
480 should be placed in the same partition. */
481 || has_anti_dependence (&(rdg
->vertices
[x
])))
483 bitmap_set_bit (upstream_mem_writes
, x
);
487 VEC_free (int, heap
, nodes
);
491 /* Returns true when vertex u has a memory write node as a predecessor
495 has_upstream_mem_writes (int u
)
497 return bitmap_bit_p (upstream_mem_writes
, u
);
500 static void rdg_flag_vertex_and_dependent (struct graph
*, int, bitmap
, bitmap
,
503 /* Flag the uses of U stopping following the information from
504 upstream_mem_writes. */
507 rdg_flag_uses (struct graph
*rdg
, int u
, bitmap partition
, bitmap loops
,
508 bitmap processed
, bool *part_has_writes
)
511 struct vertex
*x
= &(rdg
->vertices
[u
]);
512 gimple stmt
= RDGV_STMT (x
);
513 struct graph_edge
*anti_dep
= has_anti_dependence (x
);
515 /* Keep in the same partition the destination of an antidependence,
516 because this is a store to the exact same location. Putting this
517 in another partition is bad for cache locality. */
520 int v
= anti_dep
->dest
;
522 if (!already_processed_vertex_p (processed
, v
))
523 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
524 processed
, part_has_writes
);
527 if (gimple_code (stmt
) != GIMPLE_PHI
)
529 if ((use_p
= gimple_vuse_op (stmt
)) != NULL_USE_OPERAND_P
)
531 tree use
= USE_FROM_PTR (use_p
);
533 if (TREE_CODE (use
) == SSA_NAME
)
535 gimple def_stmt
= SSA_NAME_DEF_STMT (use
);
536 int v
= rdg_vertex_for_stmt (rdg
, def_stmt
);
539 && !already_processed_vertex_p (processed
, v
))
540 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
541 processed
, part_has_writes
);
546 if (is_gimple_assign (stmt
) && has_upstream_mem_writes (u
))
548 tree op0
= gimple_assign_lhs (stmt
);
550 /* Scalar channels don't have enough space for transmitting data
551 between tasks, unless we add more storage by privatizing. */
552 if (is_gimple_reg (op0
))
555 imm_use_iterator iter
;
557 FOR_EACH_IMM_USE_FAST (use_p
, iter
, op0
)
559 int v
= rdg_vertex_for_stmt (rdg
, USE_STMT (use_p
));
561 if (!already_processed_vertex_p (processed
, v
))
562 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
563 processed
, part_has_writes
);
569 /* Flag V from RDG as part of PARTITION, and also flag its loop number
573 rdg_flag_vertex (struct graph
*rdg
, int v
, bitmap partition
, bitmap loops
,
574 bool *part_has_writes
)
578 if (!bitmap_set_bit (partition
, v
))
581 loop
= loop_containing_stmt (RDG_STMT (rdg
, v
));
582 bitmap_set_bit (loops
, loop
->num
);
584 if (rdg_cannot_recompute_vertex_p (rdg
, v
))
586 *part_has_writes
= true;
587 bitmap_clear_bit (remaining_stmts
, v
);
591 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
592 Also flag their loop number in LOOPS. */
595 rdg_flag_vertex_and_dependent (struct graph
*rdg
, int v
, bitmap partition
,
596 bitmap loops
, bitmap processed
,
597 bool *part_has_writes
)
600 VEC (int, heap
) *nodes
= VEC_alloc (int, heap
, 3);
603 bitmap_set_bit (processed
, v
);
604 rdg_flag_uses (rdg
, v
, partition
, loops
, processed
, part_has_writes
);
605 graphds_dfs (rdg
, &v
, 1, &nodes
, false, remaining_stmts
);
606 rdg_flag_vertex (rdg
, v
, partition
, loops
, part_has_writes
);
608 FOR_EACH_VEC_ELT (int, nodes
, i
, x
)
609 if (!already_processed_vertex_p (processed
, x
))
610 rdg_flag_vertex_and_dependent (rdg
, x
, partition
, loops
, processed
,
613 VEC_free (int, heap
, nodes
);
616 /* Initialize CONDS with all the condition statements from the basic
620 collect_condition_stmts (struct loop
*loop
, VEC (gimple
, heap
) **conds
)
624 VEC (edge
, heap
) *exits
= get_loop_exit_edges (loop
);
626 FOR_EACH_VEC_ELT (edge
, exits
, i
, e
)
628 gimple cond
= last_stmt (e
->src
);
631 VEC_safe_push (gimple
, heap
, *conds
, cond
);
634 VEC_free (edge
, heap
, exits
);
637 /* Add to PARTITION all the exit condition statements for LOOPS
638 together with all their dependent statements determined from
642 rdg_flag_loop_exits (struct graph
*rdg
, bitmap loops
, bitmap partition
,
643 bitmap processed
, bool *part_has_writes
)
647 VEC (gimple
, heap
) *conds
= VEC_alloc (gimple
, heap
, 3);
649 EXECUTE_IF_SET_IN_BITMAP (loops
, 0, i
, bi
)
650 collect_condition_stmts (get_loop (i
), &conds
);
652 while (!VEC_empty (gimple
, conds
))
654 gimple cond
= VEC_pop (gimple
, conds
);
655 int v
= rdg_vertex_for_stmt (rdg
, cond
);
656 bitmap new_loops
= BITMAP_ALLOC (NULL
);
658 if (!already_processed_vertex_p (processed
, v
))
659 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, new_loops
, processed
,
662 EXECUTE_IF_SET_IN_BITMAP (new_loops
, 0, i
, bi
)
663 if (bitmap_set_bit (loops
, i
))
664 collect_condition_stmts (get_loop (i
), &conds
);
666 BITMAP_FREE (new_loops
);
669 VEC_free (gimple
, heap
, conds
);
672 /* Returns a bitmap in which all the statements needed for computing
673 the strongly connected component C of the RDG are flagged, also
674 including the loop exit conditions. */
677 build_rdg_partition_for_component (struct graph
*rdg
, rdgc c
,
678 bool *part_has_writes
)
681 bitmap partition
= BITMAP_ALLOC (NULL
);
682 bitmap loops
= BITMAP_ALLOC (NULL
);
683 bitmap processed
= BITMAP_ALLOC (NULL
);
685 FOR_EACH_VEC_ELT (int, c
->vertices
, i
, v
)
686 if (!already_processed_vertex_p (processed
, v
))
687 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
, processed
,
690 rdg_flag_loop_exits (rdg
, loops
, partition
, processed
, part_has_writes
);
692 BITMAP_FREE (processed
);
697 /* Free memory for COMPONENTS. */
700 free_rdg_components (VEC (rdgc
, heap
) *components
)
705 FOR_EACH_VEC_ELT (rdgc
, components
, i
, x
)
707 VEC_free (int, heap
, x
->vertices
);
711 VEC_free (rdgc
, heap
, components
);
714 /* Build the COMPONENTS vector with the strongly connected components
715 of RDG in which the STARTING_VERTICES occur. */
718 rdg_build_components (struct graph
*rdg
, VEC (int, heap
) *starting_vertices
,
719 VEC (rdgc
, heap
) **components
)
722 bitmap saved_components
= BITMAP_ALLOC (NULL
);
723 int n_components
= graphds_scc (rdg
, NULL
);
724 VEC (int, heap
) **all_components
= XNEWVEC (VEC (int, heap
) *, n_components
);
726 for (i
= 0; i
< n_components
; i
++)
727 all_components
[i
] = VEC_alloc (int, heap
, 3);
729 for (i
= 0; i
< rdg
->n_vertices
; i
++)
730 VEC_safe_push (int, heap
, all_components
[rdg
->vertices
[i
].component
], i
);
732 FOR_EACH_VEC_ELT (int, starting_vertices
, i
, v
)
734 int c
= rdg
->vertices
[v
].component
;
736 if (bitmap_set_bit (saved_components
, c
))
738 rdgc x
= XCNEW (struct rdg_component
);
740 x
->vertices
= all_components
[c
];
742 VEC_safe_push (rdgc
, heap
, *components
, x
);
746 for (i
= 0; i
< n_components
; i
++)
747 if (!bitmap_bit_p (saved_components
, i
))
748 VEC_free (int, heap
, all_components
[i
]);
750 free (all_components
);
751 BITMAP_FREE (saved_components
);
754 /* Returns true when it is possible to generate a builtin pattern for
755 the PARTITION of RDG. For the moment we detect only the memset
759 can_generate_builtin (struct graph
*rdg
, bitmap partition
)
767 EXECUTE_IF_SET_IN_BITMAP (partition
, 0, i
, bi
)
768 if (RDG_MEM_READS_STMT (rdg
, i
))
770 else if (RDG_MEM_WRITE_STMT (rdg
, i
))
773 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg
, i
)))
777 return stores_zero
== 1 && nb_writes
== 1 && nb_reads
== 0;
780 /* Returns true when PARTITION1 and PARTITION2 have similar memory
784 similar_memory_accesses (struct graph
*rdg
, bitmap partition1
,
788 bitmap_iterator bi
, bj
;
790 EXECUTE_IF_SET_IN_BITMAP (partition1
, 0, i
, bi
)
791 if (RDG_MEM_WRITE_STMT (rdg
, i
)
792 || RDG_MEM_READS_STMT (rdg
, i
))
793 EXECUTE_IF_SET_IN_BITMAP (partition2
, 0, j
, bj
)
794 if (RDG_MEM_WRITE_STMT (rdg
, j
)
795 || RDG_MEM_READS_STMT (rdg
, j
))
796 if (rdg_has_similar_memory_accesses (rdg
, i
, j
))
802 /* Fuse all the partitions from PARTITIONS that contain similar memory
803 references, i.e., we're taking care of cache locality. This
804 function does not fuse those partitions that contain patterns that
805 can be code generated with builtins. */
808 fuse_partitions_with_similar_memory_accesses (struct graph
*rdg
,
809 VEC (bitmap
, heap
) **partitions
)
812 bitmap partition1
, partition2
;
814 FOR_EACH_VEC_ELT (bitmap
, *partitions
, p1
, partition1
)
815 if (!can_generate_builtin (rdg
, partition1
))
816 FOR_EACH_VEC_ELT (bitmap
, *partitions
, p2
, partition2
)
818 && !can_generate_builtin (rdg
, partition2
)
819 && similar_memory_accesses (rdg
, partition1
, partition2
))
821 bitmap_ior_into (partition1
, partition2
);
822 VEC_ordered_remove (bitmap
, *partitions
, p2
);
827 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
831 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
833 imm_use_iterator imm_iter
;
836 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
837 if (loop
!= loop_containing_stmt (USE_STMT (use_p
)))
843 /* Returns true when STMT defines a scalar variable used after the
847 stmt_has_scalar_dependences_outside_loop (gimple stmt
)
851 switch (gimple_code (stmt
))
854 name
= gimple_assign_lhs (stmt
);
858 name
= gimple_phi_result (stmt
);
865 return TREE_CODE (name
) == SSA_NAME
866 && ssa_name_has_uses_outside_loop_p (name
, loop_containing_stmt (stmt
));
869 /* Returns true when STMT will be code generated in a partition of RDG
870 different than PART and that will not be code generated as a
874 stmt_generated_in_another_partition (struct graph
*rdg
, gimple stmt
, int part
,
875 VEC (bitmap
, heap
) *partitions
)
882 FOR_EACH_VEC_ELT (bitmap
, partitions
, p
, pp
)
884 && !can_generate_builtin (rdg
, pp
))
885 EXECUTE_IF_SET_IN_BITMAP (pp
, 0, i
, bi
)
886 if (stmt
== RDG_STMT (rdg
, i
))
892 /* For each partition in PARTITIONS that will be code generated using
893 a builtin, add its scalar computations used after the loop to
897 add_scalar_computations_to_partition (struct graph
*rdg
,
898 VEC (bitmap
, heap
) *partitions
,
905 bitmap l
= BITMAP_ALLOC (NULL
);
906 bitmap pr
= BITMAP_ALLOC (NULL
);
909 FOR_EACH_VEC_ELT (bitmap
, partitions
, p
, pp
)
910 if (can_generate_builtin (rdg
, pp
))
911 EXECUTE_IF_SET_IN_BITMAP (pp
, 0, i
, bi
)
912 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg
, i
))
913 && !stmt_generated_in_another_partition (rdg
, RDG_STMT (rdg
, i
), p
,
915 rdg_flag_vertex_and_dependent (rdg
, i
, partition
, l
, pr
, &f
);
917 rdg_flag_loop_exits (rdg
, l
, partition
, pr
, &f
);
923 /* Aggregate several components into a useful partition that is
924 registered in the PARTITIONS vector. Partitions will be
925 distributed in different loops. */
928 rdg_build_partitions (struct graph
*rdg
, VEC (rdgc
, heap
) *components
,
929 VEC (int, heap
) **other_stores
,
930 VEC (bitmap
, heap
) **partitions
, bitmap processed
)
934 bitmap partition
= BITMAP_ALLOC (NULL
);
936 FOR_EACH_VEC_ELT (rdgc
, components
, i
, x
)
939 bool part_has_writes
= false;
940 int v
= VEC_index (int, x
->vertices
, 0);
942 if (bitmap_bit_p (processed
, v
))
945 np
= build_rdg_partition_for_component (rdg
, x
, &part_has_writes
);
946 bitmap_ior_into (partition
, np
);
947 bitmap_ior_into (processed
, np
);
952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
954 fprintf (dump_file
, "ldist useful partition:\n");
955 dump_bitmap (dump_file
, partition
);
958 VEC_safe_push (bitmap
, heap
, *partitions
, partition
);
959 partition
= BITMAP_ALLOC (NULL
);
963 /* Add the nodes from the RDG that were not marked as processed, and
964 that are used outside the current loop. These are scalar
965 computations that are not yet part of previous partitions. */
966 for (i
= 0; i
< rdg
->n_vertices
; i
++)
967 if (!bitmap_bit_p (processed
, i
)
968 && rdg_defs_used_in_other_loops_p (rdg
, i
))
969 VEC_safe_push (int, heap
, *other_stores
, i
);
971 /* If there are still statements left in the OTHER_STORES array,
972 create other components and partitions with these stores and
973 their dependences. */
974 if (VEC_length (int, *other_stores
) > 0)
976 VEC (rdgc
, heap
) *comps
= VEC_alloc (rdgc
, heap
, 3);
977 VEC (int, heap
) *foo
= VEC_alloc (int, heap
, 3);
979 rdg_build_components (rdg
, *other_stores
, &comps
);
980 rdg_build_partitions (rdg
, comps
, &foo
, partitions
, processed
);
982 VEC_free (int, heap
, foo
);
983 free_rdg_components (comps
);
986 add_scalar_computations_to_partition (rdg
, *partitions
, partition
);
988 /* If there is something left in the last partition, save it. */
989 if (bitmap_count_bits (partition
) > 0)
990 VEC_safe_push (bitmap
, heap
, *partitions
, partition
);
992 BITMAP_FREE (partition
);
994 fuse_partitions_with_similar_memory_accesses (rdg
, partitions
);
997 /* Dump to FILE the PARTITIONS. */
1000 dump_rdg_partitions (FILE *file
, VEC (bitmap
, heap
) *partitions
)
1005 FOR_EACH_VEC_ELT (bitmap
, partitions
, i
, partition
)
1006 debug_bitmap_file (file
, partition
);
1009 /* Debug PARTITIONS. */
1010 extern void debug_rdg_partitions (VEC (bitmap
, heap
) *);
1013 debug_rdg_partitions (VEC (bitmap
, heap
) *partitions
)
1015 dump_rdg_partitions (stderr
, partitions
);
1018 /* Returns the number of read and write operations in the RDG. */
1021 number_of_rw_in_rdg (struct graph
*rdg
)
1025 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1027 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1030 if (RDG_MEM_READS_STMT (rdg
, i
))
1037 /* Returns the number of read and write operations in a PARTITION of
1041 number_of_rw_in_partition (struct graph
*rdg
, bitmap partition
)
1047 EXECUTE_IF_SET_IN_BITMAP (partition
, 0, i
, ii
)
1049 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1052 if (RDG_MEM_READS_STMT (rdg
, i
))
1059 /* Returns true when one of the PARTITIONS contains all the read or
1060 write operations of RDG. */
1063 partition_contains_all_rw (struct graph
*rdg
, VEC (bitmap
, heap
) *partitions
)
1067 int nrw
= number_of_rw_in_rdg (rdg
);
1069 FOR_EACH_VEC_ELT (bitmap
, partitions
, i
, partition
)
1070 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1076 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1077 distributed loops. */
1080 ldist_gen (struct loop
*loop
, struct graph
*rdg
,
1081 VEC (int, heap
) *starting_vertices
)
1084 VEC (rdgc
, heap
) *components
= VEC_alloc (rdgc
, heap
, 3);
1085 VEC (bitmap
, heap
) *partitions
= VEC_alloc (bitmap
, heap
, 3);
1086 VEC (int, heap
) *other_stores
= VEC_alloc (int, heap
, 3);
1087 bitmap partition
, processed
= BITMAP_ALLOC (NULL
);
1089 remaining_stmts
= BITMAP_ALLOC (NULL
);
1090 upstream_mem_writes
= BITMAP_ALLOC (NULL
);
1092 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1094 bitmap_set_bit (remaining_stmts
, i
);
1096 /* Save in OTHER_STORES all the memory writes that are not in
1097 STARTING_VERTICES. */
1098 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1104 FOR_EACH_VEC_ELT (int, starting_vertices
, j
, v
)
1112 VEC_safe_push (int, heap
, other_stores
, i
);
1116 mark_nodes_having_upstream_mem_writes (rdg
);
1117 rdg_build_components (rdg
, starting_vertices
, &components
);
1118 rdg_build_partitions (rdg
, components
, &other_stores
, &partitions
,
1120 BITMAP_FREE (processed
);
1121 nbp
= VEC_length (bitmap
, partitions
);
1124 || partition_contains_all_rw (rdg
, partitions
))
1127 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1128 dump_rdg_partitions (dump_file
, partitions
);
1130 FOR_EACH_VEC_ELT (bitmap
, partitions
, i
, partition
)
1131 if (!generate_code_for_partition (loop
, partition
, i
< nbp
- 1))
1134 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1135 update_ssa (TODO_update_ssa_only_virtuals
| TODO_update_ssa
);
1139 BITMAP_FREE (remaining_stmts
);
1140 BITMAP_FREE (upstream_mem_writes
);
1142 FOR_EACH_VEC_ELT (bitmap
, partitions
, i
, partition
)
1143 BITMAP_FREE (partition
);
1145 VEC_free (int, heap
, other_stores
);
1146 VEC_free (bitmap
, heap
, partitions
);
1147 free_rdg_components (components
);
1151 /* Distributes the code from LOOP in such a way that producer
1152 statements are placed before consumer statements. When STMTS is
1153 NULL, performs the maximal distribution, if STMTS is not NULL,
1154 tries to separate only these statements from the LOOP's body.
1155 Returns the number of distributed loops. */
1158 distribute_loop (struct loop
*loop
, VEC (gimple
, heap
) *stmts
)
1164 VEC (int, heap
) *vertices
;
1165 VEC (ddr_p
, heap
) *dependence_relations
;
1166 VEC (data_reference_p
, heap
) *datarefs
;
1167 VEC (loop_p
, heap
) *loop_nest
;
1169 if (loop
->num_nodes
> 2)
1171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1173 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1179 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
1180 dependence_relations
= VEC_alloc (ddr_p
, heap
, 100);
1181 loop_nest
= VEC_alloc (loop_p
, heap
, 3);
1182 rdg
= build_rdg (loop
, &loop_nest
, &dependence_relations
, &datarefs
);
1186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1188 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1191 free_dependence_relations (dependence_relations
);
1192 free_data_refs (datarefs
);
1193 VEC_free (loop_p
, heap
, loop_nest
);
1197 vertices
= VEC_alloc (int, heap
, 3);
1199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1200 dump_rdg (dump_file
, rdg
);
1202 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, s
)
1204 int v
= rdg_vertex_for_stmt (rdg
, s
);
1208 VEC_safe_push (int, heap
, vertices
, v
);
1210 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1212 "ldist asked to generate code for vertex %d\n", v
);
1216 res
= ldist_gen (loop
, rdg
, vertices
);
1217 VEC_free (int, heap
, vertices
);
1219 free_dependence_relations (dependence_relations
);
1220 free_data_refs (datarefs
);
1221 VEC_free (loop_p
, heap
, loop_nest
);
1225 /* Distribute all loops in the current function. */
1228 tree_loop_distribution (void)
1232 int nb_generated_loops
= 0;
1234 FOR_EACH_LOOP (li
, loop
, 0)
1236 VEC (gimple
, heap
) *work_list
= NULL
;
1237 int num
= loop
->num
;
1239 /* If the loop doesn't have a single exit we will fail anyway,
1240 so do that early. */
1241 if (!single_exit (loop
))
1244 /* If both flag_tree_loop_distribute_patterns and
1245 flag_tree_loop_distribution are set, then only
1246 distribute_patterns is executed. */
1247 if (flag_tree_loop_distribute_patterns
)
1249 /* With the following working list, we're asking
1250 distribute_loop to separate from the rest of the loop the
1251 stores of the form "A[i] = 0". */
1252 stores_zero_from_loop (loop
, &work_list
);
1254 /* Do nothing if there are no patterns to be distributed. */
1255 if (VEC_length (gimple
, work_list
) > 0)
1256 nb_generated_loops
= distribute_loop (loop
, work_list
);
1258 else if (flag_tree_loop_distribution
)
1260 /* With the following working list, we're asking
1261 distribute_loop to separate the stores of the loop: when
1262 dependences allow, it will end on having one store per
1264 stores_from_loop (loop
, &work_list
);
1266 /* A simple heuristic for cache locality is to not split
1267 stores to the same array. Without this call, an unrolled
1268 loop would be split into as many loops as unroll factor,
1269 each loop storing in the same array. */
1270 remove_similar_memory_refs (&work_list
);
1272 nb_generated_loops
= distribute_loop (loop
, work_list
);
1275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1277 if (nb_generated_loops
> 1)
1278 fprintf (dump_file
, "Loop %d distributed: split to %d loops.\n",
1279 num
, nb_generated_loops
);
1281 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1284 verify_loop_structure ();
1286 VEC_free (gimple
, heap
, work_list
);
1293 gate_tree_loop_distribution (void)
1295 return flag_tree_loop_distribution
1296 || flag_tree_loop_distribute_patterns
;
1299 struct gimple_opt_pass pass_loop_distribution
=
1304 gate_tree_loop_distribution
, /* gate */
1305 tree_loop_distribution
, /* execute */
1308 0, /* static_pass_number */
1309 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1310 PROP_cfg
| PROP_ssa
, /* properties_required */
1311 0, /* properties_provided */
1312 0, /* properties_destroyed */
1313 0, /* todo_flags_start */
1314 0 /* todo_flags_finish */