2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind
{ PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
};
57 typedef struct partition_s
61 enum partition_kind kind
;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr
;
64 data_reference_p secondary_dr
;
68 /* Allocate and initialize a partition from BITMAP. */
71 partition_alloc (bitmap stmts
)
73 partition_t partition
= XCNEW (struct partition_s
);
74 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
75 partition
->has_writes
= false;
76 partition
->kind
= PKIND_NORMAL
;
83 partition_free (partition_t partition
)
85 BITMAP_FREE (partition
->stmts
);
89 /* Returns true if the partition can be generated as a builtin. */
92 partition_builtin_p (partition_t partition
)
94 return partition
->kind
!= PKIND_NORMAL
;
97 /* Returns true if the partition has an writes. */
100 partition_has_writes (partition_t partition
)
102 return partition
->has_writes
;
105 /* If bit I is not set, it means that this node represents an
106 operation that has already been performed, and that should not be
107 performed again. This is the subgraph of remaining important
108 computations that is passed to the DFS algorithm for avoiding to
109 include several times the same stores in different loops. */
110 static bitmap remaining_stmts
;
112 /* A node of the RDG is marked in this bitmap when it has as a
113 predecessor a node that writes to memory. */
114 static bitmap upstream_mem_writes
;
116 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
120 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
122 imm_use_iterator imm_iter
;
125 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
127 gimple use_stmt
= USE_STMT (use_p
);
128 if (!is_gimple_debug (use_stmt
)
129 && loop
!= loop_containing_stmt (use_stmt
))
136 /* Returns true when STMT defines a scalar variable used after the
140 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
145 if (gimple_code (stmt
) == GIMPLE_PHI
)
146 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
148 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
149 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
155 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
159 update_phis_for_loop_copy (struct loop
*orig_loop
, struct loop
*new_loop
)
162 gimple_stmt_iterator si_new
, si_orig
;
163 edge orig_loop_latch
= loop_latch_edge (orig_loop
);
164 edge orig_entry_e
= loop_preheader_edge (orig_loop
);
165 edge new_loop_entry_e
= loop_preheader_edge (new_loop
);
167 /* Scan the phis in the headers of the old and new loops
168 (they are organized in exactly the same order). */
169 for (si_new
= gsi_start_phis (new_loop
->header
),
170 si_orig
= gsi_start_phis (orig_loop
->header
);
171 !gsi_end_p (si_new
) && !gsi_end_p (si_orig
);
172 gsi_next (&si_new
), gsi_next (&si_orig
))
175 source_location locus
;
176 gimple phi_new
= gsi_stmt (si_new
);
177 gimple phi_orig
= gsi_stmt (si_orig
);
179 /* Add the first phi argument for the phi in NEW_LOOP (the one
180 associated with the entry of NEW_LOOP) */
181 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_entry_e
);
182 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_entry_e
);
183 add_phi_arg (phi_new
, def
, new_loop_entry_e
, locus
);
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_loop_latch
);
188 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_loop_latch
);
190 if (TREE_CODE (def
) == SSA_NAME
)
192 new_ssa_name
= get_current_def (def
);
195 /* This only happens if there are no definitions inside the
196 loop. Use the the invariant in the new loop as is. */
200 /* Could be an integer. */
203 add_phi_arg (phi_new
, new_ssa_name
, loop_latch_edge (new_loop
), locus
);
207 /* Return a copy of LOOP placed before LOOP. */
210 copy_loop_before (struct loop
*loop
)
213 edge preheader
= loop_preheader_edge (loop
);
215 initialize_original_copy_tables ();
216 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
217 gcc_assert (res
!= NULL
);
218 free_original_copy_tables ();
220 update_phis_for_loop_copy (loop
, res
);
221 rename_variables_in_loop (res
);
226 /* Creates an empty basic block after LOOP. */
229 create_bb_after_loop (struct loop
*loop
)
231 edge exit
= single_exit (loop
);
239 /* Generate code for PARTITION from the code in LOOP. The loop is
240 copied when COPY_P is true. All the statements not flagged in the
241 PARTITION bitmap are removed from the loop or from its copy. The
242 statements are indexed in sequence inside a basic block, and the
243 basic blocks of a loop are taken in dom order. */
246 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
250 gimple_stmt_iterator bsi
;
255 loop
= copy_loop_before (loop
);
256 gcc_assert (loop
!= NULL
);
257 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
258 create_bb_after_loop (loop
);
261 /* Remove stmts not in the PARTITION bitmap. The order in which we
262 visit the phi nodes and the statements is exactly as in
264 bbs
= get_loop_body_in_dom_order (loop
);
266 if (MAY_HAVE_DEBUG_STMTS
)
267 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
269 basic_block bb
= bbs
[i
];
271 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
272 if (!bitmap_bit_p (partition
->stmts
, x
++))
273 reset_debug_uses (gsi_stmt (bsi
));
275 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
277 gimple stmt
= gsi_stmt (bsi
);
278 if (gimple_code (stmt
) != GIMPLE_LABEL
279 && !is_gimple_debug (stmt
)
280 && !bitmap_bit_p (partition
->stmts
, x
++))
281 reset_debug_uses (stmt
);
285 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
287 basic_block bb
= bbs
[i
];
289 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
290 if (!bitmap_bit_p (partition
->stmts
, x
++))
292 gimple phi
= gsi_stmt (bsi
);
293 if (virtual_operand_p (gimple_phi_result (phi
)))
294 mark_virtual_phi_result_for_renaming (phi
);
295 remove_phi_node (&bsi
, true);
300 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
302 gimple stmt
= gsi_stmt (bsi
);
303 if (gimple_code (stmt
) != GIMPLE_LABEL
304 && !is_gimple_debug (stmt
)
305 && !bitmap_bit_p (partition
->stmts
, x
++))
307 unlink_stmt_vdef (stmt
);
308 gsi_remove (&bsi
, true);
319 /* Build the size argument for a memory operation call. */
322 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
)
325 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
326 fold_convert_loc (loc
, sizetype
, nb_iter
),
327 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
328 return fold_convert_loc (loc
, size_type_node
, size
);
331 /* Build an address argument for a memory operation call. */
334 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
338 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
339 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
341 /* Test for a negative stride, iterating over every element. */
342 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
344 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
345 fold_convert_loc (loc
, sizetype
, nb_bytes
));
346 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
347 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
350 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
353 /* Generate a call to memset for PARTITION in LOOP. */
356 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
358 gimple_stmt_iterator gsi
;
359 gimple stmt
, fn_call
;
360 tree nb_iter
, mem
, fn
, nb_bytes
;
364 stmt
= DR_STMT (partition
->main_dr
);
365 loc
= gimple_location (stmt
);
366 if (gimple_bb (stmt
) == loop
->latch
)
367 nb_iter
= number_of_latch_executions (loop
);
369 nb_iter
= number_of_exit_cond_executions (loop
);
371 /* The new statements will be placed before LOOP. */
372 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
374 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
375 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
376 false, GSI_CONTINUE_LINKING
);
377 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
378 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
379 false, GSI_CONTINUE_LINKING
);
381 /* This exactly matches the pattern recognition in classify_partition. */
382 val
= gimple_assign_rhs1 (stmt
);
383 if (integer_zerop (val
)
385 || TREE_CODE (val
) == CONSTRUCTOR
)
386 val
= integer_zero_node
;
387 else if (integer_all_onesp (val
))
388 val
= build_int_cst (integer_type_node
, -1);
391 if (TREE_CODE (val
) == INTEGER_CST
)
392 val
= fold_convert (integer_type_node
, val
);
393 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
396 tree tem
= make_ssa_name (integer_type_node
, NULL
);
397 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
398 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
403 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
404 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
405 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
409 fprintf (dump_file
, "generated memset");
410 if (integer_zerop (val
))
411 fprintf (dump_file
, " zero\n");
412 else if (integer_all_onesp (val
))
413 fprintf (dump_file
, " minus one\n");
415 fprintf (dump_file
, "\n");
419 /* Generate a call to memcpy for PARTITION in LOOP. */
422 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
424 gimple_stmt_iterator gsi
;
425 gimple stmt
, fn_call
;
426 tree nb_iter
, dest
, src
, fn
, nb_bytes
;
428 enum built_in_function kind
;
430 stmt
= DR_STMT (partition
->main_dr
);
431 loc
= gimple_location (stmt
);
432 if (gimple_bb (stmt
) == loop
->latch
)
433 nb_iter
= number_of_latch_executions (loop
);
435 nb_iter
= number_of_exit_cond_executions (loop
);
437 /* The new statements will be placed before LOOP. */
438 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
440 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
441 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
442 false, GSI_CONTINUE_LINKING
);
443 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
444 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
445 if (ptr_derefs_may_alias_p (dest
, src
))
446 kind
= BUILT_IN_MEMMOVE
;
448 kind
= BUILT_IN_MEMCPY
;
450 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
451 false, GSI_CONTINUE_LINKING
);
452 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
453 false, GSI_CONTINUE_LINKING
);
454 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
455 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
456 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
460 if (kind
== BUILT_IN_MEMCPY
)
461 fprintf (dump_file
, "generated memcpy\n");
463 fprintf (dump_file
, "generated memmove\n");
467 /* Remove and destroy the loop LOOP. */
470 destroy_loop (struct loop
*loop
)
472 unsigned nbbs
= loop
->num_nodes
;
473 edge exit
= single_exit (loop
);
474 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
478 bbs
= get_loop_body_in_dom_order (loop
);
480 redirect_edge_pred (exit
, src
);
481 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
482 exit
->flags
|= EDGE_FALLTHRU
;
483 cancel_loop_tree (loop
);
484 rescan_loop_exit (exit
, false, true);
486 for (i
= 0; i
< nbbs
; i
++)
488 /* We have made sure to not leave any dangling uses of SSA
489 names defined in the loop. With the exception of virtuals.
490 Make sure we replace all uses of virtual defs that will remain
491 outside of the loop with the bare symbol as delete_basic_block
492 will release them. */
493 gimple_stmt_iterator gsi
;
494 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
496 gimple phi
= gsi_stmt (gsi
);
497 if (virtual_operand_p (gimple_phi_result (phi
)))
498 mark_virtual_phi_result_for_renaming (phi
);
500 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
502 gimple stmt
= gsi_stmt (gsi
);
503 tree vdef
= gimple_vdef (stmt
);
504 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
505 mark_virtual_operand_for_renaming (vdef
);
507 delete_basic_block (bbs
[i
]);
511 set_immediate_dominator (CDI_DOMINATORS
, dest
,
512 recompute_dominator (CDI_DOMINATORS
, dest
));
515 /* Generates code for PARTITION. */
518 generate_code_for_partition (struct loop
*loop
,
519 partition_t partition
, bool copy_p
)
521 switch (partition
->kind
)
524 generate_memset_builtin (loop
, partition
);
525 /* If this is the last partition for which we generate code, we have
526 to destroy the loop. */
532 generate_memcpy_builtin (loop
, partition
);
533 /* If this is the last partition for which we generate code, we have
534 to destroy the loop. */
540 generate_loops_for_partition (loop
, partition
, copy_p
);
549 /* Returns true if the node V of RDG cannot be recomputed. */
552 rdg_cannot_recompute_vertex_p (struct graph
*rdg
, int v
)
554 if (RDG_MEM_WRITE_STMT (rdg
, v
))
560 /* Returns true when the vertex V has already been generated in the
561 current partition (V is in PROCESSED), or when V belongs to another
562 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
565 already_processed_vertex_p (bitmap processed
, int v
)
567 return (bitmap_bit_p (processed
, v
)
568 || !bitmap_bit_p (remaining_stmts
, v
));
571 /* Returns NULL when there is no anti-dependence among the successors
572 of vertex V, otherwise returns the edge with the anti-dep. */
574 static struct graph_edge
*
575 has_anti_dependence (struct vertex
*v
)
577 struct graph_edge
*e
;
580 for (e
= v
->succ
; e
; e
= e
->succ_next
)
581 if (RDGE_TYPE (e
) == anti_dd
)
587 /* Returns true when V has an anti-dependence edge among its successors. */
590 predecessor_has_mem_write (struct graph
*rdg
, struct vertex
*v
)
592 struct graph_edge
*e
;
595 for (e
= v
->pred
; e
; e
= e
->pred_next
)
596 if (bitmap_bit_p (upstream_mem_writes
, e
->src
)
597 /* Don't consider flow channels: a write to memory followed
598 by a read from memory. These channels allow the split of
599 the RDG in different partitions. */
600 && !RDG_MEM_WRITE_STMT (rdg
, e
->src
))
606 /* Initializes the upstream_mem_writes bitmap following the
607 information from RDG. */
610 mark_nodes_having_upstream_mem_writes (struct graph
*rdg
)
613 bitmap seen
= BITMAP_ALLOC (NULL
);
615 for (v
= rdg
->n_vertices
- 1; v
>= 0; v
--)
616 if (!bitmap_bit_p (seen
, v
))
622 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
624 FOR_EACH_VEC_ELT (nodes
, i
, x
)
626 if (!bitmap_set_bit (seen
, x
))
629 if (RDG_MEM_WRITE_STMT (rdg
, x
)
630 || predecessor_has_mem_write (rdg
, &(rdg
->vertices
[x
]))
631 /* In anti dependences the read should occur before
632 the write, this is why both the read and the write
633 should be placed in the same partition. */
634 || has_anti_dependence (&(rdg
->vertices
[x
])))
636 bitmap_set_bit (upstream_mem_writes
, x
);
644 /* Returns true when vertex u has a memory write node as a predecessor
648 has_upstream_mem_writes (int u
)
650 return bitmap_bit_p (upstream_mem_writes
, u
);
653 static void rdg_flag_vertex_and_dependent (struct graph
*, int, partition_t
,
656 /* Flag the uses of U stopping following the information from
657 upstream_mem_writes. */
660 rdg_flag_uses (struct graph
*rdg
, int u
, partition_t partition
, bitmap loops
,
664 struct vertex
*x
= &(rdg
->vertices
[u
]);
665 gimple stmt
= RDGV_STMT (x
);
666 struct graph_edge
*anti_dep
= has_anti_dependence (x
);
668 /* Keep in the same partition the destination of an antidependence,
669 because this is a store to the exact same location. Putting this
670 in another partition is bad for cache locality. */
673 int v
= anti_dep
->dest
;
675 if (!already_processed_vertex_p (processed
, v
))
676 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
680 if (gimple_code (stmt
) != GIMPLE_PHI
)
682 if ((use_p
= gimple_vuse_op (stmt
)) != NULL_USE_OPERAND_P
)
684 tree use
= USE_FROM_PTR (use_p
);
686 if (TREE_CODE (use
) == SSA_NAME
)
688 gimple def_stmt
= SSA_NAME_DEF_STMT (use
);
689 int v
= rdg_vertex_for_stmt (rdg
, def_stmt
);
692 && !already_processed_vertex_p (processed
, v
))
693 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
699 if (is_gimple_assign (stmt
) && has_upstream_mem_writes (u
))
701 tree op0
= gimple_assign_lhs (stmt
);
703 /* Scalar channels don't have enough space for transmitting data
704 between tasks, unless we add more storage by privatizing. */
705 if (is_gimple_reg (op0
))
708 imm_use_iterator iter
;
710 FOR_EACH_IMM_USE_FAST (use_p
, iter
, op0
)
712 int v
= rdg_vertex_for_stmt (rdg
, USE_STMT (use_p
));
714 if (!already_processed_vertex_p (processed
, v
))
715 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
722 /* Flag V from RDG as part of PARTITION, and also flag its loop number
726 rdg_flag_vertex (struct graph
*rdg
, int v
, partition_t partition
, bitmap loops
)
730 if (!bitmap_set_bit (partition
->stmts
, v
))
733 loop
= loop_containing_stmt (RDG_STMT (rdg
, v
));
734 bitmap_set_bit (loops
, loop
->num
);
736 if (rdg_cannot_recompute_vertex_p (rdg
, v
))
738 partition
->has_writes
= true;
739 bitmap_clear_bit (remaining_stmts
, v
);
743 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
744 Also flag their loop number in LOOPS. */
747 rdg_flag_vertex_and_dependent (struct graph
*rdg
, int v
, partition_t partition
,
748 bitmap loops
, bitmap processed
)
755 bitmap_set_bit (processed
, v
);
756 rdg_flag_uses (rdg
, v
, partition
, loops
, processed
);
757 graphds_dfs (rdg
, &v
, 1, &nodes
, false, remaining_stmts
);
758 rdg_flag_vertex (rdg
, v
, partition
, loops
);
760 FOR_EACH_VEC_ELT (nodes
, i
, x
)
761 if (!already_processed_vertex_p (processed
, x
))
762 rdg_flag_vertex_and_dependent (rdg
, x
, partition
, loops
, processed
);
767 /* Initialize CONDS with all the condition statements from the basic
771 collect_condition_stmts (struct loop
*loop
, vec
<gimple
> *conds
)
775 vec
<edge
> exits
= get_loop_exit_edges (loop
);
777 FOR_EACH_VEC_ELT (exits
, i
, e
)
779 gimple cond
= last_stmt (e
->src
);
782 conds
->safe_push (cond
);
788 /* Add to PARTITION all the exit condition statements for LOOPS
789 together with all their dependent statements determined from
793 rdg_flag_loop_exits (struct graph
*rdg
, bitmap loops
, partition_t partition
,
801 EXECUTE_IF_SET_IN_BITMAP (loops
, 0, i
, bi
)
802 collect_condition_stmts (get_loop (i
), &conds
);
804 while (!conds
.is_empty ())
806 gimple cond
= conds
.pop ();
807 int v
= rdg_vertex_for_stmt (rdg
, cond
);
808 bitmap new_loops
= BITMAP_ALLOC (NULL
);
810 if (!already_processed_vertex_p (processed
, v
))
811 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, new_loops
, processed
);
813 EXECUTE_IF_SET_IN_BITMAP (new_loops
, 0, i
, bi
)
814 if (bitmap_set_bit (loops
, i
))
815 collect_condition_stmts (get_loop (i
), &conds
);
817 BITMAP_FREE (new_loops
);
823 /* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
828 build_rdg_partition_for_component (struct graph
*rdg
, rdgc c
)
831 partition_t partition
= partition_alloc (NULL
);
832 bitmap loops
= BITMAP_ALLOC (NULL
);
833 bitmap processed
= BITMAP_ALLOC (NULL
);
835 FOR_EACH_VEC_ELT (c
->vertices
, i
, v
)
836 if (!already_processed_vertex_p (processed
, v
))
837 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
, processed
);
839 rdg_flag_loop_exits (rdg
, loops
, partition
, processed
);
841 BITMAP_FREE (processed
);
846 /* Free memory for COMPONENTS. */
849 free_rdg_components (vec
<rdgc
> components
)
854 FOR_EACH_VEC_ELT (components
, i
, x
)
856 x
->vertices
.release ();
860 components
.release ();
863 /* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
867 rdg_build_components (struct graph
*rdg
, vec
<int> starting_vertices
,
868 vec
<rdgc
> *components
)
871 bitmap saved_components
= BITMAP_ALLOC (NULL
);
872 int n_components
= graphds_scc (rdg
, NULL
);
873 /* ??? Macros cannot process template types with more than one
874 argument, so we need this typedef. */
875 typedef vec
<int> vec_int_heap
;
876 vec
<int> *all_components
= XNEWVEC (vec_int_heap
, n_components
);
878 for (i
= 0; i
< n_components
; i
++)
879 all_components
[i
].create (3);
881 for (i
= 0; i
< rdg
->n_vertices
; i
++)
882 all_components
[rdg
->vertices
[i
].component
].safe_push (i
);
884 FOR_EACH_VEC_ELT (starting_vertices
, i
, v
)
886 int c
= rdg
->vertices
[v
].component
;
888 if (bitmap_set_bit (saved_components
, c
))
890 rdgc x
= XCNEW (struct rdg_component
);
892 x
->vertices
= all_components
[c
];
894 components
->safe_push (x
);
898 for (i
= 0; i
< n_components
; i
++)
899 if (!bitmap_bit_p (saved_components
, i
))
900 all_components
[i
].release ();
902 free (all_components
);
903 BITMAP_FREE (saved_components
);
906 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
907 For the moment we detect only the memset zero pattern. */
910 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
915 data_reference_p single_load
, single_store
;
917 partition
->kind
= PKIND_NORMAL
;
918 partition
->main_dr
= NULL
;
919 partition
->secondary_dr
= NULL
;
921 if (!flag_tree_loop_distribute_patterns
)
924 /* Perform general partition disqualification for builtins. */
925 nb_iter
= number_of_exit_cond_executions (loop
);
926 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
929 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
931 gimple stmt
= RDG_STMT (rdg
, i
);
933 if (gimple_has_volatile_ops (stmt
))
936 /* If the stmt has uses outside of the loop fail.
937 ??? If the stmt is generated in another partition that
938 is not created as builtin we can ignore this. */
939 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
942 fprintf (dump_file
, "not generating builtin, partition has "
943 "scalar uses outside of the loop\n");
948 /* Detect memset and memcpy. */
951 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
953 gimple stmt
= RDG_STMT (rdg
, i
);
957 if (gimple_code (stmt
) == GIMPLE_PHI
)
960 /* Any scalar stmts are ok. */
961 if (!gimple_vuse (stmt
))
964 /* Otherwise just regular loads/stores. */
965 if (!gimple_assign_single_p (stmt
))
968 /* But exactly one store and/or load. */
969 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
973 if (single_load
!= NULL
)
979 if (single_store
!= NULL
)
986 if (single_store
&& !single_load
)
988 gimple stmt
= DR_STMT (single_store
);
989 tree rhs
= gimple_assign_rhs1 (stmt
);
990 if (!(integer_zerop (rhs
)
991 || integer_all_onesp (rhs
)
993 || (TREE_CODE (rhs
) == CONSTRUCTOR
994 && !TREE_CLOBBER_P (rhs
))
995 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
996 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
)))
997 == TYPE_MODE (unsigned_char_type_node
)))))
999 if (TREE_CODE (rhs
) == SSA_NAME
1000 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1001 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1003 if (!adjacent_dr_p (single_store
))
1005 partition
->kind
= PKIND_MEMSET
;
1006 partition
->main_dr
= single_store
;
1008 else if (single_store
&& single_load
)
1010 gimple store
= DR_STMT (single_store
);
1011 gimple load
= DR_STMT (single_load
);
1012 /* Direct aggregate copy or via an SSA name temporary. */
1014 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1016 if (!adjacent_dr_p (single_store
)
1017 || !adjacent_dr_p (single_load
)
1018 || !operand_equal_p (DR_STEP (single_store
),
1019 DR_STEP (single_load
), 0))
1021 /* Now check that if there is a dependence this dependence is
1022 of a suitable form for memmove. */
1023 vec
<loop_p
> loops
= vec
<loop_p
>();
1025 loops
.safe_push (loop
);
1026 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1028 compute_affine_dependence (ddr
, loop
);
1029 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1031 free_dependence_relation (ddr
);
1035 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1037 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1039 free_dependence_relation (ddr
);
1043 lambda_vector dist_v
;
1044 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1046 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1047 DDR_LOOP_NEST (ddr
))];
1048 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1050 free_dependence_relation (ddr
);
1056 free_dependence_relation (ddr
);
1058 partition
->kind
= PKIND_MEMCPY
;
1059 partition
->main_dr
= single_store
;
1060 partition
->secondary_dr
= single_load
;
1064 /* For a data reference REF, return the declaration of its base
1065 address or NULL_TREE if the base is not determined. */
1068 ref_base_address (data_reference_p dr
)
1070 tree base_address
= DR_BASE_ADDRESS (dr
);
1072 && TREE_CODE (base_address
) == ADDR_EXPR
)
1073 return TREE_OPERAND (base_address
, 0);
1075 return base_address
;
1078 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1082 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1083 partition_t partition2
)
1085 unsigned i
, j
, k
, l
;
1086 bitmap_iterator bi
, bj
;
1087 data_reference_p ref1
, ref2
;
1089 /* First check whether in the intersection of the two partitions are
1090 any loads or stores. Common loads are the situation that happens
1092 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1093 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1094 || RDG_MEM_READS_STMT (rdg
, i
))
1097 /* Then check all data-references against each other. */
1098 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1099 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1100 || RDG_MEM_READS_STMT (rdg
, i
))
1101 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1102 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1103 || RDG_MEM_READS_STMT (rdg
, j
))
1105 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1107 tree base1
= ref_base_address (ref1
);
1109 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1110 if (base1
== ref_base_address (ref2
))
1118 /* Aggregate several components into a useful partition that is
1119 registered in the PARTITIONS vector. Partitions will be
1120 distributed in different loops. */
1123 rdg_build_partitions (struct graph
*rdg
, vec
<rdgc
> components
,
1124 vec
<int> *other_stores
,
1125 vec
<partition_t
> *partitions
, bitmap processed
)
1129 partition_t partition
= partition_alloc (NULL
);
1131 FOR_EACH_VEC_ELT (components
, i
, x
)
1134 int v
= x
->vertices
[0];
1136 if (bitmap_bit_p (processed
, v
))
1139 np
= build_rdg_partition_for_component (rdg
, x
);
1140 bitmap_ior_into (partition
->stmts
, np
->stmts
);
1141 partition
->has_writes
= partition_has_writes (np
);
1142 bitmap_ior_into (processed
, np
->stmts
);
1143 partition_free (np
);
1145 if (partition_has_writes (partition
))
1147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1149 fprintf (dump_file
, "ldist useful partition:\n");
1150 dump_bitmap (dump_file
, partition
->stmts
);
1153 partitions
->safe_push (partition
);
1154 partition
= partition_alloc (NULL
);
1158 /* Add the nodes from the RDG that were not marked as processed, and
1159 that are used outside the current loop. These are scalar
1160 computations that are not yet part of previous partitions. */
1161 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1162 if (!bitmap_bit_p (processed
, i
)
1163 && rdg_defs_used_in_other_loops_p (rdg
, i
))
1164 other_stores
->safe_push (i
);
1166 /* If there are still statements left in the OTHER_STORES array,
1167 create other components and partitions with these stores and
1168 their dependences. */
1169 if (other_stores
->length () > 0)
1176 rdg_build_components (rdg
, *other_stores
, &comps
);
1177 rdg_build_partitions (rdg
, comps
, &foo
, partitions
, processed
);
1180 free_rdg_components (comps
);
1183 /* If there is something left in the last partition, save it. */
1184 if (bitmap_count_bits (partition
->stmts
) > 0)
1185 partitions
->safe_push (partition
);
1187 partition_free (partition
);
1190 /* Dump to FILE the PARTITIONS. */
1193 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1196 partition_t partition
;
1198 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1199 debug_bitmap_file (file
, partition
->stmts
);
1202 /* Debug PARTITIONS. */
1203 extern void debug_rdg_partitions (vec
<partition_t
> );
1206 debug_rdg_partitions (vec
<partition_t
> partitions
)
1208 dump_rdg_partitions (stderr
, partitions
);
1211 /* Returns the number of read and write operations in the RDG. */
1214 number_of_rw_in_rdg (struct graph
*rdg
)
1218 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1220 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1223 if (RDG_MEM_READS_STMT (rdg
, i
))
1230 /* Returns the number of read and write operations in a PARTITION of
1234 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1240 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1242 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1245 if (RDG_MEM_READS_STMT (rdg
, i
))
1252 /* Returns true when one of the PARTITIONS contains all the read or
1253 write operations of RDG. */
1256 partition_contains_all_rw (struct graph
*rdg
,
1257 vec
<partition_t
> partitions
)
1260 partition_t partition
;
1261 int nrw
= number_of_rw_in_rdg (rdg
);
1263 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1264 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1270 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1271 distributed loops. */
1274 ldist_gen (struct loop
*loop
, struct graph
*rdg
,
1275 vec
<int> starting_vertices
)
1278 vec
<rdgc
> components
;
1279 components
.create (3);
1280 vec
<partition_t
> partitions
;
1281 partitions
.create (3);
1282 vec
<int> other_stores
;
1283 other_stores
.create (3);
1284 partition_t partition
;
1285 bitmap processed
= BITMAP_ALLOC (NULL
);
1288 remaining_stmts
= BITMAP_ALLOC (NULL
);
1289 upstream_mem_writes
= BITMAP_ALLOC (NULL
);
1291 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1293 bitmap_set_bit (remaining_stmts
, i
);
1295 /* Save in OTHER_STORES all the memory writes that are not in
1296 STARTING_VERTICES. */
1297 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1303 FOR_EACH_VEC_ELT (starting_vertices
, j
, v
)
1311 other_stores
.safe_push (i
);
1315 mark_nodes_having_upstream_mem_writes (rdg
);
1316 rdg_build_components (rdg
, starting_vertices
, &components
);
1317 rdg_build_partitions (rdg
, components
, &other_stores
, &partitions
,
1319 BITMAP_FREE (processed
);
1321 any_builtin
= false;
1322 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1324 classify_partition (loop
, rdg
, partition
);
1325 any_builtin
|= partition_builtin_p (partition
);
1328 /* If we are only distributing patterns fuse all partitions that
1329 were not properly classified as builtins. Else fuse partitions
1330 with similar memory accesses. */
1331 if (!flag_tree_loop_distribution
)
1334 /* If we did not detect any builtin simply bail out. */
1340 /* Only fuse adjacent non-builtin partitions, see PR53616.
1341 ??? Use dependence information to improve partition ordering. */
1345 for (; partitions
.iterate (i
, &into
); ++i
)
1346 if (!partition_builtin_p (into
))
1348 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1349 if (!partition_builtin_p (partition
))
1351 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1352 partitions
.ordered_remove (i
);
1358 while ((unsigned) i
< partitions
.length ());
1364 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1366 if (partition_builtin_p (into
))
1369 partitions
.iterate (j
, &partition
); ++j
)
1371 if (!partition_builtin_p (partition
)
1372 /* ??? The following is horribly inefficient,
1373 we are re-computing and analyzing data-references
1374 of the stmts in the partitions all the time. */
1375 && similar_memory_accesses (rdg
, into
, partition
))
1377 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1379 fprintf (dump_file
, "fusing partitions\n");
1380 dump_bitmap (dump_file
, into
->stmts
);
1381 dump_bitmap (dump_file
, partition
->stmts
);
1382 fprintf (dump_file
, "because they have similar "
1383 "memory accesses\n");
1385 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1386 partitions
.ordered_remove (j
);
1393 nbp
= partitions
.length ();
1395 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1396 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1403 dump_rdg_partitions (dump_file
, partitions
);
1405 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1406 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1410 BITMAP_FREE (remaining_stmts
);
1411 BITMAP_FREE (upstream_mem_writes
);
1413 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1414 partition_free (partition
);
1416 other_stores
.release ();
1417 partitions
.release ();
1418 free_rdg_components (components
);
1422 /* Distributes the code from LOOP in such a way that producer
1423 statements are placed before consumer statements. When STMTS is
1424 NULL, performs the maximal distribution, if STMTS is not NULL,
1425 tries to separate only these statements from the LOOP's body.
1426 Returns the number of distributed loops. */
1429 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
)
1436 vec
<ddr_p
> dependence_relations
;
1437 vec
<data_reference_p
> datarefs
;
1438 vec
<loop_p
> loop_nest
;
1440 datarefs
.create (10);
1441 dependence_relations
.create (100);
1442 loop_nest
.create (3);
1443 rdg
= build_rdg (loop
, &loop_nest
, &dependence_relations
, &datarefs
);
1447 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1449 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1452 free_dependence_relations (dependence_relations
);
1453 free_data_refs (datarefs
);
1454 loop_nest
.release ();
1458 vertices
.create (3);
1460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1461 dump_rdg (dump_file
, rdg
);
1463 FOR_EACH_VEC_ELT (stmts
, i
, s
)
1465 int v
= rdg_vertex_for_stmt (rdg
, s
);
1469 vertices
.safe_push (v
);
1471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1473 "ldist asked to generate code for vertex %d\n", v
);
1477 res
= ldist_gen (loop
, rdg
, vertices
);
1478 vertices
.release ();
1480 free_dependence_relations (dependence_relations
);
1481 free_data_refs (datarefs
);
1482 loop_nest
.release ();
1486 /* Distribute all loops in the current function. */
1489 tree_loop_distribution (void)
1493 bool changed
= false;
1498 gimple_stmt_iterator gsi
;
1499 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1500 gimple_set_uid (gsi_stmt (gsi
), -1);
1501 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1502 gimple_set_uid (gsi_stmt (gsi
), -1);
1505 /* We can at the moment only distribute non-nested loops, thus restrict
1506 walking to innermost loops. */
1507 FOR_EACH_LOOP (li
, loop
, LI_ONLY_INNERMOST
)
1509 vec
<gimple
> work_list
= vec
<gimple
>();
1511 int num
= loop
->num
;
1512 int nb_generated_loops
= 0;
1515 /* If the loop doesn't have a single exit we will fail anyway,
1516 so do that early. */
1517 if (!single_exit (loop
))
1520 /* Only optimize hot loops. */
1521 if (!optimize_loop_for_speed_p (loop
))
1524 /* Only distribute loops with a header and latch for now. */
1525 if (loop
->num_nodes
> 2)
1528 /* Initialize the worklist with stmts we seed the partitions with. */
1529 bbs
= get_loop_body_in_dom_order (loop
);
1530 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1532 gimple_stmt_iterator gsi
;
1533 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1535 gimple stmt
= gsi_stmt (gsi
);
1536 /* Only distribute stores for now.
1537 ??? We should also try to distribute scalar reductions,
1538 thus SSA defs that have scalar uses outside of the loop. */
1539 if (!gimple_assign_single_p (stmt
)
1540 || is_gimple_reg (gimple_assign_lhs (stmt
)))
1543 work_list
.safe_push (stmt
);
1548 if (work_list
.length () > 0)
1549 nb_generated_loops
= distribute_loop (loop
, work_list
);
1551 if (nb_generated_loops
> 0)
1554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1556 if (nb_generated_loops
> 1)
1557 fprintf (dump_file
, "Loop %d distributed: split to %d loops.\n",
1558 num
, nb_generated_loops
);
1560 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1563 work_list
.release ();
1568 mark_virtual_operands_for_renaming (cfun
);
1569 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1572 #ifdef ENABLE_CHECKING
1573 verify_loop_structure ();
1580 gate_tree_loop_distribution (void)
1582 return flag_tree_loop_distribution
1583 || flag_tree_loop_distribute_patterns
;
1586 struct gimple_opt_pass pass_loop_distribution
=
1591 OPTGROUP_LOOP
, /* optinfo_flags */
1592 gate_tree_loop_distribution
, /* gate */
1593 tree_loop_distribution
, /* execute */
1596 0, /* static_pass_number */
1597 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1598 PROP_cfg
| PROP_ssa
, /* properties_required */
1599 0, /* properties_provided */
1600 0, /* properties_destroyed */
1601 0, /* todo_flags_start */
1603 | TODO_verify_ssa
/* todo_flags_finish */