2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
47 #include "tree-flow.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
55 PKIND_NORMAL
, PKIND_REDUCTION
, PKIND_MEMSET
, PKIND_MEMCPY
58 typedef struct partition_s
62 enum partition_kind kind
;
63 /* data-references a kind != PKIND_NORMAL partition is about. */
64 data_reference_p main_dr
;
65 data_reference_p secondary_dr
;
69 /* Allocate and initialize a partition from BITMAP. */
72 partition_alloc (bitmap stmts
)
74 partition_t partition
= XCNEW (struct partition_s
);
75 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
76 partition
->has_writes
= false;
77 partition
->kind
= PKIND_NORMAL
;
84 partition_free (partition_t partition
)
86 BITMAP_FREE (partition
->stmts
);
90 /* Returns true if the partition can be generated as a builtin. */
93 partition_builtin_p (partition_t partition
)
95 return partition
->kind
> PKIND_REDUCTION
;
98 /* Returns true if the partition has an writes. */
101 partition_has_writes (partition_t partition
)
103 return partition
->has_writes
;
106 /* If bit I is not set, it means that this node represents an
107 operation that has already been performed, and that should not be
108 performed again. This is the subgraph of remaining important
109 computations that is passed to the DFS algorithm for avoiding to
110 include several times the same stores in different loops. */
111 static bitmap remaining_stmts
;
113 /* A node of the RDG is marked in this bitmap when it has as a
114 predecessor a node that writes to memory. */
115 static bitmap upstream_mem_writes
;
117 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
121 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
123 imm_use_iterator imm_iter
;
126 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
128 gimple use_stmt
= USE_STMT (use_p
);
129 if (!is_gimple_debug (use_stmt
)
130 && loop
!= loop_containing_stmt (use_stmt
))
137 /* Returns true when STMT defines a scalar variable used after the
141 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
146 if (gimple_code (stmt
) == GIMPLE_PHI
)
147 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
149 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
150 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
156 /* Return a copy of LOOP placed before LOOP. */
159 copy_loop_before (struct loop
*loop
)
162 edge preheader
= loop_preheader_edge (loop
);
164 initialize_original_copy_tables ();
165 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
166 gcc_assert (res
!= NULL
);
167 free_original_copy_tables ();
168 delete_update_ssa ();
173 /* Creates an empty basic block after LOOP. */
176 create_bb_after_loop (struct loop
*loop
)
178 edge exit
= single_exit (loop
);
186 /* Generate code for PARTITION from the code in LOOP. The loop is
187 copied when COPY_P is true. All the statements not flagged in the
188 PARTITION bitmap are removed from the loop or from its copy. The
189 statements are indexed in sequence inside a basic block, and the
190 basic blocks of a loop are taken in dom order. */
193 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
197 gimple_stmt_iterator bsi
;
202 loop
= copy_loop_before (loop
);
203 gcc_assert (loop
!= NULL
);
204 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
205 create_bb_after_loop (loop
);
208 /* Remove stmts not in the PARTITION bitmap. The order in which we
209 visit the phi nodes and the statements is exactly as in
211 bbs
= get_loop_body_in_dom_order (loop
);
213 if (MAY_HAVE_DEBUG_STMTS
)
214 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
216 basic_block bb
= bbs
[i
];
218 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
219 if (!bitmap_bit_p (partition
->stmts
, x
++))
220 reset_debug_uses (gsi_stmt (bsi
));
222 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
224 gimple stmt
= gsi_stmt (bsi
);
225 if (gimple_code (stmt
) != GIMPLE_LABEL
226 && !is_gimple_debug (stmt
)
227 && !bitmap_bit_p (partition
->stmts
, x
++))
228 reset_debug_uses (stmt
);
232 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
234 basic_block bb
= bbs
[i
];
236 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
237 if (!bitmap_bit_p (partition
->stmts
, x
++))
239 gimple phi
= gsi_stmt (bsi
);
240 if (virtual_operand_p (gimple_phi_result (phi
)))
241 mark_virtual_phi_result_for_renaming (phi
);
242 remove_phi_node (&bsi
, true);
247 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
249 gimple stmt
= gsi_stmt (bsi
);
250 if (gimple_code (stmt
) != GIMPLE_LABEL
251 && !is_gimple_debug (stmt
)
252 && !bitmap_bit_p (partition
->stmts
, x
++))
254 unlink_stmt_vdef (stmt
);
255 gsi_remove (&bsi
, true);
266 /* Build the size argument for a memory operation call. */
269 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
)
272 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
273 fold_convert_loc (loc
, sizetype
, nb_iter
),
274 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
275 return fold_convert_loc (loc
, size_type_node
, size
);
278 /* Build an address argument for a memory operation call. */
281 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
285 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
286 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
288 /* Test for a negative stride, iterating over every element. */
289 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
291 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
292 fold_convert_loc (loc
, sizetype
, nb_bytes
));
293 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
294 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
297 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
300 /* Generate a call to memset for PARTITION in LOOP. */
303 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
305 gimple_stmt_iterator gsi
;
306 gimple stmt
, fn_call
;
307 tree nb_iter
, mem
, fn
, nb_bytes
;
311 stmt
= DR_STMT (partition
->main_dr
);
312 loc
= gimple_location (stmt
);
313 if (gimple_bb (stmt
) == loop
->latch
)
314 nb_iter
= number_of_latch_executions (loop
);
316 nb_iter
= number_of_exit_cond_executions (loop
);
318 /* The new statements will be placed before LOOP. */
319 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
321 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
322 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
323 false, GSI_CONTINUE_LINKING
);
324 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
325 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
326 false, GSI_CONTINUE_LINKING
);
328 /* This exactly matches the pattern recognition in classify_partition. */
329 val
= gimple_assign_rhs1 (stmt
);
330 if (integer_zerop (val
)
332 || TREE_CODE (val
) == CONSTRUCTOR
)
333 val
= integer_zero_node
;
334 else if (integer_all_onesp (val
))
335 val
= build_int_cst (integer_type_node
, -1);
338 if (TREE_CODE (val
) == INTEGER_CST
)
339 val
= fold_convert (integer_type_node
, val
);
340 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
343 tree tem
= make_ssa_name (integer_type_node
, NULL
);
344 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
345 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
350 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
351 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
352 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
356 fprintf (dump_file
, "generated memset");
357 if (integer_zerop (val
))
358 fprintf (dump_file
, " zero\n");
359 else if (integer_all_onesp (val
))
360 fprintf (dump_file
, " minus one\n");
362 fprintf (dump_file
, "\n");
366 /* Generate a call to memcpy for PARTITION in LOOP. */
369 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
371 gimple_stmt_iterator gsi
;
372 gimple stmt
, fn_call
;
373 tree nb_iter
, dest
, src
, fn
, nb_bytes
;
375 enum built_in_function kind
;
377 stmt
= DR_STMT (partition
->main_dr
);
378 loc
= gimple_location (stmt
);
379 if (gimple_bb (stmt
) == loop
->latch
)
380 nb_iter
= number_of_latch_executions (loop
);
382 nb_iter
= number_of_exit_cond_executions (loop
);
384 /* The new statements will be placed before LOOP. */
385 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
387 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
388 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
389 false, GSI_CONTINUE_LINKING
);
390 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
391 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
392 if (ptr_derefs_may_alias_p (dest
, src
))
393 kind
= BUILT_IN_MEMMOVE
;
395 kind
= BUILT_IN_MEMCPY
;
397 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
398 false, GSI_CONTINUE_LINKING
);
399 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
400 false, GSI_CONTINUE_LINKING
);
401 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
402 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
403 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
405 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
407 if (kind
== BUILT_IN_MEMCPY
)
408 fprintf (dump_file
, "generated memcpy\n");
410 fprintf (dump_file
, "generated memmove\n");
414 /* Remove and destroy the loop LOOP. */
417 destroy_loop (struct loop
*loop
)
419 unsigned nbbs
= loop
->num_nodes
;
420 edge exit
= single_exit (loop
);
421 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
425 bbs
= get_loop_body_in_dom_order (loop
);
427 redirect_edge_pred (exit
, src
);
428 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
429 exit
->flags
|= EDGE_FALLTHRU
;
430 cancel_loop_tree (loop
);
431 rescan_loop_exit (exit
, false, true);
433 for (i
= 0; i
< nbbs
; i
++)
435 /* We have made sure to not leave any dangling uses of SSA
436 names defined in the loop. With the exception of virtuals.
437 Make sure we replace all uses of virtual defs that will remain
438 outside of the loop with the bare symbol as delete_basic_block
439 will release them. */
440 gimple_stmt_iterator gsi
;
441 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
443 gimple phi
= gsi_stmt (gsi
);
444 if (virtual_operand_p (gimple_phi_result (phi
)))
445 mark_virtual_phi_result_for_renaming (phi
);
447 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
449 gimple stmt
= gsi_stmt (gsi
);
450 tree vdef
= gimple_vdef (stmt
);
451 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
452 mark_virtual_operand_for_renaming (vdef
);
454 delete_basic_block (bbs
[i
]);
458 set_immediate_dominator (CDI_DOMINATORS
, dest
,
459 recompute_dominator (CDI_DOMINATORS
, dest
));
462 /* Generates code for PARTITION. */
465 generate_code_for_partition (struct loop
*loop
,
466 partition_t partition
, bool copy_p
)
468 switch (partition
->kind
)
471 generate_memset_builtin (loop
, partition
);
472 /* If this is the last partition for which we generate code, we have
473 to destroy the loop. */
479 generate_memcpy_builtin (loop
, partition
);
480 /* If this is the last partition for which we generate code, we have
481 to destroy the loop. */
486 case PKIND_REDUCTION
:
487 /* Reductions all have to be in the last partition. */
488 gcc_assert (!copy_p
);
490 generate_loops_for_partition (loop
, partition
, copy_p
);
499 /* Returns true if the node V of RDG cannot be recomputed. */
502 rdg_cannot_recompute_vertex_p (struct graph
*rdg
, int v
)
504 if (RDG_MEM_WRITE_STMT (rdg
, v
))
510 /* Returns true when the vertex V has already been generated in the
511 current partition (V is in PROCESSED), or when V belongs to another
512 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
515 already_processed_vertex_p (bitmap processed
, int v
)
517 return (bitmap_bit_p (processed
, v
)
518 || !bitmap_bit_p (remaining_stmts
, v
));
521 /* Returns NULL when there is no anti-dependence among the successors
522 of vertex V, otherwise returns the edge with the anti-dep. */
524 static struct graph_edge
*
525 has_anti_dependence (struct vertex
*v
)
527 struct graph_edge
*e
;
530 for (e
= v
->succ
; e
; e
= e
->succ_next
)
531 if (RDGE_TYPE (e
) == anti_dd
)
537 /* Returns true when V has an anti-dependence edge among its successors. */
540 predecessor_has_mem_write (struct graph
*rdg
, struct vertex
*v
)
542 struct graph_edge
*e
;
545 for (e
= v
->pred
; e
; e
= e
->pred_next
)
546 if (bitmap_bit_p (upstream_mem_writes
, e
->src
)
547 /* Don't consider flow channels: a write to memory followed
548 by a read from memory. These channels allow the split of
549 the RDG in different partitions. */
550 && !RDG_MEM_WRITE_STMT (rdg
, e
->src
))
556 /* Initializes the upstream_mem_writes bitmap following the
557 information from RDG. */
560 mark_nodes_having_upstream_mem_writes (struct graph
*rdg
)
563 bitmap seen
= BITMAP_ALLOC (NULL
);
565 for (v
= rdg
->n_vertices
- 1; v
>= 0; v
--)
566 if (!bitmap_bit_p (seen
, v
))
572 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
574 FOR_EACH_VEC_ELT (nodes
, i
, x
)
576 if (!bitmap_set_bit (seen
, x
))
579 if (RDG_MEM_WRITE_STMT (rdg
, x
)
580 || predecessor_has_mem_write (rdg
, &(rdg
->vertices
[x
]))
581 /* In anti dependences the read should occur before
582 the write, this is why both the read and the write
583 should be placed in the same partition. */
584 || has_anti_dependence (&(rdg
->vertices
[x
])))
586 bitmap_set_bit (upstream_mem_writes
, x
);
594 /* Returns true when vertex u has a memory write node as a predecessor
598 has_upstream_mem_writes (int u
)
600 return bitmap_bit_p (upstream_mem_writes
, u
);
603 static void rdg_flag_vertex_and_dependent (struct graph
*, int, partition_t
,
606 /* Flag the uses of U stopping following the information from
607 upstream_mem_writes. */
610 rdg_flag_uses (struct graph
*rdg
, int u
, partition_t partition
, bitmap loops
,
614 struct vertex
*x
= &(rdg
->vertices
[u
]);
615 gimple stmt
= RDGV_STMT (x
);
616 struct graph_edge
*anti_dep
= has_anti_dependence (x
);
618 /* Keep in the same partition the destination of an antidependence,
619 because this is a store to the exact same location. Putting this
620 in another partition is bad for cache locality. */
623 int v
= anti_dep
->dest
;
625 if (!already_processed_vertex_p (processed
, v
))
626 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
630 if (gimple_code (stmt
) != GIMPLE_PHI
)
632 if ((use_p
= gimple_vuse_op (stmt
)) != NULL_USE_OPERAND_P
)
634 tree use
= USE_FROM_PTR (use_p
);
636 if (TREE_CODE (use
) == SSA_NAME
637 && !SSA_NAME_IS_DEFAULT_DEF (use
))
639 gimple def_stmt
= SSA_NAME_DEF_STMT (use
);
640 int v
= rdg_vertex_for_stmt (rdg
, def_stmt
);
643 && !already_processed_vertex_p (processed
, v
))
644 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
650 if (is_gimple_assign (stmt
) && has_upstream_mem_writes (u
))
652 tree op0
= gimple_assign_lhs (stmt
);
654 /* Scalar channels don't have enough space for transmitting data
655 between tasks, unless we add more storage by privatizing. */
656 if (is_gimple_reg (op0
))
659 imm_use_iterator iter
;
661 FOR_EACH_IMM_USE_FAST (use_p
, iter
, op0
)
663 int v
= rdg_vertex_for_stmt (rdg
, USE_STMT (use_p
));
665 if (!already_processed_vertex_p (processed
, v
))
666 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
673 /* Flag V from RDG as part of PARTITION, and also flag its loop number
677 rdg_flag_vertex (struct graph
*rdg
, int v
, partition_t partition
, bitmap loops
)
681 if (!bitmap_set_bit (partition
->stmts
, v
))
684 loop
= loop_containing_stmt (RDG_STMT (rdg
, v
));
685 bitmap_set_bit (loops
, loop
->num
);
687 if (rdg_cannot_recompute_vertex_p (rdg
, v
))
689 partition
->has_writes
= true;
690 bitmap_clear_bit (remaining_stmts
, v
);
694 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
695 Also flag their loop number in LOOPS. */
698 rdg_flag_vertex_and_dependent (struct graph
*rdg
, int v
, partition_t partition
,
699 bitmap loops
, bitmap processed
)
706 bitmap_set_bit (processed
, v
);
707 rdg_flag_uses (rdg
, v
, partition
, loops
, processed
);
708 graphds_dfs (rdg
, &v
, 1, &nodes
, false, remaining_stmts
);
709 rdg_flag_vertex (rdg
, v
, partition
, loops
);
711 FOR_EACH_VEC_ELT (nodes
, i
, x
)
712 if (!already_processed_vertex_p (processed
, x
))
713 rdg_flag_vertex_and_dependent (rdg
, x
, partition
, loops
, processed
);
718 /* Initialize CONDS with all the condition statements from the basic
722 collect_condition_stmts (struct loop
*loop
, vec
<gimple
> *conds
)
726 vec
<edge
> exits
= get_loop_exit_edges (loop
);
728 FOR_EACH_VEC_ELT (exits
, i
, e
)
730 gimple cond
= last_stmt (e
->src
);
733 conds
->safe_push (cond
);
739 /* Add to PARTITION all the exit condition statements for LOOPS
740 together with all their dependent statements determined from
744 rdg_flag_loop_exits (struct graph
*rdg
, bitmap loops
, partition_t partition
,
752 EXECUTE_IF_SET_IN_BITMAP (loops
, 0, i
, bi
)
753 collect_condition_stmts (get_loop (i
), &conds
);
755 while (!conds
.is_empty ())
757 gimple cond
= conds
.pop ();
758 int v
= rdg_vertex_for_stmt (rdg
, cond
);
759 bitmap new_loops
= BITMAP_ALLOC (NULL
);
761 if (!already_processed_vertex_p (processed
, v
))
762 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, new_loops
, processed
);
764 EXECUTE_IF_SET_IN_BITMAP (new_loops
, 0, i
, bi
)
765 if (bitmap_set_bit (loops
, i
))
766 collect_condition_stmts (get_loop (i
), &conds
);
768 BITMAP_FREE (new_loops
);
774 /* Returns a bitmap in which all the statements needed for computing
775 the strongly connected component C of the RDG are flagged, also
776 including the loop exit conditions. */
779 build_rdg_partition_for_component (struct graph
*rdg
, rdgc c
)
782 partition_t partition
= partition_alloc (NULL
);
783 bitmap loops
= BITMAP_ALLOC (NULL
);
784 bitmap processed
= BITMAP_ALLOC (NULL
);
786 FOR_EACH_VEC_ELT (c
->vertices
, i
, v
)
787 if (!already_processed_vertex_p (processed
, v
))
788 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
, processed
);
790 rdg_flag_loop_exits (rdg
, loops
, partition
, processed
);
792 BITMAP_FREE (processed
);
797 /* Free memory for COMPONENTS. */
800 free_rdg_components (vec
<rdgc
> components
)
805 FOR_EACH_VEC_ELT (components
, i
, x
)
807 x
->vertices
.release ();
811 components
.release ();
814 /* Build the COMPONENTS vector with the strongly connected components
815 of RDG in which the STARTING_VERTICES occur. */
818 rdg_build_components (struct graph
*rdg
, vec
<int> starting_vertices
,
819 vec
<rdgc
> *components
)
822 bitmap saved_components
= BITMAP_ALLOC (NULL
);
823 int n_components
= graphds_scc (rdg
, NULL
);
824 /* ??? Macros cannot process template types with more than one
825 argument, so we need this typedef. */
826 typedef vec
<int> vec_int_heap
;
827 vec
<int> *all_components
= XNEWVEC (vec_int_heap
, n_components
);
829 for (i
= 0; i
< n_components
; i
++)
830 all_components
[i
].create (3);
832 for (i
= 0; i
< rdg
->n_vertices
; i
++)
833 all_components
[rdg
->vertices
[i
].component
].safe_push (i
);
835 FOR_EACH_VEC_ELT (starting_vertices
, i
, v
)
837 int c
= rdg
->vertices
[v
].component
;
839 if (bitmap_set_bit (saved_components
, c
))
841 rdgc x
= XCNEW (struct rdg_component
);
843 x
->vertices
= all_components
[c
];
845 components
->safe_push (x
);
849 for (i
= 0; i
< n_components
; i
++)
850 if (!bitmap_bit_p (saved_components
, i
))
851 all_components
[i
].release ();
853 free (all_components
);
854 BITMAP_FREE (saved_components
);
857 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
858 For the moment we detect only the memset zero pattern. */
861 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
866 data_reference_p single_load
, single_store
;
867 bool volatiles_p
= false;
869 partition
->kind
= PKIND_NORMAL
;
870 partition
->main_dr
= NULL
;
871 partition
->secondary_dr
= NULL
;
873 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
875 gimple stmt
= RDG_STMT (rdg
, i
);
877 if (gimple_has_volatile_ops (stmt
))
880 /* If the stmt has uses outside of the loop fail.
881 ??? If the stmt is generated in another partition that
882 is not created as builtin we can ignore this. */
883 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
885 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
886 fprintf (dump_file
, "not generating builtin, partition has "
887 "scalar uses outside of the loop\n");
888 partition
->kind
= PKIND_REDUCTION
;
893 /* Perform general partition disqualification for builtins. */
895 || !flag_tree_loop_distribute_patterns
)
898 nb_iter
= number_of_exit_cond_executions (loop
);
899 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
902 /* Detect memset and memcpy. */
905 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
907 gimple stmt
= RDG_STMT (rdg
, i
);
911 if (gimple_code (stmt
) == GIMPLE_PHI
)
914 /* Any scalar stmts are ok. */
915 if (!gimple_vuse (stmt
))
918 /* Otherwise just regular loads/stores. */
919 if (!gimple_assign_single_p (stmt
))
922 /* But exactly one store and/or load. */
923 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
927 if (single_load
!= NULL
)
933 if (single_store
!= NULL
)
940 if (single_store
&& !single_load
)
942 gimple stmt
= DR_STMT (single_store
);
943 tree rhs
= gimple_assign_rhs1 (stmt
);
944 if (!(integer_zerop (rhs
)
945 || integer_all_onesp (rhs
)
947 || (TREE_CODE (rhs
) == CONSTRUCTOR
948 && !TREE_CLOBBER_P (rhs
))
949 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
950 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
)))
951 == TYPE_MODE (unsigned_char_type_node
)))))
953 if (TREE_CODE (rhs
) == SSA_NAME
954 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
955 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
957 if (!adjacent_dr_p (single_store
))
959 partition
->kind
= PKIND_MEMSET
;
960 partition
->main_dr
= single_store
;
962 else if (single_store
&& single_load
)
964 gimple store
= DR_STMT (single_store
);
965 gimple load
= DR_STMT (single_load
);
966 /* Direct aggregate copy or via an SSA name temporary. */
968 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
970 if (!adjacent_dr_p (single_store
)
971 || !adjacent_dr_p (single_load
)
972 || !operand_equal_p (DR_STEP (single_store
),
973 DR_STEP (single_load
), 0))
975 /* Now check that if there is a dependence this dependence is
976 of a suitable form for memmove. */
977 vec
<loop_p
> loops
= vNULL
;
979 loops
.safe_push (loop
);
980 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
982 compute_affine_dependence (ddr
, loop
);
983 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
985 free_dependence_relation (ddr
);
989 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
991 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
993 free_dependence_relation (ddr
);
997 lambda_vector dist_v
;
998 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1000 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1001 DDR_LOOP_NEST (ddr
))];
1002 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1004 free_dependence_relation (ddr
);
1010 free_dependence_relation (ddr
);
1012 partition
->kind
= PKIND_MEMCPY
;
1013 partition
->main_dr
= single_store
;
1014 partition
->secondary_dr
= single_load
;
1018 /* For a data reference REF, return the declaration of its base
1019 address or NULL_TREE if the base is not determined. */
1022 ref_base_address (data_reference_p dr
)
1024 tree base_address
= DR_BASE_ADDRESS (dr
);
1026 && TREE_CODE (base_address
) == ADDR_EXPR
)
1027 return TREE_OPERAND (base_address
, 0);
1029 return base_address
;
1032 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1036 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1037 partition_t partition2
)
1039 unsigned i
, j
, k
, l
;
1040 bitmap_iterator bi
, bj
;
1041 data_reference_p ref1
, ref2
;
1043 /* First check whether in the intersection of the two partitions are
1044 any loads or stores. Common loads are the situation that happens
1046 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1047 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1048 || RDG_MEM_READS_STMT (rdg
, i
))
1051 /* Then check all data-references against each other. */
1052 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1053 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1054 || RDG_MEM_READS_STMT (rdg
, i
))
1055 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1056 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1057 || RDG_MEM_READS_STMT (rdg
, j
))
1059 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1061 tree base1
= ref_base_address (ref1
);
1063 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1064 if (base1
== ref_base_address (ref2
))
1072 /* Aggregate several components into a useful partition that is
1073 registered in the PARTITIONS vector. Partitions will be
1074 distributed in different loops. */
1077 rdg_build_partitions (struct graph
*rdg
, vec
<rdgc
> components
,
1078 vec
<int> *other_stores
,
1079 vec
<partition_t
> *partitions
, bitmap processed
)
1083 partition_t partition
= partition_alloc (NULL
);
1085 FOR_EACH_VEC_ELT (components
, i
, x
)
1088 int v
= x
->vertices
[0];
1090 if (bitmap_bit_p (processed
, v
))
1093 np
= build_rdg_partition_for_component (rdg
, x
);
1094 bitmap_ior_into (partition
->stmts
, np
->stmts
);
1095 partition
->has_writes
= partition_has_writes (np
);
1096 bitmap_ior_into (processed
, np
->stmts
);
1097 partition_free (np
);
1099 if (partition_has_writes (partition
))
1101 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1103 fprintf (dump_file
, "ldist useful partition:\n");
1104 dump_bitmap (dump_file
, partition
->stmts
);
1107 partitions
->safe_push (partition
);
1108 partition
= partition_alloc (NULL
);
1112 /* Add the nodes from the RDG that were not marked as processed, and
1113 that are used outside the current loop. These are scalar
1114 computations that are not yet part of previous partitions. */
1115 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1116 if (!bitmap_bit_p (processed
, i
)
1117 && rdg_defs_used_in_other_loops_p (rdg
, i
))
1118 other_stores
->safe_push (i
);
1120 /* If there are still statements left in the OTHER_STORES array,
1121 create other components and partitions with these stores and
1122 their dependences. */
1123 if (other_stores
->length () > 0)
1130 rdg_build_components (rdg
, *other_stores
, &comps
);
1131 rdg_build_partitions (rdg
, comps
, &foo
, partitions
, processed
);
1134 free_rdg_components (comps
);
1137 /* If there is something left in the last partition, save it. */
1138 if (bitmap_count_bits (partition
->stmts
) > 0)
1139 partitions
->safe_push (partition
);
1141 partition_free (partition
);
1144 /* Dump to FILE the PARTITIONS. */
1147 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1150 partition_t partition
;
1152 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1153 debug_bitmap_file (file
, partition
->stmts
);
1156 /* Debug PARTITIONS. */
1157 extern void debug_rdg_partitions (vec
<partition_t
> );
1160 debug_rdg_partitions (vec
<partition_t
> partitions
)
1162 dump_rdg_partitions (stderr
, partitions
);
1165 /* Returns the number of read and write operations in the RDG. */
1168 number_of_rw_in_rdg (struct graph
*rdg
)
1172 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1174 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1177 if (RDG_MEM_READS_STMT (rdg
, i
))
1184 /* Returns the number of read and write operations in a PARTITION of
1188 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1194 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1196 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1199 if (RDG_MEM_READS_STMT (rdg
, i
))
1206 /* Returns true when one of the PARTITIONS contains all the read or
1207 write operations of RDG. */
1210 partition_contains_all_rw (struct graph
*rdg
,
1211 vec
<partition_t
> partitions
)
1214 partition_t partition
;
1215 int nrw
= number_of_rw_in_rdg (rdg
);
1217 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1218 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1224 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1225 distributed loops. */
1228 ldist_gen (struct loop
*loop
, struct graph
*rdg
,
1229 vec
<int> starting_vertices
)
1232 vec
<rdgc
> components
;
1233 components
.create (3);
1234 vec
<partition_t
> partitions
;
1235 partitions
.create (3);
1236 vec
<int> other_stores
;
1237 other_stores
.create (3);
1238 partition_t partition
;
1239 bitmap processed
= BITMAP_ALLOC (NULL
);
1242 remaining_stmts
= BITMAP_ALLOC (NULL
);
1243 upstream_mem_writes
= BITMAP_ALLOC (NULL
);
1245 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1247 bitmap_set_bit (remaining_stmts
, i
);
1249 /* Save in OTHER_STORES all the memory writes that are not in
1250 STARTING_VERTICES. */
1251 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1257 FOR_EACH_VEC_ELT (starting_vertices
, j
, v
)
1265 other_stores
.safe_push (i
);
1269 mark_nodes_having_upstream_mem_writes (rdg
);
1270 rdg_build_components (rdg
, starting_vertices
, &components
);
1271 rdg_build_partitions (rdg
, components
, &other_stores
, &partitions
,
1273 BITMAP_FREE (processed
);
1275 any_builtin
= false;
1276 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1278 classify_partition (loop
, rdg
, partition
);
1279 any_builtin
|= partition_builtin_p (partition
);
1282 /* If we are only distributing patterns fuse all partitions that
1283 were not properly classified as builtins. Else fuse partitions
1284 with similar memory accesses. */
1285 if (!flag_tree_loop_distribution
)
1288 /* If we did not detect any builtin simply bail out. */
1294 /* Only fuse adjacent non-builtin partitions, see PR53616.
1295 ??? Use dependence information to improve partition ordering. */
1299 for (; partitions
.iterate (i
, &into
); ++i
)
1300 if (!partition_builtin_p (into
))
1302 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1303 if (!partition_builtin_p (partition
))
1305 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1306 if (partition
->kind
== PKIND_REDUCTION
)
1307 into
->kind
= PKIND_REDUCTION
;
1308 partitions
.ordered_remove (i
);
1314 while ((unsigned) i
< partitions
.length ());
1320 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1322 if (partition_builtin_p (into
))
1325 partitions
.iterate (j
, &partition
); ++j
)
1327 if (!partition_builtin_p (partition
)
1328 /* ??? The following is horribly inefficient,
1329 we are re-computing and analyzing data-references
1330 of the stmts in the partitions all the time. */
1331 && similar_memory_accesses (rdg
, into
, partition
))
1333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1335 fprintf (dump_file
, "fusing partitions\n");
1336 dump_bitmap (dump_file
, into
->stmts
);
1337 dump_bitmap (dump_file
, partition
->stmts
);
1338 fprintf (dump_file
, "because they have similar "
1339 "memory accesses\n");
1341 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1342 if (partition
->kind
== PKIND_REDUCTION
)
1343 into
->kind
= PKIND_REDUCTION
;
1344 partitions
.ordered_remove (j
);
1351 /* Fuse all reduction partitions into the last. */
1352 if (partitions
.length () > 1)
1354 partition_t into
= partitions
.last ();
1355 for (i
= partitions
.length () - 2; i
>= 0; --i
)
1357 partition_t what
= partitions
[i
];
1358 if (what
->kind
== PKIND_REDUCTION
)
1360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1362 fprintf (dump_file
, "fusing partitions\n");
1363 dump_bitmap (dump_file
, into
->stmts
);
1364 dump_bitmap (dump_file
, what
->stmts
);
1365 fprintf (dump_file
, "because the latter has reductions\n");
1367 bitmap_ior_into (into
->stmts
, what
->stmts
);
1368 into
->kind
= PKIND_REDUCTION
;
1369 partitions
.ordered_remove (i
);
1374 nbp
= partitions
.length ();
1376 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1377 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1383 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1384 dump_rdg_partitions (dump_file
, partitions
);
1386 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1387 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1391 BITMAP_FREE (remaining_stmts
);
1392 BITMAP_FREE (upstream_mem_writes
);
1394 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1395 partition_free (partition
);
1397 other_stores
.release ();
1398 partitions
.release ();
1399 free_rdg_components (components
);
1403 /* Distributes the code from LOOP in such a way that producer
1404 statements are placed before consumer statements. When STMTS is
1405 NULL, performs the maximal distribution, if STMTS is not NULL,
1406 tries to separate only these statements from the LOOP's body.
1407 Returns the number of distributed loops. */
1410 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
)
1417 vec
<ddr_p
> dependence_relations
;
1418 vec
<data_reference_p
> datarefs
;
1419 vec
<loop_p
> loop_nest
;
1421 datarefs
.create (10);
1422 dependence_relations
.create (100);
1423 loop_nest
.create (3);
1424 rdg
= build_rdg (loop
, &loop_nest
, &dependence_relations
, &datarefs
);
1428 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1430 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1433 free_dependence_relations (dependence_relations
);
1434 free_data_refs (datarefs
);
1435 loop_nest
.release ();
1439 vertices
.create (3);
1441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1442 dump_rdg (dump_file
, rdg
);
1444 FOR_EACH_VEC_ELT (stmts
, i
, s
)
1446 int v
= rdg_vertex_for_stmt (rdg
, s
);
1450 vertices
.safe_push (v
);
1452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1454 "ldist asked to generate code for vertex %d\n", v
);
1458 res
= ldist_gen (loop
, rdg
, vertices
);
1459 vertices
.release ();
1461 free_dependence_relations (dependence_relations
);
1462 free_data_refs (datarefs
);
1463 loop_nest
.release ();
1467 /* Distribute all loops in the current function. */
1470 tree_loop_distribution (void)
1474 bool changed
= false;
1479 gimple_stmt_iterator gsi
;
1480 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1481 gimple_set_uid (gsi_stmt (gsi
), -1);
1482 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1483 gimple_set_uid (gsi_stmt (gsi
), -1);
1486 /* We can at the moment only distribute non-nested loops, thus restrict
1487 walking to innermost loops. */
1488 FOR_EACH_LOOP (li
, loop
, LI_ONLY_INNERMOST
)
1490 vec
<gimple
> work_list
= vNULL
;
1492 int num
= loop
->num
;
1493 int nb_generated_loops
= 0;
1496 /* If the loop doesn't have a single exit we will fail anyway,
1497 so do that early. */
1498 if (!single_exit (loop
))
1501 /* Only optimize hot loops. */
1502 if (!optimize_loop_for_speed_p (loop
))
1505 /* Only distribute loops with a header and latch for now. */
1506 if (loop
->num_nodes
> 2)
1509 /* Initialize the worklist with stmts we seed the partitions with. */
1510 bbs
= get_loop_body_in_dom_order (loop
);
1511 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1513 gimple_stmt_iterator gsi
;
1514 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1516 gimple stmt
= gsi_stmt (gsi
);
1517 /* Distribute stmts which have defs that are used outside of
1519 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1521 /* Otherwise only distribute stores for now. */
1522 else if (!gimple_assign_single_p (stmt
)
1523 || is_gimple_reg (gimple_assign_lhs (stmt
)))
1526 work_list
.safe_push (stmt
);
1531 if (work_list
.length () > 0)
1532 nb_generated_loops
= distribute_loop (loop
, work_list
);
1534 if (nb_generated_loops
> 0)
1537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1539 if (nb_generated_loops
> 1)
1540 fprintf (dump_file
, "Loop %d distributed: split to %d loops.\n",
1541 num
, nb_generated_loops
);
1543 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1546 work_list
.release ();
1551 mark_virtual_operands_for_renaming (cfun
);
1552 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1555 #ifdef ENABLE_CHECKING
1556 verify_loop_structure ();
1563 gate_tree_loop_distribution (void)
1565 return flag_tree_loop_distribution
1566 || flag_tree_loop_distribute_patterns
;
1569 struct gimple_opt_pass pass_loop_distribution
=
1574 OPTGROUP_LOOP
, /* optinfo_flags */
1575 gate_tree_loop_distribution
, /* gate */
1576 tree_loop_distribution
, /* execute */
1579 0, /* static_pass_number */
1580 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1581 PROP_cfg
| PROP_ssa
, /* properties_required */
1582 0, /* properties_provided */
1583 0, /* properties_destroyed */
1584 0, /* todo_flags_start */
1586 | TODO_verify_ssa
/* todo_flags_finish */