2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
51 #include "tree-pass.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 /* The statement represented by this vertex. */
75 /* Vector of data-references in this statement. */
76 vec
<data_reference_p
> datarefs
;
78 /* True when the statement contains a write to memory. */
81 /* True when the statement contains a read from memory. */
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
94 /* Data dependence type. */
98 /* Read After Write (RAW). */
101 /* Control dependence (execute conditional on). */
105 /* Dependence information attached to an edge of the RDG. */
109 /* Type of the dependence. */
110 enum rdg_dep_type type
;
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 /* Dump vertex I in RDG to FILE. */
118 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
120 struct vertex
*v
= &(rdg
->vertices
[i
]);
121 struct graph_edge
*e
;
123 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
124 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
128 for (e
= v
->pred
; e
; e
= e
->pred_next
)
129 fprintf (file
, " %d", e
->src
);
131 fprintf (file
, ") (out:");
134 for (e
= v
->succ
; e
; e
= e
->succ_next
)
135 fprintf (file
, " %d", e
->dest
);
137 fprintf (file
, ")\n");
138 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
139 fprintf (file
, ")\n");
142 /* Call dump_rdg_vertex on stderr. */
145 debug_rdg_vertex (struct graph
*rdg
, int i
)
147 dump_rdg_vertex (stderr
, rdg
, i
);
150 /* Dump the reduced dependence graph RDG to FILE. */
153 dump_rdg (FILE *file
, struct graph
*rdg
)
155 fprintf (file
, "(rdg\n");
156 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
157 dump_rdg_vertex (file
, rdg
, i
);
158 fprintf (file
, ")\n");
161 /* Call dump_rdg on stderr. */
164 debug_rdg (struct graph
*rdg
)
166 dump_rdg (stderr
, rdg
);
170 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
173 pretty_printer buffer
;
174 pp_needs_newline (&buffer
) = false;
175 buffer
.buffer
->stream
= file
;
177 fprintf (file
, "digraph RDG {\n");
179 for (i
= 0; i
< rdg
->n_vertices
; i
++)
181 struct vertex
*v
= &(rdg
->vertices
[i
]);
182 struct graph_edge
*e
;
184 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
185 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
187 fprintf (file
, "\"]\n");
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg
, i
))
191 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg
, i
))
195 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
198 for (e
= v
->succ
; e
; e
= e
->succ_next
)
199 switch (RDGE_TYPE (e
))
202 /* These are the most common dependences: don't print these. */
203 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
207 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
215 fprintf (file
, "}\n\n");
218 /* Display the Reduced Dependence Graph using dotty. */
221 dot_rdg (struct graph
*rdg
)
223 /* When debugging, you may want to enable the following code. */
225 FILE *file
= popen ("dot -Tx11", "w");
228 dot_rdg_1 (file
, rdg
);
230 close (fileno (file
));
233 dot_rdg_1 (stderr
, rdg
);
237 /* Returns the index of STMT in RDG. */
240 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple
*stmt
)
242 int index
= gimple_uid (stmt
);
243 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
251 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
253 use_operand_p imm_use_p
;
254 imm_use_iterator iterator
;
256 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
258 struct graph_edge
*e
;
259 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
264 e
= add_edge (rdg
, idef
, use
);
265 e
->data
= XNEW (struct rdg_edge
);
266 RDGE_TYPE (e
) = flow_dd
;
270 /* Creates an edge for the control dependences of BB to the vertex V. */
273 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
274 int v
, control_dependences
*cd
)
278 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
281 basic_block cond_bb
= cd
->get_edge_src (edge_n
);
282 gimple
*stmt
= last_stmt (cond_bb
);
283 if (stmt
&& is_ctrl_stmt (stmt
))
285 struct graph_edge
*e
;
286 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
290 e
= add_edge (rdg
, c
, v
);
291 e
->data
= XNEW (struct rdg_edge
);
292 RDGE_TYPE (e
) = control_dd
;
297 /* Creates the edges of the reduced dependence graph RDG. */
300 create_rdg_flow_edges (struct graph
*rdg
)
306 for (i
= 0; i
< rdg
->n_vertices
; i
++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
309 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
312 /* Creates the edges of the reduced dependence graph RDG. */
315 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
, loop_p loop
)
319 for (i
= 0; i
< rdg
->n_vertices
; i
++)
321 gimple
*stmt
= RDG_STMT (rdg
, i
);
322 if (gimple_code (stmt
) == GIMPLE_PHI
)
326 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
327 if (flow_bb_inside_loop_p (loop
, e
->src
))
328 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
331 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
335 /* Build the vertices of the reduced dependence graph RDG. Return false
339 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
*> stmts
, loop_p loop
,
340 vec
<data_reference_p
> *datarefs
)
345 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
347 struct vertex
*v
= &(rdg
->vertices
[i
]);
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt
, i
);
352 v
->data
= XNEW (struct rdg_vertex
);
353 RDGV_STMT (v
) = stmt
;
354 RDGV_DATAREFS (v
).create (0);
355 RDGV_HAS_MEM_WRITE (v
) = false;
356 RDGV_HAS_MEM_READS (v
) = false;
357 if (gimple_code (stmt
) == GIMPLE_PHI
)
360 unsigned drp
= datarefs
->length ();
361 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
363 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
365 data_reference_p dr
= (*datarefs
)[j
];
367 RDGV_HAS_MEM_READS (v
) = true;
369 RDGV_HAS_MEM_WRITE (v
) = true;
370 RDGV_DATAREFS (v
).safe_push (dr
);
376 /* Initialize STMTS with all the statements of LOOP. The order in
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
379 identifying statements in loop copies. */
382 stmts_from_loop (struct loop
*loop
, vec
<gimple
*> *stmts
)
385 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
387 for (i
= 0; i
< loop
->num_nodes
; i
++)
389 basic_block bb
= bbs
[i
];
391 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
393 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
394 stmts
->safe_push (bsi
.phi ());
396 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
399 gimple
*stmt
= gsi_stmt (bsi
);
400 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
401 stmts
->safe_push (stmt
);
408 /* Free the reduced dependence graph RDG. */
411 free_rdg (struct graph
*rdg
)
415 for (i
= 0; i
< rdg
->n_vertices
; i
++)
417 struct vertex
*v
= &(rdg
->vertices
[i
]);
418 struct graph_edge
*e
;
420 for (e
= v
->succ
; e
; e
= e
->succ_next
)
425 gimple_set_uid (RDGV_STMT (v
), -1);
426 free_data_refs (RDGV_DATAREFS (v
));
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
436 scalar dependence. */
438 static struct graph
*
439 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
442 vec
<data_reference_p
> datarefs
;
444 /* Create the RDG vertices from the stmts of the loop nest. */
445 auto_vec
<gimple
*, 10> stmts
;
446 stmts_from_loop (loop_nest
[0], &stmts
);
447 rdg
= new_graph (stmts
.length ());
448 datarefs
.create (10);
449 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
457 create_rdg_flow_edges (rdg
);
459 create_rdg_cd_edges (rdg
, cd
, loop_nest
[0]);
468 enum partition_kind
{
469 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
, PKIND_MEMMOVE
477 enum partition_kind kind
;
478 /* data-references a kind != PKIND_NORMAL partition is about. */
479 data_reference_p main_dr
;
480 data_reference_p secondary_dr
;
486 /* Allocate and initialize a partition from BITMAP. */
489 partition_alloc (bitmap stmts
, bitmap loops
)
491 partition
*partition
= XCNEW (struct partition
);
492 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
493 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
494 partition
->reduction_p
= false;
495 partition
->kind
= PKIND_NORMAL
;
499 /* Free PARTITION. */
502 partition_free (partition
*partition
)
504 BITMAP_FREE (partition
->stmts
);
505 BITMAP_FREE (partition
->loops
);
509 /* Returns true if the partition can be generated as a builtin. */
512 partition_builtin_p (partition
*partition
)
514 return partition
->kind
!= PKIND_NORMAL
;
517 /* Returns true if the partition contains a reduction. */
520 partition_reduction_p (partition
*partition
)
522 return partition
->reduction_p
;
525 /* Merge PARTITION into the partition DEST. */
528 partition_merge_into (partition
*dest
, partition
*partition
)
530 dest
->kind
= PKIND_NORMAL
;
531 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
532 if (partition_reduction_p (partition
))
533 dest
->reduction_p
= true;
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
541 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
543 imm_use_iterator imm_iter
;
546 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
548 gimple
*use_stmt
= USE_STMT (use_p
);
549 if (!is_gimple_debug (use_stmt
)
550 && loop
!= loop_containing_stmt (use_stmt
))
557 /* Returns true when STMT defines a scalar variable used after the
561 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple
*stmt
)
566 if (gimple_code (stmt
) == GIMPLE_PHI
)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
569 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
576 /* Return a copy of LOOP placed before LOOP. */
579 copy_loop_before (struct loop
*loop
)
582 edge preheader
= loop_preheader_edge (loop
);
584 initialize_original_copy_tables ();
585 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
586 gcc_assert (res
!= NULL
);
587 free_original_copy_tables ();
588 delete_update_ssa ();
593 /* Creates an empty basic block after LOOP. */
596 create_bb_after_loop (struct loop
*loop
)
598 edge exit
= single_exit (loop
);
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
613 generate_loops_for_partition (struct loop
*loop
, partition
*partition
,
621 loop
= copy_loop_before (loop
);
622 gcc_assert (loop
!= NULL
);
623 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
624 create_bb_after_loop (loop
);
627 /* Remove stmts not in the PARTITION bitmap. */
628 bbs
= get_loop_body_in_dom_order (loop
);
630 if (MAY_HAVE_DEBUG_STMTS
)
631 for (i
= 0; i
< loop
->num_nodes
; i
++)
633 basic_block bb
= bbs
[i
];
635 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
638 gphi
*phi
= bsi
.phi ();
639 if (!virtual_operand_p (gimple_phi_result (phi
))
640 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
641 reset_debug_uses (phi
);
644 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
646 gimple
*stmt
= gsi_stmt (bsi
);
647 if (gimple_code (stmt
) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt
)
649 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
650 reset_debug_uses (stmt
);
654 for (i
= 0; i
< loop
->num_nodes
; i
++)
656 basic_block bb
= bbs
[i
];
658 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
660 gphi
*phi
= bsi
.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi
))
662 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
663 remove_phi_node (&bsi
, true);
668 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
670 gimple
*stmt
= gsi_stmt (bsi
);
671 if (gimple_code (stmt
) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt
)
673 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
679 gimple_cond_make_false (cond_stmt
);
682 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
684 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
685 gimple_switch_set_index
686 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
691 unlink_stmt_vdef (stmt
);
692 gsi_remove (&bsi
, true);
704 /* Build the size argument for a memory operation call. */
707 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
710 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
712 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
713 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
715 size
= fold_convert_loc (loc
, size_type_node
, size
);
719 /* Build an address argument for a memory operation call. */
722 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
726 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
727 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
732 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
733 fold_convert_loc (loc
, sizetype
, nb_bytes
));
734 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
738 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
747 const_with_all_bytes_same (tree val
)
749 unsigned char buf
[64];
752 if (integer_zerop (val
)
753 || (TREE_CODE (val
) == CONSTRUCTOR
754 && !TREE_CLOBBER_P (val
)
755 && CONSTRUCTOR_NELTS (val
) == 0))
758 if (real_zerop (val
))
760 /* Only return 0 for +0.0, not for -0.0, which doesn't have
761 an all bytes same memory representation. Don't transform
762 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
763 switch (TREE_CODE (val
))
766 if (!real_isneg (TREE_REAL_CST_PTR (val
)))
770 if (!const_with_all_bytes_same (TREE_REALPART (val
))
771 && !const_with_all_bytes_same (TREE_IMAGPART (val
)))
776 for (j
= 0; j
< VECTOR_CST_NELTS (val
); ++j
)
777 if (const_with_all_bytes_same (VECTOR_CST_ELT (val
, j
)))
779 if (j
== VECTOR_CST_NELTS (val
))
787 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
790 len
= native_encode_expr (val
, buf
, sizeof (buf
));
793 for (i
= 1; i
< len
; i
++)
794 if (buf
[i
] != buf
[0])
799 /* Generate a call to memset for PARTITION in LOOP. */
802 generate_memset_builtin (struct loop
*loop
, partition
*partition
)
804 gimple_stmt_iterator gsi
;
805 gimple
*stmt
, *fn_call
;
806 tree mem
, fn
, nb_bytes
;
810 stmt
= DR_STMT (partition
->main_dr
);
811 loc
= gimple_location (stmt
);
813 /* The new statements will be placed before LOOP. */
814 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
816 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
817 partition
->plus_one
);
818 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
819 false, GSI_CONTINUE_LINKING
);
820 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
821 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
822 false, GSI_CONTINUE_LINKING
);
824 /* This exactly matches the pattern recognition in classify_partition. */
825 val
= gimple_assign_rhs1 (stmt
);
826 /* Handle constants like 0x15151515 and similarly
827 floating point constants etc. where all bytes are the same. */
828 int bytev
= const_with_all_bytes_same (val
);
830 val
= build_int_cst (integer_type_node
, bytev
);
831 else if (TREE_CODE (val
) == INTEGER_CST
)
832 val
= fold_convert (integer_type_node
, val
);
833 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
835 tree tem
= make_ssa_name (integer_type_node
);
836 gimple
*cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
837 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
841 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
842 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
843 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
845 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
847 fprintf (dump_file
, "generated memset");
849 fprintf (dump_file
, " zero\n");
851 fprintf (dump_file
, "\n");
855 /* Generate a call to memcpy for PARTITION in LOOP. */
858 generate_memcpy_builtin (struct loop
*loop
, partition
*partition
)
860 gimple_stmt_iterator gsi
;
861 gimple
*stmt
, *fn_call
;
862 tree dest
, src
, fn
, nb_bytes
;
864 enum built_in_function kind
;
866 stmt
= DR_STMT (partition
->main_dr
);
867 loc
= gimple_location (stmt
);
869 /* The new statements will be placed before LOOP. */
870 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
872 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
873 partition
->plus_one
);
874 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
875 false, GSI_CONTINUE_LINKING
);
876 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
877 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
878 if (partition
->kind
== PKIND_MEMCPY
879 || ! ptr_derefs_may_alias_p (dest
, src
))
880 kind
= BUILT_IN_MEMCPY
;
882 kind
= BUILT_IN_MEMMOVE
;
884 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
885 false, GSI_CONTINUE_LINKING
);
886 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
887 false, GSI_CONTINUE_LINKING
);
888 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
889 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
890 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
892 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
894 if (kind
== BUILT_IN_MEMCPY
)
895 fprintf (dump_file
, "generated memcpy\n");
897 fprintf (dump_file
, "generated memmove\n");
901 /* Remove and destroy the loop LOOP. */
904 destroy_loop (struct loop
*loop
)
906 unsigned nbbs
= loop
->num_nodes
;
907 edge exit
= single_exit (loop
);
908 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
912 bbs
= get_loop_body_in_dom_order (loop
);
914 redirect_edge_pred (exit
, src
);
915 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
916 exit
->flags
|= EDGE_FALLTHRU
;
917 cancel_loop_tree (loop
);
918 rescan_loop_exit (exit
, false, true);
923 /* We have made sure to not leave any dangling uses of SSA
924 names defined in the loop. With the exception of virtuals.
925 Make sure we replace all uses of virtual defs that will remain
926 outside of the loop with the bare symbol as delete_basic_block
927 will release them. */
929 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
932 gphi
*phi
= gsi
.phi ();
933 if (virtual_operand_p (gimple_phi_result (phi
)))
934 mark_virtual_phi_result_for_renaming (phi
);
936 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
939 gimple
*stmt
= gsi_stmt (gsi
);
940 tree vdef
= gimple_vdef (stmt
);
941 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
942 mark_virtual_operand_for_renaming (vdef
);
944 delete_basic_block (bbs
[i
]);
950 set_immediate_dominator (CDI_DOMINATORS
, dest
,
951 recompute_dominator (CDI_DOMINATORS
, dest
));
954 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
957 generate_code_for_partition (struct loop
*loop
,
958 partition
*partition
, bool copy_p
)
960 switch (partition
->kind
)
963 /* Reductions all have to be in the last partition. */
964 gcc_assert (!partition_reduction_p (partition
)
966 generate_loops_for_partition (loop
, partition
, copy_p
);
970 generate_memset_builtin (loop
, partition
);
975 generate_memcpy_builtin (loop
, partition
);
982 /* Common tail for partitions we turn into a call. If this was the last
983 partition for which we generate code, we have to destroy the loop. */
990 /* Returns a partition with all the statements needed for computing
991 the vertex V of the RDG, also including the loop exit conditions. */
994 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
996 partition
*partition
= partition_alloc (NULL
, NULL
);
997 auto_vec
<int, 3> nodes
;
1001 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
1003 FOR_EACH_VEC_ELT (nodes
, i
, x
)
1005 bitmap_set_bit (partition
->stmts
, x
);
1006 bitmap_set_bit (partition
->loops
,
1007 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
1013 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1014 For the moment we detect only the memset zero pattern. */
1017 classify_partition (loop_p loop
, struct graph
*rdg
, partition
*partition
)
1022 data_reference_p single_load
, single_store
;
1023 bool volatiles_p
= false;
1024 bool plus_one
= false;
1026 partition
->kind
= PKIND_NORMAL
;
1027 partition
->main_dr
= NULL
;
1028 partition
->secondary_dr
= NULL
;
1029 partition
->niter
= NULL_TREE
;
1030 partition
->plus_one
= false;
1032 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1034 gimple
*stmt
= RDG_STMT (rdg
, i
);
1036 if (gimple_has_volatile_ops (stmt
))
1039 /* If the stmt has uses outside of the loop mark it as reduction. */
1040 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1042 partition
->reduction_p
= true;
1047 /* Perform general partition disqualification for builtins. */
1049 || !flag_tree_loop_distribute_patterns
)
1052 /* Detect memset and memcpy. */
1054 single_store
= NULL
;
1055 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1057 gimple
*stmt
= RDG_STMT (rdg
, i
);
1058 data_reference_p dr
;
1061 if (gimple_code (stmt
) == GIMPLE_PHI
)
1064 /* Any scalar stmts are ok. */
1065 if (!gimple_vuse (stmt
))
1068 /* Otherwise just regular loads/stores. */
1069 if (!gimple_assign_single_p (stmt
))
1072 /* But exactly one store and/or load. */
1073 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1075 if (DR_IS_READ (dr
))
1077 if (single_load
!= NULL
)
1083 if (single_store
!= NULL
)
1093 nb_iter
= number_of_latch_executions (loop
);
1094 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1096 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1097 gimple_bb (DR_STMT (single_store
))))
1100 if (single_store
&& !single_load
)
1102 gimple
*stmt
= DR_STMT (single_store
);
1103 tree rhs
= gimple_assign_rhs1 (stmt
);
1104 if (const_with_all_bytes_same (rhs
) == -1
1105 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1106 || (TYPE_MODE (TREE_TYPE (rhs
))
1107 != TYPE_MODE (unsigned_char_type_node
))))
1109 if (TREE_CODE (rhs
) == SSA_NAME
1110 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1111 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1113 if (!adjacent_dr_p (single_store
)
1114 || !dominated_by_p (CDI_DOMINATORS
,
1115 loop
->latch
, gimple_bb (stmt
)))
1117 partition
->kind
= PKIND_MEMSET
;
1118 partition
->main_dr
= single_store
;
1119 partition
->niter
= nb_iter
;
1120 partition
->plus_one
= plus_one
;
1122 else if (single_store
&& single_load
)
1124 gimple
*store
= DR_STMT (single_store
);
1125 gimple
*load
= DR_STMT (single_load
);
1126 /* Direct aggregate copy or via an SSA name temporary. */
1128 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1130 if (!adjacent_dr_p (single_store
)
1131 || !adjacent_dr_p (single_load
)
1132 || !operand_equal_p (DR_STEP (single_store
),
1133 DR_STEP (single_load
), 0)
1134 || !dominated_by_p (CDI_DOMINATORS
,
1135 loop
->latch
, gimple_bb (store
)))
1137 /* Now check that if there is a dependence this dependence is
1138 of a suitable form for memmove. */
1139 vec
<loop_p
> loops
= vNULL
;
1141 loops
.safe_push (loop
);
1142 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1144 compute_affine_dependence (ddr
, loop
);
1145 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1147 free_dependence_relation (ddr
);
1151 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1153 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1155 free_dependence_relation (ddr
);
1159 lambda_vector dist_v
;
1160 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1162 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1163 DDR_LOOP_NEST (ddr
))];
1164 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1166 free_dependence_relation (ddr
);
1171 partition
->kind
= PKIND_MEMMOVE
;
1174 partition
->kind
= PKIND_MEMCPY
;
1175 free_dependence_relation (ddr
);
1177 partition
->main_dr
= single_store
;
1178 partition
->secondary_dr
= single_load
;
1179 partition
->niter
= nb_iter
;
1180 partition
->plus_one
= plus_one
;
1184 /* For a data reference REF, return the declaration of its base
1185 address or NULL_TREE if the base is not determined. */
1188 ref_base_address (data_reference_p dr
)
1190 tree base_address
= DR_BASE_ADDRESS (dr
);
1192 && TREE_CODE (base_address
) == ADDR_EXPR
)
1193 return TREE_OPERAND (base_address
, 0);
1195 return base_address
;
1198 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1202 similar_memory_accesses (struct graph
*rdg
, partition
*partition1
,
1203 partition
*partition2
)
1205 unsigned i
, j
, k
, l
;
1206 bitmap_iterator bi
, bj
;
1207 data_reference_p ref1
, ref2
;
1209 /* First check whether in the intersection of the two partitions are
1210 any loads or stores. Common loads are the situation that happens
1212 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1213 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1214 || RDG_MEM_READS_STMT (rdg
, i
))
1217 /* Then check all data-references against each other. */
1218 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1219 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1220 || RDG_MEM_READS_STMT (rdg
, i
))
1221 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1222 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1223 || RDG_MEM_READS_STMT (rdg
, j
))
1225 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1227 tree base1
= ref_base_address (ref1
);
1229 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1230 if (base1
== ref_base_address (ref2
))
1238 /* Aggregate several components into a useful partition that is
1239 registered in the PARTITIONS vector. Partitions will be
1240 distributed in different loops. */
1243 rdg_build_partitions (struct graph
*rdg
,
1244 vec
<gimple
*> starting_stmts
,
1245 vec
<partition
*> *partitions
)
1247 bitmap processed
= BITMAP_ALLOC (NULL
);
1251 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1253 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1255 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1257 "ldist asked to generate code for vertex %d\n", v
);
1259 /* If the vertex is already contained in another partition so
1260 is the partition rooted at it. */
1261 if (bitmap_bit_p (processed
, v
))
1264 partition
*partition
= build_rdg_partition_for_vertex (rdg
, v
);
1265 bitmap_ior_into (processed
, partition
->stmts
);
1267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1269 fprintf (dump_file
, "ldist useful partition:\n");
1270 dump_bitmap (dump_file
, partition
->stmts
);
1273 partitions
->safe_push (partition
);
1276 /* All vertices should have been assigned to at least one partition now,
1277 other than vertices belonging to dead code. */
1279 BITMAP_FREE (processed
);
1282 /* Dump to FILE the PARTITIONS. */
1285 dump_rdg_partitions (FILE *file
, vec
<partition
*> partitions
)
1288 partition
*partition
;
1290 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1291 debug_bitmap_file (file
, partition
->stmts
);
1294 /* Debug PARTITIONS. */
1295 extern void debug_rdg_partitions (vec
<partition
*> );
1298 debug_rdg_partitions (vec
<partition
*> partitions
)
1300 dump_rdg_partitions (stderr
, partitions
);
1303 /* Returns the number of read and write operations in the RDG. */
1306 number_of_rw_in_rdg (struct graph
*rdg
)
1310 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1312 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1315 if (RDG_MEM_READS_STMT (rdg
, i
))
1322 /* Returns the number of read and write operations in a PARTITION of
1326 number_of_rw_in_partition (struct graph
*rdg
, partition
*partition
)
1332 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1334 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1337 if (RDG_MEM_READS_STMT (rdg
, i
))
1344 /* Returns true when one of the PARTITIONS contains all the read or
1345 write operations of RDG. */
1348 partition_contains_all_rw (struct graph
*rdg
,
1349 vec
<partition
*> partitions
)
1352 partition
*partition
;
1353 int nrw
= number_of_rw_in_rdg (rdg
);
1355 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1356 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1362 /* Compute partition dependence created by the data references in DRS1
1363 and DRS2 and modify and return DIR according to that. */
1366 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1367 vec
<data_reference_p
> drs1
,
1368 vec
<data_reference_p
> drs2
)
1370 data_reference_p dr1
, dr2
;
1372 /* dependence direction - 0 is no dependence, -1 is back,
1373 1 is forth, 2 is both (we can stop then, merging will occur). */
1374 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1375 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1377 data_reference_p saved_dr1
= dr1
;
1380 /* Re-shuffle data-refs to be in dominator order. */
1381 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1382 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1384 std::swap (dr1
, dr2
);
1385 this_dir
= -this_dir
;
1387 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1388 compute_affine_dependence (ddr
, loops
[0]);
1389 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1391 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1393 if (DDR_REVERSED_P (ddr
))
1395 std::swap (dr1
, dr2
);
1396 this_dir
= -this_dir
;
1398 /* Known dependences can still be unordered througout the
1399 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1400 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1402 /* If the overlap is exact preserve stmt order. */
1403 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1407 /* Else as the distance vector is lexicographic positive
1408 swap the dependence direction. */
1409 this_dir
= -this_dir
;
1414 free_dependence_relation (ddr
);
1419 else if (this_dir
!= 0 && dir
!= this_dir
)
1421 /* Shuffle "back" dr1. */
1427 /* Compare postorder number of the partition graph vertices V1 and V2. */
1430 pgcmp (const void *v1_
, const void *v2_
)
1432 const vertex
*v1
= (const vertex
*)v1_
;
1433 const vertex
*v2
= (const vertex
*)v2_
;
1434 return v2
->post
- v1
->post
;
1437 /* Distributes the code from LOOP in such a way that producer
1438 statements are placed before consumer statements. Tries to separate
1439 only the statements from STMTS into separate loops.
1440 Returns the number of distributed loops. Set *DESTROY_P to whether
1441 LOOP needs to be destroyed. */
1444 distribute_loop (struct loop
*loop
, vec
<gimple
*> stmts
,
1445 control_dependences
*cd
, int *nb_calls
, bool *destroy_p
)
1448 partition
*partition
;
1456 auto_vec
<loop_p
, 3> loop_nest
;
1457 if (!find_loop_nest (loop
, &loop_nest
))
1460 rdg
= build_rdg (loop_nest
, cd
);
1463 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1465 "Loop %d not distributed: failed to build the RDG.\n",
1471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1472 dump_rdg (dump_file
, rdg
);
1474 auto_vec
<struct partition
*, 3> partitions
;
1475 rdg_build_partitions (rdg
, stmts
, &partitions
);
1477 any_builtin
= false;
1478 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1480 classify_partition (loop
, rdg
, partition
);
1481 any_builtin
|= partition_builtin_p (partition
);
1484 /* If we are only distributing patterns but did not detect any,
1486 if (!flag_tree_loop_distribution
1493 /* If we are only distributing patterns fuse all partitions that
1494 were not classified as builtins. This also avoids chopping
1495 a loop into pieces, separated by builtin calls. That is, we
1496 only want no or a single loop body remaining. */
1497 struct partition
*into
;
1498 if (!flag_tree_loop_distribution
)
1500 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1501 if (!partition_builtin_p (into
))
1503 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1504 if (!partition_builtin_p (partition
))
1506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1508 fprintf (dump_file
, "fusing non-builtin partitions\n");
1509 dump_bitmap (dump_file
, into
->stmts
);
1510 dump_bitmap (dump_file
, partition
->stmts
);
1512 partition_merge_into (into
, partition
);
1513 partitions
.unordered_remove (i
);
1514 partition_free (partition
);
1519 /* Due to limitations in the transform phase we have to fuse all
1520 reduction partitions into the last partition so the existing
1521 loop will contain all loop-closed PHI nodes. */
1522 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1523 if (partition_reduction_p (into
))
1525 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1526 if (partition_reduction_p (partition
))
1528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1530 fprintf (dump_file
, "fusing partitions\n");
1531 dump_bitmap (dump_file
, into
->stmts
);
1532 dump_bitmap (dump_file
, partition
->stmts
);
1533 fprintf (dump_file
, "because they have reductions\n");
1535 partition_merge_into (into
, partition
);
1536 partitions
.unordered_remove (i
);
1537 partition_free (partition
);
1541 /* Apply our simple cost model - fuse partitions with similar
1543 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1545 bool changed
= false;
1546 if (partition_builtin_p (into
))
1549 partitions
.iterate (j
, &partition
); ++j
)
1551 if (!partition_builtin_p (partition
)
1552 && similar_memory_accesses (rdg
, into
, partition
))
1554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1556 fprintf (dump_file
, "fusing partitions\n");
1557 dump_bitmap (dump_file
, into
->stmts
);
1558 dump_bitmap (dump_file
, partition
->stmts
);
1559 fprintf (dump_file
, "because they have similar "
1560 "memory accesses\n");
1562 partition_merge_into (into
, partition
);
1563 partitions
.unordered_remove (j
);
1564 partition_free (partition
);
1569 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1570 accesses when 1 and 2 have similar accesses but not 0 and 1
1571 then in the next iteration we will fail to consider merging
1572 1 into 0,2. So try again if we did any merging into 0. */
1577 /* Build the partition dependency graph. */
1578 if (partitions
.length () > 1)
1580 pg
= new_graph (partitions
.length ());
1582 struct partition
*partition
;
1583 vec
<data_reference_p
> writes
;
1584 vec
<data_reference_p
> reads
;
1586 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1587 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1589 vertex
*v
= &pg
->vertices
[i
];
1590 pgdata
*data
= new pgdata
;
1591 data_reference_p dr
;
1592 /* FIXME - leaks. */
1596 data
->partition
= partition
;
1597 data
->reads
= vNULL
;
1598 data
->writes
= vNULL
;
1599 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1600 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1601 if (DR_IS_READ (dr
))
1602 data
->reads
.safe_push (dr
);
1604 data
->writes
.safe_push (dr
);
1606 struct partition
*partition1
, *partition2
;
1607 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1608 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1610 /* dependence direction - 0 is no dependence, -1 is back,
1611 1 is forth, 2 is both (we can stop then, merging will occur). */
1613 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1617 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1621 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1624 if (dir
== 1 || dir
== 2)
1625 add_edge (pg
, i
, j
);
1626 if (dir
== -1 || dir
== 2)
1627 add_edge (pg
, j
, i
);
1630 /* Add edges to the reduction partition (if any) to force it last. */
1632 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1633 if (partition_reduction_p (partition
))
1635 if (j
< partitions
.length ())
1637 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1639 add_edge (pg
, i
, j
);
1642 /* Compute partitions we cannot separate and fuse them. */
1643 num_sccs
= graphds_scc (pg
, NULL
);
1644 for (i
= 0; i
< num_sccs
; ++i
)
1646 struct partition
*first
;
1648 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1649 if (pg
->vertices
[j
].component
== i
)
1651 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1652 if (pg
->vertices
[j
].component
== i
)
1654 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1656 fprintf (dump_file
, "fusing partitions\n");
1657 dump_bitmap (dump_file
, first
->stmts
);
1658 dump_bitmap (dump_file
, partition
->stmts
);
1659 fprintf (dump_file
, "because they are in the same "
1660 "dependence SCC\n");
1662 partition_merge_into (first
, partition
);
1663 partitions
[j
] = NULL
;
1664 partition_free (partition
);
1665 PGDATA (j
)->partition
= NULL
;
1669 /* Now order the remaining nodes in postorder. */
1670 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1671 partitions
.truncate (0);
1672 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1674 pgdata
*data
= PGDATA (i
);
1675 if (data
->partition
)
1676 partitions
.safe_push (data
->partition
);
1677 data
->reads
.release ();
1678 data
->writes
.release ();
1681 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1685 nbp
= partitions
.length ();
1687 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1688 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1695 dump_rdg_partitions (dump_file
, partitions
);
1697 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1699 if (partition_builtin_p (partition
))
1701 *destroy_p
|= generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1706 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1707 partition_free (partition
);
1710 return nbp
- *nb_calls
;
1713 /* Distribute all loops in the current function. */
1717 const pass_data pass_data_loop_distribution
=
1719 GIMPLE_PASS
, /* type */
1721 OPTGROUP_LOOP
, /* optinfo_flags */
1722 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1723 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1724 0, /* properties_provided */
1725 0, /* properties_destroyed */
1726 0, /* todo_flags_start */
1727 0, /* todo_flags_finish */
1730 class pass_loop_distribution
: public gimple_opt_pass
1733 pass_loop_distribution (gcc::context
*ctxt
)
1734 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1737 /* opt_pass methods: */
1738 virtual bool gate (function
*)
1740 return flag_tree_loop_distribution
1741 || flag_tree_loop_distribute_patterns
;
1744 virtual unsigned int execute (function
*);
1746 }; // class pass_loop_distribution
1749 pass_loop_distribution::execute (function
*fun
)
1752 bool changed
= false;
1754 control_dependences
*cd
= NULL
;
1755 auto_vec
<loop_p
> loops_to_be_destroyed
;
1757 FOR_ALL_BB_FN (bb
, fun
)
1759 gimple_stmt_iterator gsi
;
1760 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1761 gimple_set_uid (gsi_stmt (gsi
), -1);
1762 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1763 gimple_set_uid (gsi_stmt (gsi
), -1);
1766 /* We can at the moment only distribute non-nested loops, thus restrict
1767 walking to innermost loops. */
1768 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1770 auto_vec
<gimple
*> work_list
;
1772 int num
= loop
->num
;
1775 /* If the loop doesn't have a single exit we will fail anyway,
1776 so do that early. */
1777 if (!single_exit (loop
))
1780 /* Only optimize hot loops. */
1781 if (!optimize_loop_for_speed_p (loop
))
1784 /* Initialize the worklist with stmts we seed the partitions with. */
1785 bbs
= get_loop_body_in_dom_order (loop
);
1786 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1788 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
1792 gphi
*phi
= gsi
.phi ();
1793 if (virtual_operand_p (gimple_phi_result (phi
)))
1795 /* Distribute stmts which have defs that are used outside of
1797 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1799 work_list
.safe_push (phi
);
1801 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1805 gimple
*stmt
= gsi_stmt (gsi
);
1807 /* If there is a stmt with side-effects bail out - we
1808 cannot and should not distribute this loop. */
1809 if (gimple_has_side_effects (stmt
))
1811 work_list
.truncate (0);
1815 /* Distribute stmts which have defs that are used outside of
1817 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1819 /* Otherwise only distribute stores for now. */
1820 else if (!gimple_vdef (stmt
))
1823 work_list
.safe_push (stmt
);
1829 int nb_generated_loops
= 0;
1830 int nb_generated_calls
= 0;
1831 location_t loc
= find_loop_location (loop
);
1832 if (work_list
.length () > 0)
1836 calculate_dominance_info (CDI_DOMINATORS
);
1837 calculate_dominance_info (CDI_POST_DOMINATORS
);
1838 cd
= new control_dependences ();
1839 free_dominance_info (CDI_POST_DOMINATORS
);
1842 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1843 &nb_generated_calls
,
1846 loops_to_be_destroyed
.safe_push (loop
);
1849 if (nb_generated_loops
+ nb_generated_calls
> 0)
1852 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1853 loc
, "Loop %d distributed: split to %d loops "
1854 "and %d library calls.\n",
1855 num
, nb_generated_loops
, nb_generated_calls
);
1857 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1858 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1866 /* Destroy loop bodies that could not be reused. Do this late as we
1867 otherwise can end up refering to stale data in control dependences. */
1869 FOR_EACH_VEC_ELT (loops_to_be_destroyed
, i
, loop
)
1870 destroy_loop (loop
);
1872 /* Cached scalar evolutions now may refer to wrong or non-existing
1875 mark_virtual_operands_for_renaming (fun
);
1876 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1879 checking_verify_loop_structure ();
1887 make_pass_loop_distribution (gcc::context
*ctxt
)
1889 return new pass_loop_distribution (ctxt
);