2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
50 #include "double-int.h"
58 #include "fold-const.h"
61 #include "hard-reg-set.h"
64 #include "dominance.h"
67 #include "basic-block.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-expr.h"
73 #include "gimple-iterator.h"
74 #include "gimplify-me.h"
75 #include "stor-layout.h"
76 #include "gimple-ssa.h"
78 #include "tree-phinodes.h"
79 #include "ssa-iterators.h"
80 #include "stringpool.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-manip.h"
83 #include "tree-ssa-loop.h"
84 #include "tree-into-ssa.h"
87 #include "tree-chrec.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "tree-pass.h"
91 #include "gimple-pretty-print.h"
92 #include "tree-vectorizer.h"
95 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
96 typedef struct rdg_vertex
98 /* The statement represented by this vertex. */
101 /* Vector of data-references in this statement. */
102 vec
<data_reference_p
> datarefs
;
104 /* True when the statement contains a write to memory. */
107 /* True when the statement contains a read from memory. */
111 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
112 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
113 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
114 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
115 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
116 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
117 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
118 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
120 /* Data dependence type. */
124 /* Read After Write (RAW). */
127 /* Control dependence (execute conditional on). */
131 /* Dependence information attached to an edge of the RDG. */
133 typedef struct rdg_edge
135 /* Type of the dependence. */
136 enum rdg_dep_type type
;
139 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
141 /* Dump vertex I in RDG to FILE. */
144 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
146 struct vertex
*v
= &(rdg
->vertices
[i
]);
147 struct graph_edge
*e
;
149 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
150 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
151 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
154 for (e
= v
->pred
; e
; e
= e
->pred_next
)
155 fprintf (file
, " %d", e
->src
);
157 fprintf (file
, ") (out:");
160 for (e
= v
->succ
; e
; e
= e
->succ_next
)
161 fprintf (file
, " %d", e
->dest
);
163 fprintf (file
, ")\n");
164 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
165 fprintf (file
, ")\n");
168 /* Call dump_rdg_vertex on stderr. */
171 debug_rdg_vertex (struct graph
*rdg
, int i
)
173 dump_rdg_vertex (stderr
, rdg
, i
);
176 /* Dump the reduced dependence graph RDG to FILE. */
179 dump_rdg (FILE *file
, struct graph
*rdg
)
181 fprintf (file
, "(rdg\n");
182 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
183 dump_rdg_vertex (file
, rdg
, i
);
184 fprintf (file
, ")\n");
187 /* Call dump_rdg on stderr. */
190 debug_rdg (struct graph
*rdg
)
192 dump_rdg (stderr
, rdg
);
196 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
199 pretty_printer buffer
;
200 pp_needs_newline (&buffer
) = false;
201 buffer
.buffer
->stream
= file
;
203 fprintf (file
, "digraph RDG {\n");
205 for (i
= 0; i
< rdg
->n_vertices
; i
++)
207 struct vertex
*v
= &(rdg
->vertices
[i
]);
208 struct graph_edge
*e
;
210 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
211 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
213 fprintf (file
, "\"]\n");
215 /* Highlight reads from memory. */
216 if (RDG_MEM_READS_STMT (rdg
, i
))
217 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
219 /* Highlight stores to memory. */
220 if (RDG_MEM_WRITE_STMT (rdg
, i
))
221 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
224 for (e
= v
->succ
; e
; e
= e
->succ_next
)
225 switch (RDGE_TYPE (e
))
228 /* These are the most common dependences: don't print these. */
229 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
233 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
241 fprintf (file
, "}\n\n");
244 /* Display the Reduced Dependence Graph using dotty. */
247 dot_rdg (struct graph
*rdg
)
249 /* When debugging, you may want to enable the following code. */
251 FILE *file
= popen ("dot -Tx11", "w");
254 dot_rdg_1 (file
, rdg
);
256 close (fileno (file
));
259 dot_rdg_1 (stderr
, rdg
);
263 /* Returns the index of STMT in RDG. */
266 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
268 int index
= gimple_uid (stmt
);
269 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
273 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
274 the index of DEF in RDG. */
277 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
279 use_operand_p imm_use_p
;
280 imm_use_iterator iterator
;
282 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
284 struct graph_edge
*e
;
285 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
290 e
= add_edge (rdg
, idef
, use
);
291 e
->data
= XNEW (struct rdg_edge
);
292 RDGE_TYPE (e
) = flow_dd
;
296 /* Creates an edge for the control dependences of BB to the vertex V. */
299 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
300 int v
, control_dependences
*cd
)
304 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
307 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
308 gimple stmt
= last_stmt (cond_bb
);
309 if (stmt
&& is_ctrl_stmt (stmt
))
311 struct graph_edge
*e
;
312 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
316 e
= add_edge (rdg
, c
, v
);
317 e
->data
= XNEW (struct rdg_edge
);
318 RDGE_TYPE (e
) = control_dd
;
323 /* Creates the edges of the reduced dependence graph RDG. */
326 create_rdg_flow_edges (struct graph
*rdg
)
332 for (i
= 0; i
< rdg
->n_vertices
; i
++)
333 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
335 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
338 /* Creates the edges of the reduced dependence graph RDG. */
341 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
345 for (i
= 0; i
< rdg
->n_vertices
; i
++)
347 gimple stmt
= RDG_STMT (rdg
, i
);
348 if (gimple_code (stmt
) == GIMPLE_PHI
)
352 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
353 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
356 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
360 /* Build the vertices of the reduced dependence graph RDG. Return false
364 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
365 vec
<data_reference_p
> *datarefs
)
370 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
372 struct vertex
*v
= &(rdg
->vertices
[i
]);
374 /* Record statement to vertex mapping. */
375 gimple_set_uid (stmt
, i
);
377 v
->data
= XNEW (struct rdg_vertex
);
378 RDGV_STMT (v
) = stmt
;
379 RDGV_DATAREFS (v
).create (0);
380 RDGV_HAS_MEM_WRITE (v
) = false;
381 RDGV_HAS_MEM_READS (v
) = false;
382 if (gimple_code (stmt
) == GIMPLE_PHI
)
385 unsigned drp
= datarefs
->length ();
386 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
388 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
390 data_reference_p dr
= (*datarefs
)[j
];
392 RDGV_HAS_MEM_READS (v
) = true;
394 RDGV_HAS_MEM_WRITE (v
) = true;
395 RDGV_DATAREFS (v
).safe_push (dr
);
401 /* Initialize STMTS with all the statements of LOOP. The order in
402 which we discover statements is important as
403 generate_loops_for_partition is using the same traversal for
404 identifying statements in loop copies. */
407 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
410 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
412 for (i
= 0; i
< loop
->num_nodes
; i
++)
414 basic_block bb
= bbs
[i
];
416 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
418 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
419 stmts
->safe_push (bsi
.phi ());
421 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
424 gimple stmt
= gsi_stmt (bsi
);
425 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
426 stmts
->safe_push (stmt
);
433 /* Free the reduced dependence graph RDG. */
436 free_rdg (struct graph
*rdg
)
440 for (i
= 0; i
< rdg
->n_vertices
; i
++)
442 struct vertex
*v
= &(rdg
->vertices
[i
]);
443 struct graph_edge
*e
;
445 for (e
= v
->succ
; e
; e
= e
->succ_next
)
450 gimple_set_uid (RDGV_STMT (v
), -1);
451 free_data_refs (RDGV_DATAREFS (v
));
459 /* Build the Reduced Dependence Graph (RDG) with one vertex per
460 statement of the loop nest LOOP_NEST, and one edge per data dependence or
461 scalar dependence. */
463 static struct graph
*
464 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
467 vec
<data_reference_p
> datarefs
;
469 /* Create the RDG vertices from the stmts of the loop nest. */
470 auto_vec
<gimple
, 10> stmts
;
471 stmts_from_loop (loop_nest
[0], &stmts
);
472 rdg
= new_graph (stmts
.length ());
473 datarefs
.create (10);
474 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
482 create_rdg_flow_edges (rdg
);
484 create_rdg_cd_edges (rdg
, cd
);
493 enum partition_kind
{
494 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
497 typedef struct partition_s
502 enum partition_kind kind
;
503 /* data-references a kind != PKIND_NORMAL partition is about. */
504 data_reference_p main_dr
;
505 data_reference_p secondary_dr
;
511 /* Allocate and initialize a partition from BITMAP. */
514 partition_alloc (bitmap stmts
, bitmap loops
)
516 partition_t partition
= XCNEW (struct partition_s
);
517 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
518 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
519 partition
->reduction_p
= false;
520 partition
->kind
= PKIND_NORMAL
;
524 /* Free PARTITION. */
527 partition_free (partition_t partition
)
529 BITMAP_FREE (partition
->stmts
);
530 BITMAP_FREE (partition
->loops
);
534 /* Returns true if the partition can be generated as a builtin. */
537 partition_builtin_p (partition_t partition
)
539 return partition
->kind
!= PKIND_NORMAL
;
542 /* Returns true if the partition contains a reduction. */
545 partition_reduction_p (partition_t partition
)
547 return partition
->reduction_p
;
550 /* Merge PARTITION into the partition DEST. */
553 partition_merge_into (partition_t dest
, partition_t partition
)
555 dest
->kind
= PKIND_NORMAL
;
556 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
557 if (partition_reduction_p (partition
))
558 dest
->reduction_p
= true;
562 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
566 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
568 imm_use_iterator imm_iter
;
571 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
573 gimple use_stmt
= USE_STMT (use_p
);
574 if (!is_gimple_debug (use_stmt
)
575 && loop
!= loop_containing_stmt (use_stmt
))
582 /* Returns true when STMT defines a scalar variable used after the
586 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
591 if (gimple_code (stmt
) == GIMPLE_PHI
)
592 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
594 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
595 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
601 /* Return a copy of LOOP placed before LOOP. */
604 copy_loop_before (struct loop
*loop
)
607 edge preheader
= loop_preheader_edge (loop
);
609 initialize_original_copy_tables ();
610 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
611 gcc_assert (res
!= NULL
);
612 free_original_copy_tables ();
613 delete_update_ssa ();
618 /* Creates an empty basic block after LOOP. */
621 create_bb_after_loop (struct loop
*loop
)
623 edge exit
= single_exit (loop
);
631 /* Generate code for PARTITION from the code in LOOP. The loop is
632 copied when COPY_P is true. All the statements not flagged in the
633 PARTITION bitmap are removed from the loop or from its copy. The
634 statements are indexed in sequence inside a basic block, and the
635 basic blocks of a loop are taken in dom order. */
638 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
646 loop
= copy_loop_before (loop
);
647 gcc_assert (loop
!= NULL
);
648 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
649 create_bb_after_loop (loop
);
652 /* Remove stmts not in the PARTITION bitmap. */
653 bbs
= get_loop_body_in_dom_order (loop
);
655 if (MAY_HAVE_DEBUG_STMTS
)
656 for (i
= 0; i
< loop
->num_nodes
; i
++)
658 basic_block bb
= bbs
[i
];
660 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
663 gphi
*phi
= bsi
.phi ();
664 if (!virtual_operand_p (gimple_phi_result (phi
))
665 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
666 reset_debug_uses (phi
);
669 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
671 gimple stmt
= gsi_stmt (bsi
);
672 if (gimple_code (stmt
) != GIMPLE_LABEL
673 && !is_gimple_debug (stmt
)
674 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
675 reset_debug_uses (stmt
);
679 for (i
= 0; i
< loop
->num_nodes
; i
++)
681 basic_block bb
= bbs
[i
];
683 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
685 gphi
*phi
= bsi
.phi ();
686 if (!virtual_operand_p (gimple_phi_result (phi
))
687 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
688 remove_phi_node (&bsi
, true);
693 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
695 gimple stmt
= gsi_stmt (bsi
);
696 if (gimple_code (stmt
) != GIMPLE_LABEL
697 && !is_gimple_debug (stmt
)
698 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
700 /* Choose an arbitrary path through the empty CFG part
701 that this unnecessary control stmt controls. */
702 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
704 gimple_cond_make_false (cond_stmt
);
707 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
709 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
710 gimple_switch_set_index
711 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
716 unlink_stmt_vdef (stmt
);
717 gsi_remove (&bsi
, true);
729 /* Build the size argument for a memory operation call. */
732 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
735 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
737 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
738 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
739 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
740 size
= fold_convert_loc (loc
, size_type_node
, size
);
744 /* Build an address argument for a memory operation call. */
747 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
751 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
752 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
754 /* Test for a negative stride, iterating over every element. */
755 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
757 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
758 fold_convert_loc (loc
, sizetype
, nb_bytes
));
759 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
760 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
763 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
766 /* If VAL memory representation contains the same value in all bytes,
767 return that value, otherwise return -1.
768 E.g. for 0x24242424 return 0x24, for IEEE double
769 747708026454360457216.0 return 0x44, etc. */
772 const_with_all_bytes_same (tree val
)
774 unsigned char buf
[64];
777 if (integer_zerop (val
)
779 || (TREE_CODE (val
) == CONSTRUCTOR
780 && !TREE_CLOBBER_P (val
)
781 && CONSTRUCTOR_NELTS (val
) == 0))
784 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
787 len
= native_encode_expr (val
, buf
, sizeof (buf
));
790 for (i
= 1; i
< len
; i
++)
791 if (buf
[i
] != buf
[0])
796 /* Generate a call to memset for PARTITION in LOOP. */
799 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
801 gimple_stmt_iterator gsi
;
802 gimple stmt
, fn_call
;
803 tree mem
, fn
, nb_bytes
;
807 stmt
= DR_STMT (partition
->main_dr
);
808 loc
= gimple_location (stmt
);
810 /* The new statements will be placed before LOOP. */
811 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
813 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
814 partition
->plus_one
);
815 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
816 false, GSI_CONTINUE_LINKING
);
817 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
818 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
819 false, GSI_CONTINUE_LINKING
);
821 /* This exactly matches the pattern recognition in classify_partition. */
822 val
= gimple_assign_rhs1 (stmt
);
823 /* Handle constants like 0x15151515 and similarly
824 floating point constants etc. where all bytes are the same. */
825 int bytev
= const_with_all_bytes_same (val
);
827 val
= build_int_cst (integer_type_node
, bytev
);
828 else if (TREE_CODE (val
) == INTEGER_CST
)
829 val
= fold_convert (integer_type_node
, val
);
830 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
832 tree tem
= make_ssa_name (integer_type_node
);
833 gimple cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
834 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
838 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
839 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
840 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "generated memset");
846 fprintf (dump_file
, " zero\n");
848 fprintf (dump_file
, "\n");
852 /* Generate a call to memcpy for PARTITION in LOOP. */
855 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
857 gimple_stmt_iterator gsi
;
858 gimple stmt
, fn_call
;
859 tree dest
, src
, fn
, nb_bytes
;
861 enum built_in_function kind
;
863 stmt
= DR_STMT (partition
->main_dr
);
864 loc
= gimple_location (stmt
);
866 /* The new statements will be placed before LOOP. */
867 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
869 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
870 partition
->plus_one
);
871 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
872 false, GSI_CONTINUE_LINKING
);
873 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
874 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
875 if (ptr_derefs_may_alias_p (dest
, src
))
876 kind
= BUILT_IN_MEMMOVE
;
878 kind
= BUILT_IN_MEMCPY
;
880 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
881 false, GSI_CONTINUE_LINKING
);
882 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
883 false, GSI_CONTINUE_LINKING
);
884 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
885 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
886 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
890 if (kind
== BUILT_IN_MEMCPY
)
891 fprintf (dump_file
, "generated memcpy\n");
893 fprintf (dump_file
, "generated memmove\n");
897 /* Remove and destroy the loop LOOP. */
900 destroy_loop (struct loop
*loop
)
902 unsigned nbbs
= loop
->num_nodes
;
903 edge exit
= single_exit (loop
);
904 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
908 bbs
= get_loop_body_in_dom_order (loop
);
910 redirect_edge_pred (exit
, src
);
911 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
912 exit
->flags
|= EDGE_FALLTHRU
;
913 cancel_loop_tree (loop
);
914 rescan_loop_exit (exit
, false, true);
916 for (i
= 0; i
< nbbs
; i
++)
918 /* We have made sure to not leave any dangling uses of SSA
919 names defined in the loop. With the exception of virtuals.
920 Make sure we replace all uses of virtual defs that will remain
921 outside of the loop with the bare symbol as delete_basic_block
922 will release them. */
923 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
926 gphi
*phi
= gsi
.phi ();
927 if (virtual_operand_p (gimple_phi_result (phi
)))
928 mark_virtual_phi_result_for_renaming (phi
);
930 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
933 gimple stmt
= gsi_stmt (gsi
);
934 tree vdef
= gimple_vdef (stmt
);
935 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
936 mark_virtual_operand_for_renaming (vdef
);
938 delete_basic_block (bbs
[i
]);
942 set_immediate_dominator (CDI_DOMINATORS
, dest
,
943 recompute_dominator (CDI_DOMINATORS
, dest
));
946 /* Generates code for PARTITION. */
949 generate_code_for_partition (struct loop
*loop
,
950 partition_t partition
, bool copy_p
)
952 switch (partition
->kind
)
955 /* Reductions all have to be in the last partition. */
956 gcc_assert (!partition_reduction_p (partition
)
958 generate_loops_for_partition (loop
, partition
, copy_p
);
962 generate_memset_builtin (loop
, partition
);
966 generate_memcpy_builtin (loop
, partition
);
973 /* Common tail for partitions we turn into a call. If this was the last
974 partition for which we generate code, we have to destroy the loop. */
980 /* Returns a partition with all the statements needed for computing
981 the vertex V of the RDG, also including the loop exit conditions. */
984 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
986 partition_t partition
= partition_alloc (NULL
, NULL
);
987 auto_vec
<int, 3> nodes
;
991 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
993 FOR_EACH_VEC_ELT (nodes
, i
, x
)
995 bitmap_set_bit (partition
->stmts
, x
);
996 bitmap_set_bit (partition
->loops
,
997 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
1003 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1004 For the moment we detect only the memset zero pattern. */
1007 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
1012 data_reference_p single_load
, single_store
;
1013 bool volatiles_p
= false;
1014 bool plus_one
= false;
1016 partition
->kind
= PKIND_NORMAL
;
1017 partition
->main_dr
= NULL
;
1018 partition
->secondary_dr
= NULL
;
1019 partition
->niter
= NULL_TREE
;
1020 partition
->plus_one
= false;
1022 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1024 gimple stmt
= RDG_STMT (rdg
, i
);
1026 if (gimple_has_volatile_ops (stmt
))
1029 /* If the stmt has uses outside of the loop mark it as reduction. */
1030 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1032 partition
->reduction_p
= true;
1037 /* Perform general partition disqualification for builtins. */
1039 || !flag_tree_loop_distribute_patterns
)
1042 /* Detect memset and memcpy. */
1044 single_store
= NULL
;
1045 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1047 gimple stmt
= RDG_STMT (rdg
, i
);
1048 data_reference_p dr
;
1051 if (gimple_code (stmt
) == GIMPLE_PHI
)
1054 /* Any scalar stmts are ok. */
1055 if (!gimple_vuse (stmt
))
1058 /* Otherwise just regular loads/stores. */
1059 if (!gimple_assign_single_p (stmt
))
1062 /* But exactly one store and/or load. */
1063 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1065 if (DR_IS_READ (dr
))
1067 if (single_load
!= NULL
)
1073 if (single_store
!= NULL
)
1083 nb_iter
= number_of_latch_executions (loop
);
1084 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1086 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1087 gimple_bb (DR_STMT (single_store
))))
1090 if (single_store
&& !single_load
)
1092 gimple stmt
= DR_STMT (single_store
);
1093 tree rhs
= gimple_assign_rhs1 (stmt
);
1094 if (const_with_all_bytes_same (rhs
) == -1
1095 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1096 || (TYPE_MODE (TREE_TYPE (rhs
))
1097 != TYPE_MODE (unsigned_char_type_node
))))
1099 if (TREE_CODE (rhs
) == SSA_NAME
1100 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1101 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1103 if (!adjacent_dr_p (single_store
)
1104 || !dominated_by_p (CDI_DOMINATORS
,
1105 loop
->latch
, gimple_bb (stmt
)))
1107 partition
->kind
= PKIND_MEMSET
;
1108 partition
->main_dr
= single_store
;
1109 partition
->niter
= nb_iter
;
1110 partition
->plus_one
= plus_one
;
1112 else if (single_store
&& single_load
)
1114 gimple store
= DR_STMT (single_store
);
1115 gimple load
= DR_STMT (single_load
);
1116 /* Direct aggregate copy or via an SSA name temporary. */
1118 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1120 if (!adjacent_dr_p (single_store
)
1121 || !adjacent_dr_p (single_load
)
1122 || !operand_equal_p (DR_STEP (single_store
),
1123 DR_STEP (single_load
), 0)
1124 || !dominated_by_p (CDI_DOMINATORS
,
1125 loop
->latch
, gimple_bb (store
)))
1127 /* Now check that if there is a dependence this dependence is
1128 of a suitable form for memmove. */
1129 vec
<loop_p
> loops
= vNULL
;
1131 loops
.safe_push (loop
);
1132 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1134 compute_affine_dependence (ddr
, loop
);
1135 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1137 free_dependence_relation (ddr
);
1141 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1143 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1145 free_dependence_relation (ddr
);
1149 lambda_vector dist_v
;
1150 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1152 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1153 DDR_LOOP_NEST (ddr
))];
1154 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1156 free_dependence_relation (ddr
);
1162 free_dependence_relation (ddr
);
1164 partition
->kind
= PKIND_MEMCPY
;
1165 partition
->main_dr
= single_store
;
1166 partition
->secondary_dr
= single_load
;
1167 partition
->niter
= nb_iter
;
1168 partition
->plus_one
= plus_one
;
1172 /* For a data reference REF, return the declaration of its base
1173 address or NULL_TREE if the base is not determined. */
1176 ref_base_address (data_reference_p dr
)
1178 tree base_address
= DR_BASE_ADDRESS (dr
);
1180 && TREE_CODE (base_address
) == ADDR_EXPR
)
1181 return TREE_OPERAND (base_address
, 0);
1183 return base_address
;
1186 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1190 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1191 partition_t partition2
)
1193 unsigned i
, j
, k
, l
;
1194 bitmap_iterator bi
, bj
;
1195 data_reference_p ref1
, ref2
;
1197 /* First check whether in the intersection of the two partitions are
1198 any loads or stores. Common loads are the situation that happens
1200 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1201 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1202 || RDG_MEM_READS_STMT (rdg
, i
))
1205 /* Then check all data-references against each other. */
1206 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1207 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1208 || RDG_MEM_READS_STMT (rdg
, i
))
1209 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1210 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1211 || RDG_MEM_READS_STMT (rdg
, j
))
1213 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1215 tree base1
= ref_base_address (ref1
);
1217 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1218 if (base1
== ref_base_address (ref2
))
1226 /* Aggregate several components into a useful partition that is
1227 registered in the PARTITIONS vector. Partitions will be
1228 distributed in different loops. */
1231 rdg_build_partitions (struct graph
*rdg
,
1232 vec
<gimple
> starting_stmts
,
1233 vec
<partition_t
> *partitions
)
1235 bitmap processed
= BITMAP_ALLOC (NULL
);
1239 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1241 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1245 "ldist asked to generate code for vertex %d\n", v
);
1247 /* If the vertex is already contained in another partition so
1248 is the partition rooted at it. */
1249 if (bitmap_bit_p (processed
, v
))
1252 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1253 bitmap_ior_into (processed
, partition
->stmts
);
1255 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1257 fprintf (dump_file
, "ldist useful partition:\n");
1258 dump_bitmap (dump_file
, partition
->stmts
);
1261 partitions
->safe_push (partition
);
1264 /* All vertices should have been assigned to at least one partition now,
1265 other than vertices belonging to dead code. */
1267 BITMAP_FREE (processed
);
1270 /* Dump to FILE the PARTITIONS. */
1273 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1276 partition_t partition
;
1278 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1279 debug_bitmap_file (file
, partition
->stmts
);
1282 /* Debug PARTITIONS. */
1283 extern void debug_rdg_partitions (vec
<partition_t
> );
1286 debug_rdg_partitions (vec
<partition_t
> partitions
)
1288 dump_rdg_partitions (stderr
, partitions
);
1291 /* Returns the number of read and write operations in the RDG. */
1294 number_of_rw_in_rdg (struct graph
*rdg
)
1298 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1300 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1303 if (RDG_MEM_READS_STMT (rdg
, i
))
1310 /* Returns the number of read and write operations in a PARTITION of
1314 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1320 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1322 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1325 if (RDG_MEM_READS_STMT (rdg
, i
))
1332 /* Returns true when one of the PARTITIONS contains all the read or
1333 write operations of RDG. */
1336 partition_contains_all_rw (struct graph
*rdg
,
1337 vec
<partition_t
> partitions
)
1340 partition_t partition
;
1341 int nrw
= number_of_rw_in_rdg (rdg
);
1343 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1344 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1350 /* Compute partition dependence created by the data references in DRS1
1351 and DRS2 and modify and return DIR according to that. */
1354 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1355 vec
<data_reference_p
> drs1
,
1356 vec
<data_reference_p
> drs2
)
1358 data_reference_p dr1
, dr2
;
1360 /* dependence direction - 0 is no dependence, -1 is back,
1361 1 is forth, 2 is both (we can stop then, merging will occur). */
1362 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1363 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1365 data_reference_p saved_dr1
= dr1
;
1368 /* Re-shuffle data-refs to be in dominator order. */
1369 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1370 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1372 data_reference_p tem
= dr1
;
1375 this_dir
= -this_dir
;
1377 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1378 compute_affine_dependence (ddr
, loops
[0]);
1379 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1381 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1383 if (DDR_REVERSED_P (ddr
))
1385 data_reference_p tem
= dr1
;
1388 this_dir
= -this_dir
;
1390 /* Known dependences can still be unordered througout the
1391 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1392 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1394 /* If the overlap is exact preserve stmt order. */
1395 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1399 /* Else as the distance vector is lexicographic positive
1400 swap the dependence direction. */
1401 this_dir
= -this_dir
;
1406 free_dependence_relation (ddr
);
1409 else if (dir
!= this_dir
)
1411 /* Shuffle "back" dr1. */
1417 /* Compare postorder number of the partition graph vertices V1 and V2. */
1420 pgcmp (const void *v1_
, const void *v2_
)
1422 const vertex
*v1
= (const vertex
*)v1_
;
1423 const vertex
*v2
= (const vertex
*)v2_
;
1424 return v2
->post
- v1
->post
;
1427 /* Distributes the code from LOOP in such a way that producer
1428 statements are placed before consumer statements. Tries to separate
1429 only the statements from STMTS into separate loops.
1430 Returns the number of distributed loops. */
1433 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1434 control_dependences
*cd
, int *nb_calls
)
1437 partition_t partition
;
1444 auto_vec
<loop_p
, 3> loop_nest
;
1445 if (!find_loop_nest (loop
, &loop_nest
))
1448 rdg
= build_rdg (loop_nest
, cd
);
1451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1453 "Loop %d not distributed: failed to build the RDG.\n",
1459 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1460 dump_rdg (dump_file
, rdg
);
1462 auto_vec
<partition_t
, 3> partitions
;
1463 rdg_build_partitions (rdg
, stmts
, &partitions
);
1465 any_builtin
= false;
1466 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1468 classify_partition (loop
, rdg
, partition
);
1469 any_builtin
|= partition_builtin_p (partition
);
1472 /* If we are only distributing patterns but did not detect any,
1474 if (!flag_tree_loop_distribution
1481 /* If we are only distributing patterns fuse all partitions that
1482 were not classified as builtins. This also avoids chopping
1483 a loop into pieces, separated by builtin calls. That is, we
1484 only want no or a single loop body remaining. */
1486 if (!flag_tree_loop_distribution
)
1488 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1489 if (!partition_builtin_p (into
))
1491 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1492 if (!partition_builtin_p (partition
))
1494 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1496 fprintf (dump_file
, "fusing non-builtin partitions\n");
1497 dump_bitmap (dump_file
, into
->stmts
);
1498 dump_bitmap (dump_file
, partition
->stmts
);
1500 partition_merge_into (into
, partition
);
1501 partitions
.unordered_remove (i
);
1502 partition_free (partition
);
1507 /* Due to limitations in the transform phase we have to fuse all
1508 reduction partitions into the last partition so the existing
1509 loop will contain all loop-closed PHI nodes. */
1510 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1511 if (partition_reduction_p (into
))
1513 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1514 if (partition_reduction_p (partition
))
1516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1518 fprintf (dump_file
, "fusing partitions\n");
1519 dump_bitmap (dump_file
, into
->stmts
);
1520 dump_bitmap (dump_file
, partition
->stmts
);
1521 fprintf (dump_file
, "because they have reductions\n");
1523 partition_merge_into (into
, partition
);
1524 partitions
.unordered_remove (i
);
1525 partition_free (partition
);
1529 /* Apply our simple cost model - fuse partitions with similar
1531 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1533 if (partition_builtin_p (into
))
1536 partitions
.iterate (j
, &partition
); ++j
)
1538 if (!partition_builtin_p (partition
)
1539 && similar_memory_accesses (rdg
, into
, partition
))
1541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1543 fprintf (dump_file
, "fusing partitions\n");
1544 dump_bitmap (dump_file
, into
->stmts
);
1545 dump_bitmap (dump_file
, partition
->stmts
);
1546 fprintf (dump_file
, "because they have similar "
1547 "memory accesses\n");
1549 partition_merge_into (into
, partition
);
1550 partitions
.unordered_remove (j
);
1551 partition_free (partition
);
1557 /* Build the partition dependency graph. */
1558 if (partitions
.length () > 1)
1560 pg
= new_graph (partitions
.length ());
1562 partition_t partition
;
1563 vec
<data_reference_p
> writes
;
1564 vec
<data_reference_p
> reads
;
1566 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1567 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1569 vertex
*v
= &pg
->vertices
[i
];
1570 pgdata
*data
= new pgdata
;
1571 data_reference_p dr
;
1572 /* FIXME - leaks. */
1576 data
->partition
= partition
;
1577 data
->reads
= vNULL
;
1578 data
->writes
= vNULL
;
1579 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1580 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1581 if (DR_IS_READ (dr
))
1582 data
->reads
.safe_push (dr
);
1584 data
->writes
.safe_push (dr
);
1586 partition_t partition1
, partition2
;
1587 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1588 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1590 /* dependence direction - 0 is no dependence, -1 is back,
1591 1 is forth, 2 is both (we can stop then, merging will occur). */
1593 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1597 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1601 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1604 if (dir
== 1 || dir
== 2)
1605 add_edge (pg
, i
, j
);
1606 if (dir
== -1 || dir
== 2)
1607 add_edge (pg
, j
, i
);
1610 /* Add edges to the reduction partition (if any) to force it last. */
1612 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1613 if (partition_reduction_p (partition
))
1615 if (j
< partitions
.length ())
1617 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1619 add_edge (pg
, i
, j
);
1622 /* Compute partitions we cannot separate and fuse them. */
1623 num_sccs
= graphds_scc (pg
, NULL
);
1624 for (i
= 0; i
< num_sccs
; ++i
)
1628 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1629 if (pg
->vertices
[j
].component
== i
)
1631 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1632 if (pg
->vertices
[j
].component
== i
)
1634 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1636 fprintf (dump_file
, "fusing partitions\n");
1637 dump_bitmap (dump_file
, first
->stmts
);
1638 dump_bitmap (dump_file
, partition
->stmts
);
1639 fprintf (dump_file
, "because they are in the same "
1640 "dependence SCC\n");
1642 partition_merge_into (first
, partition
);
1643 partitions
[j
] = NULL
;
1644 partition_free (partition
);
1645 PGDATA (j
)->partition
= NULL
;
1649 /* Now order the remaining nodes in postorder. */
1650 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1651 partitions
.truncate (0);
1652 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1654 pgdata
*data
= PGDATA (i
);
1655 if (data
->partition
)
1656 partitions
.safe_push (data
->partition
);
1657 data
->reads
.release ();
1658 data
->writes
.release ();
1661 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1665 nbp
= partitions
.length ();
1667 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1668 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1674 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1675 dump_rdg_partitions (dump_file
, partitions
);
1677 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1679 if (partition_builtin_p (partition
))
1681 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1686 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1687 partition_free (partition
);
1690 return nbp
- *nb_calls
;
1693 /* Distribute all loops in the current function. */
1697 const pass_data pass_data_loop_distribution
=
1699 GIMPLE_PASS
, /* type */
1701 OPTGROUP_LOOP
, /* optinfo_flags */
1702 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1703 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1704 0, /* properties_provided */
1705 0, /* properties_destroyed */
1706 0, /* todo_flags_start */
1707 0, /* todo_flags_finish */
1710 class pass_loop_distribution
: public gimple_opt_pass
1713 pass_loop_distribution (gcc::context
*ctxt
)
1714 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1717 /* opt_pass methods: */
1718 virtual bool gate (function
*)
1720 return flag_tree_loop_distribution
1721 || flag_tree_loop_distribute_patterns
;
1724 virtual unsigned int execute (function
*);
1726 }; // class pass_loop_distribution
1729 pass_loop_distribution::execute (function
*fun
)
1732 bool changed
= false;
1734 control_dependences
*cd
= NULL
;
1736 FOR_ALL_BB_FN (bb
, fun
)
1738 gimple_stmt_iterator gsi
;
1739 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1740 gimple_set_uid (gsi_stmt (gsi
), -1);
1741 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1742 gimple_set_uid (gsi_stmt (gsi
), -1);
1745 /* We can at the moment only distribute non-nested loops, thus restrict
1746 walking to innermost loops. */
1747 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1749 auto_vec
<gimple
> work_list
;
1751 int num
= loop
->num
;
1754 /* If the loop doesn't have a single exit we will fail anyway,
1755 so do that early. */
1756 if (!single_exit (loop
))
1759 /* Only optimize hot loops. */
1760 if (!optimize_loop_for_speed_p (loop
))
1763 /* Initialize the worklist with stmts we seed the partitions with. */
1764 bbs
= get_loop_body_in_dom_order (loop
);
1765 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1767 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
1771 gphi
*phi
= gsi
.phi ();
1772 if (virtual_operand_p (gimple_phi_result (phi
)))
1774 /* Distribute stmts which have defs that are used outside of
1776 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1778 work_list
.safe_push (phi
);
1780 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1784 gimple stmt
= gsi_stmt (gsi
);
1786 /* If there is a stmt with side-effects bail out - we
1787 cannot and should not distribute this loop. */
1788 if (gimple_has_side_effects (stmt
))
1790 work_list
.truncate (0);
1794 /* Distribute stmts which have defs that are used outside of
1796 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1798 /* Otherwise only distribute stores for now. */
1799 else if (!gimple_vdef (stmt
))
1802 work_list
.safe_push (stmt
);
1808 int nb_generated_loops
= 0;
1809 int nb_generated_calls
= 0;
1810 location_t loc
= find_loop_location (loop
);
1811 if (work_list
.length () > 0)
1815 calculate_dominance_info (CDI_DOMINATORS
);
1816 calculate_dominance_info (CDI_POST_DOMINATORS
);
1817 cd
= new control_dependences (create_edge_list ());
1818 free_dominance_info (CDI_POST_DOMINATORS
);
1820 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1821 &nb_generated_calls
);
1824 if (nb_generated_loops
+ nb_generated_calls
> 0)
1827 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1828 loc
, "Loop %d distributed: split to %d loops "
1829 "and %d library calls.\n",
1830 num
, nb_generated_loops
, nb_generated_calls
);
1832 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1833 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1841 /* Cached scalar evolutions now may refer to wrong or non-existing
1844 mark_virtual_operands_for_renaming (fun
);
1845 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1848 #ifdef ENABLE_CHECKING
1849 verify_loop_structure ();
1858 make_pass_loop_distribution (gcc::context
*ctxt
)
1860 return new pass_loop_distribution (ctxt
);