2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
52 #include "fold-const.h"
55 #include "hard-reg-set.h"
58 #include "dominance.h"
61 #include "basic-block.h"
62 #include "tree-ssa-alias.h"
63 #include "internal-fn.h"
64 #include "gimple-expr.h"
67 #include "gimple-iterator.h"
68 #include "gimplify-me.h"
69 #include "stor-layout.h"
70 #include "gimple-ssa.h"
72 #include "tree-phinodes.h"
73 #include "ssa-iterators.h"
74 #include "stringpool.h"
75 #include "tree-ssanames.h"
76 #include "tree-ssa-loop-manip.h"
77 #include "tree-ssa-loop.h"
78 #include "tree-into-ssa.h"
81 #include "tree-chrec.h"
82 #include "tree-data-ref.h"
83 #include "tree-scalar-evolution.h"
84 #include "tree-pass.h"
85 #include "gimple-pretty-print.h"
86 #include "tree-vectorizer.h"
89 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
90 typedef struct rdg_vertex
92 /* The statement represented by this vertex. */
95 /* Vector of data-references in this statement. */
96 vec
<data_reference_p
> datarefs
;
98 /* True when the statement contains a write to memory. */
101 /* True when the statement contains a read from memory. */
105 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
106 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
107 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
108 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
109 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
110 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
111 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
112 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
114 /* Data dependence type. */
118 /* Read After Write (RAW). */
121 /* Control dependence (execute conditional on). */
125 /* Dependence information attached to an edge of the RDG. */
127 typedef struct rdg_edge
129 /* Type of the dependence. */
130 enum rdg_dep_type type
;
133 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
135 /* Dump vertex I in RDG to FILE. */
138 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
140 struct vertex
*v
= &(rdg
->vertices
[i
]);
141 struct graph_edge
*e
;
143 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
144 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
145 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
148 for (e
= v
->pred
; e
; e
= e
->pred_next
)
149 fprintf (file
, " %d", e
->src
);
151 fprintf (file
, ") (out:");
154 for (e
= v
->succ
; e
; e
= e
->succ_next
)
155 fprintf (file
, " %d", e
->dest
);
157 fprintf (file
, ")\n");
158 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
159 fprintf (file
, ")\n");
162 /* Call dump_rdg_vertex on stderr. */
165 debug_rdg_vertex (struct graph
*rdg
, int i
)
167 dump_rdg_vertex (stderr
, rdg
, i
);
170 /* Dump the reduced dependence graph RDG to FILE. */
173 dump_rdg (FILE *file
, struct graph
*rdg
)
175 fprintf (file
, "(rdg\n");
176 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
177 dump_rdg_vertex (file
, rdg
, i
);
178 fprintf (file
, ")\n");
181 /* Call dump_rdg on stderr. */
184 debug_rdg (struct graph
*rdg
)
186 dump_rdg (stderr
, rdg
);
190 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
193 pretty_printer buffer
;
194 pp_needs_newline (&buffer
) = false;
195 buffer
.buffer
->stream
= file
;
197 fprintf (file
, "digraph RDG {\n");
199 for (i
= 0; i
< rdg
->n_vertices
; i
++)
201 struct vertex
*v
= &(rdg
->vertices
[i
]);
202 struct graph_edge
*e
;
204 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
205 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
207 fprintf (file
, "\"]\n");
209 /* Highlight reads from memory. */
210 if (RDG_MEM_READS_STMT (rdg
, i
))
211 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
213 /* Highlight stores to memory. */
214 if (RDG_MEM_WRITE_STMT (rdg
, i
))
215 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
218 for (e
= v
->succ
; e
; e
= e
->succ_next
)
219 switch (RDGE_TYPE (e
))
222 /* These are the most common dependences: don't print these. */
223 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
227 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
235 fprintf (file
, "}\n\n");
238 /* Display the Reduced Dependence Graph using dotty. */
241 dot_rdg (struct graph
*rdg
)
243 /* When debugging, you may want to enable the following code. */
245 FILE *file
= popen ("dot -Tx11", "w");
248 dot_rdg_1 (file
, rdg
);
250 close (fileno (file
));
253 dot_rdg_1 (stderr
, rdg
);
257 /* Returns the index of STMT in RDG. */
260 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
262 int index
= gimple_uid (stmt
);
263 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
267 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
268 the index of DEF in RDG. */
271 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
273 use_operand_p imm_use_p
;
274 imm_use_iterator iterator
;
276 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
278 struct graph_edge
*e
;
279 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
284 e
= add_edge (rdg
, idef
, use
);
285 e
->data
= XNEW (struct rdg_edge
);
286 RDGE_TYPE (e
) = flow_dd
;
290 /* Creates an edge for the control dependences of BB to the vertex V. */
293 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
294 int v
, control_dependences
*cd
)
298 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
301 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
302 gimple stmt
= last_stmt (cond_bb
);
303 if (stmt
&& is_ctrl_stmt (stmt
))
305 struct graph_edge
*e
;
306 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
310 e
= add_edge (rdg
, c
, v
);
311 e
->data
= XNEW (struct rdg_edge
);
312 RDGE_TYPE (e
) = control_dd
;
317 /* Creates the edges of the reduced dependence graph RDG. */
320 create_rdg_flow_edges (struct graph
*rdg
)
326 for (i
= 0; i
< rdg
->n_vertices
; i
++)
327 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
329 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
332 /* Creates the edges of the reduced dependence graph RDG. */
335 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
339 for (i
= 0; i
< rdg
->n_vertices
; i
++)
341 gimple stmt
= RDG_STMT (rdg
, i
);
342 if (gimple_code (stmt
) == GIMPLE_PHI
)
346 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
347 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
350 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
354 /* Build the vertices of the reduced dependence graph RDG. Return false
358 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
359 vec
<data_reference_p
> *datarefs
)
364 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
366 struct vertex
*v
= &(rdg
->vertices
[i
]);
368 /* Record statement to vertex mapping. */
369 gimple_set_uid (stmt
, i
);
371 v
->data
= XNEW (struct rdg_vertex
);
372 RDGV_STMT (v
) = stmt
;
373 RDGV_DATAREFS (v
).create (0);
374 RDGV_HAS_MEM_WRITE (v
) = false;
375 RDGV_HAS_MEM_READS (v
) = false;
376 if (gimple_code (stmt
) == GIMPLE_PHI
)
379 unsigned drp
= datarefs
->length ();
380 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
382 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
384 data_reference_p dr
= (*datarefs
)[j
];
386 RDGV_HAS_MEM_READS (v
) = true;
388 RDGV_HAS_MEM_WRITE (v
) = true;
389 RDGV_DATAREFS (v
).safe_push (dr
);
395 /* Initialize STMTS with all the statements of LOOP. The order in
396 which we discover statements is important as
397 generate_loops_for_partition is using the same traversal for
398 identifying statements in loop copies. */
401 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
404 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
406 for (i
= 0; i
< loop
->num_nodes
; i
++)
408 basic_block bb
= bbs
[i
];
410 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
412 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
413 stmts
->safe_push (bsi
.phi ());
415 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
418 gimple stmt
= gsi_stmt (bsi
);
419 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
420 stmts
->safe_push (stmt
);
427 /* Free the reduced dependence graph RDG. */
430 free_rdg (struct graph
*rdg
)
434 for (i
= 0; i
< rdg
->n_vertices
; i
++)
436 struct vertex
*v
= &(rdg
->vertices
[i
]);
437 struct graph_edge
*e
;
439 for (e
= v
->succ
; e
; e
= e
->succ_next
)
444 gimple_set_uid (RDGV_STMT (v
), -1);
445 free_data_refs (RDGV_DATAREFS (v
));
453 /* Build the Reduced Dependence Graph (RDG) with one vertex per
454 statement of the loop nest LOOP_NEST, and one edge per data dependence or
455 scalar dependence. */
457 static struct graph
*
458 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
461 vec
<data_reference_p
> datarefs
;
463 /* Create the RDG vertices from the stmts of the loop nest. */
464 auto_vec
<gimple
, 10> stmts
;
465 stmts_from_loop (loop_nest
[0], &stmts
);
466 rdg
= new_graph (stmts
.length ());
467 datarefs
.create (10);
468 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
476 create_rdg_flow_edges (rdg
);
478 create_rdg_cd_edges (rdg
, cd
);
487 enum partition_kind
{
488 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
491 typedef struct partition_s
496 enum partition_kind kind
;
497 /* data-references a kind != PKIND_NORMAL partition is about. */
498 data_reference_p main_dr
;
499 data_reference_p secondary_dr
;
505 /* Allocate and initialize a partition from BITMAP. */
508 partition_alloc (bitmap stmts
, bitmap loops
)
510 partition_t partition
= XCNEW (struct partition_s
);
511 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
512 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
513 partition
->reduction_p
= false;
514 partition
->kind
= PKIND_NORMAL
;
518 /* Free PARTITION. */
521 partition_free (partition_t partition
)
523 BITMAP_FREE (partition
->stmts
);
524 BITMAP_FREE (partition
->loops
);
528 /* Returns true if the partition can be generated as a builtin. */
531 partition_builtin_p (partition_t partition
)
533 return partition
->kind
!= PKIND_NORMAL
;
536 /* Returns true if the partition contains a reduction. */
539 partition_reduction_p (partition_t partition
)
541 return partition
->reduction_p
;
544 /* Merge PARTITION into the partition DEST. */
547 partition_merge_into (partition_t dest
, partition_t partition
)
549 dest
->kind
= PKIND_NORMAL
;
550 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
551 if (partition_reduction_p (partition
))
552 dest
->reduction_p
= true;
556 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
560 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
562 imm_use_iterator imm_iter
;
565 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
567 gimple use_stmt
= USE_STMT (use_p
);
568 if (!is_gimple_debug (use_stmt
)
569 && loop
!= loop_containing_stmt (use_stmt
))
576 /* Returns true when STMT defines a scalar variable used after the
580 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
585 if (gimple_code (stmt
) == GIMPLE_PHI
)
586 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
588 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
589 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
595 /* Return a copy of LOOP placed before LOOP. */
598 copy_loop_before (struct loop
*loop
)
601 edge preheader
= loop_preheader_edge (loop
);
603 initialize_original_copy_tables ();
604 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
605 gcc_assert (res
!= NULL
);
606 free_original_copy_tables ();
607 delete_update_ssa ();
612 /* Creates an empty basic block after LOOP. */
615 create_bb_after_loop (struct loop
*loop
)
617 edge exit
= single_exit (loop
);
625 /* Generate code for PARTITION from the code in LOOP. The loop is
626 copied when COPY_P is true. All the statements not flagged in the
627 PARTITION bitmap are removed from the loop or from its copy. The
628 statements are indexed in sequence inside a basic block, and the
629 basic blocks of a loop are taken in dom order. */
632 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
640 loop
= copy_loop_before (loop
);
641 gcc_assert (loop
!= NULL
);
642 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
643 create_bb_after_loop (loop
);
646 /* Remove stmts not in the PARTITION bitmap. */
647 bbs
= get_loop_body_in_dom_order (loop
);
649 if (MAY_HAVE_DEBUG_STMTS
)
650 for (i
= 0; i
< loop
->num_nodes
; i
++)
652 basic_block bb
= bbs
[i
];
654 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
657 gphi
*phi
= bsi
.phi ();
658 if (!virtual_operand_p (gimple_phi_result (phi
))
659 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
660 reset_debug_uses (phi
);
663 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
665 gimple stmt
= gsi_stmt (bsi
);
666 if (gimple_code (stmt
) != GIMPLE_LABEL
667 && !is_gimple_debug (stmt
)
668 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
669 reset_debug_uses (stmt
);
673 for (i
= 0; i
< loop
->num_nodes
; i
++)
675 basic_block bb
= bbs
[i
];
677 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
679 gphi
*phi
= bsi
.phi ();
680 if (!virtual_operand_p (gimple_phi_result (phi
))
681 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
682 remove_phi_node (&bsi
, true);
687 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
689 gimple stmt
= gsi_stmt (bsi
);
690 if (gimple_code (stmt
) != GIMPLE_LABEL
691 && !is_gimple_debug (stmt
)
692 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
694 /* Choose an arbitrary path through the empty CFG part
695 that this unnecessary control stmt controls. */
696 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
698 gimple_cond_make_false (cond_stmt
);
701 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
703 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
704 gimple_switch_set_index
705 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
710 unlink_stmt_vdef (stmt
);
711 gsi_remove (&bsi
, true);
723 /* Build the size argument for a memory operation call. */
726 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
729 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
731 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
732 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
733 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
734 size
= fold_convert_loc (loc
, size_type_node
, size
);
738 /* Build an address argument for a memory operation call. */
741 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
745 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
746 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
748 /* Test for a negative stride, iterating over every element. */
749 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
751 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
752 fold_convert_loc (loc
, sizetype
, nb_bytes
));
753 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
754 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
757 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
760 /* If VAL memory representation contains the same value in all bytes,
761 return that value, otherwise return -1.
762 E.g. for 0x24242424 return 0x24, for IEEE double
763 747708026454360457216.0 return 0x44, etc. */
766 const_with_all_bytes_same (tree val
)
768 unsigned char buf
[64];
771 if (integer_zerop (val
)
773 || (TREE_CODE (val
) == CONSTRUCTOR
774 && !TREE_CLOBBER_P (val
)
775 && CONSTRUCTOR_NELTS (val
) == 0))
778 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
781 len
= native_encode_expr (val
, buf
, sizeof (buf
));
784 for (i
= 1; i
< len
; i
++)
785 if (buf
[i
] != buf
[0])
790 /* Generate a call to memset for PARTITION in LOOP. */
793 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
795 gimple_stmt_iterator gsi
;
796 gimple stmt
, fn_call
;
797 tree mem
, fn
, nb_bytes
;
801 stmt
= DR_STMT (partition
->main_dr
);
802 loc
= gimple_location (stmt
);
804 /* The new statements will be placed before LOOP. */
805 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
807 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
808 partition
->plus_one
);
809 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
810 false, GSI_CONTINUE_LINKING
);
811 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
812 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
813 false, GSI_CONTINUE_LINKING
);
815 /* This exactly matches the pattern recognition in classify_partition. */
816 val
= gimple_assign_rhs1 (stmt
);
817 /* Handle constants like 0x15151515 and similarly
818 floating point constants etc. where all bytes are the same. */
819 int bytev
= const_with_all_bytes_same (val
);
821 val
= build_int_cst (integer_type_node
, bytev
);
822 else if (TREE_CODE (val
) == INTEGER_CST
)
823 val
= fold_convert (integer_type_node
, val
);
824 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
826 tree tem
= make_ssa_name (integer_type_node
);
827 gimple cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
828 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
832 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
833 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
834 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
836 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
838 fprintf (dump_file
, "generated memset");
840 fprintf (dump_file
, " zero\n");
842 fprintf (dump_file
, "\n");
846 /* Generate a call to memcpy for PARTITION in LOOP. */
849 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
851 gimple_stmt_iterator gsi
;
852 gimple stmt
, fn_call
;
853 tree dest
, src
, fn
, nb_bytes
;
855 enum built_in_function kind
;
857 stmt
= DR_STMT (partition
->main_dr
);
858 loc
= gimple_location (stmt
);
860 /* The new statements will be placed before LOOP. */
861 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
863 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
864 partition
->plus_one
);
865 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
866 false, GSI_CONTINUE_LINKING
);
867 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
868 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
869 if (ptr_derefs_may_alias_p (dest
, src
))
870 kind
= BUILT_IN_MEMMOVE
;
872 kind
= BUILT_IN_MEMCPY
;
874 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
875 false, GSI_CONTINUE_LINKING
);
876 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
877 false, GSI_CONTINUE_LINKING
);
878 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
879 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
880 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
884 if (kind
== BUILT_IN_MEMCPY
)
885 fprintf (dump_file
, "generated memcpy\n");
887 fprintf (dump_file
, "generated memmove\n");
891 /* Remove and destroy the loop LOOP. */
894 destroy_loop (struct loop
*loop
)
896 unsigned nbbs
= loop
->num_nodes
;
897 edge exit
= single_exit (loop
);
898 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
902 bbs
= get_loop_body_in_dom_order (loop
);
904 redirect_edge_pred (exit
, src
);
905 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
906 exit
->flags
|= EDGE_FALLTHRU
;
907 cancel_loop_tree (loop
);
908 rescan_loop_exit (exit
, false, true);
910 for (i
= 0; i
< nbbs
; i
++)
912 /* We have made sure to not leave any dangling uses of SSA
913 names defined in the loop. With the exception of virtuals.
914 Make sure we replace all uses of virtual defs that will remain
915 outside of the loop with the bare symbol as delete_basic_block
916 will release them. */
917 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
920 gphi
*phi
= gsi
.phi ();
921 if (virtual_operand_p (gimple_phi_result (phi
)))
922 mark_virtual_phi_result_for_renaming (phi
);
924 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
927 gimple stmt
= gsi_stmt (gsi
);
928 tree vdef
= gimple_vdef (stmt
);
929 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
930 mark_virtual_operand_for_renaming (vdef
);
932 delete_basic_block (bbs
[i
]);
936 set_immediate_dominator (CDI_DOMINATORS
, dest
,
937 recompute_dominator (CDI_DOMINATORS
, dest
));
940 /* Generates code for PARTITION. */
943 generate_code_for_partition (struct loop
*loop
,
944 partition_t partition
, bool copy_p
)
946 switch (partition
->kind
)
949 /* Reductions all have to be in the last partition. */
950 gcc_assert (!partition_reduction_p (partition
)
952 generate_loops_for_partition (loop
, partition
, copy_p
);
956 generate_memset_builtin (loop
, partition
);
960 generate_memcpy_builtin (loop
, partition
);
967 /* Common tail for partitions we turn into a call. If this was the last
968 partition for which we generate code, we have to destroy the loop. */
974 /* Returns a partition with all the statements needed for computing
975 the vertex V of the RDG, also including the loop exit conditions. */
978 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
980 partition_t partition
= partition_alloc (NULL
, NULL
);
981 auto_vec
<int, 3> nodes
;
985 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
987 FOR_EACH_VEC_ELT (nodes
, i
, x
)
989 bitmap_set_bit (partition
->stmts
, x
);
990 bitmap_set_bit (partition
->loops
,
991 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
997 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
998 For the moment we detect only the memset zero pattern. */
1001 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
1006 data_reference_p single_load
, single_store
;
1007 bool volatiles_p
= false;
1008 bool plus_one
= false;
1010 partition
->kind
= PKIND_NORMAL
;
1011 partition
->main_dr
= NULL
;
1012 partition
->secondary_dr
= NULL
;
1013 partition
->niter
= NULL_TREE
;
1014 partition
->plus_one
= false;
1016 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1018 gimple stmt
= RDG_STMT (rdg
, i
);
1020 if (gimple_has_volatile_ops (stmt
))
1023 /* If the stmt has uses outside of the loop mark it as reduction. */
1024 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1026 partition
->reduction_p
= true;
1031 /* Perform general partition disqualification for builtins. */
1033 || !flag_tree_loop_distribute_patterns
)
1036 /* Detect memset and memcpy. */
1038 single_store
= NULL
;
1039 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1041 gimple stmt
= RDG_STMT (rdg
, i
);
1042 data_reference_p dr
;
1045 if (gimple_code (stmt
) == GIMPLE_PHI
)
1048 /* Any scalar stmts are ok. */
1049 if (!gimple_vuse (stmt
))
1052 /* Otherwise just regular loads/stores. */
1053 if (!gimple_assign_single_p (stmt
))
1056 /* But exactly one store and/or load. */
1057 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1059 if (DR_IS_READ (dr
))
1061 if (single_load
!= NULL
)
1067 if (single_store
!= NULL
)
1077 nb_iter
= number_of_latch_executions (loop
);
1078 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1080 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1081 gimple_bb (DR_STMT (single_store
))))
1084 if (single_store
&& !single_load
)
1086 gimple stmt
= DR_STMT (single_store
);
1087 tree rhs
= gimple_assign_rhs1 (stmt
);
1088 if (const_with_all_bytes_same (rhs
) == -1
1089 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1090 || (TYPE_MODE (TREE_TYPE (rhs
))
1091 != TYPE_MODE (unsigned_char_type_node
))))
1093 if (TREE_CODE (rhs
) == SSA_NAME
1094 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1095 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1097 if (!adjacent_dr_p (single_store
)
1098 || !dominated_by_p (CDI_DOMINATORS
,
1099 loop
->latch
, gimple_bb (stmt
)))
1101 partition
->kind
= PKIND_MEMSET
;
1102 partition
->main_dr
= single_store
;
1103 partition
->niter
= nb_iter
;
1104 partition
->plus_one
= plus_one
;
1106 else if (single_store
&& single_load
)
1108 gimple store
= DR_STMT (single_store
);
1109 gimple load
= DR_STMT (single_load
);
1110 /* Direct aggregate copy or via an SSA name temporary. */
1112 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1114 if (!adjacent_dr_p (single_store
)
1115 || !adjacent_dr_p (single_load
)
1116 || !operand_equal_p (DR_STEP (single_store
),
1117 DR_STEP (single_load
), 0)
1118 || !dominated_by_p (CDI_DOMINATORS
,
1119 loop
->latch
, gimple_bb (store
)))
1121 /* Now check that if there is a dependence this dependence is
1122 of a suitable form for memmove. */
1123 vec
<loop_p
> loops
= vNULL
;
1125 loops
.safe_push (loop
);
1126 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1128 compute_affine_dependence (ddr
, loop
);
1129 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1131 free_dependence_relation (ddr
);
1135 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1137 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1139 free_dependence_relation (ddr
);
1143 lambda_vector dist_v
;
1144 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1146 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1147 DDR_LOOP_NEST (ddr
))];
1148 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1150 free_dependence_relation (ddr
);
1156 free_dependence_relation (ddr
);
1158 partition
->kind
= PKIND_MEMCPY
;
1159 partition
->main_dr
= single_store
;
1160 partition
->secondary_dr
= single_load
;
1161 partition
->niter
= nb_iter
;
1162 partition
->plus_one
= plus_one
;
1166 /* For a data reference REF, return the declaration of its base
1167 address or NULL_TREE if the base is not determined. */
1170 ref_base_address (data_reference_p dr
)
1172 tree base_address
= DR_BASE_ADDRESS (dr
);
1174 && TREE_CODE (base_address
) == ADDR_EXPR
)
1175 return TREE_OPERAND (base_address
, 0);
1177 return base_address
;
1180 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1184 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1185 partition_t partition2
)
1187 unsigned i
, j
, k
, l
;
1188 bitmap_iterator bi
, bj
;
1189 data_reference_p ref1
, ref2
;
1191 /* First check whether in the intersection of the two partitions are
1192 any loads or stores. Common loads are the situation that happens
1194 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1195 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1196 || RDG_MEM_READS_STMT (rdg
, i
))
1199 /* Then check all data-references against each other. */
1200 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1201 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1202 || RDG_MEM_READS_STMT (rdg
, i
))
1203 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1204 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1205 || RDG_MEM_READS_STMT (rdg
, j
))
1207 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1209 tree base1
= ref_base_address (ref1
);
1211 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1212 if (base1
== ref_base_address (ref2
))
1220 /* Aggregate several components into a useful partition that is
1221 registered in the PARTITIONS vector. Partitions will be
1222 distributed in different loops. */
1225 rdg_build_partitions (struct graph
*rdg
,
1226 vec
<gimple
> starting_stmts
,
1227 vec
<partition_t
> *partitions
)
1229 bitmap processed
= BITMAP_ALLOC (NULL
);
1233 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1235 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1239 "ldist asked to generate code for vertex %d\n", v
);
1241 /* If the vertex is already contained in another partition so
1242 is the partition rooted at it. */
1243 if (bitmap_bit_p (processed
, v
))
1246 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1247 bitmap_ior_into (processed
, partition
->stmts
);
1249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1251 fprintf (dump_file
, "ldist useful partition:\n");
1252 dump_bitmap (dump_file
, partition
->stmts
);
1255 partitions
->safe_push (partition
);
1258 /* All vertices should have been assigned to at least one partition now,
1259 other than vertices belonging to dead code. */
1261 BITMAP_FREE (processed
);
1264 /* Dump to FILE the PARTITIONS. */
1267 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1270 partition_t partition
;
1272 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1273 debug_bitmap_file (file
, partition
->stmts
);
1276 /* Debug PARTITIONS. */
1277 extern void debug_rdg_partitions (vec
<partition_t
> );
1280 debug_rdg_partitions (vec
<partition_t
> partitions
)
1282 dump_rdg_partitions (stderr
, partitions
);
1285 /* Returns the number of read and write operations in the RDG. */
1288 number_of_rw_in_rdg (struct graph
*rdg
)
1292 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1294 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1297 if (RDG_MEM_READS_STMT (rdg
, i
))
1304 /* Returns the number of read and write operations in a PARTITION of
1308 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1314 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1316 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1319 if (RDG_MEM_READS_STMT (rdg
, i
))
1326 /* Returns true when one of the PARTITIONS contains all the read or
1327 write operations of RDG. */
1330 partition_contains_all_rw (struct graph
*rdg
,
1331 vec
<partition_t
> partitions
)
1334 partition_t partition
;
1335 int nrw
= number_of_rw_in_rdg (rdg
);
1337 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1338 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1344 /* Compute partition dependence created by the data references in DRS1
1345 and DRS2 and modify and return DIR according to that. */
1348 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1349 vec
<data_reference_p
> drs1
,
1350 vec
<data_reference_p
> drs2
)
1352 data_reference_p dr1
, dr2
;
1354 /* dependence direction - 0 is no dependence, -1 is back,
1355 1 is forth, 2 is both (we can stop then, merging will occur). */
1356 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1357 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1359 data_reference_p saved_dr1
= dr1
;
1362 /* Re-shuffle data-refs to be in dominator order. */
1363 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1364 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1366 data_reference_p tem
= dr1
;
1369 this_dir
= -this_dir
;
1371 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1372 compute_affine_dependence (ddr
, loops
[0]);
1373 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1375 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1377 if (DDR_REVERSED_P (ddr
))
1379 data_reference_p tem
= dr1
;
1382 this_dir
= -this_dir
;
1384 /* Known dependences can still be unordered througout the
1385 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1386 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1388 /* If the overlap is exact preserve stmt order. */
1389 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1393 /* Else as the distance vector is lexicographic positive
1394 swap the dependence direction. */
1395 this_dir
= -this_dir
;
1400 free_dependence_relation (ddr
);
1403 else if (dir
!= this_dir
)
1405 /* Shuffle "back" dr1. */
1411 /* Compare postorder number of the partition graph vertices V1 and V2. */
1414 pgcmp (const void *v1_
, const void *v2_
)
1416 const vertex
*v1
= (const vertex
*)v1_
;
1417 const vertex
*v2
= (const vertex
*)v2_
;
1418 return v2
->post
- v1
->post
;
1421 /* Distributes the code from LOOP in such a way that producer
1422 statements are placed before consumer statements. Tries to separate
1423 only the statements from STMTS into separate loops.
1424 Returns the number of distributed loops. */
1427 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1428 control_dependences
*cd
, int *nb_calls
)
1431 partition_t partition
;
1438 auto_vec
<loop_p
, 3> loop_nest
;
1439 if (!find_loop_nest (loop
, &loop_nest
))
1442 rdg
= build_rdg (loop_nest
, cd
);
1445 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1447 "Loop %d not distributed: failed to build the RDG.\n",
1453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1454 dump_rdg (dump_file
, rdg
);
1456 auto_vec
<partition_t
, 3> partitions
;
1457 rdg_build_partitions (rdg
, stmts
, &partitions
);
1459 any_builtin
= false;
1460 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1462 classify_partition (loop
, rdg
, partition
);
1463 any_builtin
|= partition_builtin_p (partition
);
1466 /* If we are only distributing patterns but did not detect any,
1468 if (!flag_tree_loop_distribution
1475 /* If we are only distributing patterns fuse all partitions that
1476 were not classified as builtins. This also avoids chopping
1477 a loop into pieces, separated by builtin calls. That is, we
1478 only want no or a single loop body remaining. */
1480 if (!flag_tree_loop_distribution
)
1482 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1483 if (!partition_builtin_p (into
))
1485 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1486 if (!partition_builtin_p (partition
))
1488 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1490 fprintf (dump_file
, "fusing non-builtin partitions\n");
1491 dump_bitmap (dump_file
, into
->stmts
);
1492 dump_bitmap (dump_file
, partition
->stmts
);
1494 partition_merge_into (into
, partition
);
1495 partitions
.unordered_remove (i
);
1496 partition_free (partition
);
1501 /* Due to limitations in the transform phase we have to fuse all
1502 reduction partitions into the last partition so the existing
1503 loop will contain all loop-closed PHI nodes. */
1504 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1505 if (partition_reduction_p (into
))
1507 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1508 if (partition_reduction_p (partition
))
1510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1512 fprintf (dump_file
, "fusing partitions\n");
1513 dump_bitmap (dump_file
, into
->stmts
);
1514 dump_bitmap (dump_file
, partition
->stmts
);
1515 fprintf (dump_file
, "because they have reductions\n");
1517 partition_merge_into (into
, partition
);
1518 partitions
.unordered_remove (i
);
1519 partition_free (partition
);
1523 /* Apply our simple cost model - fuse partitions with similar
1525 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1527 if (partition_builtin_p (into
))
1530 partitions
.iterate (j
, &partition
); ++j
)
1532 if (!partition_builtin_p (partition
)
1533 && similar_memory_accesses (rdg
, into
, partition
))
1535 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1537 fprintf (dump_file
, "fusing partitions\n");
1538 dump_bitmap (dump_file
, into
->stmts
);
1539 dump_bitmap (dump_file
, partition
->stmts
);
1540 fprintf (dump_file
, "because they have similar "
1541 "memory accesses\n");
1543 partition_merge_into (into
, partition
);
1544 partitions
.unordered_remove (j
);
1545 partition_free (partition
);
1551 /* Build the partition dependency graph. */
1552 if (partitions
.length () > 1)
1554 pg
= new_graph (partitions
.length ());
1556 partition_t partition
;
1557 vec
<data_reference_p
> writes
;
1558 vec
<data_reference_p
> reads
;
1560 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1561 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1563 vertex
*v
= &pg
->vertices
[i
];
1564 pgdata
*data
= new pgdata
;
1565 data_reference_p dr
;
1566 /* FIXME - leaks. */
1570 data
->partition
= partition
;
1571 data
->reads
= vNULL
;
1572 data
->writes
= vNULL
;
1573 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1574 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1575 if (DR_IS_READ (dr
))
1576 data
->reads
.safe_push (dr
);
1578 data
->writes
.safe_push (dr
);
1580 partition_t partition1
, partition2
;
1581 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1582 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1584 /* dependence direction - 0 is no dependence, -1 is back,
1585 1 is forth, 2 is both (we can stop then, merging will occur). */
1587 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1591 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1595 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1598 if (dir
== 1 || dir
== 2)
1599 add_edge (pg
, i
, j
);
1600 if (dir
== -1 || dir
== 2)
1601 add_edge (pg
, j
, i
);
1604 /* Add edges to the reduction partition (if any) to force it last. */
1606 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1607 if (partition_reduction_p (partition
))
1609 if (j
< partitions
.length ())
1611 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1613 add_edge (pg
, i
, j
);
1616 /* Compute partitions we cannot separate and fuse them. */
1617 num_sccs
= graphds_scc (pg
, NULL
);
1618 for (i
= 0; i
< num_sccs
; ++i
)
1622 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1623 if (pg
->vertices
[j
].component
== i
)
1625 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1626 if (pg
->vertices
[j
].component
== i
)
1628 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1630 fprintf (dump_file
, "fusing partitions\n");
1631 dump_bitmap (dump_file
, first
->stmts
);
1632 dump_bitmap (dump_file
, partition
->stmts
);
1633 fprintf (dump_file
, "because they are in the same "
1634 "dependence SCC\n");
1636 partition_merge_into (first
, partition
);
1637 partitions
[j
] = NULL
;
1638 partition_free (partition
);
1639 PGDATA (j
)->partition
= NULL
;
1643 /* Now order the remaining nodes in postorder. */
1644 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1645 partitions
.truncate (0);
1646 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1648 pgdata
*data
= PGDATA (i
);
1649 if (data
->partition
)
1650 partitions
.safe_push (data
->partition
);
1651 data
->reads
.release ();
1652 data
->writes
.release ();
1655 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1659 nbp
= partitions
.length ();
1661 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1662 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1669 dump_rdg_partitions (dump_file
, partitions
);
1671 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1673 if (partition_builtin_p (partition
))
1675 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1680 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1681 partition_free (partition
);
1684 return nbp
- *nb_calls
;
1687 /* Distribute all loops in the current function. */
1691 const pass_data pass_data_loop_distribution
=
1693 GIMPLE_PASS
, /* type */
1695 OPTGROUP_LOOP
, /* optinfo_flags */
1696 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1697 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 0, /* todo_flags_finish */
1704 class pass_loop_distribution
: public gimple_opt_pass
1707 pass_loop_distribution (gcc::context
*ctxt
)
1708 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1711 /* opt_pass methods: */
1712 virtual bool gate (function
*)
1714 return flag_tree_loop_distribution
1715 || flag_tree_loop_distribute_patterns
;
1718 virtual unsigned int execute (function
*);
1720 }; // class pass_loop_distribution
1723 pass_loop_distribution::execute (function
*fun
)
1726 bool changed
= false;
1728 control_dependences
*cd
= NULL
;
1730 FOR_ALL_BB_FN (bb
, fun
)
1732 gimple_stmt_iterator gsi
;
1733 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1734 gimple_set_uid (gsi_stmt (gsi
), -1);
1735 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1736 gimple_set_uid (gsi_stmt (gsi
), -1);
1739 /* We can at the moment only distribute non-nested loops, thus restrict
1740 walking to innermost loops. */
1741 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1743 auto_vec
<gimple
> work_list
;
1745 int num
= loop
->num
;
1748 /* If the loop doesn't have a single exit we will fail anyway,
1749 so do that early. */
1750 if (!single_exit (loop
))
1753 /* Only optimize hot loops. */
1754 if (!optimize_loop_for_speed_p (loop
))
1757 /* Initialize the worklist with stmts we seed the partitions with. */
1758 bbs
= get_loop_body_in_dom_order (loop
);
1759 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1761 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
1765 gphi
*phi
= gsi
.phi ();
1766 if (virtual_operand_p (gimple_phi_result (phi
)))
1768 /* Distribute stmts which have defs that are used outside of
1770 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1772 work_list
.safe_push (phi
);
1774 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1778 gimple stmt
= gsi_stmt (gsi
);
1780 /* If there is a stmt with side-effects bail out - we
1781 cannot and should not distribute this loop. */
1782 if (gimple_has_side_effects (stmt
))
1784 work_list
.truncate (0);
1788 /* Distribute stmts which have defs that are used outside of
1790 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1792 /* Otherwise only distribute stores for now. */
1793 else if (!gimple_vdef (stmt
))
1796 work_list
.safe_push (stmt
);
1802 int nb_generated_loops
= 0;
1803 int nb_generated_calls
= 0;
1804 location_t loc
= find_loop_location (loop
);
1805 if (work_list
.length () > 0)
1809 calculate_dominance_info (CDI_DOMINATORS
);
1810 calculate_dominance_info (CDI_POST_DOMINATORS
);
1811 cd
= new control_dependences (create_edge_list ());
1812 free_dominance_info (CDI_POST_DOMINATORS
);
1814 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1815 &nb_generated_calls
);
1818 if (nb_generated_loops
+ nb_generated_calls
> 0)
1821 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1822 loc
, "Loop %d distributed: split to %d loops "
1823 "and %d library calls.\n",
1824 num
, nb_generated_loops
, nb_generated_calls
);
1826 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1827 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1835 /* Cached scalar evolutions now may refer to wrong or non-existing
1838 mark_virtual_operands_for_renaming (fun
);
1839 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1842 #ifdef ENABLE_CHECKING
1843 verify_loop_structure ();
1852 make_pass_loop_distribution (gcc::context
*ctxt
)
1854 return new pass_loop_distribution (ctxt
);