2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
48 #include "basic-block.h"
49 #include "tree-ssa-alias.h"
50 #include "internal-fn.h"
51 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "stor-layout.h"
57 #include "gimple-ssa.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-into-ssa.h"
68 #include "tree-chrec.h"
69 #include "tree-data-ref.h"
70 #include "tree-scalar-evolution.h"
71 #include "tree-pass.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-vectorizer.h"
76 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
77 typedef struct rdg_vertex
79 /* The statement represented by this vertex. */
82 /* Vector of data-references in this statement. */
83 vec
<data_reference_p
> datarefs
;
85 /* True when the statement contains a write to memory. */
88 /* True when the statement contains a read from memory. */
92 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
93 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
94 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
95 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
96 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
97 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
98 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
99 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
101 /* Data dependence type. */
105 /* Read After Write (RAW). */
108 /* Control dependence (execute conditional on). */
112 /* Dependence information attached to an edge of the RDG. */
114 typedef struct rdg_edge
116 /* Type of the dependence. */
117 enum rdg_dep_type type
;
120 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
122 /* Dump vertex I in RDG to FILE. */
125 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
127 struct vertex
*v
= &(rdg
->vertices
[i
]);
128 struct graph_edge
*e
;
130 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
131 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
132 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
135 for (e
= v
->pred
; e
; e
= e
->pred_next
)
136 fprintf (file
, " %d", e
->src
);
138 fprintf (file
, ") (out:");
141 for (e
= v
->succ
; e
; e
= e
->succ_next
)
142 fprintf (file
, " %d", e
->dest
);
144 fprintf (file
, ")\n");
145 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
146 fprintf (file
, ")\n");
149 /* Call dump_rdg_vertex on stderr. */
152 debug_rdg_vertex (struct graph
*rdg
, int i
)
154 dump_rdg_vertex (stderr
, rdg
, i
);
157 /* Dump the reduced dependence graph RDG to FILE. */
160 dump_rdg (FILE *file
, struct graph
*rdg
)
162 fprintf (file
, "(rdg\n");
163 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
164 dump_rdg_vertex (file
, rdg
, i
);
165 fprintf (file
, ")\n");
168 /* Call dump_rdg on stderr. */
171 debug_rdg (struct graph
*rdg
)
173 dump_rdg (stderr
, rdg
);
177 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
180 pretty_printer buffer
;
181 pp_needs_newline (&buffer
) = false;
182 buffer
.buffer
->stream
= file
;
184 fprintf (file
, "digraph RDG {\n");
186 for (i
= 0; i
< rdg
->n_vertices
; i
++)
188 struct vertex
*v
= &(rdg
->vertices
[i
]);
189 struct graph_edge
*e
;
191 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
192 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
194 fprintf (file
, "\"]\n");
196 /* Highlight reads from memory. */
197 if (RDG_MEM_READS_STMT (rdg
, i
))
198 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
200 /* Highlight stores to memory. */
201 if (RDG_MEM_WRITE_STMT (rdg
, i
))
202 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
205 for (e
= v
->succ
; e
; e
= e
->succ_next
)
206 switch (RDGE_TYPE (e
))
209 /* These are the most common dependences: don't print these. */
210 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
214 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
222 fprintf (file
, "}\n\n");
225 /* Display the Reduced Dependence Graph using dotty. */
228 dot_rdg (struct graph
*rdg
)
230 /* When debugging, you may want to enable the following code. */
232 FILE *file
= popen ("dot -Tx11", "w");
235 dot_rdg_1 (file
, rdg
);
237 close (fileno (file
));
240 dot_rdg_1 (stderr
, rdg
);
244 /* Returns the index of STMT in RDG. */
247 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
249 int index
= gimple_uid (stmt
);
250 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
254 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
255 the index of DEF in RDG. */
258 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
260 use_operand_p imm_use_p
;
261 imm_use_iterator iterator
;
263 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
265 struct graph_edge
*e
;
266 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
271 e
= add_edge (rdg
, idef
, use
);
272 e
->data
= XNEW (struct rdg_edge
);
273 RDGE_TYPE (e
) = flow_dd
;
277 /* Creates an edge for the control dependences of BB to the vertex V. */
280 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
281 int v
, control_dependences
*cd
)
285 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
288 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
289 gimple stmt
= last_stmt (cond_bb
);
290 if (stmt
&& is_ctrl_stmt (stmt
))
292 struct graph_edge
*e
;
293 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
297 e
= add_edge (rdg
, c
, v
);
298 e
->data
= XNEW (struct rdg_edge
);
299 RDGE_TYPE (e
) = control_dd
;
304 /* Creates the edges of the reduced dependence graph RDG. */
307 create_rdg_flow_edges (struct graph
*rdg
)
313 for (i
= 0; i
< rdg
->n_vertices
; i
++)
314 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
316 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
319 /* Creates the edges of the reduced dependence graph RDG. */
322 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
326 for (i
= 0; i
< rdg
->n_vertices
; i
++)
328 gimple stmt
= RDG_STMT (rdg
, i
);
329 if (gimple_code (stmt
) == GIMPLE_PHI
)
333 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
334 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
337 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
341 /* Build the vertices of the reduced dependence graph RDG. Return false
345 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
346 vec
<data_reference_p
> *datarefs
)
351 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
353 struct vertex
*v
= &(rdg
->vertices
[i
]);
355 /* Record statement to vertex mapping. */
356 gimple_set_uid (stmt
, i
);
358 v
->data
= XNEW (struct rdg_vertex
);
359 RDGV_STMT (v
) = stmt
;
360 RDGV_DATAREFS (v
).create (0);
361 RDGV_HAS_MEM_WRITE (v
) = false;
362 RDGV_HAS_MEM_READS (v
) = false;
363 if (gimple_code (stmt
) == GIMPLE_PHI
)
366 unsigned drp
= datarefs
->length ();
367 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
369 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
371 data_reference_p dr
= (*datarefs
)[j
];
373 RDGV_HAS_MEM_READS (v
) = true;
375 RDGV_HAS_MEM_WRITE (v
) = true;
376 RDGV_DATAREFS (v
).safe_push (dr
);
382 /* Initialize STMTS with all the statements of LOOP. The order in
383 which we discover statements is important as
384 generate_loops_for_partition is using the same traversal for
385 identifying statements in loop copies. */
388 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
391 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
393 for (i
= 0; i
< loop
->num_nodes
; i
++)
395 basic_block bb
= bbs
[i
];
396 gimple_stmt_iterator bsi
;
399 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
400 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi
))))
401 stmts
->safe_push (gsi_stmt (bsi
));
403 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
405 stmt
= gsi_stmt (bsi
);
406 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
407 stmts
->safe_push (stmt
);
414 /* Free the reduced dependence graph RDG. */
417 free_rdg (struct graph
*rdg
)
421 for (i
= 0; i
< rdg
->n_vertices
; i
++)
423 struct vertex
*v
= &(rdg
->vertices
[i
]);
424 struct graph_edge
*e
;
426 for (e
= v
->succ
; e
; e
= e
->succ_next
)
431 gimple_set_uid (RDGV_STMT (v
), -1);
432 free_data_refs (RDGV_DATAREFS (v
));
440 /* Build the Reduced Dependence Graph (RDG) with one vertex per
441 statement of the loop nest LOOP_NEST, and one edge per data dependence or
442 scalar dependence. */
444 static struct graph
*
445 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
448 vec
<data_reference_p
> datarefs
;
450 /* Create the RDG vertices from the stmts of the loop nest. */
451 auto_vec
<gimple
, 10> stmts
;
452 stmts_from_loop (loop_nest
[0], &stmts
);
453 rdg
= new_graph (stmts
.length ());
454 datarefs
.create (10);
455 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
463 create_rdg_flow_edges (rdg
);
465 create_rdg_cd_edges (rdg
, cd
);
474 enum partition_kind
{
475 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
478 typedef struct partition_s
483 enum partition_kind kind
;
484 /* data-references a kind != PKIND_NORMAL partition is about. */
485 data_reference_p main_dr
;
486 data_reference_p secondary_dr
;
492 /* Allocate and initialize a partition from BITMAP. */
495 partition_alloc (bitmap stmts
, bitmap loops
)
497 partition_t partition
= XCNEW (struct partition_s
);
498 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
499 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
500 partition
->reduction_p
= false;
501 partition
->kind
= PKIND_NORMAL
;
505 /* Free PARTITION. */
508 partition_free (partition_t partition
)
510 BITMAP_FREE (partition
->stmts
);
511 BITMAP_FREE (partition
->loops
);
515 /* Returns true if the partition can be generated as a builtin. */
518 partition_builtin_p (partition_t partition
)
520 return partition
->kind
!= PKIND_NORMAL
;
523 /* Returns true if the partition contains a reduction. */
526 partition_reduction_p (partition_t partition
)
528 return partition
->reduction_p
;
531 /* Merge PARTITION into the partition DEST. */
534 partition_merge_into (partition_t dest
, partition_t partition
)
536 dest
->kind
= PKIND_NORMAL
;
537 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
538 if (partition_reduction_p (partition
))
539 dest
->reduction_p
= true;
543 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
547 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
549 imm_use_iterator imm_iter
;
552 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
554 gimple use_stmt
= USE_STMT (use_p
);
555 if (!is_gimple_debug (use_stmt
)
556 && loop
!= loop_containing_stmt (use_stmt
))
563 /* Returns true when STMT defines a scalar variable used after the
567 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
572 if (gimple_code (stmt
) == GIMPLE_PHI
)
573 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
575 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
576 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
582 /* Return a copy of LOOP placed before LOOP. */
585 copy_loop_before (struct loop
*loop
)
588 edge preheader
= loop_preheader_edge (loop
);
590 initialize_original_copy_tables ();
591 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
592 gcc_assert (res
!= NULL
);
593 free_original_copy_tables ();
594 delete_update_ssa ();
599 /* Creates an empty basic block after LOOP. */
602 create_bb_after_loop (struct loop
*loop
)
604 edge exit
= single_exit (loop
);
612 /* Generate code for PARTITION from the code in LOOP. The loop is
613 copied when COPY_P is true. All the statements not flagged in the
614 PARTITION bitmap are removed from the loop or from its copy. The
615 statements are indexed in sequence inside a basic block, and the
616 basic blocks of a loop are taken in dom order. */
619 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
623 gimple_stmt_iterator bsi
;
628 loop
= copy_loop_before (loop
);
629 gcc_assert (loop
!= NULL
);
630 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
631 create_bb_after_loop (loop
);
634 /* Remove stmts not in the PARTITION bitmap. */
635 bbs
= get_loop_body_in_dom_order (loop
);
637 if (MAY_HAVE_DEBUG_STMTS
)
638 for (i
= 0; i
< loop
->num_nodes
; i
++)
640 basic_block bb
= bbs
[i
];
642 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
644 gimple phi
= gsi_stmt (bsi
);
645 if (!virtual_operand_p (gimple_phi_result (phi
))
646 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
647 reset_debug_uses (phi
);
650 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
652 gimple stmt
= gsi_stmt (bsi
);
653 if (gimple_code (stmt
) != GIMPLE_LABEL
654 && !is_gimple_debug (stmt
)
655 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
656 reset_debug_uses (stmt
);
660 for (i
= 0; i
< loop
->num_nodes
; i
++)
662 basic_block bb
= bbs
[i
];
664 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
666 gimple phi
= gsi_stmt (bsi
);
667 if (!virtual_operand_p (gimple_phi_result (phi
))
668 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
669 remove_phi_node (&bsi
, true);
674 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
676 gimple stmt
= gsi_stmt (bsi
);
677 if (gimple_code (stmt
) != GIMPLE_LABEL
678 && !is_gimple_debug (stmt
)
679 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
681 /* Choose an arbitrary path through the empty CFG part
682 that this unnecessary control stmt controls. */
683 if (gimple_cond cond_stmt
= dyn_cast
<gimple_cond
> (stmt
))
685 gimple_cond_make_false (cond_stmt
);
688 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
690 gimple_switch switch_stmt
= as_a
<gimple_switch
> (stmt
);
691 gimple_switch_set_index
692 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
697 unlink_stmt_vdef (stmt
);
698 gsi_remove (&bsi
, true);
710 /* Build the size argument for a memory operation call. */
713 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
716 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
718 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
719 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
720 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
721 size
= fold_convert_loc (loc
, size_type_node
, size
);
725 /* Build an address argument for a memory operation call. */
728 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
732 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
733 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
735 /* Test for a negative stride, iterating over every element. */
736 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
738 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
739 fold_convert_loc (loc
, sizetype
, nb_bytes
));
740 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
741 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
744 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
747 /* If VAL memory representation contains the same value in all bytes,
748 return that value, otherwise return -1.
749 E.g. for 0x24242424 return 0x24, for IEEE double
750 747708026454360457216.0 return 0x44, etc. */
753 const_with_all_bytes_same (tree val
)
755 unsigned char buf
[64];
758 if (integer_zerop (val
)
760 || (TREE_CODE (val
) == CONSTRUCTOR
761 && !TREE_CLOBBER_P (val
)
762 && CONSTRUCTOR_NELTS (val
) == 0))
765 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
768 len
= native_encode_expr (val
, buf
, sizeof (buf
));
771 for (i
= 1; i
< len
; i
++)
772 if (buf
[i
] != buf
[0])
777 /* Generate a call to memset for PARTITION in LOOP. */
780 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
782 gimple_stmt_iterator gsi
;
783 gimple stmt
, fn_call
;
784 tree mem
, fn
, nb_bytes
;
788 stmt
= DR_STMT (partition
->main_dr
);
789 loc
= gimple_location (stmt
);
791 /* The new statements will be placed before LOOP. */
792 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
794 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
795 partition
->plus_one
);
796 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
797 false, GSI_CONTINUE_LINKING
);
798 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
799 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
800 false, GSI_CONTINUE_LINKING
);
802 /* This exactly matches the pattern recognition in classify_partition. */
803 val
= gimple_assign_rhs1 (stmt
);
804 /* Handle constants like 0x15151515 and similarly
805 floating point constants etc. where all bytes are the same. */
806 int bytev
= const_with_all_bytes_same (val
);
808 val
= build_int_cst (integer_type_node
, bytev
);
809 else if (TREE_CODE (val
) == INTEGER_CST
)
810 val
= fold_convert (integer_type_node
, val
);
811 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
814 tree tem
= make_ssa_name (integer_type_node
, NULL
);
815 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
816 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
820 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
821 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
822 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
826 fprintf (dump_file
, "generated memset");
828 fprintf (dump_file
, " zero\n");
830 fprintf (dump_file
, "\n");
834 /* Generate a call to memcpy for PARTITION in LOOP. */
837 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
839 gimple_stmt_iterator gsi
;
840 gimple stmt
, fn_call
;
841 tree dest
, src
, fn
, nb_bytes
;
843 enum built_in_function kind
;
845 stmt
= DR_STMT (partition
->main_dr
);
846 loc
= gimple_location (stmt
);
848 /* The new statements will be placed before LOOP. */
849 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
851 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
852 partition
->plus_one
);
853 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
854 false, GSI_CONTINUE_LINKING
);
855 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
856 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
857 if (ptr_derefs_may_alias_p (dest
, src
))
858 kind
= BUILT_IN_MEMMOVE
;
860 kind
= BUILT_IN_MEMCPY
;
862 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
863 false, GSI_CONTINUE_LINKING
);
864 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
865 false, GSI_CONTINUE_LINKING
);
866 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
867 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
868 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
872 if (kind
== BUILT_IN_MEMCPY
)
873 fprintf (dump_file
, "generated memcpy\n");
875 fprintf (dump_file
, "generated memmove\n");
879 /* Remove and destroy the loop LOOP. */
882 destroy_loop (struct loop
*loop
)
884 unsigned nbbs
= loop
->num_nodes
;
885 edge exit
= single_exit (loop
);
886 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
890 bbs
= get_loop_body_in_dom_order (loop
);
892 redirect_edge_pred (exit
, src
);
893 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
894 exit
->flags
|= EDGE_FALLTHRU
;
895 cancel_loop_tree (loop
);
896 rescan_loop_exit (exit
, false, true);
898 for (i
= 0; i
< nbbs
; i
++)
900 /* We have made sure to not leave any dangling uses of SSA
901 names defined in the loop. With the exception of virtuals.
902 Make sure we replace all uses of virtual defs that will remain
903 outside of the loop with the bare symbol as delete_basic_block
904 will release them. */
905 gimple_stmt_iterator gsi
;
906 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
908 gimple phi
= gsi_stmt (gsi
);
909 if (virtual_operand_p (gimple_phi_result (phi
)))
910 mark_virtual_phi_result_for_renaming (phi
);
912 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
914 gimple stmt
= gsi_stmt (gsi
);
915 tree vdef
= gimple_vdef (stmt
);
916 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
917 mark_virtual_operand_for_renaming (vdef
);
919 delete_basic_block (bbs
[i
]);
923 set_immediate_dominator (CDI_DOMINATORS
, dest
,
924 recompute_dominator (CDI_DOMINATORS
, dest
));
927 /* Generates code for PARTITION. */
930 generate_code_for_partition (struct loop
*loop
,
931 partition_t partition
, bool copy_p
)
933 switch (partition
->kind
)
936 /* Reductions all have to be in the last partition. */
937 gcc_assert (!partition_reduction_p (partition
)
939 generate_loops_for_partition (loop
, partition
, copy_p
);
943 generate_memset_builtin (loop
, partition
);
947 generate_memcpy_builtin (loop
, partition
);
954 /* Common tail for partitions we turn into a call. If this was the last
955 partition for which we generate code, we have to destroy the loop. */
961 /* Returns a partition with all the statements needed for computing
962 the vertex V of the RDG, also including the loop exit conditions. */
965 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
967 partition_t partition
= partition_alloc (NULL
, NULL
);
968 auto_vec
<int, 3> nodes
;
972 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
974 FOR_EACH_VEC_ELT (nodes
, i
, x
)
976 bitmap_set_bit (partition
->stmts
, x
);
977 bitmap_set_bit (partition
->loops
,
978 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
984 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
985 For the moment we detect only the memset zero pattern. */
988 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
993 data_reference_p single_load
, single_store
;
994 bool volatiles_p
= false;
995 bool plus_one
= false;
997 partition
->kind
= PKIND_NORMAL
;
998 partition
->main_dr
= NULL
;
999 partition
->secondary_dr
= NULL
;
1000 partition
->niter
= NULL_TREE
;
1001 partition
->plus_one
= false;
1003 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1005 gimple stmt
= RDG_STMT (rdg
, i
);
1007 if (gimple_has_volatile_ops (stmt
))
1010 /* If the stmt has uses outside of the loop mark it as reduction. */
1011 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1013 partition
->reduction_p
= true;
1018 /* Perform general partition disqualification for builtins. */
1020 || !flag_tree_loop_distribute_patterns
)
1023 /* Detect memset and memcpy. */
1025 single_store
= NULL
;
1026 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1028 gimple stmt
= RDG_STMT (rdg
, i
);
1029 data_reference_p dr
;
1032 if (gimple_code (stmt
) == GIMPLE_PHI
)
1035 /* Any scalar stmts are ok. */
1036 if (!gimple_vuse (stmt
))
1039 /* Otherwise just regular loads/stores. */
1040 if (!gimple_assign_single_p (stmt
))
1043 /* But exactly one store and/or load. */
1044 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1046 if (DR_IS_READ (dr
))
1048 if (single_load
!= NULL
)
1054 if (single_store
!= NULL
)
1064 nb_iter
= number_of_latch_executions (loop
);
1065 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1067 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1068 gimple_bb (DR_STMT (single_store
))))
1071 if (single_store
&& !single_load
)
1073 gimple stmt
= DR_STMT (single_store
);
1074 tree rhs
= gimple_assign_rhs1 (stmt
);
1075 if (const_with_all_bytes_same (rhs
) == -1
1076 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1077 || (TYPE_MODE (TREE_TYPE (rhs
))
1078 != TYPE_MODE (unsigned_char_type_node
))))
1080 if (TREE_CODE (rhs
) == SSA_NAME
1081 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1082 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1084 if (!adjacent_dr_p (single_store
)
1085 || !dominated_by_p (CDI_DOMINATORS
,
1086 loop
->latch
, gimple_bb (stmt
)))
1088 partition
->kind
= PKIND_MEMSET
;
1089 partition
->main_dr
= single_store
;
1090 partition
->niter
= nb_iter
;
1091 partition
->plus_one
= plus_one
;
1093 else if (single_store
&& single_load
)
1095 gimple store
= DR_STMT (single_store
);
1096 gimple load
= DR_STMT (single_load
);
1097 /* Direct aggregate copy or via an SSA name temporary. */
1099 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1101 if (!adjacent_dr_p (single_store
)
1102 || !adjacent_dr_p (single_load
)
1103 || !operand_equal_p (DR_STEP (single_store
),
1104 DR_STEP (single_load
), 0)
1105 || !dominated_by_p (CDI_DOMINATORS
,
1106 loop
->latch
, gimple_bb (store
)))
1108 /* Now check that if there is a dependence this dependence is
1109 of a suitable form for memmove. */
1110 vec
<loop_p
> loops
= vNULL
;
1112 loops
.safe_push (loop
);
1113 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1115 compute_affine_dependence (ddr
, loop
);
1116 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1118 free_dependence_relation (ddr
);
1122 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1124 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1126 free_dependence_relation (ddr
);
1130 lambda_vector dist_v
;
1131 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1133 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1134 DDR_LOOP_NEST (ddr
))];
1135 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1137 free_dependence_relation (ddr
);
1143 free_dependence_relation (ddr
);
1145 partition
->kind
= PKIND_MEMCPY
;
1146 partition
->main_dr
= single_store
;
1147 partition
->secondary_dr
= single_load
;
1148 partition
->niter
= nb_iter
;
1149 partition
->plus_one
= plus_one
;
1153 /* For a data reference REF, return the declaration of its base
1154 address or NULL_TREE if the base is not determined. */
1157 ref_base_address (data_reference_p dr
)
1159 tree base_address
= DR_BASE_ADDRESS (dr
);
1161 && TREE_CODE (base_address
) == ADDR_EXPR
)
1162 return TREE_OPERAND (base_address
, 0);
1164 return base_address
;
1167 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1171 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1172 partition_t partition2
)
1174 unsigned i
, j
, k
, l
;
1175 bitmap_iterator bi
, bj
;
1176 data_reference_p ref1
, ref2
;
1178 /* First check whether in the intersection of the two partitions are
1179 any loads or stores. Common loads are the situation that happens
1181 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1182 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1183 || RDG_MEM_READS_STMT (rdg
, i
))
1186 /* Then check all data-references against each other. */
1187 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1188 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1189 || RDG_MEM_READS_STMT (rdg
, i
))
1190 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1191 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1192 || RDG_MEM_READS_STMT (rdg
, j
))
1194 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1196 tree base1
= ref_base_address (ref1
);
1198 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1199 if (base1
== ref_base_address (ref2
))
1207 /* Aggregate several components into a useful partition that is
1208 registered in the PARTITIONS vector. Partitions will be
1209 distributed in different loops. */
1212 rdg_build_partitions (struct graph
*rdg
,
1213 vec
<gimple
> starting_stmts
,
1214 vec
<partition_t
> *partitions
)
1216 bitmap processed
= BITMAP_ALLOC (NULL
);
1220 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1222 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1224 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1226 "ldist asked to generate code for vertex %d\n", v
);
1228 /* If the vertex is already contained in another partition so
1229 is the partition rooted at it. */
1230 if (bitmap_bit_p (processed
, v
))
1233 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1234 bitmap_ior_into (processed
, partition
->stmts
);
1236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1238 fprintf (dump_file
, "ldist useful partition:\n");
1239 dump_bitmap (dump_file
, partition
->stmts
);
1242 partitions
->safe_push (partition
);
1245 /* All vertices should have been assigned to at least one partition now,
1246 other than vertices belonging to dead code. */
1248 BITMAP_FREE (processed
);
1251 /* Dump to FILE the PARTITIONS. */
1254 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1257 partition_t partition
;
1259 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1260 debug_bitmap_file (file
, partition
->stmts
);
1263 /* Debug PARTITIONS. */
1264 extern void debug_rdg_partitions (vec
<partition_t
> );
1267 debug_rdg_partitions (vec
<partition_t
> partitions
)
1269 dump_rdg_partitions (stderr
, partitions
);
1272 /* Returns the number of read and write operations in the RDG. */
1275 number_of_rw_in_rdg (struct graph
*rdg
)
1279 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1281 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1284 if (RDG_MEM_READS_STMT (rdg
, i
))
1291 /* Returns the number of read and write operations in a PARTITION of
1295 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1301 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1303 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1306 if (RDG_MEM_READS_STMT (rdg
, i
))
1313 /* Returns true when one of the PARTITIONS contains all the read or
1314 write operations of RDG. */
1317 partition_contains_all_rw (struct graph
*rdg
,
1318 vec
<partition_t
> partitions
)
1321 partition_t partition
;
1322 int nrw
= number_of_rw_in_rdg (rdg
);
1324 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1325 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1331 /* Compute partition dependence created by the data references in DRS1
1332 and DRS2 and modify and return DIR according to that. */
1335 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1336 vec
<data_reference_p
> drs1
,
1337 vec
<data_reference_p
> drs2
)
1339 data_reference_p dr1
, dr2
;
1341 /* dependence direction - 0 is no dependence, -1 is back,
1342 1 is forth, 2 is both (we can stop then, merging will occur). */
1343 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1344 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1348 /* Re-shuffle data-refs to be in dominator order. */
1349 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1350 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1352 data_reference_p tem
= dr1
;
1355 this_dir
= -this_dir
;
1357 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1358 compute_affine_dependence (ddr
, loops
[0]);
1359 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1361 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1363 if (DDR_REVERSED_P (ddr
))
1365 data_reference_p tem
= dr1
;
1368 this_dir
= -this_dir
;
1370 /* Known dependences can still be unordered througout the
1371 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1372 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1374 /* If the overlap is exact preserve stmt order. */
1375 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1379 /* Else as the distance vector is lexicographic positive
1380 swap the dependence direction. */
1381 this_dir
= -this_dir
;
1386 free_dependence_relation (ddr
);
1389 else if (dir
!= this_dir
)
1395 /* Compare postorder number of the partition graph vertices V1 and V2. */
1398 pgcmp (const void *v1_
, const void *v2_
)
1400 const vertex
*v1
= (const vertex
*)v1_
;
1401 const vertex
*v2
= (const vertex
*)v2_
;
1402 return v2
->post
- v1
->post
;
1405 /* Distributes the code from LOOP in such a way that producer
1406 statements are placed before consumer statements. Tries to separate
1407 only the statements from STMTS into separate loops.
1408 Returns the number of distributed loops. */
1411 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1412 control_dependences
*cd
, int *nb_calls
)
1415 partition_t partition
;
1422 auto_vec
<loop_p
, 3> loop_nest
;
1423 if (!find_loop_nest (loop
, &loop_nest
))
1426 rdg
= build_rdg (loop_nest
, cd
);
1429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1431 "Loop %d not distributed: failed to build the RDG.\n",
1437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1438 dump_rdg (dump_file
, rdg
);
1440 auto_vec
<partition_t
, 3> partitions
;
1441 rdg_build_partitions (rdg
, stmts
, &partitions
);
1443 any_builtin
= false;
1444 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1446 classify_partition (loop
, rdg
, partition
);
1447 any_builtin
|= partition_builtin_p (partition
);
1450 /* If we are only distributing patterns but did not detect any,
1452 if (!flag_tree_loop_distribution
1459 /* If we are only distributing patterns fuse all partitions that
1460 were not classified as builtins. This also avoids chopping
1461 a loop into pieces, separated by builtin calls. That is, we
1462 only want no or a single loop body remaining. */
1464 if (!flag_tree_loop_distribution
)
1466 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1467 if (!partition_builtin_p (into
))
1469 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1470 if (!partition_builtin_p (partition
))
1472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1474 fprintf (dump_file
, "fusing non-builtin partitions\n");
1475 dump_bitmap (dump_file
, into
->stmts
);
1476 dump_bitmap (dump_file
, partition
->stmts
);
1478 partition_merge_into (into
, partition
);
1479 partitions
.unordered_remove (i
);
1480 partition_free (partition
);
1485 /* Due to limitations in the transform phase we have to fuse all
1486 reduction partitions into the last partition so the existing
1487 loop will contain all loop-closed PHI nodes. */
1488 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1489 if (partition_reduction_p (into
))
1491 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1492 if (partition_reduction_p (partition
))
1494 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1496 fprintf (dump_file
, "fusing partitions\n");
1497 dump_bitmap (dump_file
, into
->stmts
);
1498 dump_bitmap (dump_file
, partition
->stmts
);
1499 fprintf (dump_file
, "because they have reductions\n");
1501 partition_merge_into (into
, partition
);
1502 partitions
.unordered_remove (i
);
1503 partition_free (partition
);
1507 /* Apply our simple cost model - fuse partitions with similar
1509 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1511 if (partition_builtin_p (into
))
1514 partitions
.iterate (j
, &partition
); ++j
)
1516 if (!partition_builtin_p (partition
)
1517 && similar_memory_accesses (rdg
, into
, partition
))
1519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1521 fprintf (dump_file
, "fusing partitions\n");
1522 dump_bitmap (dump_file
, into
->stmts
);
1523 dump_bitmap (dump_file
, partition
->stmts
);
1524 fprintf (dump_file
, "because they have similar "
1525 "memory accesses\n");
1527 partition_merge_into (into
, partition
);
1528 partitions
.unordered_remove (j
);
1529 partition_free (partition
);
1535 /* Build the partition dependency graph. */
1536 if (partitions
.length () > 1)
1538 pg
= new_graph (partitions
.length ());
1540 partition_t partition
;
1541 vec
<data_reference_p
> writes
;
1542 vec
<data_reference_p
> reads
;
1544 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1545 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1547 vertex
*v
= &pg
->vertices
[i
];
1548 pgdata
*data
= new pgdata
;
1549 data_reference_p dr
;
1550 /* FIXME - leaks. */
1554 data
->partition
= partition
;
1555 data
->reads
= vNULL
;
1556 data
->writes
= vNULL
;
1557 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1558 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1559 if (DR_IS_READ (dr
))
1560 data
->reads
.safe_push (dr
);
1562 data
->writes
.safe_push (dr
);
1564 partition_t partition1
, partition2
;
1565 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1566 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1568 /* dependence direction - 0 is no dependence, -1 is back,
1569 1 is forth, 2 is both (we can stop then, merging will occur). */
1571 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1575 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1579 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1582 if (dir
== 1 || dir
== 2)
1583 add_edge (pg
, i
, j
);
1584 if (dir
== -1 || dir
== 2)
1585 add_edge (pg
, j
, i
);
1588 /* Add edges to the reduction partition (if any) to force it last. */
1590 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1591 if (partition_reduction_p (partition
))
1593 if (j
< partitions
.length ())
1595 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1597 add_edge (pg
, i
, j
);
1600 /* Compute partitions we cannot separate and fuse them. */
1601 num_sccs
= graphds_scc (pg
, NULL
);
1602 for (i
= 0; i
< num_sccs
; ++i
)
1606 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1607 if (pg
->vertices
[j
].component
== i
)
1609 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1610 if (pg
->vertices
[j
].component
== i
)
1612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1614 fprintf (dump_file
, "fusing partitions\n");
1615 dump_bitmap (dump_file
, first
->stmts
);
1616 dump_bitmap (dump_file
, partition
->stmts
);
1617 fprintf (dump_file
, "because they are in the same "
1618 "dependence SCC\n");
1620 partition_merge_into (first
, partition
);
1621 partitions
[j
] = NULL
;
1622 partition_free (partition
);
1623 PGDATA (j
)->partition
= NULL
;
1627 /* Now order the remaining nodes in postorder. */
1628 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1629 partitions
.truncate (0);
1630 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1632 pgdata
*data
= PGDATA (i
);
1633 if (data
->partition
)
1634 partitions
.safe_push (data
->partition
);
1635 data
->reads
.release ();
1636 data
->writes
.release ();
1639 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1643 nbp
= partitions
.length ();
1645 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1646 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1652 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1653 dump_rdg_partitions (dump_file
, partitions
);
1655 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1657 if (partition_builtin_p (partition
))
1659 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1664 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1665 partition_free (partition
);
1668 return nbp
- *nb_calls
;
1671 /* Distribute all loops in the current function. */
1675 const pass_data pass_data_loop_distribution
=
1677 GIMPLE_PASS
, /* type */
1679 OPTGROUP_LOOP
, /* optinfo_flags */
1680 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1681 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1682 0, /* properties_provided */
1683 0, /* properties_destroyed */
1684 0, /* todo_flags_start */
1685 0, /* todo_flags_finish */
1688 class pass_loop_distribution
: public gimple_opt_pass
1691 pass_loop_distribution (gcc::context
*ctxt
)
1692 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1695 /* opt_pass methods: */
1696 virtual bool gate (function
*)
1698 return flag_tree_loop_distribution
1699 || flag_tree_loop_distribute_patterns
;
1702 virtual unsigned int execute (function
*);
1704 }; // class pass_loop_distribution
1707 pass_loop_distribution::execute (function
*fun
)
1710 bool changed
= false;
1712 control_dependences
*cd
= NULL
;
1714 FOR_ALL_BB_FN (bb
, fun
)
1716 gimple_stmt_iterator gsi
;
1717 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1718 gimple_set_uid (gsi_stmt (gsi
), -1);
1719 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1720 gimple_set_uid (gsi_stmt (gsi
), -1);
1723 /* We can at the moment only distribute non-nested loops, thus restrict
1724 walking to innermost loops. */
1725 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1727 auto_vec
<gimple
> work_list
;
1729 int num
= loop
->num
;
1732 /* If the loop doesn't have a single exit we will fail anyway,
1733 so do that early. */
1734 if (!single_exit (loop
))
1737 /* Only optimize hot loops. */
1738 if (!optimize_loop_for_speed_p (loop
))
1741 /* Initialize the worklist with stmts we seed the partitions with. */
1742 bbs
= get_loop_body_in_dom_order (loop
);
1743 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1745 gimple_stmt_iterator gsi
;
1746 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1748 gimple phi
= gsi_stmt (gsi
);
1749 if (virtual_operand_p (gimple_phi_result (phi
)))
1751 /* Distribute stmts which have defs that are used outside of
1753 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1755 work_list
.safe_push (phi
);
1757 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1759 gimple stmt
= gsi_stmt (gsi
);
1761 /* If there is a stmt with side-effects bail out - we
1762 cannot and should not distribute this loop. */
1763 if (gimple_has_side_effects (stmt
))
1765 work_list
.truncate (0);
1769 /* Distribute stmts which have defs that are used outside of
1771 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1773 /* Otherwise only distribute stores for now. */
1774 else if (!gimple_vdef (stmt
))
1777 work_list
.safe_push (stmt
);
1783 int nb_generated_loops
= 0;
1784 int nb_generated_calls
= 0;
1785 location_t loc
= find_loop_location (loop
);
1786 if (work_list
.length () > 0)
1790 calculate_dominance_info (CDI_DOMINATORS
);
1791 calculate_dominance_info (CDI_POST_DOMINATORS
);
1792 cd
= new control_dependences (create_edge_list ());
1793 free_dominance_info (CDI_POST_DOMINATORS
);
1795 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1796 &nb_generated_calls
);
1799 if (nb_generated_loops
+ nb_generated_calls
> 0)
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1803 loc
, "Loop %d distributed: split to %d loops "
1804 "and %d library calls.\n",
1805 num
, nb_generated_loops
, nb_generated_calls
);
1807 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1808 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1816 mark_virtual_operands_for_renaming (fun
);
1817 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1820 #ifdef ENABLE_CHECKING
1821 verify_loop_structure ();
1830 make_pass_loop_distribution (gcc::context
*ctxt
)
1832 return new pass_loop_distribution (ctxt
);