2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
51 #include "fold-const.h"
54 #include "hard-reg-set.h"
56 #include "dominance.h"
59 #include "basic-block.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-expr.h"
64 #include "gimple-iterator.h"
65 #include "gimplify-me.h"
66 #include "stor-layout.h"
67 #include "gimple-ssa.h"
69 #include "tree-phinodes.h"
70 #include "ssa-iterators.h"
71 #include "stringpool.h"
72 #include "tree-ssanames.h"
73 #include "tree-ssa-loop-manip.h"
74 #include "tree-ssa-loop.h"
75 #include "tree-into-ssa.h"
78 #include "tree-chrec.h"
79 #include "tree-data-ref.h"
80 #include "tree-scalar-evolution.h"
81 #include "tree-pass.h"
82 #include "gimple-pretty-print.h"
83 #include "tree-vectorizer.h"
86 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
87 typedef struct rdg_vertex
89 /* The statement represented by this vertex. */
92 /* Vector of data-references in this statement. */
93 vec
<data_reference_p
> datarefs
;
95 /* True when the statement contains a write to memory. */
98 /* True when the statement contains a read from memory. */
102 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
103 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
104 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
105 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
106 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
107 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
108 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
109 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
111 /* Data dependence type. */
115 /* Read After Write (RAW). */
118 /* Control dependence (execute conditional on). */
122 /* Dependence information attached to an edge of the RDG. */
124 typedef struct rdg_edge
126 /* Type of the dependence. */
127 enum rdg_dep_type type
;
130 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
132 /* Dump vertex I in RDG to FILE. */
135 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
137 struct vertex
*v
= &(rdg
->vertices
[i
]);
138 struct graph_edge
*e
;
140 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
141 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
142 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
145 for (e
= v
->pred
; e
; e
= e
->pred_next
)
146 fprintf (file
, " %d", e
->src
);
148 fprintf (file
, ") (out:");
151 for (e
= v
->succ
; e
; e
= e
->succ_next
)
152 fprintf (file
, " %d", e
->dest
);
154 fprintf (file
, ")\n");
155 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
156 fprintf (file
, ")\n");
159 /* Call dump_rdg_vertex on stderr. */
162 debug_rdg_vertex (struct graph
*rdg
, int i
)
164 dump_rdg_vertex (stderr
, rdg
, i
);
167 /* Dump the reduced dependence graph RDG to FILE. */
170 dump_rdg (FILE *file
, struct graph
*rdg
)
172 fprintf (file
, "(rdg\n");
173 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
174 dump_rdg_vertex (file
, rdg
, i
);
175 fprintf (file
, ")\n");
178 /* Call dump_rdg on stderr. */
181 debug_rdg (struct graph
*rdg
)
183 dump_rdg (stderr
, rdg
);
187 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
190 pretty_printer buffer
;
191 pp_needs_newline (&buffer
) = false;
192 buffer
.buffer
->stream
= file
;
194 fprintf (file
, "digraph RDG {\n");
196 for (i
= 0; i
< rdg
->n_vertices
; i
++)
198 struct vertex
*v
= &(rdg
->vertices
[i
]);
199 struct graph_edge
*e
;
201 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
202 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
204 fprintf (file
, "\"]\n");
206 /* Highlight reads from memory. */
207 if (RDG_MEM_READS_STMT (rdg
, i
))
208 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
210 /* Highlight stores to memory. */
211 if (RDG_MEM_WRITE_STMT (rdg
, i
))
212 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
215 for (e
= v
->succ
; e
; e
= e
->succ_next
)
216 switch (RDGE_TYPE (e
))
219 /* These are the most common dependences: don't print these. */
220 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
224 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
232 fprintf (file
, "}\n\n");
235 /* Display the Reduced Dependence Graph using dotty. */
238 dot_rdg (struct graph
*rdg
)
240 /* When debugging, you may want to enable the following code. */
242 FILE *file
= popen ("dot -Tx11", "w");
245 dot_rdg_1 (file
, rdg
);
247 close (fileno (file
));
250 dot_rdg_1 (stderr
, rdg
);
254 /* Returns the index of STMT in RDG. */
257 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
259 int index
= gimple_uid (stmt
);
260 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
264 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
265 the index of DEF in RDG. */
268 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
270 use_operand_p imm_use_p
;
271 imm_use_iterator iterator
;
273 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
275 struct graph_edge
*e
;
276 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
281 e
= add_edge (rdg
, idef
, use
);
282 e
->data
= XNEW (struct rdg_edge
);
283 RDGE_TYPE (e
) = flow_dd
;
287 /* Creates an edge for the control dependences of BB to the vertex V. */
290 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
291 int v
, control_dependences
*cd
)
295 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
298 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
299 gimple stmt
= last_stmt (cond_bb
);
300 if (stmt
&& is_ctrl_stmt (stmt
))
302 struct graph_edge
*e
;
303 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
307 e
= add_edge (rdg
, c
, v
);
308 e
->data
= XNEW (struct rdg_edge
);
309 RDGE_TYPE (e
) = control_dd
;
314 /* Creates the edges of the reduced dependence graph RDG. */
317 create_rdg_flow_edges (struct graph
*rdg
)
323 for (i
= 0; i
< rdg
->n_vertices
; i
++)
324 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
326 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
329 /* Creates the edges of the reduced dependence graph RDG. */
332 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
336 for (i
= 0; i
< rdg
->n_vertices
; i
++)
338 gimple stmt
= RDG_STMT (rdg
, i
);
339 if (gimple_code (stmt
) == GIMPLE_PHI
)
343 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
344 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
347 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
351 /* Build the vertices of the reduced dependence graph RDG. Return false
355 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
356 vec
<data_reference_p
> *datarefs
)
361 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
363 struct vertex
*v
= &(rdg
->vertices
[i
]);
365 /* Record statement to vertex mapping. */
366 gimple_set_uid (stmt
, i
);
368 v
->data
= XNEW (struct rdg_vertex
);
369 RDGV_STMT (v
) = stmt
;
370 RDGV_DATAREFS (v
).create (0);
371 RDGV_HAS_MEM_WRITE (v
) = false;
372 RDGV_HAS_MEM_READS (v
) = false;
373 if (gimple_code (stmt
) == GIMPLE_PHI
)
376 unsigned drp
= datarefs
->length ();
377 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
379 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
381 data_reference_p dr
= (*datarefs
)[j
];
383 RDGV_HAS_MEM_READS (v
) = true;
385 RDGV_HAS_MEM_WRITE (v
) = true;
386 RDGV_DATAREFS (v
).safe_push (dr
);
392 /* Initialize STMTS with all the statements of LOOP. The order in
393 which we discover statements is important as
394 generate_loops_for_partition is using the same traversal for
395 identifying statements in loop copies. */
398 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
401 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
403 for (i
= 0; i
< loop
->num_nodes
; i
++)
405 basic_block bb
= bbs
[i
];
407 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
409 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
410 stmts
->safe_push (bsi
.phi ());
412 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
415 gimple stmt
= gsi_stmt (bsi
);
416 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
417 stmts
->safe_push (stmt
);
424 /* Free the reduced dependence graph RDG. */
427 free_rdg (struct graph
*rdg
)
431 for (i
= 0; i
< rdg
->n_vertices
; i
++)
433 struct vertex
*v
= &(rdg
->vertices
[i
]);
434 struct graph_edge
*e
;
436 for (e
= v
->succ
; e
; e
= e
->succ_next
)
441 gimple_set_uid (RDGV_STMT (v
), -1);
442 free_data_refs (RDGV_DATAREFS (v
));
450 /* Build the Reduced Dependence Graph (RDG) with one vertex per
451 statement of the loop nest LOOP_NEST, and one edge per data dependence or
452 scalar dependence. */
454 static struct graph
*
455 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
458 vec
<data_reference_p
> datarefs
;
460 /* Create the RDG vertices from the stmts of the loop nest. */
461 auto_vec
<gimple
, 10> stmts
;
462 stmts_from_loop (loop_nest
[0], &stmts
);
463 rdg
= new_graph (stmts
.length ());
464 datarefs
.create (10);
465 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
473 create_rdg_flow_edges (rdg
);
475 create_rdg_cd_edges (rdg
, cd
);
484 enum partition_kind
{
485 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
488 typedef struct partition_s
493 enum partition_kind kind
;
494 /* data-references a kind != PKIND_NORMAL partition is about. */
495 data_reference_p main_dr
;
496 data_reference_p secondary_dr
;
502 /* Allocate and initialize a partition from BITMAP. */
505 partition_alloc (bitmap stmts
, bitmap loops
)
507 partition_t partition
= XCNEW (struct partition_s
);
508 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
509 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
510 partition
->reduction_p
= false;
511 partition
->kind
= PKIND_NORMAL
;
515 /* Free PARTITION. */
518 partition_free (partition_t partition
)
520 BITMAP_FREE (partition
->stmts
);
521 BITMAP_FREE (partition
->loops
);
525 /* Returns true if the partition can be generated as a builtin. */
528 partition_builtin_p (partition_t partition
)
530 return partition
->kind
!= PKIND_NORMAL
;
533 /* Returns true if the partition contains a reduction. */
536 partition_reduction_p (partition_t partition
)
538 return partition
->reduction_p
;
541 /* Merge PARTITION into the partition DEST. */
544 partition_merge_into (partition_t dest
, partition_t partition
)
546 dest
->kind
= PKIND_NORMAL
;
547 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
548 if (partition_reduction_p (partition
))
549 dest
->reduction_p
= true;
553 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
557 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
559 imm_use_iterator imm_iter
;
562 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
564 gimple use_stmt
= USE_STMT (use_p
);
565 if (!is_gimple_debug (use_stmt
)
566 && loop
!= loop_containing_stmt (use_stmt
))
573 /* Returns true when STMT defines a scalar variable used after the
577 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
582 if (gimple_code (stmt
) == GIMPLE_PHI
)
583 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
585 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
586 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
592 /* Return a copy of LOOP placed before LOOP. */
595 copy_loop_before (struct loop
*loop
)
598 edge preheader
= loop_preheader_edge (loop
);
600 initialize_original_copy_tables ();
601 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
602 gcc_assert (res
!= NULL
);
603 free_original_copy_tables ();
604 delete_update_ssa ();
609 /* Creates an empty basic block after LOOP. */
612 create_bb_after_loop (struct loop
*loop
)
614 edge exit
= single_exit (loop
);
622 /* Generate code for PARTITION from the code in LOOP. The loop is
623 copied when COPY_P is true. All the statements not flagged in the
624 PARTITION bitmap are removed from the loop or from its copy. The
625 statements are indexed in sequence inside a basic block, and the
626 basic blocks of a loop are taken in dom order. */
629 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
637 loop
= copy_loop_before (loop
);
638 gcc_assert (loop
!= NULL
);
639 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
640 create_bb_after_loop (loop
);
643 /* Remove stmts not in the PARTITION bitmap. */
644 bbs
= get_loop_body_in_dom_order (loop
);
646 if (MAY_HAVE_DEBUG_STMTS
)
647 for (i
= 0; i
< loop
->num_nodes
; i
++)
649 basic_block bb
= bbs
[i
];
651 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
654 gphi
*phi
= bsi
.phi ();
655 if (!virtual_operand_p (gimple_phi_result (phi
))
656 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
657 reset_debug_uses (phi
);
660 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
662 gimple stmt
= gsi_stmt (bsi
);
663 if (gimple_code (stmt
) != GIMPLE_LABEL
664 && !is_gimple_debug (stmt
)
665 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
666 reset_debug_uses (stmt
);
670 for (i
= 0; i
< loop
->num_nodes
; i
++)
672 basic_block bb
= bbs
[i
];
674 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
676 gphi
*phi
= bsi
.phi ();
677 if (!virtual_operand_p (gimple_phi_result (phi
))
678 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
679 remove_phi_node (&bsi
, true);
684 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
686 gimple stmt
= gsi_stmt (bsi
);
687 if (gimple_code (stmt
) != GIMPLE_LABEL
688 && !is_gimple_debug (stmt
)
689 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
691 /* Choose an arbitrary path through the empty CFG part
692 that this unnecessary control stmt controls. */
693 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
695 gimple_cond_make_false (cond_stmt
);
698 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
700 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
701 gimple_switch_set_index
702 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
707 unlink_stmt_vdef (stmt
);
708 gsi_remove (&bsi
, true);
720 /* Build the size argument for a memory operation call. */
723 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
726 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
728 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
729 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
730 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
731 size
= fold_convert_loc (loc
, size_type_node
, size
);
735 /* Build an address argument for a memory operation call. */
738 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
742 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
743 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
745 /* Test for a negative stride, iterating over every element. */
746 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
748 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
749 fold_convert_loc (loc
, sizetype
, nb_bytes
));
750 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
751 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
754 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
757 /* If VAL memory representation contains the same value in all bytes,
758 return that value, otherwise return -1.
759 E.g. for 0x24242424 return 0x24, for IEEE double
760 747708026454360457216.0 return 0x44, etc. */
763 const_with_all_bytes_same (tree val
)
765 unsigned char buf
[64];
768 if (integer_zerop (val
)
770 || (TREE_CODE (val
) == CONSTRUCTOR
771 && !TREE_CLOBBER_P (val
)
772 && CONSTRUCTOR_NELTS (val
) == 0))
775 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
778 len
= native_encode_expr (val
, buf
, sizeof (buf
));
781 for (i
= 1; i
< len
; i
++)
782 if (buf
[i
] != buf
[0])
787 /* Generate a call to memset for PARTITION in LOOP. */
790 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
792 gimple_stmt_iterator gsi
;
793 gimple stmt
, fn_call
;
794 tree mem
, fn
, nb_bytes
;
798 stmt
= DR_STMT (partition
->main_dr
);
799 loc
= gimple_location (stmt
);
801 /* The new statements will be placed before LOOP. */
802 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
804 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
805 partition
->plus_one
);
806 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
807 false, GSI_CONTINUE_LINKING
);
808 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
809 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
810 false, GSI_CONTINUE_LINKING
);
812 /* This exactly matches the pattern recognition in classify_partition. */
813 val
= gimple_assign_rhs1 (stmt
);
814 /* Handle constants like 0x15151515 and similarly
815 floating point constants etc. where all bytes are the same. */
816 int bytev
= const_with_all_bytes_same (val
);
818 val
= build_int_cst (integer_type_node
, bytev
);
819 else if (TREE_CODE (val
) == INTEGER_CST
)
820 val
= fold_convert (integer_type_node
, val
);
821 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
823 tree tem
= make_ssa_name (integer_type_node
);
824 gimple cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
825 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
829 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
830 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
831 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
835 fprintf (dump_file
, "generated memset");
837 fprintf (dump_file
, " zero\n");
839 fprintf (dump_file
, "\n");
843 /* Generate a call to memcpy for PARTITION in LOOP. */
846 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
848 gimple_stmt_iterator gsi
;
849 gimple stmt
, fn_call
;
850 tree dest
, src
, fn
, nb_bytes
;
852 enum built_in_function kind
;
854 stmt
= DR_STMT (partition
->main_dr
);
855 loc
= gimple_location (stmt
);
857 /* The new statements will be placed before LOOP. */
858 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
860 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
861 partition
->plus_one
);
862 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
863 false, GSI_CONTINUE_LINKING
);
864 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
865 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
866 if (ptr_derefs_may_alias_p (dest
, src
))
867 kind
= BUILT_IN_MEMMOVE
;
869 kind
= BUILT_IN_MEMCPY
;
871 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
872 false, GSI_CONTINUE_LINKING
);
873 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
874 false, GSI_CONTINUE_LINKING
);
875 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
876 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
877 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
879 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
881 if (kind
== BUILT_IN_MEMCPY
)
882 fprintf (dump_file
, "generated memcpy\n");
884 fprintf (dump_file
, "generated memmove\n");
888 /* Remove and destroy the loop LOOP. */
891 destroy_loop (struct loop
*loop
)
893 unsigned nbbs
= loop
->num_nodes
;
894 edge exit
= single_exit (loop
);
895 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
899 bbs
= get_loop_body_in_dom_order (loop
);
901 redirect_edge_pred (exit
, src
);
902 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
903 exit
->flags
|= EDGE_FALLTHRU
;
904 cancel_loop_tree (loop
);
905 rescan_loop_exit (exit
, false, true);
907 for (i
= 0; i
< nbbs
; i
++)
909 /* We have made sure to not leave any dangling uses of SSA
910 names defined in the loop. With the exception of virtuals.
911 Make sure we replace all uses of virtual defs that will remain
912 outside of the loop with the bare symbol as delete_basic_block
913 will release them. */
914 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
917 gphi
*phi
= gsi
.phi ();
918 if (virtual_operand_p (gimple_phi_result (phi
)))
919 mark_virtual_phi_result_for_renaming (phi
);
921 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
924 gimple stmt
= gsi_stmt (gsi
);
925 tree vdef
= gimple_vdef (stmt
);
926 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
927 mark_virtual_operand_for_renaming (vdef
);
929 delete_basic_block (bbs
[i
]);
933 set_immediate_dominator (CDI_DOMINATORS
, dest
,
934 recompute_dominator (CDI_DOMINATORS
, dest
));
937 /* Generates code for PARTITION. */
940 generate_code_for_partition (struct loop
*loop
,
941 partition_t partition
, bool copy_p
)
943 switch (partition
->kind
)
946 /* Reductions all have to be in the last partition. */
947 gcc_assert (!partition_reduction_p (partition
)
949 generate_loops_for_partition (loop
, partition
, copy_p
);
953 generate_memset_builtin (loop
, partition
);
957 generate_memcpy_builtin (loop
, partition
);
964 /* Common tail for partitions we turn into a call. If this was the last
965 partition for which we generate code, we have to destroy the loop. */
971 /* Returns a partition with all the statements needed for computing
972 the vertex V of the RDG, also including the loop exit conditions. */
975 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
977 partition_t partition
= partition_alloc (NULL
, NULL
);
978 auto_vec
<int, 3> nodes
;
982 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
984 FOR_EACH_VEC_ELT (nodes
, i
, x
)
986 bitmap_set_bit (partition
->stmts
, x
);
987 bitmap_set_bit (partition
->loops
,
988 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
994 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
995 For the moment we detect only the memset zero pattern. */
998 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
1003 data_reference_p single_load
, single_store
;
1004 bool volatiles_p
= false;
1005 bool plus_one
= false;
1007 partition
->kind
= PKIND_NORMAL
;
1008 partition
->main_dr
= NULL
;
1009 partition
->secondary_dr
= NULL
;
1010 partition
->niter
= NULL_TREE
;
1011 partition
->plus_one
= false;
1013 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1015 gimple stmt
= RDG_STMT (rdg
, i
);
1017 if (gimple_has_volatile_ops (stmt
))
1020 /* If the stmt has uses outside of the loop mark it as reduction. */
1021 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1023 partition
->reduction_p
= true;
1028 /* Perform general partition disqualification for builtins. */
1030 || !flag_tree_loop_distribute_patterns
)
1033 /* Detect memset and memcpy. */
1035 single_store
= NULL
;
1036 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1038 gimple stmt
= RDG_STMT (rdg
, i
);
1039 data_reference_p dr
;
1042 if (gimple_code (stmt
) == GIMPLE_PHI
)
1045 /* Any scalar stmts are ok. */
1046 if (!gimple_vuse (stmt
))
1049 /* Otherwise just regular loads/stores. */
1050 if (!gimple_assign_single_p (stmt
))
1053 /* But exactly one store and/or load. */
1054 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1056 if (DR_IS_READ (dr
))
1058 if (single_load
!= NULL
)
1064 if (single_store
!= NULL
)
1074 nb_iter
= number_of_latch_executions (loop
);
1075 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1077 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1078 gimple_bb (DR_STMT (single_store
))))
1081 if (single_store
&& !single_load
)
1083 gimple stmt
= DR_STMT (single_store
);
1084 tree rhs
= gimple_assign_rhs1 (stmt
);
1085 if (const_with_all_bytes_same (rhs
) == -1
1086 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1087 || (TYPE_MODE (TREE_TYPE (rhs
))
1088 != TYPE_MODE (unsigned_char_type_node
))))
1090 if (TREE_CODE (rhs
) == SSA_NAME
1091 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1092 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1094 if (!adjacent_dr_p (single_store
)
1095 || !dominated_by_p (CDI_DOMINATORS
,
1096 loop
->latch
, gimple_bb (stmt
)))
1098 partition
->kind
= PKIND_MEMSET
;
1099 partition
->main_dr
= single_store
;
1100 partition
->niter
= nb_iter
;
1101 partition
->plus_one
= plus_one
;
1103 else if (single_store
&& single_load
)
1105 gimple store
= DR_STMT (single_store
);
1106 gimple load
= DR_STMT (single_load
);
1107 /* Direct aggregate copy or via an SSA name temporary. */
1109 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1111 if (!adjacent_dr_p (single_store
)
1112 || !adjacent_dr_p (single_load
)
1113 || !operand_equal_p (DR_STEP (single_store
),
1114 DR_STEP (single_load
), 0)
1115 || !dominated_by_p (CDI_DOMINATORS
,
1116 loop
->latch
, gimple_bb (store
)))
1118 /* Now check that if there is a dependence this dependence is
1119 of a suitable form for memmove. */
1120 vec
<loop_p
> loops
= vNULL
;
1122 loops
.safe_push (loop
);
1123 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1125 compute_affine_dependence (ddr
, loop
);
1126 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1128 free_dependence_relation (ddr
);
1132 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1134 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1136 free_dependence_relation (ddr
);
1140 lambda_vector dist_v
;
1141 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1143 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1144 DDR_LOOP_NEST (ddr
))];
1145 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1147 free_dependence_relation (ddr
);
1153 free_dependence_relation (ddr
);
1155 partition
->kind
= PKIND_MEMCPY
;
1156 partition
->main_dr
= single_store
;
1157 partition
->secondary_dr
= single_load
;
1158 partition
->niter
= nb_iter
;
1159 partition
->plus_one
= plus_one
;
1163 /* For a data reference REF, return the declaration of its base
1164 address or NULL_TREE if the base is not determined. */
1167 ref_base_address (data_reference_p dr
)
1169 tree base_address
= DR_BASE_ADDRESS (dr
);
1171 && TREE_CODE (base_address
) == ADDR_EXPR
)
1172 return TREE_OPERAND (base_address
, 0);
1174 return base_address
;
1177 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1181 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1182 partition_t partition2
)
1184 unsigned i
, j
, k
, l
;
1185 bitmap_iterator bi
, bj
;
1186 data_reference_p ref1
, ref2
;
1188 /* First check whether in the intersection of the two partitions are
1189 any loads or stores. Common loads are the situation that happens
1191 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1192 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1193 || RDG_MEM_READS_STMT (rdg
, i
))
1196 /* Then check all data-references against each other. */
1197 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1198 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1199 || RDG_MEM_READS_STMT (rdg
, i
))
1200 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1201 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1202 || RDG_MEM_READS_STMT (rdg
, j
))
1204 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1206 tree base1
= ref_base_address (ref1
);
1208 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1209 if (base1
== ref_base_address (ref2
))
1217 /* Aggregate several components into a useful partition that is
1218 registered in the PARTITIONS vector. Partitions will be
1219 distributed in different loops. */
1222 rdg_build_partitions (struct graph
*rdg
,
1223 vec
<gimple
> starting_stmts
,
1224 vec
<partition_t
> *partitions
)
1226 bitmap processed
= BITMAP_ALLOC (NULL
);
1230 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1232 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1236 "ldist asked to generate code for vertex %d\n", v
);
1238 /* If the vertex is already contained in another partition so
1239 is the partition rooted at it. */
1240 if (bitmap_bit_p (processed
, v
))
1243 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1244 bitmap_ior_into (processed
, partition
->stmts
);
1246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1248 fprintf (dump_file
, "ldist useful partition:\n");
1249 dump_bitmap (dump_file
, partition
->stmts
);
1252 partitions
->safe_push (partition
);
1255 /* All vertices should have been assigned to at least one partition now,
1256 other than vertices belonging to dead code. */
1258 BITMAP_FREE (processed
);
1261 /* Dump to FILE the PARTITIONS. */
1264 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1267 partition_t partition
;
1269 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1270 debug_bitmap_file (file
, partition
->stmts
);
1273 /* Debug PARTITIONS. */
1274 extern void debug_rdg_partitions (vec
<partition_t
> );
1277 debug_rdg_partitions (vec
<partition_t
> partitions
)
1279 dump_rdg_partitions (stderr
, partitions
);
1282 /* Returns the number of read and write operations in the RDG. */
1285 number_of_rw_in_rdg (struct graph
*rdg
)
1289 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1291 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1294 if (RDG_MEM_READS_STMT (rdg
, i
))
1301 /* Returns the number of read and write operations in a PARTITION of
1305 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1311 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1313 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1316 if (RDG_MEM_READS_STMT (rdg
, i
))
1323 /* Returns true when one of the PARTITIONS contains all the read or
1324 write operations of RDG. */
1327 partition_contains_all_rw (struct graph
*rdg
,
1328 vec
<partition_t
> partitions
)
1331 partition_t partition
;
1332 int nrw
= number_of_rw_in_rdg (rdg
);
1334 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1335 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1341 /* Compute partition dependence created by the data references in DRS1
1342 and DRS2 and modify and return DIR according to that. */
1345 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1346 vec
<data_reference_p
> drs1
,
1347 vec
<data_reference_p
> drs2
)
1349 data_reference_p dr1
, dr2
;
1351 /* dependence direction - 0 is no dependence, -1 is back,
1352 1 is forth, 2 is both (we can stop then, merging will occur). */
1353 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1354 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1356 data_reference_p saved_dr1
= dr1
;
1359 /* Re-shuffle data-refs to be in dominator order. */
1360 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1361 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1363 std::swap (dr1
, dr2
);
1364 this_dir
= -this_dir
;
1366 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1367 compute_affine_dependence (ddr
, loops
[0]);
1368 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1370 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1372 if (DDR_REVERSED_P (ddr
))
1374 std::swap (dr1
, dr2
);
1375 this_dir
= -this_dir
;
1377 /* Known dependences can still be unordered througout the
1378 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1379 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1381 /* If the overlap is exact preserve stmt order. */
1382 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1386 /* Else as the distance vector is lexicographic positive
1387 swap the dependence direction. */
1388 this_dir
= -this_dir
;
1393 free_dependence_relation (ddr
);
1396 else if (dir
!= this_dir
)
1398 /* Shuffle "back" dr1. */
1404 /* Compare postorder number of the partition graph vertices V1 and V2. */
1407 pgcmp (const void *v1_
, const void *v2_
)
1409 const vertex
*v1
= (const vertex
*)v1_
;
1410 const vertex
*v2
= (const vertex
*)v2_
;
1411 return v2
->post
- v1
->post
;
1414 /* Distributes the code from LOOP in such a way that producer
1415 statements are placed before consumer statements. Tries to separate
1416 only the statements from STMTS into separate loops.
1417 Returns the number of distributed loops. */
1420 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1421 control_dependences
*cd
, int *nb_calls
)
1424 partition_t partition
;
1431 auto_vec
<loop_p
, 3> loop_nest
;
1432 if (!find_loop_nest (loop
, &loop_nest
))
1435 rdg
= build_rdg (loop_nest
, cd
);
1438 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1440 "Loop %d not distributed: failed to build the RDG.\n",
1446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1447 dump_rdg (dump_file
, rdg
);
1449 auto_vec
<partition_t
, 3> partitions
;
1450 rdg_build_partitions (rdg
, stmts
, &partitions
);
1452 any_builtin
= false;
1453 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1455 classify_partition (loop
, rdg
, partition
);
1456 any_builtin
|= partition_builtin_p (partition
);
1459 /* If we are only distributing patterns but did not detect any,
1461 if (!flag_tree_loop_distribution
1468 /* If we are only distributing patterns fuse all partitions that
1469 were not classified as builtins. This also avoids chopping
1470 a loop into pieces, separated by builtin calls. That is, we
1471 only want no or a single loop body remaining. */
1473 if (!flag_tree_loop_distribution
)
1475 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1476 if (!partition_builtin_p (into
))
1478 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1479 if (!partition_builtin_p (partition
))
1481 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1483 fprintf (dump_file
, "fusing non-builtin partitions\n");
1484 dump_bitmap (dump_file
, into
->stmts
);
1485 dump_bitmap (dump_file
, partition
->stmts
);
1487 partition_merge_into (into
, partition
);
1488 partitions
.unordered_remove (i
);
1489 partition_free (partition
);
1494 /* Due to limitations in the transform phase we have to fuse all
1495 reduction partitions into the last partition so the existing
1496 loop will contain all loop-closed PHI nodes. */
1497 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1498 if (partition_reduction_p (into
))
1500 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1501 if (partition_reduction_p (partition
))
1503 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1505 fprintf (dump_file
, "fusing partitions\n");
1506 dump_bitmap (dump_file
, into
->stmts
);
1507 dump_bitmap (dump_file
, partition
->stmts
);
1508 fprintf (dump_file
, "because they have reductions\n");
1510 partition_merge_into (into
, partition
);
1511 partitions
.unordered_remove (i
);
1512 partition_free (partition
);
1516 /* Apply our simple cost model - fuse partitions with similar
1518 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1520 if (partition_builtin_p (into
))
1523 partitions
.iterate (j
, &partition
); ++j
)
1525 if (!partition_builtin_p (partition
)
1526 && similar_memory_accesses (rdg
, into
, partition
))
1528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1530 fprintf (dump_file
, "fusing partitions\n");
1531 dump_bitmap (dump_file
, into
->stmts
);
1532 dump_bitmap (dump_file
, partition
->stmts
);
1533 fprintf (dump_file
, "because they have similar "
1534 "memory accesses\n");
1536 partition_merge_into (into
, partition
);
1537 partitions
.unordered_remove (j
);
1538 partition_free (partition
);
1544 /* Build the partition dependency graph. */
1545 if (partitions
.length () > 1)
1547 pg
= new_graph (partitions
.length ());
1549 partition_t partition
;
1550 vec
<data_reference_p
> writes
;
1551 vec
<data_reference_p
> reads
;
1553 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1554 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1556 vertex
*v
= &pg
->vertices
[i
];
1557 pgdata
*data
= new pgdata
;
1558 data_reference_p dr
;
1559 /* FIXME - leaks. */
1563 data
->partition
= partition
;
1564 data
->reads
= vNULL
;
1565 data
->writes
= vNULL
;
1566 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1567 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1568 if (DR_IS_READ (dr
))
1569 data
->reads
.safe_push (dr
);
1571 data
->writes
.safe_push (dr
);
1573 partition_t partition1
, partition2
;
1574 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1575 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1577 /* dependence direction - 0 is no dependence, -1 is back,
1578 1 is forth, 2 is both (we can stop then, merging will occur). */
1580 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1584 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1588 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1591 if (dir
== 1 || dir
== 2)
1592 add_edge (pg
, i
, j
);
1593 if (dir
== -1 || dir
== 2)
1594 add_edge (pg
, j
, i
);
1597 /* Add edges to the reduction partition (if any) to force it last. */
1599 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1600 if (partition_reduction_p (partition
))
1602 if (j
< partitions
.length ())
1604 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1606 add_edge (pg
, i
, j
);
1609 /* Compute partitions we cannot separate and fuse them. */
1610 num_sccs
= graphds_scc (pg
, NULL
);
1611 for (i
= 0; i
< num_sccs
; ++i
)
1615 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1616 if (pg
->vertices
[j
].component
== i
)
1618 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1619 if (pg
->vertices
[j
].component
== i
)
1621 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1623 fprintf (dump_file
, "fusing partitions\n");
1624 dump_bitmap (dump_file
, first
->stmts
);
1625 dump_bitmap (dump_file
, partition
->stmts
);
1626 fprintf (dump_file
, "because they are in the same "
1627 "dependence SCC\n");
1629 partition_merge_into (first
, partition
);
1630 partitions
[j
] = NULL
;
1631 partition_free (partition
);
1632 PGDATA (j
)->partition
= NULL
;
1636 /* Now order the remaining nodes in postorder. */
1637 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1638 partitions
.truncate (0);
1639 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1641 pgdata
*data
= PGDATA (i
);
1642 if (data
->partition
)
1643 partitions
.safe_push (data
->partition
);
1644 data
->reads
.release ();
1645 data
->writes
.release ();
1648 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1652 nbp
= partitions
.length ();
1654 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1655 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1662 dump_rdg_partitions (dump_file
, partitions
);
1664 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1666 if (partition_builtin_p (partition
))
1668 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1673 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1674 partition_free (partition
);
1677 return nbp
- *nb_calls
;
1680 /* Distribute all loops in the current function. */
1684 const pass_data pass_data_loop_distribution
=
1686 GIMPLE_PASS
, /* type */
1688 OPTGROUP_LOOP
, /* optinfo_flags */
1689 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1690 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
1694 0, /* todo_flags_finish */
1697 class pass_loop_distribution
: public gimple_opt_pass
1700 pass_loop_distribution (gcc::context
*ctxt
)
1701 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1704 /* opt_pass methods: */
1705 virtual bool gate (function
*)
1707 return flag_tree_loop_distribution
1708 || flag_tree_loop_distribute_patterns
;
1711 virtual unsigned int execute (function
*);
1713 }; // class pass_loop_distribution
1716 pass_loop_distribution::execute (function
*fun
)
1719 bool changed
= false;
1721 control_dependences
*cd
= NULL
;
1723 FOR_ALL_BB_FN (bb
, fun
)
1725 gimple_stmt_iterator gsi
;
1726 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1727 gimple_set_uid (gsi_stmt (gsi
), -1);
1728 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1729 gimple_set_uid (gsi_stmt (gsi
), -1);
1732 /* We can at the moment only distribute non-nested loops, thus restrict
1733 walking to innermost loops. */
1734 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1736 auto_vec
<gimple
> work_list
;
1738 int num
= loop
->num
;
1741 /* If the loop doesn't have a single exit we will fail anyway,
1742 so do that early. */
1743 if (!single_exit (loop
))
1746 /* Only optimize hot loops. */
1747 if (!optimize_loop_for_speed_p (loop
))
1750 /* Initialize the worklist with stmts we seed the partitions with. */
1751 bbs
= get_loop_body_in_dom_order (loop
);
1752 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1754 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
1758 gphi
*phi
= gsi
.phi ();
1759 if (virtual_operand_p (gimple_phi_result (phi
)))
1761 /* Distribute stmts which have defs that are used outside of
1763 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1765 work_list
.safe_push (phi
);
1767 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1771 gimple stmt
= gsi_stmt (gsi
);
1773 /* If there is a stmt with side-effects bail out - we
1774 cannot and should not distribute this loop. */
1775 if (gimple_has_side_effects (stmt
))
1777 work_list
.truncate (0);
1781 /* Distribute stmts which have defs that are used outside of
1783 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1785 /* Otherwise only distribute stores for now. */
1786 else if (!gimple_vdef (stmt
))
1789 work_list
.safe_push (stmt
);
1795 int nb_generated_loops
= 0;
1796 int nb_generated_calls
= 0;
1797 location_t loc
= find_loop_location (loop
);
1798 if (work_list
.length () > 0)
1802 calculate_dominance_info (CDI_DOMINATORS
);
1803 calculate_dominance_info (CDI_POST_DOMINATORS
);
1804 cd
= new control_dependences (create_edge_list ());
1805 free_dominance_info (CDI_POST_DOMINATORS
);
1807 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1808 &nb_generated_calls
);
1811 if (nb_generated_loops
+ nb_generated_calls
> 0)
1814 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1815 loc
, "Loop %d distributed: split to %d loops "
1816 "and %d library calls.\n",
1817 num
, nb_generated_loops
, nb_generated_calls
);
1819 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1820 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1828 /* Cached scalar evolutions now may refer to wrong or non-existing
1831 mark_virtual_operands_for_renaming (fun
);
1832 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1835 #ifdef ENABLE_CHECKING
1836 verify_loop_structure ();
1845 make_pass_loop_distribution (gcc::context
*ctxt
)
1847 return new pass_loop_distribution (ctxt
);