2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "gimple-ssa.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "gimple-pretty-print.h"
68 #include "tree-vectorizer.h"
71 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 typedef struct rdg_vertex
74 /* The statement represented by this vertex. */
77 /* Vector of data-references in this statement. */
78 vec
<data_reference_p
> datarefs
;
80 /* True when the statement contains a write to memory. */
83 /* True when the statement contains a read from memory. */
87 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
88 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
89 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
90 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
91 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
92 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
93 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
94 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
96 /* Data dependence type. */
100 /* Read After Write (RAW). */
103 /* Control dependence (execute conditional on). */
107 /* Dependence information attached to an edge of the RDG. */
109 typedef struct rdg_edge
111 /* Type of the dependence. */
112 enum rdg_dep_type type
;
115 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
117 /* Dump vertex I in RDG to FILE. */
120 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
122 struct vertex
*v
= &(rdg
->vertices
[i
]);
123 struct graph_edge
*e
;
125 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
126 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
127 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
130 for (e
= v
->pred
; e
; e
= e
->pred_next
)
131 fprintf (file
, " %d", e
->src
);
133 fprintf (file
, ") (out:");
136 for (e
= v
->succ
; e
; e
= e
->succ_next
)
137 fprintf (file
, " %d", e
->dest
);
139 fprintf (file
, ")\n");
140 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
141 fprintf (file
, ")\n");
144 /* Call dump_rdg_vertex on stderr. */
147 debug_rdg_vertex (struct graph
*rdg
, int i
)
149 dump_rdg_vertex (stderr
, rdg
, i
);
152 /* Dump the reduced dependence graph RDG to FILE. */
155 dump_rdg (FILE *file
, struct graph
*rdg
)
157 fprintf (file
, "(rdg\n");
158 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
159 dump_rdg_vertex (file
, rdg
, i
);
160 fprintf (file
, ")\n");
163 /* Call dump_rdg on stderr. */
166 debug_rdg (struct graph
*rdg
)
168 dump_rdg (stderr
, rdg
);
172 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
175 pretty_printer buffer
;
176 pp_needs_newline (&buffer
) = false;
177 buffer
.buffer
->stream
= file
;
179 fprintf (file
, "digraph RDG {\n");
181 for (i
= 0; i
< rdg
->n_vertices
; i
++)
183 struct vertex
*v
= &(rdg
->vertices
[i
]);
184 struct graph_edge
*e
;
186 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
187 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
189 fprintf (file
, "\"]\n");
191 /* Highlight reads from memory. */
192 if (RDG_MEM_READS_STMT (rdg
, i
))
193 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
195 /* Highlight stores to memory. */
196 if (RDG_MEM_WRITE_STMT (rdg
, i
))
197 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
200 for (e
= v
->succ
; e
; e
= e
->succ_next
)
201 switch (RDGE_TYPE (e
))
204 /* These are the most common dependences: don't print these. */
205 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
209 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
217 fprintf (file
, "}\n\n");
220 /* Display the Reduced Dependence Graph using dotty. */
223 dot_rdg (struct graph
*rdg
)
225 /* When debugging, you may want to enable the following code. */
227 FILE *file
= popen ("dot -Tx11", "w");
230 dot_rdg_1 (file
, rdg
);
232 close (fileno (file
));
235 dot_rdg_1 (stderr
, rdg
);
239 /* Returns the index of STMT in RDG. */
242 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
244 int index
= gimple_uid (stmt
);
245 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
249 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
250 the index of DEF in RDG. */
253 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
255 use_operand_p imm_use_p
;
256 imm_use_iterator iterator
;
258 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
260 struct graph_edge
*e
;
261 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
266 e
= add_edge (rdg
, idef
, use
);
267 e
->data
= XNEW (struct rdg_edge
);
268 RDGE_TYPE (e
) = flow_dd
;
272 /* Creates an edge for the control dependences of BB to the vertex V. */
275 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
276 int v
, control_dependences
*cd
)
280 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
283 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
284 gimple stmt
= last_stmt (cond_bb
);
285 if (stmt
&& is_ctrl_stmt (stmt
))
287 struct graph_edge
*e
;
288 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
292 e
= add_edge (rdg
, c
, v
);
293 e
->data
= XNEW (struct rdg_edge
);
294 RDGE_TYPE (e
) = control_dd
;
299 /* Creates the edges of the reduced dependence graph RDG. */
302 create_rdg_flow_edges (struct graph
*rdg
)
308 for (i
= 0; i
< rdg
->n_vertices
; i
++)
309 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
311 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
314 /* Creates the edges of the reduced dependence graph RDG. */
317 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
321 for (i
= 0; i
< rdg
->n_vertices
; i
++)
323 gimple stmt
= RDG_STMT (rdg
, i
);
324 if (gimple_code (stmt
) == GIMPLE_PHI
)
328 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
329 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
332 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
336 /* Build the vertices of the reduced dependence graph RDG. Return false
340 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
341 vec
<data_reference_p
> *datarefs
)
346 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
348 struct vertex
*v
= &(rdg
->vertices
[i
]);
350 /* Record statement to vertex mapping. */
351 gimple_set_uid (stmt
, i
);
353 v
->data
= XNEW (struct rdg_vertex
);
354 RDGV_STMT (v
) = stmt
;
355 RDGV_DATAREFS (v
).create (0);
356 RDGV_HAS_MEM_WRITE (v
) = false;
357 RDGV_HAS_MEM_READS (v
) = false;
358 if (gimple_code (stmt
) == GIMPLE_PHI
)
361 unsigned drp
= datarefs
->length ();
362 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
364 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
366 data_reference_p dr
= (*datarefs
)[j
];
368 RDGV_HAS_MEM_READS (v
) = true;
370 RDGV_HAS_MEM_WRITE (v
) = true;
371 RDGV_DATAREFS (v
).safe_push (dr
);
377 /* Initialize STMTS with all the statements of LOOP. The order in
378 which we discover statements is important as
379 generate_loops_for_partition is using the same traversal for
380 identifying statements in loop copies. */
383 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
386 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
388 for (i
= 0; i
< loop
->num_nodes
; i
++)
390 basic_block bb
= bbs
[i
];
391 gimple_stmt_iterator bsi
;
394 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
395 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi
))))
396 stmts
->safe_push (gsi_stmt (bsi
));
398 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
400 stmt
= gsi_stmt (bsi
);
401 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
402 stmts
->safe_push (stmt
);
409 /* Free the reduced dependence graph RDG. */
412 free_rdg (struct graph
*rdg
)
416 for (i
= 0; i
< rdg
->n_vertices
; i
++)
418 struct vertex
*v
= &(rdg
->vertices
[i
]);
419 struct graph_edge
*e
;
421 for (e
= v
->succ
; e
; e
= e
->succ_next
)
426 gimple_set_uid (RDGV_STMT (v
), -1);
427 free_data_refs (RDGV_DATAREFS (v
));
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436 statement of the loop nest LOOP_NEST, and one edge per data dependence or
437 scalar dependence. */
439 static struct graph
*
440 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
443 vec
<data_reference_p
> datarefs
;
445 /* Create the RDG vertices from the stmts of the loop nest. */
446 stack_vec
<gimple
, 10> stmts
;
447 stmts_from_loop (loop_nest
[0], &stmts
);
448 rdg
= new_graph (stmts
.length ());
449 datarefs
.create (10);
450 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
458 create_rdg_flow_edges (rdg
);
460 create_rdg_cd_edges (rdg
, cd
);
469 enum partition_kind
{
470 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
473 typedef struct partition_s
478 enum partition_kind kind
;
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr
;
481 data_reference_p secondary_dr
;
487 /* Allocate and initialize a partition from BITMAP. */
490 partition_alloc (bitmap stmts
, bitmap loops
)
492 partition_t partition
= XCNEW (struct partition_s
);
493 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
494 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
495 partition
->reduction_p
= false;
496 partition
->kind
= PKIND_NORMAL
;
500 /* Free PARTITION. */
503 partition_free (partition_t partition
)
505 BITMAP_FREE (partition
->stmts
);
506 BITMAP_FREE (partition
->loops
);
510 /* Returns true if the partition can be generated as a builtin. */
513 partition_builtin_p (partition_t partition
)
515 return partition
->kind
!= PKIND_NORMAL
;
518 /* Returns true if the partition contains a reduction. */
521 partition_reduction_p (partition_t partition
)
523 return partition
->reduction_p
;
526 /* Merge PARTITION into the partition DEST. */
529 partition_merge_into (partition_t dest
, partition_t partition
)
531 dest
->kind
= PKIND_NORMAL
;
532 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
533 if (partition_reduction_p (partition
))
534 dest
->reduction_p
= true;
538 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
542 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
544 imm_use_iterator imm_iter
;
547 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
549 gimple use_stmt
= USE_STMT (use_p
);
550 if (!is_gimple_debug (use_stmt
)
551 && loop
!= loop_containing_stmt (use_stmt
))
558 /* Returns true when STMT defines a scalar variable used after the
562 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
567 if (gimple_code (stmt
) == GIMPLE_PHI
)
568 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
570 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
571 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
577 /* Return a copy of LOOP placed before LOOP. */
580 copy_loop_before (struct loop
*loop
)
583 edge preheader
= loop_preheader_edge (loop
);
585 initialize_original_copy_tables ();
586 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
587 gcc_assert (res
!= NULL
);
588 free_original_copy_tables ();
589 delete_update_ssa ();
594 /* Creates an empty basic block after LOOP. */
597 create_bb_after_loop (struct loop
*loop
)
599 edge exit
= single_exit (loop
);
607 /* Generate code for PARTITION from the code in LOOP. The loop is
608 copied when COPY_P is true. All the statements not flagged in the
609 PARTITION bitmap are removed from the loop or from its copy. The
610 statements are indexed in sequence inside a basic block, and the
611 basic blocks of a loop are taken in dom order. */
614 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
618 gimple_stmt_iterator bsi
;
623 loop
= copy_loop_before (loop
);
624 gcc_assert (loop
!= NULL
);
625 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
626 create_bb_after_loop (loop
);
629 /* Remove stmts not in the PARTITION bitmap. */
630 bbs
= get_loop_body_in_dom_order (loop
);
632 if (MAY_HAVE_DEBUG_STMTS
)
633 for (i
= 0; i
< loop
->num_nodes
; i
++)
635 basic_block bb
= bbs
[i
];
637 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
639 gimple phi
= gsi_stmt (bsi
);
640 if (!virtual_operand_p (gimple_phi_result (phi
))
641 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
642 reset_debug_uses (phi
);
645 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
647 gimple stmt
= gsi_stmt (bsi
);
648 if (gimple_code (stmt
) != GIMPLE_LABEL
649 && !is_gimple_debug (stmt
)
650 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
651 reset_debug_uses (stmt
);
655 for (i
= 0; i
< loop
->num_nodes
; i
++)
657 basic_block bb
= bbs
[i
];
659 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
661 gimple phi
= gsi_stmt (bsi
);
662 if (!virtual_operand_p (gimple_phi_result (phi
))
663 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
664 remove_phi_node (&bsi
, true);
669 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
671 gimple stmt
= gsi_stmt (bsi
);
672 if (gimple_code (stmt
) != GIMPLE_LABEL
673 && !is_gimple_debug (stmt
)
674 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
676 /* Choose an arbitrary path through the empty CFG part
677 that this unnecessary control stmt controls. */
678 if (gimple_code (stmt
) == GIMPLE_COND
)
680 gimple_cond_make_false (stmt
);
683 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
685 gimple_switch_set_index
686 (stmt
, CASE_LOW (gimple_switch_label (stmt
, 1)));
691 unlink_stmt_vdef (stmt
);
692 gsi_remove (&bsi
, true);
704 /* Build the size argument for a memory operation call. */
707 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
710 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
712 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
713 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
715 size
= fold_convert_loc (loc
, size_type_node
, size
);
719 /* Build an address argument for a memory operation call. */
722 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
726 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
727 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
732 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
733 fold_convert_loc (loc
, sizetype
, nb_bytes
));
734 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
738 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
747 const_with_all_bytes_same (tree val
)
749 unsigned char buf
[64];
752 if (integer_zerop (val
)
754 || (TREE_CODE (val
) == CONSTRUCTOR
755 && !TREE_CLOBBER_P (val
)
756 && CONSTRUCTOR_NELTS (val
) == 0))
759 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
762 len
= native_encode_expr (val
, buf
, sizeof (buf
));
765 for (i
= 1; i
< len
; i
++)
766 if (buf
[i
] != buf
[0])
771 /* Generate a call to memset for PARTITION in LOOP. */
774 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
776 gimple_stmt_iterator gsi
;
777 gimple stmt
, fn_call
;
778 tree mem
, fn
, nb_bytes
;
782 stmt
= DR_STMT (partition
->main_dr
);
783 loc
= gimple_location (stmt
);
785 /* The new statements will be placed before LOOP. */
786 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
788 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
789 partition
->plus_one
);
790 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
791 false, GSI_CONTINUE_LINKING
);
792 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
793 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
794 false, GSI_CONTINUE_LINKING
);
796 /* This exactly matches the pattern recognition in classify_partition. */
797 val
= gimple_assign_rhs1 (stmt
);
798 /* Handle constants like 0x15151515 and similarly
799 floating point constants etc. where all bytes are the same. */
800 int bytev
= const_with_all_bytes_same (val
);
802 val
= build_int_cst (integer_type_node
, bytev
);
803 else if (TREE_CODE (val
) == INTEGER_CST
)
804 val
= fold_convert (integer_type_node
, val
);
805 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
808 tree tem
= make_ssa_name (integer_type_node
, NULL
);
809 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
810 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
814 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
815 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
816 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
820 fprintf (dump_file
, "generated memset");
822 fprintf (dump_file
, " zero\n");
824 fprintf (dump_file
, "\n");
828 /* Generate a call to memcpy for PARTITION in LOOP. */
831 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
833 gimple_stmt_iterator gsi
;
834 gimple stmt
, fn_call
;
835 tree dest
, src
, fn
, nb_bytes
;
837 enum built_in_function kind
;
839 stmt
= DR_STMT (partition
->main_dr
);
840 loc
= gimple_location (stmt
);
842 /* The new statements will be placed before LOOP. */
843 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
845 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
846 partition
->plus_one
);
847 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
848 false, GSI_CONTINUE_LINKING
);
849 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
850 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
851 if (ptr_derefs_may_alias_p (dest
, src
))
852 kind
= BUILT_IN_MEMMOVE
;
854 kind
= BUILT_IN_MEMCPY
;
856 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
857 false, GSI_CONTINUE_LINKING
);
858 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
859 false, GSI_CONTINUE_LINKING
);
860 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
861 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
862 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
866 if (kind
== BUILT_IN_MEMCPY
)
867 fprintf (dump_file
, "generated memcpy\n");
869 fprintf (dump_file
, "generated memmove\n");
873 /* Remove and destroy the loop LOOP. */
876 destroy_loop (struct loop
*loop
)
878 unsigned nbbs
= loop
->num_nodes
;
879 edge exit
= single_exit (loop
);
880 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
884 bbs
= get_loop_body_in_dom_order (loop
);
886 redirect_edge_pred (exit
, src
);
887 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
888 exit
->flags
|= EDGE_FALLTHRU
;
889 cancel_loop_tree (loop
);
890 rescan_loop_exit (exit
, false, true);
892 for (i
= 0; i
< nbbs
; i
++)
894 /* We have made sure to not leave any dangling uses of SSA
895 names defined in the loop. With the exception of virtuals.
896 Make sure we replace all uses of virtual defs that will remain
897 outside of the loop with the bare symbol as delete_basic_block
898 will release them. */
899 gimple_stmt_iterator gsi
;
900 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
902 gimple phi
= gsi_stmt (gsi
);
903 if (virtual_operand_p (gimple_phi_result (phi
)))
904 mark_virtual_phi_result_for_renaming (phi
);
906 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
908 gimple stmt
= gsi_stmt (gsi
);
909 tree vdef
= gimple_vdef (stmt
);
910 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
911 mark_virtual_operand_for_renaming (vdef
);
913 delete_basic_block (bbs
[i
]);
917 set_immediate_dominator (CDI_DOMINATORS
, dest
,
918 recompute_dominator (CDI_DOMINATORS
, dest
));
921 /* Generates code for PARTITION. */
924 generate_code_for_partition (struct loop
*loop
,
925 partition_t partition
, bool copy_p
)
927 switch (partition
->kind
)
930 /* Reductions all have to be in the last partition. */
931 gcc_assert (!partition_reduction_p (partition
)
933 generate_loops_for_partition (loop
, partition
, copy_p
);
937 generate_memset_builtin (loop
, partition
);
941 generate_memcpy_builtin (loop
, partition
);
948 /* Common tail for partitions we turn into a call. If this was the last
949 partition for which we generate code, we have to destroy the loop. */
955 /* Returns a partition with all the statements needed for computing
956 the vertex V of the RDG, also including the loop exit conditions. */
959 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
961 partition_t partition
= partition_alloc (NULL
, NULL
);
962 stack_vec
<int, 3> nodes
;
966 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
968 FOR_EACH_VEC_ELT (nodes
, i
, x
)
970 bitmap_set_bit (partition
->stmts
, x
);
971 bitmap_set_bit (partition
->loops
,
972 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
978 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
979 For the moment we detect only the memset zero pattern. */
982 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
987 data_reference_p single_load
, single_store
;
988 bool volatiles_p
= false;
989 bool plus_one
= false;
991 partition
->kind
= PKIND_NORMAL
;
992 partition
->main_dr
= NULL
;
993 partition
->secondary_dr
= NULL
;
994 partition
->niter
= NULL_TREE
;
995 partition
->plus_one
= false;
997 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
999 gimple stmt
= RDG_STMT (rdg
, i
);
1001 if (gimple_has_volatile_ops (stmt
))
1004 /* If the stmt has uses outside of the loop mark it as reduction. */
1005 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1007 partition
->reduction_p
= true;
1012 /* Perform general partition disqualification for builtins. */
1014 || !flag_tree_loop_distribute_patterns
)
1017 /* Detect memset and memcpy. */
1019 single_store
= NULL
;
1020 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1022 gimple stmt
= RDG_STMT (rdg
, i
);
1023 data_reference_p dr
;
1026 if (gimple_code (stmt
) == GIMPLE_PHI
)
1029 /* Any scalar stmts are ok. */
1030 if (!gimple_vuse (stmt
))
1033 /* Otherwise just regular loads/stores. */
1034 if (!gimple_assign_single_p (stmt
))
1037 /* But exactly one store and/or load. */
1038 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1040 if (DR_IS_READ (dr
))
1042 if (single_load
!= NULL
)
1048 if (single_store
!= NULL
)
1058 nb_iter
= number_of_latch_executions (loop
);
1059 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1061 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1062 gimple_bb (DR_STMT (single_store
))))
1065 if (single_store
&& !single_load
)
1067 gimple stmt
= DR_STMT (single_store
);
1068 tree rhs
= gimple_assign_rhs1 (stmt
);
1069 if (const_with_all_bytes_same (rhs
) == -1
1070 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1071 || (TYPE_MODE (TREE_TYPE (rhs
))
1072 != TYPE_MODE (unsigned_char_type_node
))))
1074 if (TREE_CODE (rhs
) == SSA_NAME
1075 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1076 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1078 if (!adjacent_dr_p (single_store
)
1079 || !dominated_by_p (CDI_DOMINATORS
,
1080 loop
->latch
, gimple_bb (stmt
)))
1082 partition
->kind
= PKIND_MEMSET
;
1083 partition
->main_dr
= single_store
;
1084 partition
->niter
= nb_iter
;
1085 partition
->plus_one
= plus_one
;
1087 else if (single_store
&& single_load
)
1089 gimple store
= DR_STMT (single_store
);
1090 gimple load
= DR_STMT (single_load
);
1091 /* Direct aggregate copy or via an SSA name temporary. */
1093 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1095 if (!adjacent_dr_p (single_store
)
1096 || !adjacent_dr_p (single_load
)
1097 || !operand_equal_p (DR_STEP (single_store
),
1098 DR_STEP (single_load
), 0)
1099 || !dominated_by_p (CDI_DOMINATORS
,
1100 loop
->latch
, gimple_bb (store
)))
1102 /* Now check that if there is a dependence this dependence is
1103 of a suitable form for memmove. */
1104 vec
<loop_p
> loops
= vNULL
;
1106 loops
.safe_push (loop
);
1107 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1109 compute_affine_dependence (ddr
, loop
);
1110 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1112 free_dependence_relation (ddr
);
1116 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1118 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1120 free_dependence_relation (ddr
);
1124 lambda_vector dist_v
;
1125 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1127 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1128 DDR_LOOP_NEST (ddr
))];
1129 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1131 free_dependence_relation (ddr
);
1137 free_dependence_relation (ddr
);
1139 partition
->kind
= PKIND_MEMCPY
;
1140 partition
->main_dr
= single_store
;
1141 partition
->secondary_dr
= single_load
;
1142 partition
->niter
= nb_iter
;
1143 partition
->plus_one
= plus_one
;
1147 /* For a data reference REF, return the declaration of its base
1148 address or NULL_TREE if the base is not determined. */
1151 ref_base_address (data_reference_p dr
)
1153 tree base_address
= DR_BASE_ADDRESS (dr
);
1155 && TREE_CODE (base_address
) == ADDR_EXPR
)
1156 return TREE_OPERAND (base_address
, 0);
1158 return base_address
;
1161 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1165 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1166 partition_t partition2
)
1168 unsigned i
, j
, k
, l
;
1169 bitmap_iterator bi
, bj
;
1170 data_reference_p ref1
, ref2
;
1172 /* First check whether in the intersection of the two partitions are
1173 any loads or stores. Common loads are the situation that happens
1175 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1176 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1177 || RDG_MEM_READS_STMT (rdg
, i
))
1180 /* Then check all data-references against each other. */
1181 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1182 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1183 || RDG_MEM_READS_STMT (rdg
, i
))
1184 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1185 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1186 || RDG_MEM_READS_STMT (rdg
, j
))
1188 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1190 tree base1
= ref_base_address (ref1
);
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1193 if (base1
== ref_base_address (ref2
))
1201 /* Aggregate several components into a useful partition that is
1202 registered in the PARTITIONS vector. Partitions will be
1203 distributed in different loops. */
1206 rdg_build_partitions (struct graph
*rdg
,
1207 vec
<gimple
> starting_stmts
,
1208 vec
<partition_t
> *partitions
)
1210 bitmap processed
= BITMAP_ALLOC (NULL
);
1214 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1216 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1220 "ldist asked to generate code for vertex %d\n", v
);
1222 /* If the vertex is already contained in another partition so
1223 is the partition rooted at it. */
1224 if (bitmap_bit_p (processed
, v
))
1227 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1228 bitmap_ior_into (processed
, partition
->stmts
);
1230 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1232 fprintf (dump_file
, "ldist useful partition:\n");
1233 dump_bitmap (dump_file
, partition
->stmts
);
1236 partitions
->safe_push (partition
);
1239 /* All vertices should have been assigned to at least one partition now,
1240 other than vertices belonging to dead code. */
1242 BITMAP_FREE (processed
);
1245 /* Dump to FILE the PARTITIONS. */
1248 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1251 partition_t partition
;
1253 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1254 debug_bitmap_file (file
, partition
->stmts
);
1257 /* Debug PARTITIONS. */
1258 extern void debug_rdg_partitions (vec
<partition_t
> );
1261 debug_rdg_partitions (vec
<partition_t
> partitions
)
1263 dump_rdg_partitions (stderr
, partitions
);
1266 /* Returns the number of read and write operations in the RDG. */
1269 number_of_rw_in_rdg (struct graph
*rdg
)
1273 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1275 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1278 if (RDG_MEM_READS_STMT (rdg
, i
))
1285 /* Returns the number of read and write operations in a PARTITION of
1289 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1295 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1297 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1300 if (RDG_MEM_READS_STMT (rdg
, i
))
1307 /* Returns true when one of the PARTITIONS contains all the read or
1308 write operations of RDG. */
1311 partition_contains_all_rw (struct graph
*rdg
,
1312 vec
<partition_t
> partitions
)
1315 partition_t partition
;
1316 int nrw
= number_of_rw_in_rdg (rdg
);
1318 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1319 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1325 /* Compute partition dependence created by the data references in DRS1
1326 and DRS2 and modify and return DIR according to that. */
1329 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1330 vec
<data_reference_p
> drs1
,
1331 vec
<data_reference_p
> drs2
)
1333 data_reference_p dr1
, dr2
;
1335 /* dependence direction - 0 is no dependence, -1 is back,
1336 1 is forth, 2 is both (we can stop then, merging will occur). */
1337 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1338 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1342 /* Re-shuffle data-refs to be in dominator order. */
1343 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1344 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1346 data_reference_p tem
= dr1
;
1349 this_dir
= -this_dir
;
1351 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1352 compute_affine_dependence (ddr
, loops
[0]);
1353 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1355 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1357 if (DDR_REVERSED_P (ddr
))
1359 data_reference_p tem
= dr1
;
1362 this_dir
= -this_dir
;
1364 /* Known dependences can still be unordered througout the
1365 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1366 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1368 /* If the overlap is exact preserve stmt order. */
1369 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1373 /* Else as the distance vector is lexicographic positive
1374 swap the dependence direction. */
1375 this_dir
= -this_dir
;
1380 free_dependence_relation (ddr
);
1383 else if (dir
!= this_dir
)
1389 /* Compare postorder number of the partition graph vertices V1 and V2. */
1392 pgcmp (const void *v1_
, const void *v2_
)
1394 const vertex
*v1
= (const vertex
*)v1_
;
1395 const vertex
*v2
= (const vertex
*)v2_
;
1396 return v2
->post
- v1
->post
;
1399 /* Distributes the code from LOOP in such a way that producer
1400 statements are placed before consumer statements. Tries to separate
1401 only the statements from STMTS into separate loops.
1402 Returns the number of distributed loops. */
1405 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1406 control_dependences
*cd
, int *nb_calls
)
1409 vec
<partition_t
> partitions
;
1410 partition_t partition
;
1417 stack_vec
<loop_p
, 3> loop_nest
;
1418 if (!find_loop_nest (loop
, &loop_nest
))
1421 rdg
= build_rdg (loop_nest
, cd
);
1424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1426 "Loop %d not distributed: failed to build the RDG.\n",
1432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1433 dump_rdg (dump_file
, rdg
);
1435 partitions
.create (3);
1436 rdg_build_partitions (rdg
, stmts
, &partitions
);
1438 any_builtin
= false;
1439 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1441 classify_partition (loop
, rdg
, partition
);
1442 any_builtin
|= partition_builtin_p (partition
);
1445 /* If we are only distributing patterns but did not detect any,
1447 if (!flag_tree_loop_distribution
1454 /* If we are only distributing patterns fuse all partitions that
1455 were not classified as builtins. This also avoids chopping
1456 a loop into pieces, separated by builtin calls. That is, we
1457 only want no or a single loop body remaining. */
1459 if (!flag_tree_loop_distribution
)
1461 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1462 if (!partition_builtin_p (into
))
1464 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1465 if (!partition_builtin_p (partition
))
1467 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1469 fprintf (dump_file
, "fusing non-builtin partitions\n");
1470 dump_bitmap (dump_file
, into
->stmts
);
1471 dump_bitmap (dump_file
, partition
->stmts
);
1473 partition_merge_into (into
, partition
);
1474 partitions
.unordered_remove (i
);
1475 partition_free (partition
);
1480 /* Due to limitations in the transform phase we have to fuse all
1481 reduction partitions into the last partition so the existing
1482 loop will contain all loop-closed PHI nodes. */
1483 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1484 if (partition_reduction_p (into
))
1486 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1487 if (partition_reduction_p (partition
))
1489 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1491 fprintf (dump_file
, "fusing partitions\n");
1492 dump_bitmap (dump_file
, into
->stmts
);
1493 dump_bitmap (dump_file
, partition
->stmts
);
1494 fprintf (dump_file
, "because they have reductions\n");
1496 partition_merge_into (into
, partition
);
1497 partitions
.unordered_remove (i
);
1498 partition_free (partition
);
1502 /* Apply our simple cost model - fuse partitions with similar
1504 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1506 if (partition_builtin_p (into
))
1509 partitions
.iterate (j
, &partition
); ++j
)
1511 if (!partition_builtin_p (partition
)
1512 && similar_memory_accesses (rdg
, into
, partition
))
1514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1516 fprintf (dump_file
, "fusing partitions\n");
1517 dump_bitmap (dump_file
, into
->stmts
);
1518 dump_bitmap (dump_file
, partition
->stmts
);
1519 fprintf (dump_file
, "because they have similar "
1520 "memory accesses\n");
1522 partition_merge_into (into
, partition
);
1523 partitions
.unordered_remove (j
);
1524 partition_free (partition
);
1530 /* Build the partition dependency graph. */
1531 if (partitions
.length () > 1)
1533 pg
= new_graph (partitions
.length ());
1535 partition_t partition
;
1536 vec
<data_reference_p
> writes
;
1537 vec
<data_reference_p
> reads
;
1539 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1540 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1542 vertex
*v
= &pg
->vertices
[i
];
1543 pgdata
*data
= new pgdata
;
1544 data_reference_p dr
;
1545 /* FIXME - leaks. */
1549 data
->partition
= partition
;
1550 data
->reads
= vNULL
;
1551 data
->writes
= vNULL
;
1552 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1553 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1554 if (DR_IS_READ (dr
))
1555 data
->reads
.safe_push (dr
);
1557 data
->writes
.safe_push (dr
);
1559 partition_t partition1
, partition2
;
1560 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1561 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1563 /* dependence direction - 0 is no dependence, -1 is back,
1564 1 is forth, 2 is both (we can stop then, merging will occur). */
1566 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1570 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1574 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1577 if (dir
== 1 || dir
== 2)
1578 add_edge (pg
, i
, j
);
1579 if (dir
== -1 || dir
== 2)
1580 add_edge (pg
, j
, i
);
1583 /* Add edges to the reduction partition (if any) to force it last. */
1585 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1586 if (partition_reduction_p (partition
))
1588 if (j
< partitions
.length ())
1590 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1592 add_edge (pg
, i
, j
);
1595 /* Compute partitions we cannot separate and fuse them. */
1596 num_sccs
= graphds_scc (pg
, NULL
);
1597 for (i
= 0; i
< num_sccs
; ++i
)
1601 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1602 if (pg
->vertices
[j
].component
== i
)
1604 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1605 if (pg
->vertices
[j
].component
== i
)
1607 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1609 fprintf (dump_file
, "fusing partitions\n");
1610 dump_bitmap (dump_file
, first
->stmts
);
1611 dump_bitmap (dump_file
, partition
->stmts
);
1612 fprintf (dump_file
, "because they are in the same "
1613 "dependence SCC\n");
1615 partition_merge_into (first
, partition
);
1616 partitions
[j
] = NULL
;
1617 partition_free (partition
);
1618 PGDATA (j
)->partition
= NULL
;
1622 /* Now order the remaining nodes in postorder. */
1623 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1624 partitions
.truncate (0);
1625 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1627 pgdata
*data
= PGDATA (i
);
1628 if (data
->partition
)
1629 partitions
.safe_push (data
->partition
);
1630 data
->reads
.release ();
1631 data
->writes
.release ();
1634 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1638 nbp
= partitions
.length ();
1640 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1641 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1647 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1648 dump_rdg_partitions (dump_file
, partitions
);
1650 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1652 if (partition_builtin_p (partition
))
1654 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1659 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1660 partition_free (partition
);
1661 partitions
.release ();
1664 return nbp
- *nb_calls
;
1667 /* Distribute all loops in the current function. */
1670 tree_loop_distribution (void)
1673 bool changed
= false;
1675 control_dependences
*cd
= NULL
;
1679 gimple_stmt_iterator gsi
;
1680 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1681 gimple_set_uid (gsi_stmt (gsi
), -1);
1682 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1683 gimple_set_uid (gsi_stmt (gsi
), -1);
1686 /* We can at the moment only distribute non-nested loops, thus restrict
1687 walking to innermost loops. */
1688 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1690 vec
<gimple
> work_list
= vNULL
;
1692 int num
= loop
->num
;
1695 /* If the loop doesn't have a single exit we will fail anyway,
1696 so do that early. */
1697 if (!single_exit (loop
))
1700 /* Only optimize hot loops. */
1701 if (!optimize_loop_for_speed_p (loop
))
1704 /* Initialize the worklist with stmts we seed the partitions with. */
1705 bbs
= get_loop_body_in_dom_order (loop
);
1706 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1708 gimple_stmt_iterator gsi
;
1709 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1711 gimple phi
= gsi_stmt (gsi
);
1712 if (virtual_operand_p (gimple_phi_result (phi
)))
1714 /* Distribute stmts which have defs that are used outside of
1716 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1718 work_list
.safe_push (phi
);
1720 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1722 gimple stmt
= gsi_stmt (gsi
);
1724 /* If there is a stmt with side-effects bail out - we
1725 cannot and should not distribute this loop. */
1726 if (gimple_has_side_effects (stmt
))
1728 work_list
.truncate (0);
1732 /* Distribute stmts which have defs that are used outside of
1734 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1736 /* Otherwise only distribute stores for now. */
1737 else if (!gimple_vdef (stmt
))
1740 work_list
.safe_push (stmt
);
1746 int nb_generated_loops
= 0;
1747 int nb_generated_calls
= 0;
1748 location_t loc
= find_loop_location (loop
);
1749 if (work_list
.length () > 0)
1753 calculate_dominance_info (CDI_DOMINATORS
);
1754 calculate_dominance_info (CDI_POST_DOMINATORS
);
1755 cd
= new control_dependences (create_edge_list ());
1756 free_dominance_info (CDI_POST_DOMINATORS
);
1758 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1759 &nb_generated_calls
);
1762 if (nb_generated_loops
+ nb_generated_calls
> 0)
1765 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1766 loc
, "Loop %d distributed: split to %d loops "
1767 "and %d library calls.\n",
1768 num
, nb_generated_loops
, nb_generated_calls
);
1770 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1771 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1773 work_list
.release ();
1781 mark_virtual_operands_for_renaming (cfun
);
1782 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1785 #ifdef ENABLE_CHECKING
1786 verify_loop_structure ();
1793 gate_tree_loop_distribution (void)
1795 return flag_tree_loop_distribution
1796 || flag_tree_loop_distribute_patterns
;
1801 const pass_data pass_data_loop_distribution
=
1803 GIMPLE_PASS
, /* type */
1805 OPTGROUP_LOOP
, /* optinfo_flags */
1806 true, /* has_gate */
1807 true, /* has_execute */
1808 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1809 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1810 0, /* properties_provided */
1811 0, /* properties_destroyed */
1812 0, /* todo_flags_start */
1813 TODO_verify_ssa
, /* todo_flags_finish */
1816 class pass_loop_distribution
: public gimple_opt_pass
1819 pass_loop_distribution (gcc::context
*ctxt
)
1820 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1823 /* opt_pass methods: */
1824 bool gate () { return gate_tree_loop_distribution (); }
1825 unsigned int execute () { return tree_loop_distribution (); }
1827 }; // class pass_loop_distribution
1832 make_pass_loop_distribution (gcc::context
*ctxt
)
1834 return new pass_loop_distribution (ctxt
);