2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "gimple-ssa.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "gimple-pretty-print.h"
68 #include "tree-vectorizer.h"
71 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 typedef struct rdg_vertex
74 /* The statement represented by this vertex. */
77 /* Vector of data-references in this statement. */
78 vec
<data_reference_p
> datarefs
;
80 /* True when the statement contains a write to memory. */
83 /* True when the statement contains a read from memory. */
87 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
88 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
89 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
90 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
91 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
92 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
93 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
94 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
96 /* Data dependence type. */
100 /* Read After Write (RAW). */
103 /* Control dependence (execute conditional on). */
107 /* Dependence information attached to an edge of the RDG. */
109 typedef struct rdg_edge
111 /* Type of the dependence. */
112 enum rdg_dep_type type
;
115 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
117 /* Dump vertex I in RDG to FILE. */
120 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
122 struct vertex
*v
= &(rdg
->vertices
[i
]);
123 struct graph_edge
*e
;
125 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
126 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
127 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
130 for (e
= v
->pred
; e
; e
= e
->pred_next
)
131 fprintf (file
, " %d", e
->src
);
133 fprintf (file
, ") (out:");
136 for (e
= v
->succ
; e
; e
= e
->succ_next
)
137 fprintf (file
, " %d", e
->dest
);
139 fprintf (file
, ")\n");
140 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
141 fprintf (file
, ")\n");
144 /* Call dump_rdg_vertex on stderr. */
147 debug_rdg_vertex (struct graph
*rdg
, int i
)
149 dump_rdg_vertex (stderr
, rdg
, i
);
152 /* Dump the reduced dependence graph RDG to FILE. */
155 dump_rdg (FILE *file
, struct graph
*rdg
)
157 fprintf (file
, "(rdg\n");
158 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
159 dump_rdg_vertex (file
, rdg
, i
);
160 fprintf (file
, ")\n");
163 /* Call dump_rdg on stderr. */
166 debug_rdg (struct graph
*rdg
)
168 dump_rdg (stderr
, rdg
);
172 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
175 pretty_printer buffer
;
176 pp_needs_newline (&buffer
) = false;
177 buffer
.buffer
->stream
= file
;
179 fprintf (file
, "digraph RDG {\n");
181 for (i
= 0; i
< rdg
->n_vertices
; i
++)
183 struct vertex
*v
= &(rdg
->vertices
[i
]);
184 struct graph_edge
*e
;
186 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
187 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
189 fprintf (file
, "\"]\n");
191 /* Highlight reads from memory. */
192 if (RDG_MEM_READS_STMT (rdg
, i
))
193 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
195 /* Highlight stores to memory. */
196 if (RDG_MEM_WRITE_STMT (rdg
, i
))
197 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
200 for (e
= v
->succ
; e
; e
= e
->succ_next
)
201 switch (RDGE_TYPE (e
))
204 /* These are the most common dependences: don't print these. */
205 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
209 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
217 fprintf (file
, "}\n\n");
220 /* Display the Reduced Dependence Graph using dotty. */
223 dot_rdg (struct graph
*rdg
)
225 /* When debugging, you may want to enable the following code. */
227 FILE *file
= popen ("dot -Tx11", "w");
230 dot_rdg_1 (file
, rdg
);
232 close (fileno (file
));
235 dot_rdg_1 (stderr
, rdg
);
239 /* Returns the index of STMT in RDG. */
242 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
244 int index
= gimple_uid (stmt
);
245 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
249 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
250 the index of DEF in RDG. */
253 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
255 use_operand_p imm_use_p
;
256 imm_use_iterator iterator
;
258 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
260 struct graph_edge
*e
;
261 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
266 e
= add_edge (rdg
, idef
, use
);
267 e
->data
= XNEW (struct rdg_edge
);
268 RDGE_TYPE (e
) = flow_dd
;
272 /* Creates an edge for the control dependences of BB to the vertex V. */
275 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
276 int v
, control_dependences
*cd
)
280 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
283 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
284 gimple stmt
= last_stmt (cond_bb
);
285 if (stmt
&& is_ctrl_stmt (stmt
))
287 struct graph_edge
*e
;
288 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
292 e
= add_edge (rdg
, c
, v
);
293 e
->data
= XNEW (struct rdg_edge
);
294 RDGE_TYPE (e
) = control_dd
;
299 /* Creates the edges of the reduced dependence graph RDG. */
302 create_rdg_flow_edges (struct graph
*rdg
)
308 for (i
= 0; i
< rdg
->n_vertices
; i
++)
309 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
311 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
314 /* Creates the edges of the reduced dependence graph RDG. */
317 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
321 for (i
= 0; i
< rdg
->n_vertices
; i
++)
323 gimple stmt
= RDG_STMT (rdg
, i
);
324 if (gimple_code (stmt
) == GIMPLE_PHI
)
328 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
329 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
332 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
336 /* Build the vertices of the reduced dependence graph RDG. Return false
340 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
341 vec
<data_reference_p
> *datarefs
)
346 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
348 struct vertex
*v
= &(rdg
->vertices
[i
]);
350 /* Record statement to vertex mapping. */
351 gimple_set_uid (stmt
, i
);
353 v
->data
= XNEW (struct rdg_vertex
);
354 RDGV_STMT (v
) = stmt
;
355 RDGV_DATAREFS (v
).create (0);
356 RDGV_HAS_MEM_WRITE (v
) = false;
357 RDGV_HAS_MEM_READS (v
) = false;
358 if (gimple_code (stmt
) == GIMPLE_PHI
)
361 unsigned drp
= datarefs
->length ();
362 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
364 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
366 data_reference_p dr
= (*datarefs
)[j
];
368 RDGV_HAS_MEM_READS (v
) = true;
370 RDGV_HAS_MEM_WRITE (v
) = true;
371 RDGV_DATAREFS (v
).safe_push (dr
);
377 /* Initialize STMTS with all the statements of LOOP. The order in
378 which we discover statements is important as
379 generate_loops_for_partition is using the same traversal for
380 identifying statements in loop copies. */
383 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
386 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
388 for (i
= 0; i
< loop
->num_nodes
; i
++)
390 basic_block bb
= bbs
[i
];
391 gimple_stmt_iterator bsi
;
394 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
395 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi
))))
396 stmts
->safe_push (gsi_stmt (bsi
));
398 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
400 stmt
= gsi_stmt (bsi
);
401 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
402 stmts
->safe_push (stmt
);
409 /* Free the reduced dependence graph RDG. */
412 free_rdg (struct graph
*rdg
)
416 for (i
= 0; i
< rdg
->n_vertices
; i
++)
418 struct vertex
*v
= &(rdg
->vertices
[i
]);
419 struct graph_edge
*e
;
421 for (e
= v
->succ
; e
; e
= e
->succ_next
)
426 gimple_set_uid (RDGV_STMT (v
), -1);
427 free_data_refs (RDGV_DATAREFS (v
));
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436 statement of the loop nest LOOP_NEST, and one edge per data dependence or
437 scalar dependence. */
439 static struct graph
*
440 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
443 vec
<data_reference_p
> datarefs
;
445 /* Create the RDG vertices from the stmts of the loop nest. */
446 stack_vec
<gimple
, 10> stmts
;
447 stmts_from_loop (loop_nest
[0], &stmts
);
448 rdg
= new_graph (stmts
.length ());
449 datarefs
.create (10);
450 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
458 create_rdg_flow_edges (rdg
);
460 create_rdg_cd_edges (rdg
, cd
);
469 enum partition_kind
{
470 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
473 typedef struct partition_s
478 enum partition_kind kind
;
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr
;
481 data_reference_p secondary_dr
;
486 /* Allocate and initialize a partition from BITMAP. */
489 partition_alloc (bitmap stmts
, bitmap loops
)
491 partition_t partition
= XCNEW (struct partition_s
);
492 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
493 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
494 partition
->reduction_p
= false;
495 partition
->kind
= PKIND_NORMAL
;
499 /* Free PARTITION. */
502 partition_free (partition_t partition
)
504 BITMAP_FREE (partition
->stmts
);
505 BITMAP_FREE (partition
->loops
);
509 /* Returns true if the partition can be generated as a builtin. */
512 partition_builtin_p (partition_t partition
)
514 return partition
->kind
!= PKIND_NORMAL
;
517 /* Returns true if the partition contains a reduction. */
520 partition_reduction_p (partition_t partition
)
522 return partition
->reduction_p
;
525 /* Merge PARTITION into the partition DEST. */
528 partition_merge_into (partition_t dest
, partition_t partition
)
530 dest
->kind
= PKIND_NORMAL
;
531 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
532 if (partition_reduction_p (partition
))
533 dest
->reduction_p
= true;
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
541 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
543 imm_use_iterator imm_iter
;
546 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
548 gimple use_stmt
= USE_STMT (use_p
);
549 if (!is_gimple_debug (use_stmt
)
550 && loop
!= loop_containing_stmt (use_stmt
))
557 /* Returns true when STMT defines a scalar variable used after the
561 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
566 if (gimple_code (stmt
) == GIMPLE_PHI
)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
569 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
576 /* Return a copy of LOOP placed before LOOP. */
579 copy_loop_before (struct loop
*loop
)
582 edge preheader
= loop_preheader_edge (loop
);
584 initialize_original_copy_tables ();
585 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
586 gcc_assert (res
!= NULL
);
587 free_original_copy_tables ();
588 delete_update_ssa ();
593 /* Creates an empty basic block after LOOP. */
596 create_bb_after_loop (struct loop
*loop
)
598 edge exit
= single_exit (loop
);
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
613 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
617 gimple_stmt_iterator bsi
;
622 loop
= copy_loop_before (loop
);
623 gcc_assert (loop
!= NULL
);
624 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
625 create_bb_after_loop (loop
);
628 /* Remove stmts not in the PARTITION bitmap. */
629 bbs
= get_loop_body_in_dom_order (loop
);
631 if (MAY_HAVE_DEBUG_STMTS
)
632 for (i
= 0; i
< loop
->num_nodes
; i
++)
634 basic_block bb
= bbs
[i
];
636 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
638 gimple phi
= gsi_stmt (bsi
);
639 if (!virtual_operand_p (gimple_phi_result (phi
))
640 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
641 reset_debug_uses (phi
);
644 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
646 gimple stmt
= gsi_stmt (bsi
);
647 if (gimple_code (stmt
) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt
)
649 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
650 reset_debug_uses (stmt
);
654 for (i
= 0; i
< loop
->num_nodes
; i
++)
656 basic_block bb
= bbs
[i
];
658 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
660 gimple phi
= gsi_stmt (bsi
);
661 if (!virtual_operand_p (gimple_phi_result (phi
))
662 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
663 remove_phi_node (&bsi
, true);
668 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
670 gimple stmt
= gsi_stmt (bsi
);
671 if (gimple_code (stmt
) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt
)
673 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gimple_code (stmt
) == GIMPLE_COND
)
679 gimple_cond_make_false (stmt
);
682 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
684 gimple_switch_set_index
685 (stmt
, CASE_LOW (gimple_switch_label (stmt
, 1)));
690 unlink_stmt_vdef (stmt
);
691 gsi_remove (&bsi
, true);
703 /* Build the size argument for a memory operation call. */
706 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
)
709 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
710 fold_convert_loc (loc
, sizetype
, nb_iter
),
711 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
712 return fold_convert_loc (loc
, size_type_node
, size
);
715 /* Build an address argument for a memory operation call. */
718 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
722 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
723 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
725 /* Test for a negative stride, iterating over every element. */
726 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
728 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
729 fold_convert_loc (loc
, sizetype
, nb_bytes
));
730 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
731 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
734 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
737 /* If VAL memory representation contains the same value in all bytes,
738 return that value, otherwise return -1.
739 E.g. for 0x24242424 return 0x24, for IEEE double
740 747708026454360457216.0 return 0x44, etc. */
743 const_with_all_bytes_same (tree val
)
745 unsigned char buf
[64];
748 if (integer_zerop (val
)
750 || (TREE_CODE (val
) == CONSTRUCTOR
751 && !TREE_CLOBBER_P (val
)
752 && CONSTRUCTOR_NELTS (val
) == 0))
755 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
758 len
= native_encode_expr (val
, buf
, sizeof (buf
));
761 for (i
= 1; i
< len
; i
++)
762 if (buf
[i
] != buf
[0])
767 /* Generate a call to memset for PARTITION in LOOP. */
770 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
772 gimple_stmt_iterator gsi
;
773 gimple stmt
, fn_call
;
774 tree mem
, fn
, nb_bytes
;
778 stmt
= DR_STMT (partition
->main_dr
);
779 loc
= gimple_location (stmt
);
781 /* The new statements will be placed before LOOP. */
782 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
784 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
);
785 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
786 false, GSI_CONTINUE_LINKING
);
787 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
788 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
789 false, GSI_CONTINUE_LINKING
);
791 /* This exactly matches the pattern recognition in classify_partition. */
792 val
= gimple_assign_rhs1 (stmt
);
793 /* Handle constants like 0x15151515 and similarly
794 floating point constants etc. where all bytes are the same. */
795 int bytev
= const_with_all_bytes_same (val
);
797 val
= build_int_cst (integer_type_node
, bytev
);
798 else if (TREE_CODE (val
) == INTEGER_CST
)
799 val
= fold_convert (integer_type_node
, val
);
800 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
803 tree tem
= make_ssa_name (integer_type_node
, NULL
);
804 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
805 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
809 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
810 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
811 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
815 fprintf (dump_file
, "generated memset");
817 fprintf (dump_file
, " zero\n");
819 fprintf (dump_file
, "\n");
823 /* Generate a call to memcpy for PARTITION in LOOP. */
826 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
828 gimple_stmt_iterator gsi
;
829 gimple stmt
, fn_call
;
830 tree dest
, src
, fn
, nb_bytes
;
832 enum built_in_function kind
;
834 stmt
= DR_STMT (partition
->main_dr
);
835 loc
= gimple_location (stmt
);
837 /* The new statements will be placed before LOOP. */
838 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
840 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
);
841 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
842 false, GSI_CONTINUE_LINKING
);
843 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
844 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
845 if (ptr_derefs_may_alias_p (dest
, src
))
846 kind
= BUILT_IN_MEMMOVE
;
848 kind
= BUILT_IN_MEMCPY
;
850 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
851 false, GSI_CONTINUE_LINKING
);
852 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
853 false, GSI_CONTINUE_LINKING
);
854 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
855 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
856 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
858 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
860 if (kind
== BUILT_IN_MEMCPY
)
861 fprintf (dump_file
, "generated memcpy\n");
863 fprintf (dump_file
, "generated memmove\n");
867 /* Remove and destroy the loop LOOP. */
870 destroy_loop (struct loop
*loop
)
872 unsigned nbbs
= loop
->num_nodes
;
873 edge exit
= single_exit (loop
);
874 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
878 bbs
= get_loop_body_in_dom_order (loop
);
880 redirect_edge_pred (exit
, src
);
881 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
882 exit
->flags
|= EDGE_FALLTHRU
;
883 cancel_loop_tree (loop
);
884 rescan_loop_exit (exit
, false, true);
886 for (i
= 0; i
< nbbs
; i
++)
888 /* We have made sure to not leave any dangling uses of SSA
889 names defined in the loop. With the exception of virtuals.
890 Make sure we replace all uses of virtual defs that will remain
891 outside of the loop with the bare symbol as delete_basic_block
892 will release them. */
893 gimple_stmt_iterator gsi
;
894 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
896 gimple phi
= gsi_stmt (gsi
);
897 if (virtual_operand_p (gimple_phi_result (phi
)))
898 mark_virtual_phi_result_for_renaming (phi
);
900 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
902 gimple stmt
= gsi_stmt (gsi
);
903 tree vdef
= gimple_vdef (stmt
);
904 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
905 mark_virtual_operand_for_renaming (vdef
);
907 delete_basic_block (bbs
[i
]);
911 set_immediate_dominator (CDI_DOMINATORS
, dest
,
912 recompute_dominator (CDI_DOMINATORS
, dest
));
915 /* Generates code for PARTITION. */
918 generate_code_for_partition (struct loop
*loop
,
919 partition_t partition
, bool copy_p
)
921 switch (partition
->kind
)
924 /* Reductions all have to be in the last partition. */
925 gcc_assert (!partition_reduction_p (partition
)
927 generate_loops_for_partition (loop
, partition
, copy_p
);
931 generate_memset_builtin (loop
, partition
);
935 generate_memcpy_builtin (loop
, partition
);
942 /* Common tail for partitions we turn into a call. If this was the last
943 partition for which we generate code, we have to destroy the loop. */
949 /* Returns a partition with all the statements needed for computing
950 the vertex V of the RDG, also including the loop exit conditions. */
953 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
955 partition_t partition
= partition_alloc (NULL
, NULL
);
956 stack_vec
<int, 3> nodes
;
960 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
962 FOR_EACH_VEC_ELT (nodes
, i
, x
)
964 bitmap_set_bit (partition
->stmts
, x
);
965 bitmap_set_bit (partition
->loops
,
966 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
972 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
973 For the moment we detect only the memset zero pattern. */
976 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
981 data_reference_p single_load
, single_store
;
982 bool volatiles_p
= false;
984 partition
->kind
= PKIND_NORMAL
;
985 partition
->main_dr
= NULL
;
986 partition
->secondary_dr
= NULL
;
987 partition
->niter
= NULL_TREE
;
989 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
991 gimple stmt
= RDG_STMT (rdg
, i
);
993 if (gimple_has_volatile_ops (stmt
))
996 /* If the stmt has uses outside of the loop mark it as reduction. */
997 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
999 partition
->reduction_p
= true;
1004 /* Perform general partition disqualification for builtins. */
1006 || !flag_tree_loop_distribute_patterns
)
1009 /* Detect memset and memcpy. */
1011 single_store
= NULL
;
1012 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1014 gimple stmt
= RDG_STMT (rdg
, i
);
1015 data_reference_p dr
;
1018 if (gimple_code (stmt
) == GIMPLE_PHI
)
1021 /* Any scalar stmts are ok. */
1022 if (!gimple_vuse (stmt
))
1025 /* Otherwise just regular loads/stores. */
1026 if (!gimple_assign_single_p (stmt
))
1029 /* But exactly one store and/or load. */
1030 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1032 if (DR_IS_READ (dr
))
1034 if (single_load
!= NULL
)
1040 if (single_store
!= NULL
)
1050 if (!dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1051 gimple_bb (DR_STMT (single_store
))))
1052 nb_iter
= number_of_latch_executions (loop
);
1054 nb_iter
= number_of_exit_cond_executions (loop
);
1055 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1058 if (single_store
&& !single_load
)
1060 gimple stmt
= DR_STMT (single_store
);
1061 tree rhs
= gimple_assign_rhs1 (stmt
);
1062 if (const_with_all_bytes_same (rhs
) == -1
1063 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1064 || (TYPE_MODE (TREE_TYPE (rhs
))
1065 != TYPE_MODE (unsigned_char_type_node
))))
1067 if (TREE_CODE (rhs
) == SSA_NAME
1068 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1069 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1071 if (!adjacent_dr_p (single_store
)
1072 || !dominated_by_p (CDI_DOMINATORS
,
1073 loop
->latch
, gimple_bb (stmt
)))
1075 partition
->kind
= PKIND_MEMSET
;
1076 partition
->main_dr
= single_store
;
1077 partition
->niter
= nb_iter
;
1079 else if (single_store
&& single_load
)
1081 gimple store
= DR_STMT (single_store
);
1082 gimple load
= DR_STMT (single_load
);
1083 /* Direct aggregate copy or via an SSA name temporary. */
1085 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1087 if (!adjacent_dr_p (single_store
)
1088 || !adjacent_dr_p (single_load
)
1089 || !operand_equal_p (DR_STEP (single_store
),
1090 DR_STEP (single_load
), 0)
1091 || !dominated_by_p (CDI_DOMINATORS
,
1092 loop
->latch
, gimple_bb (store
)))
1094 /* Now check that if there is a dependence this dependence is
1095 of a suitable form for memmove. */
1096 vec
<loop_p
> loops
= vNULL
;
1098 loops
.safe_push (loop
);
1099 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1101 compute_affine_dependence (ddr
, loop
);
1102 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1104 free_dependence_relation (ddr
);
1108 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1110 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1112 free_dependence_relation (ddr
);
1116 lambda_vector dist_v
;
1117 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1119 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1120 DDR_LOOP_NEST (ddr
))];
1121 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1123 free_dependence_relation (ddr
);
1129 free_dependence_relation (ddr
);
1131 partition
->kind
= PKIND_MEMCPY
;
1132 partition
->main_dr
= single_store
;
1133 partition
->secondary_dr
= single_load
;
1134 partition
->niter
= nb_iter
;
1138 /* For a data reference REF, return the declaration of its base
1139 address or NULL_TREE if the base is not determined. */
1142 ref_base_address (data_reference_p dr
)
1144 tree base_address
= DR_BASE_ADDRESS (dr
);
1146 && TREE_CODE (base_address
) == ADDR_EXPR
)
1147 return TREE_OPERAND (base_address
, 0);
1149 return base_address
;
1152 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1156 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1157 partition_t partition2
)
1159 unsigned i
, j
, k
, l
;
1160 bitmap_iterator bi
, bj
;
1161 data_reference_p ref1
, ref2
;
1163 /* First check whether in the intersection of the two partitions are
1164 any loads or stores. Common loads are the situation that happens
1166 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1167 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1168 || RDG_MEM_READS_STMT (rdg
, i
))
1171 /* Then check all data-references against each other. */
1172 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1173 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1174 || RDG_MEM_READS_STMT (rdg
, i
))
1175 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1176 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1177 || RDG_MEM_READS_STMT (rdg
, j
))
1179 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1181 tree base1
= ref_base_address (ref1
);
1183 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1184 if (base1
== ref_base_address (ref2
))
1192 /* Aggregate several components into a useful partition that is
1193 registered in the PARTITIONS vector. Partitions will be
1194 distributed in different loops. */
1197 rdg_build_partitions (struct graph
*rdg
,
1198 vec
<gimple
> starting_stmts
,
1199 vec
<partition_t
> *partitions
)
1201 bitmap processed
= BITMAP_ALLOC (NULL
);
1205 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1207 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1211 "ldist asked to generate code for vertex %d\n", v
);
1213 /* If the vertex is already contained in another partition so
1214 is the partition rooted at it. */
1215 if (bitmap_bit_p (processed
, v
))
1218 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1219 bitmap_ior_into (processed
, partition
->stmts
);
1221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1223 fprintf (dump_file
, "ldist useful partition:\n");
1224 dump_bitmap (dump_file
, partition
->stmts
);
1227 partitions
->safe_push (partition
);
1230 /* All vertices should have been assigned to at least one partition now,
1231 other than vertices belonging to dead code. */
1233 BITMAP_FREE (processed
);
1236 /* Dump to FILE the PARTITIONS. */
1239 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1242 partition_t partition
;
1244 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1245 debug_bitmap_file (file
, partition
->stmts
);
1248 /* Debug PARTITIONS. */
1249 extern void debug_rdg_partitions (vec
<partition_t
> );
1252 debug_rdg_partitions (vec
<partition_t
> partitions
)
1254 dump_rdg_partitions (stderr
, partitions
);
1257 /* Returns the number of read and write operations in the RDG. */
1260 number_of_rw_in_rdg (struct graph
*rdg
)
1264 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1266 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1269 if (RDG_MEM_READS_STMT (rdg
, i
))
1276 /* Returns the number of read and write operations in a PARTITION of
1280 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1286 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1288 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1291 if (RDG_MEM_READS_STMT (rdg
, i
))
1298 /* Returns true when one of the PARTITIONS contains all the read or
1299 write operations of RDG. */
1302 partition_contains_all_rw (struct graph
*rdg
,
1303 vec
<partition_t
> partitions
)
1306 partition_t partition
;
1307 int nrw
= number_of_rw_in_rdg (rdg
);
1309 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1310 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1316 /* Compute partition dependence created by the data references in DRS1
1317 and DRS2 and modify and return DIR according to that. */
1320 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1321 vec
<data_reference_p
> drs1
,
1322 vec
<data_reference_p
> drs2
)
1324 data_reference_p dr1
, dr2
;
1326 /* dependence direction - 0 is no dependence, -1 is back,
1327 1 is forth, 2 is both (we can stop then, merging will occur). */
1328 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1329 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1333 /* Re-shuffle data-refs to be in dominator order. */
1334 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1335 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1337 data_reference_p tem
= dr1
;
1340 this_dir
= -this_dir
;
1342 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1343 compute_affine_dependence (ddr
, loops
[0]);
1344 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1346 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1348 if (DDR_REVERSED_P (ddr
))
1350 data_reference_p tem
= dr1
;
1353 this_dir
= -this_dir
;
1355 /* Known dependences can still be unordered througout the
1356 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1357 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1359 /* If the overlap is exact preserve stmt order. */
1360 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1364 /* Else as the distance vector is lexicographic positive
1365 swap the dependence direction. */
1366 this_dir
= -this_dir
;
1371 free_dependence_relation (ddr
);
1374 else if (dir
!= this_dir
)
1380 /* Compare postorder number of the partition graph vertices V1 and V2. */
1383 pgcmp (const void *v1_
, const void *v2_
)
1385 const vertex
*v1
= (const vertex
*)v1_
;
1386 const vertex
*v2
= (const vertex
*)v2_
;
1387 return v2
->post
- v1
->post
;
1390 /* Distributes the code from LOOP in such a way that producer
1391 statements are placed before consumer statements. Tries to separate
1392 only the statements from STMTS into separate loops.
1393 Returns the number of distributed loops. */
1396 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1397 control_dependences
*cd
, int *nb_calls
)
1400 vec
<partition_t
> partitions
;
1401 partition_t partition
;
1408 stack_vec
<loop_p
, 3> loop_nest
;
1409 if (!find_loop_nest (loop
, &loop_nest
))
1412 rdg
= build_rdg (loop_nest
, cd
);
1415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1417 "Loop %d not distributed: failed to build the RDG.\n",
1423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1424 dump_rdg (dump_file
, rdg
);
1426 partitions
.create (3);
1427 rdg_build_partitions (rdg
, stmts
, &partitions
);
1429 any_builtin
= false;
1430 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1432 classify_partition (loop
, rdg
, partition
);
1433 any_builtin
|= partition_builtin_p (partition
);
1436 /* If we are only distributing patterns but did not detect any,
1438 if (!flag_tree_loop_distribution
1445 /* If we are only distributing patterns fuse all partitions that
1446 were not classified as builtins. This also avoids chopping
1447 a loop into pieces, separated by builtin calls. That is, we
1448 only want no or a single loop body remaining. */
1450 if (!flag_tree_loop_distribution
)
1452 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1453 if (!partition_builtin_p (into
))
1455 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1456 if (!partition_builtin_p (partition
))
1458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1460 fprintf (dump_file
, "fusing non-builtin partitions\n");
1461 dump_bitmap (dump_file
, into
->stmts
);
1462 dump_bitmap (dump_file
, partition
->stmts
);
1464 partition_merge_into (into
, partition
);
1465 partitions
.unordered_remove (i
);
1466 partition_free (partition
);
1471 /* Due to limitations in the transform phase we have to fuse all
1472 reduction partitions into the last partition so the existing
1473 loop will contain all loop-closed PHI nodes. */
1474 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1475 if (partition_reduction_p (into
))
1477 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1478 if (partition_reduction_p (partition
))
1480 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1482 fprintf (dump_file
, "fusing partitions\n");
1483 dump_bitmap (dump_file
, into
->stmts
);
1484 dump_bitmap (dump_file
, partition
->stmts
);
1485 fprintf (dump_file
, "because they have reductions\n");
1487 partition_merge_into (into
, partition
);
1488 partitions
.unordered_remove (i
);
1489 partition_free (partition
);
1493 /* Apply our simple cost model - fuse partitions with similar
1495 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1497 if (partition_builtin_p (into
))
1500 partitions
.iterate (j
, &partition
); ++j
)
1502 if (!partition_builtin_p (partition
)
1503 && similar_memory_accesses (rdg
, into
, partition
))
1505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1507 fprintf (dump_file
, "fusing partitions\n");
1508 dump_bitmap (dump_file
, into
->stmts
);
1509 dump_bitmap (dump_file
, partition
->stmts
);
1510 fprintf (dump_file
, "because they have similar "
1511 "memory accesses\n");
1513 partition_merge_into (into
, partition
);
1514 partitions
.unordered_remove (j
);
1515 partition_free (partition
);
1521 /* Build the partition dependency graph. */
1522 if (partitions
.length () > 1)
1524 pg
= new_graph (partitions
.length ());
1526 partition_t partition
;
1527 vec
<data_reference_p
> writes
;
1528 vec
<data_reference_p
> reads
;
1530 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1531 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1533 vertex
*v
= &pg
->vertices
[i
];
1534 pgdata
*data
= new pgdata
;
1535 data_reference_p dr
;
1536 /* FIXME - leaks. */
1540 data
->partition
= partition
;
1541 data
->reads
= vNULL
;
1542 data
->writes
= vNULL
;
1543 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1544 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1545 if (DR_IS_READ (dr
))
1546 data
->reads
.safe_push (dr
);
1548 data
->writes
.safe_push (dr
);
1550 partition_t partition1
, partition2
;
1551 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1552 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1554 /* dependence direction - 0 is no dependence, -1 is back,
1555 1 is forth, 2 is both (we can stop then, merging will occur). */
1557 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1561 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1565 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1568 if (dir
== 1 || dir
== 2)
1569 add_edge (pg
, i
, j
);
1570 if (dir
== -1 || dir
== 2)
1571 add_edge (pg
, j
, i
);
1574 /* Add edges to the reduction partition (if any) to force it last. */
1576 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1577 if (partition_reduction_p (partition
))
1579 if (j
< partitions
.length ())
1581 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1583 add_edge (pg
, i
, j
);
1586 /* Compute partitions we cannot separate and fuse them. */
1587 num_sccs
= graphds_scc (pg
, NULL
);
1588 for (i
= 0; i
< num_sccs
; ++i
)
1592 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1593 if (pg
->vertices
[j
].component
== i
)
1595 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1596 if (pg
->vertices
[j
].component
== i
)
1598 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1600 fprintf (dump_file
, "fusing partitions\n");
1601 dump_bitmap (dump_file
, first
->stmts
);
1602 dump_bitmap (dump_file
, partition
->stmts
);
1603 fprintf (dump_file
, "because they are in the same "
1604 "dependence SCC\n");
1606 partition_merge_into (first
, partition
);
1607 partitions
[j
] = NULL
;
1608 partition_free (partition
);
1609 PGDATA (j
)->partition
= NULL
;
1613 /* Now order the remaining nodes in postorder. */
1614 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1615 partitions
.truncate (0);
1616 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1618 pgdata
*data
= PGDATA (i
);
1619 if (data
->partition
)
1620 partitions
.safe_push (data
->partition
);
1621 data
->reads
.release ();
1622 data
->writes
.release ();
1625 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1629 nbp
= partitions
.length ();
1631 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1632 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1638 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1639 dump_rdg_partitions (dump_file
, partitions
);
1641 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1643 if (partition_builtin_p (partition
))
1645 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1650 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1651 partition_free (partition
);
1652 partitions
.release ();
1655 return nbp
- *nb_calls
;
1658 /* Distribute all loops in the current function. */
1661 tree_loop_distribution (void)
1664 bool changed
= false;
1666 control_dependences
*cd
= NULL
;
1670 gimple_stmt_iterator gsi
;
1671 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1672 gimple_set_uid (gsi_stmt (gsi
), -1);
1673 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1674 gimple_set_uid (gsi_stmt (gsi
), -1);
1677 /* We can at the moment only distribute non-nested loops, thus restrict
1678 walking to innermost loops. */
1679 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1681 vec
<gimple
> work_list
= vNULL
;
1683 int num
= loop
->num
;
1686 /* If the loop doesn't have a single exit we will fail anyway,
1687 so do that early. */
1688 if (!single_exit (loop
))
1691 /* Only optimize hot loops. */
1692 if (!optimize_loop_for_speed_p (loop
))
1695 /* Initialize the worklist with stmts we seed the partitions with. */
1696 bbs
= get_loop_body_in_dom_order (loop
);
1697 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1699 gimple_stmt_iterator gsi
;
1700 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1702 gimple phi
= gsi_stmt (gsi
);
1703 if (virtual_operand_p (gimple_phi_result (phi
)))
1705 /* Distribute stmts which have defs that are used outside of
1707 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1709 work_list
.safe_push (phi
);
1711 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1713 gimple stmt
= gsi_stmt (gsi
);
1715 /* If there is a stmt with side-effects bail out - we
1716 cannot and should not distribute this loop. */
1717 if (gimple_has_side_effects (stmt
))
1719 work_list
.truncate (0);
1723 /* Distribute stmts which have defs that are used outside of
1725 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1727 /* Otherwise only distribute stores for now. */
1728 else if (!gimple_vdef (stmt
))
1731 work_list
.safe_push (stmt
);
1737 int nb_generated_loops
= 0;
1738 int nb_generated_calls
= 0;
1739 location_t loc
= find_loop_location (loop
);
1740 if (work_list
.length () > 0)
1744 calculate_dominance_info (CDI_DOMINATORS
);
1745 calculate_dominance_info (CDI_POST_DOMINATORS
);
1746 cd
= new control_dependences (create_edge_list ());
1747 free_dominance_info (CDI_POST_DOMINATORS
);
1749 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1750 &nb_generated_calls
);
1753 if (nb_generated_loops
+ nb_generated_calls
> 0)
1756 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1757 loc
, "Loop %d distributed: split to %d loops "
1758 "and %d library calls.\n",
1759 num
, nb_generated_loops
, nb_generated_calls
);
1761 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1762 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1764 work_list
.release ();
1772 mark_virtual_operands_for_renaming (cfun
);
1773 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1776 #ifdef ENABLE_CHECKING
1777 verify_loop_structure ();
1784 gate_tree_loop_distribution (void)
1786 return flag_tree_loop_distribution
1787 || flag_tree_loop_distribute_patterns
;
1792 const pass_data pass_data_loop_distribution
=
1794 GIMPLE_PASS
, /* type */
1796 OPTGROUP_LOOP
, /* optinfo_flags */
1797 true, /* has_gate */
1798 true, /* has_execute */
1799 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1800 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1801 0, /* properties_provided */
1802 0, /* properties_destroyed */
1803 0, /* todo_flags_start */
1804 TODO_verify_ssa
, /* todo_flags_finish */
1807 class pass_loop_distribution
: public gimple_opt_pass
1810 pass_loop_distribution (gcc::context
*ctxt
)
1811 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1814 /* opt_pass methods: */
1815 bool gate () { return gate_tree_loop_distribution (); }
1816 unsigned int execute () { return tree_loop_distribution (); }
1818 }; // class pass_loop_distribution
1823 make_pass_loop_distribution (gcc::context
*ctxt
)
1825 return new pass_loop_distribution (ctxt
);