2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "gimple-pretty-print.h"
56 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
57 typedef struct rdg_vertex
59 /* The statement represented by this vertex. */
62 /* Vector of data-references in this statement. */
63 vec
<data_reference_p
> datarefs
;
65 /* True when the statement contains a write to memory. */
68 /* True when the statement contains a read from memory. */
72 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
73 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
74 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
75 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
76 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
77 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
78 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
79 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
81 /* Data dependence type. */
85 /* Read After Write (RAW). */
88 /* Write After Read (WAR). */
91 /* Write After Write (WAW). */
94 /* Read After Read (RAR). */
98 /* Dependence information attached to an edge of the RDG. */
100 typedef struct rdg_edge
102 /* Type of the dependence. */
103 enum rdg_dep_type type
;
105 /* Levels of the dependence: the depth of the loops that carry the
109 /* Dependence relation between data dependences, NULL when one of
110 the vertices is a scalar. */
114 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
116 #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
118 /* Strongly connected components of the reduced data dependence graph. */
120 typedef struct rdg_component
126 /* Dump vertex I in RDG to FILE. */
129 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
131 struct vertex
*v
= &(rdg
->vertices
[i
]);
132 struct graph_edge
*e
;
134 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
135 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
136 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
139 for (e
= v
->pred
; e
; e
= e
->pred_next
)
140 fprintf (file
, " %d", e
->src
);
142 fprintf (file
, ") (out:");
145 for (e
= v
->succ
; e
; e
= e
->succ_next
)
146 fprintf (file
, " %d", e
->dest
);
148 fprintf (file
, ")\n");
149 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
150 fprintf (file
, ")\n");
153 /* Call dump_rdg_vertex on stderr. */
156 debug_rdg_vertex (struct graph
*rdg
, int i
)
158 dump_rdg_vertex (stderr
, rdg
, i
);
161 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
162 dumped vertices to that bitmap. */
165 dump_rdg_component (FILE *file
, struct graph
*rdg
, int c
, bitmap dumped
)
169 fprintf (file
, "(%d\n", c
);
171 for (i
= 0; i
< rdg
->n_vertices
; i
++)
172 if (rdg
->vertices
[i
].component
== c
)
175 bitmap_set_bit (dumped
, i
);
177 dump_rdg_vertex (file
, rdg
, i
);
180 fprintf (file
, ")\n");
183 /* Call dump_rdg_vertex on stderr. */
186 debug_rdg_component (struct graph
*rdg
, int c
)
188 dump_rdg_component (stderr
, rdg
, c
, NULL
);
191 /* Dump the reduced dependence graph RDG to FILE. */
194 dump_rdg (FILE *file
, struct graph
*rdg
)
197 bitmap dumped
= BITMAP_ALLOC (NULL
);
199 fprintf (file
, "(rdg\n");
201 for (i
= 0; i
< rdg
->n_vertices
; i
++)
202 if (!bitmap_bit_p (dumped
, i
))
203 dump_rdg_component (file
, rdg
, rdg
->vertices
[i
].component
, dumped
);
205 fprintf (file
, ")\n");
206 BITMAP_FREE (dumped
);
209 /* Call dump_rdg on stderr. */
212 debug_rdg (struct graph
*rdg
)
214 dump_rdg (stderr
, rdg
);
218 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
221 pretty_printer buffer
;
222 pp_needs_newline (&buffer
) = false;
223 buffer
.buffer
->stream
= file
;
225 fprintf (file
, "digraph RDG {\n");
227 for (i
= 0; i
< rdg
->n_vertices
; i
++)
229 struct vertex
*v
= &(rdg
->vertices
[i
]);
230 struct graph_edge
*e
;
232 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
233 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
235 fprintf (file
, "\"]\n");
237 /* Highlight reads from memory. */
238 if (RDG_MEM_READS_STMT (rdg
, i
))
239 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
241 /* Highlight stores to memory. */
242 if (RDG_MEM_WRITE_STMT (rdg
, i
))
243 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
246 for (e
= v
->succ
; e
; e
= e
->succ_next
)
247 switch (RDGE_TYPE (e
))
250 fprintf (file
, "%d -> %d [label=input] \n", i
, e
->dest
);
254 fprintf (file
, "%d -> %d [label=output] \n", i
, e
->dest
);
258 /* These are the most common dependences: don't print these. */
259 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
263 fprintf (file
, "%d -> %d [label=anti] \n", i
, e
->dest
);
271 fprintf (file
, "}\n\n");
274 /* Display the Reduced Dependence Graph using dotty. */
277 dot_rdg (struct graph
*rdg
)
279 /* When debugging, you may want to enable the following code. */
281 FILE *file
= popen("dot -Tx11", "w");
284 dot_rdg_1 (file
, rdg
);
286 close (fileno (file
));
289 dot_rdg_1 (stderr
, rdg
);
293 /* Returns the index of STMT in RDG. */
296 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
298 int index
= gimple_uid (stmt
);
299 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
303 /* Creates an edge in RDG for each distance vector from DDR. The
304 order that we keep track of in the RDG is the order in which
305 statements have to be executed. */
308 create_rdg_edge_for_ddr (struct graph
*rdg
, ddr_p ddr
)
310 struct graph_edge
*e
;
312 data_reference_p dra
= DDR_A (ddr
);
313 data_reference_p drb
= DDR_B (ddr
);
314 unsigned level
= ddr_dependence_level (ddr
);
316 /* For non scalar dependences, when the dependence is REVERSED,
317 statement B has to be executed before statement A. */
319 && !DDR_REVERSED_P (ddr
))
321 data_reference_p tmp
= dra
;
326 va
= rdg_vertex_for_stmt (rdg
, DR_STMT (dra
));
327 vb
= rdg_vertex_for_stmt (rdg
, DR_STMT (drb
));
329 if (va
< 0 || vb
< 0)
332 e
= add_edge (rdg
, va
, vb
);
333 e
->data
= XNEW (struct rdg_edge
);
335 RDGE_LEVEL (e
) = level
;
336 RDGE_RELATION (e
) = ddr
;
338 /* Determines the type of the data dependence. */
339 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
340 RDGE_TYPE (e
) = input_dd
;
341 else if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
342 RDGE_TYPE (e
) = output_dd
;
343 else if (DR_IS_WRITE (dra
) && DR_IS_READ (drb
))
344 RDGE_TYPE (e
) = flow_dd
;
345 else if (DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
346 RDGE_TYPE (e
) = anti_dd
;
349 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
350 the index of DEF in RDG. */
353 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
355 use_operand_p imm_use_p
;
356 imm_use_iterator iterator
;
358 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
360 struct graph_edge
*e
;
361 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
366 e
= add_edge (rdg
, idef
, use
);
367 e
->data
= XNEW (struct rdg_edge
);
368 RDGE_TYPE (e
) = flow_dd
;
369 RDGE_RELATION (e
) = NULL
;
373 /* Creates the edges of the reduced dependence graph RDG. */
376 create_rdg_edges (struct graph
*rdg
, vec
<ddr_p
> ddrs
)
379 struct data_dependence_relation
*ddr
;
383 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
384 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
385 create_rdg_edge_for_ddr (rdg
, ddr
);
387 free_dependence_relation (ddr
);
389 for (i
= 0; i
< rdg
->n_vertices
; i
++)
390 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
392 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
395 /* Build the vertices of the reduced dependence graph RDG. Return false
399 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
400 vec
<data_reference_p
> *datarefs
)
405 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
407 struct vertex
*v
= &(rdg
->vertices
[i
]);
409 /* Record statement to vertex mapping. */
410 gimple_set_uid (stmt
, i
);
412 v
->data
= XNEW (struct rdg_vertex
);
413 RDGV_STMT (v
) = stmt
;
414 RDGV_DATAREFS (v
).create (0);
415 RDGV_HAS_MEM_WRITE (v
) = false;
416 RDGV_HAS_MEM_READS (v
) = false;
417 if (gimple_code (stmt
) == GIMPLE_PHI
)
420 unsigned drp
= datarefs
->length ();
421 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
423 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
425 data_reference_p dr
= (*datarefs
)[j
];
427 RDGV_HAS_MEM_READS (v
) = true;
429 RDGV_HAS_MEM_WRITE (v
) = true;
430 RDGV_DATAREFS (v
).safe_push (dr
);
436 /* Initialize STMTS with all the statements of LOOP. When
437 INCLUDE_PHIS is true, include also the PHI nodes. The order in
438 which we discover statements is important as
439 generate_loops_for_partition is using the same traversal for
440 identifying statements. */
443 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
446 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
448 for (i
= 0; i
< loop
->num_nodes
; i
++)
450 basic_block bb
= bbs
[i
];
451 gimple_stmt_iterator bsi
;
454 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
455 stmts
->safe_push (gsi_stmt (bsi
));
457 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
459 stmt
= gsi_stmt (bsi
);
460 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
461 stmts
->safe_push (stmt
);
468 /* Returns true when all the dependences are computable. */
471 known_dependences_p (vec
<ddr_p
> dependence_relations
)
476 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
477 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
483 /* Build the Reduced Dependence Graph (RDG) with one vertex per
484 statement of the loop nest, and one edge per data dependence or
485 scalar dependence. */
488 build_empty_rdg (int n_stmts
)
490 struct graph
*rdg
= new_graph (n_stmts
);
494 /* Free the reduced dependence graph RDG. */
497 free_rdg (struct graph
*rdg
)
501 for (i
= 0; i
< rdg
->n_vertices
; i
++)
503 struct vertex
*v
= &(rdg
->vertices
[i
]);
504 struct graph_edge
*e
;
506 for (e
= v
->succ
; e
; e
= e
->succ_next
)
508 free_dependence_relation (RDGE_RELATION (e
));
514 gimple_set_uid (RDGV_STMT (v
), -1);
515 free_data_refs (RDGV_DATAREFS (v
));
523 /* Build the Reduced Dependence Graph (RDG) with one vertex per
524 statement of the loop nest LOOP_NEST, and one edge per data dependence or
525 scalar dependence. */
527 static struct graph
*
528 build_rdg (vec
<loop_p
> loop_nest
)
532 vec
<data_reference_p
> datarefs
;
533 vec
<ddr_p
> dependence_relations
;
535 /* Create the RDG vertices from the stmts of the loop nest. */
537 stmts_from_loop (loop_nest
[0], &stmts
);
538 rdg
= build_empty_rdg (stmts
.length ());
539 datarefs
.create (10);
540 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
549 /* Create the RDG edges from the data dependences in the loop nest. */
550 dependence_relations
.create (100);
551 if (!compute_all_dependences (datarefs
, &dependence_relations
, loop_nest
,
553 || !known_dependences_p (dependence_relations
))
555 free_dependence_relations (dependence_relations
);
560 create_rdg_edges (rdg
, dependence_relations
);
561 dependence_relations
.release ();
567 /* Determines whether the statement from vertex V of the RDG has a
568 definition used outside the loop that contains this statement. */
571 rdg_defs_used_in_other_loops_p (struct graph
*rdg
, int v
)
573 gimple stmt
= RDG_STMT (rdg
, v
);
574 struct loop
*loop
= loop_containing_stmt (stmt
);
575 use_operand_p imm_use_p
;
576 imm_use_iterator iterator
;
583 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, it
, SSA_OP_DEF
)
585 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, DEF_FROM_PTR (def_p
))
587 if (loop_containing_stmt (USE_STMT (imm_use_p
)) != loop
)
597 enum partition_kind
{
598 PKIND_NORMAL
, PKIND_REDUCTION
, PKIND_MEMSET
, PKIND_MEMCPY
601 typedef struct partition_s
606 enum partition_kind kind
;
607 /* data-references a kind != PKIND_NORMAL partition is about. */
608 data_reference_p main_dr
;
609 data_reference_p secondary_dr
;
613 /* Allocate and initialize a partition from BITMAP. */
616 partition_alloc (bitmap stmts
, bitmap loops
)
618 partition_t partition
= XCNEW (struct partition_s
);
619 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
620 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
621 partition
->has_writes
= false;
622 partition
->kind
= PKIND_NORMAL
;
626 /* Free PARTITION. */
629 partition_free (partition_t partition
)
631 BITMAP_FREE (partition
->stmts
);
632 BITMAP_FREE (partition
->loops
);
636 /* Returns true if the partition can be generated as a builtin. */
639 partition_builtin_p (partition_t partition
)
641 return partition
->kind
> PKIND_REDUCTION
;
644 /* Returns true if the partition has an writes. */
647 partition_has_writes (partition_t partition
)
649 return partition
->has_writes
;
652 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
656 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
658 imm_use_iterator imm_iter
;
661 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
663 gimple use_stmt
= USE_STMT (use_p
);
664 if (!is_gimple_debug (use_stmt
)
665 && loop
!= loop_containing_stmt (use_stmt
))
672 /* Returns true when STMT defines a scalar variable used after the
676 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
681 if (gimple_code (stmt
) == GIMPLE_PHI
)
682 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
684 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
685 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
691 /* Return a copy of LOOP placed before LOOP. */
694 copy_loop_before (struct loop
*loop
)
697 edge preheader
= loop_preheader_edge (loop
);
699 initialize_original_copy_tables ();
700 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
701 gcc_assert (res
!= NULL
);
702 free_original_copy_tables ();
703 delete_update_ssa ();
708 /* Creates an empty basic block after LOOP. */
711 create_bb_after_loop (struct loop
*loop
)
713 edge exit
= single_exit (loop
);
721 /* Generate code for PARTITION from the code in LOOP. The loop is
722 copied when COPY_P is true. All the statements not flagged in the
723 PARTITION bitmap are removed from the loop or from its copy. The
724 statements are indexed in sequence inside a basic block, and the
725 basic blocks of a loop are taken in dom order. */
728 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
732 gimple_stmt_iterator bsi
;
737 loop
= copy_loop_before (loop
);
738 gcc_assert (loop
!= NULL
);
739 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
740 create_bb_after_loop (loop
);
743 /* Remove stmts not in the PARTITION bitmap. The order in which we
744 visit the phi nodes and the statements is exactly as in
746 bbs
= get_loop_body_in_dom_order (loop
);
748 if (MAY_HAVE_DEBUG_STMTS
)
749 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
751 basic_block bb
= bbs
[i
];
753 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
754 if (!bitmap_bit_p (partition
->stmts
, x
++))
755 reset_debug_uses (gsi_stmt (bsi
));
757 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
759 gimple stmt
= gsi_stmt (bsi
);
760 if (gimple_code (stmt
) != GIMPLE_LABEL
761 && !is_gimple_debug (stmt
)
762 && !bitmap_bit_p (partition
->stmts
, x
++))
763 reset_debug_uses (stmt
);
767 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
769 basic_block bb
= bbs
[i
];
771 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
772 if (!bitmap_bit_p (partition
->stmts
, x
++))
774 gimple phi
= gsi_stmt (bsi
);
775 if (virtual_operand_p (gimple_phi_result (phi
)))
776 mark_virtual_phi_result_for_renaming (phi
);
777 remove_phi_node (&bsi
, true);
782 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
784 gimple stmt
= gsi_stmt (bsi
);
785 if (gimple_code (stmt
) != GIMPLE_LABEL
786 && !is_gimple_debug (stmt
)
787 && !bitmap_bit_p (partition
->stmts
, x
++))
789 unlink_stmt_vdef (stmt
);
790 gsi_remove (&bsi
, true);
801 /* Build the size argument for a memory operation call. */
804 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
)
807 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
808 fold_convert_loc (loc
, sizetype
, nb_iter
),
809 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
810 return fold_convert_loc (loc
, size_type_node
, size
);
813 /* Build an address argument for a memory operation call. */
816 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
820 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
821 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
823 /* Test for a negative stride, iterating over every element. */
824 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
826 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
827 fold_convert_loc (loc
, sizetype
, nb_bytes
));
828 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
829 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
832 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
835 /* If VAL memory representation contains the same value in all bytes,
836 return that value, otherwise return -1.
837 E.g. for 0x24242424 return 0x24, for IEEE double
838 747708026454360457216.0 return 0x44, etc. */
841 const_with_all_bytes_same (tree val
)
843 unsigned char buf
[64];
846 if (integer_zerop (val
)
848 || (TREE_CODE (val
) == CONSTRUCTOR
849 && !TREE_CLOBBER_P (val
)
850 && CONSTRUCTOR_NELTS (val
) == 0))
853 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
856 len
= native_encode_expr (val
, buf
, sizeof (buf
));
859 for (i
= 1; i
< len
; i
++)
860 if (buf
[i
] != buf
[0])
865 /* Generate a call to memset for PARTITION in LOOP. */
868 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
870 gimple_stmt_iterator gsi
;
871 gimple stmt
, fn_call
;
872 tree nb_iter
, mem
, fn
, nb_bytes
;
876 stmt
= DR_STMT (partition
->main_dr
);
877 loc
= gimple_location (stmt
);
878 if (gimple_bb (stmt
) == loop
->latch
)
879 nb_iter
= number_of_latch_executions (loop
);
881 nb_iter
= number_of_exit_cond_executions (loop
);
883 /* The new statements will be placed before LOOP. */
884 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
886 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
887 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
888 false, GSI_CONTINUE_LINKING
);
889 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
890 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
891 false, GSI_CONTINUE_LINKING
);
893 /* This exactly matches the pattern recognition in classify_partition. */
894 val
= gimple_assign_rhs1 (stmt
);
895 /* Handle constants like 0x15151515 and similarly
896 floating point constants etc. where all bytes are the same. */
897 int bytev
= const_with_all_bytes_same (val
);
899 val
= build_int_cst (integer_type_node
, bytev
);
900 else if (TREE_CODE (val
) == INTEGER_CST
)
901 val
= fold_convert (integer_type_node
, val
);
902 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
905 tree tem
= make_ssa_name (integer_type_node
, NULL
);
906 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
907 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
911 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
912 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
913 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
917 fprintf (dump_file
, "generated memset");
919 fprintf (dump_file
, " zero\n");
921 fprintf (dump_file
, "\n");
925 /* Generate a call to memcpy for PARTITION in LOOP. */
928 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
930 gimple_stmt_iterator gsi
;
931 gimple stmt
, fn_call
;
932 tree nb_iter
, dest
, src
, fn
, nb_bytes
;
934 enum built_in_function kind
;
936 stmt
= DR_STMT (partition
->main_dr
);
937 loc
= gimple_location (stmt
);
938 if (gimple_bb (stmt
) == loop
->latch
)
939 nb_iter
= number_of_latch_executions (loop
);
941 nb_iter
= number_of_exit_cond_executions (loop
);
943 /* The new statements will be placed before LOOP. */
944 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
946 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
947 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
948 false, GSI_CONTINUE_LINKING
);
949 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
950 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
951 if (ptr_derefs_may_alias_p (dest
, src
))
952 kind
= BUILT_IN_MEMMOVE
;
954 kind
= BUILT_IN_MEMCPY
;
956 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
957 false, GSI_CONTINUE_LINKING
);
958 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
959 false, GSI_CONTINUE_LINKING
);
960 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
961 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
962 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
966 if (kind
== BUILT_IN_MEMCPY
)
967 fprintf (dump_file
, "generated memcpy\n");
969 fprintf (dump_file
, "generated memmove\n");
973 /* Remove and destroy the loop LOOP. */
976 destroy_loop (struct loop
*loop
)
978 unsigned nbbs
= loop
->num_nodes
;
979 edge exit
= single_exit (loop
);
980 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
984 bbs
= get_loop_body_in_dom_order (loop
);
986 redirect_edge_pred (exit
, src
);
987 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
988 exit
->flags
|= EDGE_FALLTHRU
;
989 cancel_loop_tree (loop
);
990 rescan_loop_exit (exit
, false, true);
992 for (i
= 0; i
< nbbs
; i
++)
994 /* We have made sure to not leave any dangling uses of SSA
995 names defined in the loop. With the exception of virtuals.
996 Make sure we replace all uses of virtual defs that will remain
997 outside of the loop with the bare symbol as delete_basic_block
998 will release them. */
999 gimple_stmt_iterator gsi
;
1000 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1002 gimple phi
= gsi_stmt (gsi
);
1003 if (virtual_operand_p (gimple_phi_result (phi
)))
1004 mark_virtual_phi_result_for_renaming (phi
);
1006 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1008 gimple stmt
= gsi_stmt (gsi
);
1009 tree vdef
= gimple_vdef (stmt
);
1010 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1011 mark_virtual_operand_for_renaming (vdef
);
1013 delete_basic_block (bbs
[i
]);
1017 set_immediate_dominator (CDI_DOMINATORS
, dest
,
1018 recompute_dominator (CDI_DOMINATORS
, dest
));
1021 /* Generates code for PARTITION. */
1024 generate_code_for_partition (struct loop
*loop
,
1025 partition_t partition
, bool copy_p
)
1027 switch (partition
->kind
)
1030 generate_memset_builtin (loop
, partition
);
1031 /* If this is the last partition for which we generate code, we have
1032 to destroy the loop. */
1034 destroy_loop (loop
);
1038 generate_memcpy_builtin (loop
, partition
);
1039 /* If this is the last partition for which we generate code, we have
1040 to destroy the loop. */
1042 destroy_loop (loop
);
1045 case PKIND_REDUCTION
:
1046 /* Reductions all have to be in the last partition. */
1047 gcc_assert (!copy_p
);
1049 generate_loops_for_partition (loop
, partition
, copy_p
);
1058 /* Returns true if the node V of RDG cannot be recomputed. */
1061 rdg_cannot_recompute_vertex_p (struct graph
*rdg
, int v
)
1063 if (RDG_MEM_WRITE_STMT (rdg
, v
))
1069 /* Returns true when the vertex V has already been generated in the
1070 current partition (V is in PROCESSED), or when V belongs to another
1071 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
1074 already_processed_vertex_p (bitmap processed
, int v
)
1076 return bitmap_bit_p (processed
, v
);
1079 static void rdg_flag_vertex_and_dependent (struct graph
*, int, partition_t
,
1082 /* Flag V from RDG as part of PARTITION, and also flag its loop number
1086 rdg_flag_vertex (struct graph
*rdg
, int v
, partition_t partition
)
1090 if (!bitmap_set_bit (partition
->stmts
, v
))
1093 loop
= loop_containing_stmt (RDG_STMT (rdg
, v
));
1094 bitmap_set_bit (partition
->loops
, loop
->num
);
1096 if (rdg_cannot_recompute_vertex_p (rdg
, v
))
1097 partition
->has_writes
= true;
1100 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
1101 Also flag their loop number in LOOPS. */
1104 rdg_flag_vertex_and_dependent (struct graph
*rdg
, int v
, partition_t partition
,
1112 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
1114 FOR_EACH_VEC_ELT (nodes
, i
, x
)
1115 if (bitmap_set_bit (processed
, x
))
1116 rdg_flag_vertex (rdg
, x
, partition
);
1121 /* Initialize CONDS with all the condition statements from the basic
1125 collect_condition_stmts (struct loop
*loop
, vec
<gimple
> *conds
)
1129 vec
<edge
> exits
= get_loop_exit_edges (loop
);
1131 FOR_EACH_VEC_ELT (exits
, i
, e
)
1133 gimple cond
= last_stmt (e
->src
);
1136 conds
->safe_push (cond
);
1142 /* Add to PARTITION all the exit condition statements for LOOPS
1143 together with all their dependent statements determined from
1147 rdg_flag_loop_exits (struct graph
*rdg
, partition_t partition
,
1155 EXECUTE_IF_SET_IN_BITMAP (partition
->loops
, 0, i
, bi
)
1156 collect_condition_stmts (get_loop (cfun
, i
), &conds
);
1158 while (!conds
.is_empty ())
1160 gimple cond
= conds
.pop ();
1161 int v
= rdg_vertex_for_stmt (rdg
, cond
);
1162 if (!already_processed_vertex_p (processed
, v
))
1164 bitmap saved_loops
= BITMAP_ALLOC (NULL
);
1165 bitmap_copy (saved_loops
, partition
->loops
);
1166 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, processed
);
1167 EXECUTE_IF_AND_COMPL_IN_BITMAP (partition
->loops
, saved_loops
,
1169 collect_condition_stmts (get_loop (cfun
, i
), &conds
);
1170 BITMAP_FREE (saved_loops
);
1177 /* Returns a bitmap in which all the statements needed for computing
1178 the strongly connected component C of the RDG are flagged, also
1179 including the loop exit conditions. */
1182 build_rdg_partition_for_component (struct graph
*rdg
, rdgc c
)
1184 partition_t partition
= partition_alloc (NULL
, NULL
);
1185 bitmap processed
= BITMAP_ALLOC (NULL
);
1187 /* Flag the first vertex of the component and its dependent nodes.
1188 Other members of the component are included in its dependencies.
1189 ??? What do we need components for again? To early merge initial
1190 vertices that are in a SCC of the RDG? */
1191 rdg_flag_vertex_and_dependent (rdg
, c
->vertices
[0], partition
, processed
);
1193 rdg_flag_loop_exits (rdg
, partition
, processed
);
1195 BITMAP_FREE (processed
);
1199 /* Free memory for COMPONENTS. */
1202 free_rdg_components (vec
<rdgc
> components
)
1207 FOR_EACH_VEC_ELT (components
, i
, x
)
1209 x
->vertices
.release ();
1213 components
.release ();
1216 /* Build the COMPONENTS vector with the strongly connected components
1217 of RDG in which the STARTING_VERTICES occur. */
1220 rdg_build_components (struct graph
*rdg
, vec
<int> starting_vertices
,
1221 vec
<rdgc
> *components
)
1224 bitmap saved_components
= BITMAP_ALLOC (NULL
);
1225 int n_components
= graphds_scc (rdg
, NULL
);
1226 /* ??? Macros cannot process template types with more than one
1227 argument, so we need this typedef. */
1228 typedef vec
<int> vec_int_heap
;
1229 vec
<int> *all_components
= XNEWVEC (vec_int_heap
, n_components
);
1231 for (i
= 0; i
< n_components
; i
++)
1232 all_components
[i
].create (3);
1234 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1235 all_components
[rdg
->vertices
[i
].component
].safe_push (i
);
1237 FOR_EACH_VEC_ELT (starting_vertices
, i
, v
)
1239 int c
= rdg
->vertices
[v
].component
;
1241 if (bitmap_set_bit (saved_components
, c
))
1243 rdgc x
= XCNEW (struct rdg_component
);
1245 x
->vertices
= all_components
[c
];
1247 components
->safe_push (x
);
1251 for (i
= 0; i
< n_components
; i
++)
1252 if (!bitmap_bit_p (saved_components
, i
))
1253 all_components
[i
].release ();
1255 free (all_components
);
1256 BITMAP_FREE (saved_components
);
1259 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1260 For the moment we detect only the memset zero pattern. */
1263 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
1268 data_reference_p single_load
, single_store
;
1269 bool volatiles_p
= false;
1271 partition
->kind
= PKIND_NORMAL
;
1272 partition
->main_dr
= NULL
;
1273 partition
->secondary_dr
= NULL
;
1275 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1277 gimple stmt
= RDG_STMT (rdg
, i
);
1279 if (gimple_has_volatile_ops (stmt
))
1282 /* If the stmt has uses outside of the loop fail.
1283 ??? If the stmt is generated in another partition that
1284 is not created as builtin we can ignore this. */
1285 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1288 fprintf (dump_file
, "not generating builtin, partition has "
1289 "scalar uses outside of the loop\n");
1290 partition
->kind
= PKIND_REDUCTION
;
1295 /* Perform general partition disqualification for builtins. */
1297 || !flag_tree_loop_distribute_patterns
)
1300 nb_iter
= number_of_exit_cond_executions (loop
);
1301 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1304 /* Detect memset and memcpy. */
1306 single_store
= NULL
;
1307 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1309 gimple stmt
= RDG_STMT (rdg
, i
);
1310 data_reference_p dr
;
1313 if (gimple_code (stmt
) == GIMPLE_PHI
)
1316 /* Any scalar stmts are ok. */
1317 if (!gimple_vuse (stmt
))
1320 /* Otherwise just regular loads/stores. */
1321 if (!gimple_assign_single_p (stmt
))
1324 /* But exactly one store and/or load. */
1325 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1327 if (DR_IS_READ (dr
))
1329 if (single_load
!= NULL
)
1335 if (single_store
!= NULL
)
1342 if (single_store
&& !single_load
)
1344 gimple stmt
= DR_STMT (single_store
);
1345 tree rhs
= gimple_assign_rhs1 (stmt
);
1346 if (const_with_all_bytes_same (rhs
) == -1
1347 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1348 || (TYPE_MODE (TREE_TYPE (rhs
))
1349 != TYPE_MODE (unsigned_char_type_node
))))
1351 if (TREE_CODE (rhs
) == SSA_NAME
1352 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1353 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1355 if (!adjacent_dr_p (single_store
))
1357 partition
->kind
= PKIND_MEMSET
;
1358 partition
->main_dr
= single_store
;
1360 else if (single_store
&& single_load
)
1362 gimple store
= DR_STMT (single_store
);
1363 gimple load
= DR_STMT (single_load
);
1364 /* Direct aggregate copy or via an SSA name temporary. */
1366 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1368 if (!adjacent_dr_p (single_store
)
1369 || !adjacent_dr_p (single_load
)
1370 || !operand_equal_p (DR_STEP (single_store
),
1371 DR_STEP (single_load
), 0))
1373 /* Now check that if there is a dependence this dependence is
1374 of a suitable form for memmove. */
1375 vec
<loop_p
> loops
= vNULL
;
1377 loops
.safe_push (loop
);
1378 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1380 compute_affine_dependence (ddr
, loop
);
1381 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1383 free_dependence_relation (ddr
);
1387 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1389 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1391 free_dependence_relation (ddr
);
1395 lambda_vector dist_v
;
1396 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1398 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1399 DDR_LOOP_NEST (ddr
))];
1400 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1402 free_dependence_relation (ddr
);
1408 free_dependence_relation (ddr
);
1410 partition
->kind
= PKIND_MEMCPY
;
1411 partition
->main_dr
= single_store
;
1412 partition
->secondary_dr
= single_load
;
1416 /* For a data reference REF, return the declaration of its base
1417 address or NULL_TREE if the base is not determined. */
1420 ref_base_address (data_reference_p dr
)
1422 tree base_address
= DR_BASE_ADDRESS (dr
);
1424 && TREE_CODE (base_address
) == ADDR_EXPR
)
1425 return TREE_OPERAND (base_address
, 0);
1427 return base_address
;
1430 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1434 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1435 partition_t partition2
)
1437 unsigned i
, j
, k
, l
;
1438 bitmap_iterator bi
, bj
;
1439 data_reference_p ref1
, ref2
;
1441 /* First check whether in the intersection of the two partitions are
1442 any loads or stores. Common loads are the situation that happens
1444 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1445 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1446 || RDG_MEM_READS_STMT (rdg
, i
))
1449 /* Then check all data-references against each other. */
1450 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1451 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1452 || RDG_MEM_READS_STMT (rdg
, i
))
1453 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1454 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1455 || RDG_MEM_READS_STMT (rdg
, j
))
1457 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1459 tree base1
= ref_base_address (ref1
);
1461 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1462 if (base1
== ref_base_address (ref2
))
1470 /* Aggregate several components into a useful partition that is
1471 registered in the PARTITIONS vector. Partitions will be
1472 distributed in different loops. */
1475 rdg_build_partitions (struct graph
*rdg
, vec
<rdgc
> components
,
1476 vec
<int> *other_stores
,
1477 vec
<partition_t
> *partitions
, bitmap processed
)
1481 partition_t partition
= partition_alloc (NULL
, NULL
);
1483 FOR_EACH_VEC_ELT (components
, i
, x
)
1486 int v
= x
->vertices
[0];
1488 if (bitmap_bit_p (processed
, v
))
1491 np
= build_rdg_partition_for_component (rdg
, x
);
1492 bitmap_ior_into (partition
->stmts
, np
->stmts
);
1493 partition
->has_writes
= partition_has_writes (np
);
1494 bitmap_ior_into (processed
, np
->stmts
);
1495 partition_free (np
);
1497 if (partition_has_writes (partition
))
1499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1501 fprintf (dump_file
, "ldist useful partition:\n");
1502 dump_bitmap (dump_file
, partition
->stmts
);
1505 partitions
->safe_push (partition
);
1506 partition
= partition_alloc (NULL
, NULL
);
1510 /* Add the nodes from the RDG that were not marked as processed, and
1511 that are used outside the current loop. These are scalar
1512 computations that are not yet part of previous partitions. */
1513 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1514 if (!bitmap_bit_p (processed
, i
)
1515 && rdg_defs_used_in_other_loops_p (rdg
, i
))
1516 other_stores
->safe_push (i
);
1518 /* If there are still statements left in the OTHER_STORES array,
1519 create other components and partitions with these stores and
1520 their dependences. */
1521 if (other_stores
->length () > 0)
1528 rdg_build_components (rdg
, *other_stores
, &comps
);
1529 rdg_build_partitions (rdg
, comps
, &foo
, partitions
, processed
);
1532 free_rdg_components (comps
);
1535 /* If there is something left in the last partition, save it. */
1536 if (bitmap_count_bits (partition
->stmts
) > 0)
1537 partitions
->safe_push (partition
);
1539 partition_free (partition
);
1542 /* Dump to FILE the PARTITIONS. */
1545 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1548 partition_t partition
;
1550 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1551 debug_bitmap_file (file
, partition
->stmts
);
1554 /* Debug PARTITIONS. */
1555 extern void debug_rdg_partitions (vec
<partition_t
> );
1558 debug_rdg_partitions (vec
<partition_t
> partitions
)
1560 dump_rdg_partitions (stderr
, partitions
);
1563 /* Returns the number of read and write operations in the RDG. */
1566 number_of_rw_in_rdg (struct graph
*rdg
)
1570 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1572 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1575 if (RDG_MEM_READS_STMT (rdg
, i
))
1582 /* Returns the number of read and write operations in a PARTITION of
1586 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1592 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1594 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1597 if (RDG_MEM_READS_STMT (rdg
, i
))
1604 /* Returns true when one of the PARTITIONS contains all the read or
1605 write operations of RDG. */
1608 partition_contains_all_rw (struct graph
*rdg
,
1609 vec
<partition_t
> partitions
)
1612 partition_t partition
;
1613 int nrw
= number_of_rw_in_rdg (rdg
);
1615 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1616 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1622 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1623 distributed loops. */
1626 ldist_gen (struct loop
*loop
, struct graph
*rdg
,
1627 vec
<int> starting_vertices
)
1630 vec
<rdgc
> components
;
1631 components
.create (3);
1632 vec
<partition_t
> partitions
;
1633 partitions
.create (3);
1634 vec
<int> other_stores
;
1635 other_stores
.create (3);
1636 partition_t partition
;
1637 bitmap processed
= BITMAP_ALLOC (NULL
);
1640 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1642 /* Save in OTHER_STORES all the memory writes that are not in
1643 STARTING_VERTICES. */
1644 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1650 FOR_EACH_VEC_ELT (starting_vertices
, j
, v
)
1658 other_stores
.safe_push (i
);
1662 rdg_build_components (rdg
, starting_vertices
, &components
);
1663 rdg_build_partitions (rdg
, components
, &other_stores
, &partitions
,
1665 BITMAP_FREE (processed
);
1667 any_builtin
= false;
1668 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1670 classify_partition (loop
, rdg
, partition
);
1671 any_builtin
|= partition_builtin_p (partition
);
1674 /* If we are only distributing patterns fuse all partitions that
1675 were not properly classified as builtins. Else fuse partitions
1676 with similar memory accesses. */
1677 if (!flag_tree_loop_distribution
)
1680 /* If we did not detect any builtin simply bail out. */
1686 /* Only fuse adjacent non-builtin partitions, see PR53616.
1687 ??? Use dependence information to improve partition ordering. */
1691 for (; partitions
.iterate (i
, &into
); ++i
)
1692 if (!partition_builtin_p (into
))
1694 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1695 if (!partition_builtin_p (partition
))
1697 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1698 if (partition
->kind
== PKIND_REDUCTION
)
1699 into
->kind
= PKIND_REDUCTION
;
1700 partitions
.ordered_remove (i
);
1701 partition_free (partition
);
1707 while ((unsigned) i
< partitions
.length ());
1713 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1715 if (partition_builtin_p (into
))
1718 partitions
.iterate (j
, &partition
); ++j
)
1720 if (!partition_builtin_p (partition
)
1721 /* ??? The following is horribly inefficient,
1722 we are re-computing and analyzing data-references
1723 of the stmts in the partitions all the time. */
1724 && similar_memory_accesses (rdg
, into
, partition
))
1726 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1728 fprintf (dump_file
, "fusing partitions\n");
1729 dump_bitmap (dump_file
, into
->stmts
);
1730 dump_bitmap (dump_file
, partition
->stmts
);
1731 fprintf (dump_file
, "because they have similar "
1732 "memory accesses\n");
1734 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1735 if (partition
->kind
== PKIND_REDUCTION
)
1736 into
->kind
= PKIND_REDUCTION
;
1737 partitions
.ordered_remove (j
);
1738 partition_free (partition
);
1745 /* Fuse all reduction partitions into the last. */
1746 if (partitions
.length () > 1)
1748 partition_t into
= partitions
.last ();
1749 for (i
= partitions
.length () - 2; i
>= 0; --i
)
1751 partition_t what
= partitions
[i
];
1752 if (what
->kind
== PKIND_REDUCTION
)
1754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1756 fprintf (dump_file
, "fusing partitions\n");
1757 dump_bitmap (dump_file
, into
->stmts
);
1758 dump_bitmap (dump_file
, what
->stmts
);
1759 fprintf (dump_file
, "because the latter has reductions\n");
1761 bitmap_ior_into (into
->stmts
, what
->stmts
);
1762 into
->kind
= PKIND_REDUCTION
;
1763 partitions
.ordered_remove (i
);
1764 partition_free (what
);
1769 nbp
= partitions
.length ();
1771 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1772 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1778 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1779 dump_rdg_partitions (dump_file
, partitions
);
1781 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1782 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1786 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1787 partition_free (partition
);
1789 other_stores
.release ();
1790 partitions
.release ();
1791 free_rdg_components (components
);
1795 /* Distributes the code from LOOP in such a way that producer
1796 statements are placed before consumer statements. When STMTS is
1797 NULL, performs the maximal distribution, if STMTS is not NULL,
1798 tries to separate only these statements from the LOOP's body.
1799 Returns the number of distributed loops. */
1802 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
)
1809 vec
<loop_p
> loop_nest
;
1811 loop_nest
.create (3);
1812 if (!find_loop_nest (loop
, &loop_nest
))
1814 loop_nest
.release ();
1818 rdg
= build_rdg (loop_nest
);
1821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1823 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1826 loop_nest
.release ();
1830 vertices
.create (3);
1832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1833 dump_rdg (dump_file
, rdg
);
1835 FOR_EACH_VEC_ELT (stmts
, i
, s
)
1837 int v
= rdg_vertex_for_stmt (rdg
, s
);
1841 vertices
.safe_push (v
);
1843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1845 "ldist asked to generate code for vertex %d\n", v
);
1849 res
= ldist_gen (loop
, rdg
, vertices
);
1850 vertices
.release ();
1852 loop_nest
.release ();
1856 /* Distribute all loops in the current function. */
1859 tree_loop_distribution (void)
1863 bool changed
= false;
1868 gimple_stmt_iterator gsi
;
1869 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1870 gimple_set_uid (gsi_stmt (gsi
), -1);
1871 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1872 gimple_set_uid (gsi_stmt (gsi
), -1);
1875 /* We can at the moment only distribute non-nested loops, thus restrict
1876 walking to innermost loops. */
1877 FOR_EACH_LOOP (li
, loop
, LI_ONLY_INNERMOST
)
1879 vec
<gimple
> work_list
= vNULL
;
1881 int num
= loop
->num
;
1882 int nb_generated_loops
= 0;
1885 /* If the loop doesn't have a single exit we will fail anyway,
1886 so do that early. */
1887 if (!single_exit (loop
))
1890 /* Only optimize hot loops. */
1891 if (!optimize_loop_for_speed_p (loop
))
1894 /* Only distribute loops with a header and latch for now. */
1895 if (loop
->num_nodes
> 2)
1898 /* Initialize the worklist with stmts we seed the partitions with. */
1899 bbs
= get_loop_body_in_dom_order (loop
);
1900 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1902 gimple_stmt_iterator gsi
;
1903 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1905 gimple stmt
= gsi_stmt (gsi
);
1906 /* Distribute stmts which have defs that are used outside of
1908 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1910 /* Otherwise only distribute stores for now. */
1911 else if (!gimple_assign_single_p (stmt
)
1912 || is_gimple_reg (gimple_assign_lhs (stmt
)))
1915 work_list
.safe_push (stmt
);
1920 if (work_list
.length () > 0)
1921 nb_generated_loops
= distribute_loop (loop
, work_list
);
1923 if (nb_generated_loops
> 0)
1926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1928 if (nb_generated_loops
> 1)
1929 fprintf (dump_file
, "Loop %d distributed: split to %d loops.\n",
1930 num
, nb_generated_loops
);
1932 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1935 work_list
.release ();
1940 mark_virtual_operands_for_renaming (cfun
);
1941 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1944 #ifdef ENABLE_CHECKING
1945 verify_loop_structure ();
1952 gate_tree_loop_distribution (void)
1954 return flag_tree_loop_distribution
1955 || flag_tree_loop_distribute_patterns
;
1960 const pass_data pass_data_loop_distribution
=
1962 GIMPLE_PASS
, /* type */
1964 OPTGROUP_LOOP
, /* optinfo_flags */
1965 true, /* has_gate */
1966 true, /* has_execute */
1967 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1968 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1969 0, /* properties_provided */
1970 0, /* properties_destroyed */
1971 0, /* todo_flags_start */
1972 TODO_verify_ssa
, /* todo_flags_finish */
1975 class pass_loop_distribution
: public gimple_opt_pass
1978 pass_loop_distribution(gcc::context
*ctxt
)
1979 : gimple_opt_pass(pass_data_loop_distribution
, ctxt
)
1982 /* opt_pass methods: */
1983 bool gate () { return gate_tree_loop_distribution (); }
1984 unsigned int execute () { return tree_loop_distribution (); }
1986 }; // class pass_loop_distribution
1991 make_pass_loop_distribution (gcc::context
*ctxt
)
1993 return new pass_loop_distribution (ctxt
);