2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 Loop distribution is the dual of loop fusion. It separates statements
40 of a loop (or loop nest) into multiple loops (or loop nests) with the
41 same loop header. The major goal is to separate statements which may
42 be vectorized from those that can't. This pass implements distribution
43 in the following steps:
45 1) Seed partitions with specific type statements. For now we support
46 two types seed statements: statement defining variable used outside
47 of loop; statement storing to memory.
48 2) Build reduced dependence graph (RDG) for loop to be distributed.
49 The vertices (RDG:V) model all statements in the loop and the edges
50 (RDG:E) model flow and control dependencies between statements.
51 3) Apart from RDG, compute data dependencies between memory references.
52 4) Starting from seed statement, build up partition by adding depended
53 statements according to RDG's dependence information. Partition is
54 classified as parallel type if it can be executed paralleled; or as
55 sequential type if it can't. Parallel type partition is further
56 classified as different builtin kinds if it can be implemented as
57 builtin function calls.
58 5) Build partition dependence graph (PG) based on data dependencies.
59 The vertices (PG:V) model all partitions and the edges (PG:E) model
60 all data dependencies between every partitions pair. In general,
61 data dependence is either compilation time known or unknown. In C
62 family languages, there exists quite amount compilation time unknown
63 dependencies because of possible alias relation of data references.
64 We categorize PG's edge to two types: "true" edge that represents
65 compilation time known data dependencies; "alias" edge for all other
67 6) Traverse subgraph of PG as if all "alias" edges don't exist. Merge
68 partitions in each strong connected component (SCC) correspondingly.
69 Build new PG for merged partitions.
70 7) Traverse PG again and this time with both "true" and "alias" edges
71 included. We try to break SCCs by removing some edges. Because
72 SCCs by "true" edges are all fused in step 6), we can break SCCs
73 by removing some "alias" edges. It's NP-hard to choose optimal
74 edge set, fortunately simple approximation is good enough for us
75 given the small problem scale.
76 8) Collect all data dependencies of the removed "alias" edges. Create
77 runtime alias checks for collected data dependencies.
78 9) Version loop under the condition of runtime alias checks. Given
79 loop distribution generally introduces additional overhead, it is
80 only useful if vectorization is achieved in distributed loop. We
81 version loop with internal function call IFN_LOOP_DIST_ALIAS. If
82 no distributed loop can be vectorized, we simply remove distributed
83 loops and recover to the original one.
86 1) We only distribute innermost two-level loop nest now. We should
87 extend it for arbitrary loop nests in the future.
88 2) We only fuse partitions in SCC now. A better fusion algorithm is
89 desired to minimize loop overhead, maximize parallelism and maximize
94 #include "coretypes.h"
99 #include "tree-pass.h"
101 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
104 #include "gimple-iterator.h"
105 #include "gimplify-me.h"
106 #include "stor-layout.h"
107 #include "tree-cfg.h"
108 #include "tree-ssa-loop-manip.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-into-ssa.h"
111 #include "tree-ssa.h"
113 #include "tree-scalar-evolution.h"
115 #include "tree-vectorizer.h"
118 #define MAX_DATAREFS_NUM \
119 ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
121 /* Threshold controlling number of distributed partitions. Given it may
122 be unnecessary if a memory stream cost model is invented in the future,
123 we define it as a temporary macro, rather than a parameter. */
124 #define NUM_PARTITION_THRESHOLD (4)
126 /* Hashtable helpers. */
128 struct ddr_hasher
: nofree_ptr_hash
<struct data_dependence_relation
>
130 static inline hashval_t
hash (const data_dependence_relation
*);
131 static inline bool equal (const data_dependence_relation
*,
132 const data_dependence_relation
*);
135 /* Hash function for data dependence. */
138 ddr_hasher::hash (const data_dependence_relation
*ddr
)
141 h
.add_ptr (DDR_A (ddr
));
142 h
.add_ptr (DDR_B (ddr
));
146 /* Hash table equality function for data dependence. */
149 ddr_hasher::equal (const data_dependence_relation
*ddr1
,
150 const data_dependence_relation
*ddr2
)
152 return (DDR_A (ddr1
) == DDR_A (ddr2
) && DDR_B (ddr1
) == DDR_B (ddr2
));
155 /* The loop (nest) to be distributed. */
156 static vec
<loop_p
> loop_nest
;
158 /* Vector of data references in the loop to be distributed. */
159 static vec
<data_reference_p
> datarefs_vec
;
161 /* Store index of data reference in aux field. */
162 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
164 /* Hash table for data dependence relation in the loop to be distributed. */
165 static hash_table
<ddr_hasher
> *ddrs_table
;
167 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
170 /* The statement represented by this vertex. */
173 /* Vector of data-references in this statement. */
174 vec
<data_reference_p
> datarefs
;
176 /* True when the statement contains a write to memory. */
179 /* True when the statement contains a read from memory. */
183 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
184 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
185 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
186 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
187 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
188 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
189 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
190 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
192 /* Data dependence type. */
196 /* Read After Write (RAW). */
199 /* Control dependence (execute conditional on). */
203 /* Dependence information attached to an edge of the RDG. */
207 /* Type of the dependence. */
208 enum rdg_dep_type type
;
211 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
213 /* Dump vertex I in RDG to FILE. */
216 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
218 struct vertex
*v
= &(rdg
->vertices
[i
]);
219 struct graph_edge
*e
;
221 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
222 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
223 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
226 for (e
= v
->pred
; e
; e
= e
->pred_next
)
227 fprintf (file
, " %d", e
->src
);
229 fprintf (file
, ") (out:");
232 for (e
= v
->succ
; e
; e
= e
->succ_next
)
233 fprintf (file
, " %d", e
->dest
);
235 fprintf (file
, ")\n");
236 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
237 fprintf (file
, ")\n");
240 /* Call dump_rdg_vertex on stderr. */
243 debug_rdg_vertex (struct graph
*rdg
, int i
)
245 dump_rdg_vertex (stderr
, rdg
, i
);
248 /* Dump the reduced dependence graph RDG to FILE. */
251 dump_rdg (FILE *file
, struct graph
*rdg
)
253 fprintf (file
, "(rdg\n");
254 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
255 dump_rdg_vertex (file
, rdg
, i
);
256 fprintf (file
, ")\n");
259 /* Call dump_rdg on stderr. */
262 debug_rdg (struct graph
*rdg
)
264 dump_rdg (stderr
, rdg
);
268 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
271 pretty_printer buffer
;
272 pp_needs_newline (&buffer
) = false;
273 buffer
.buffer
->stream
= file
;
275 fprintf (file
, "digraph RDG {\n");
277 for (i
= 0; i
< rdg
->n_vertices
; i
++)
279 struct vertex
*v
= &(rdg
->vertices
[i
]);
280 struct graph_edge
*e
;
282 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
283 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
285 fprintf (file
, "\"]\n");
287 /* Highlight reads from memory. */
288 if (RDG_MEM_READS_STMT (rdg
, i
))
289 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
291 /* Highlight stores to memory. */
292 if (RDG_MEM_WRITE_STMT (rdg
, i
))
293 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
296 for (e
= v
->succ
; e
; e
= e
->succ_next
)
297 switch (RDGE_TYPE (e
))
300 /* These are the most common dependences: don't print these. */
301 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
305 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
313 fprintf (file
, "}\n\n");
316 /* Display the Reduced Dependence Graph using dotty. */
319 dot_rdg (struct graph
*rdg
)
321 /* When debugging, you may want to enable the following code. */
323 FILE *file
= popen ("dot -Tx11", "w");
326 dot_rdg_1 (file
, rdg
);
328 close (fileno (file
));
331 dot_rdg_1 (stderr
, rdg
);
335 /* Returns the index of STMT in RDG. */
338 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple
*stmt
)
340 int index
= gimple_uid (stmt
);
341 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
345 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
346 the index of DEF in RDG. */
349 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
351 use_operand_p imm_use_p
;
352 imm_use_iterator iterator
;
354 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
356 struct graph_edge
*e
;
357 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
362 e
= add_edge (rdg
, idef
, use
);
363 e
->data
= XNEW (struct rdg_edge
);
364 RDGE_TYPE (e
) = flow_dd
;
368 /* Creates an edge for the control dependences of BB to the vertex V. */
371 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
372 int v
, control_dependences
*cd
)
376 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
379 basic_block cond_bb
= cd
->get_edge_src (edge_n
);
380 gimple
*stmt
= last_stmt (cond_bb
);
381 if (stmt
&& is_ctrl_stmt (stmt
))
383 struct graph_edge
*e
;
384 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
388 e
= add_edge (rdg
, c
, v
);
389 e
->data
= XNEW (struct rdg_edge
);
390 RDGE_TYPE (e
) = control_dd
;
395 /* Creates the edges of the reduced dependence graph RDG. */
398 create_rdg_flow_edges (struct graph
*rdg
)
404 for (i
= 0; i
< rdg
->n_vertices
; i
++)
405 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
407 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
410 /* Creates the edges of the reduced dependence graph RDG. */
413 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
, loop_p loop
)
417 for (i
= 0; i
< rdg
->n_vertices
; i
++)
419 gimple
*stmt
= RDG_STMT (rdg
, i
);
420 if (gimple_code (stmt
) == GIMPLE_PHI
)
424 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
425 if (flow_bb_inside_loop_p (loop
, e
->src
))
426 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
429 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
433 /* Build the vertices of the reduced dependence graph RDG. Return false
437 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
*> stmts
, loop_p loop
)
442 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
444 struct vertex
*v
= &(rdg
->vertices
[i
]);
446 /* Record statement to vertex mapping. */
447 gimple_set_uid (stmt
, i
);
449 v
->data
= XNEW (struct rdg_vertex
);
450 RDGV_STMT (v
) = stmt
;
451 RDGV_DATAREFS (v
).create (0);
452 RDGV_HAS_MEM_WRITE (v
) = false;
453 RDGV_HAS_MEM_READS (v
) = false;
454 if (gimple_code (stmt
) == GIMPLE_PHI
)
457 unsigned drp
= datarefs_vec
.length ();
458 if (!find_data_references_in_stmt (loop
, stmt
, &datarefs_vec
))
460 for (unsigned j
= drp
; j
< datarefs_vec
.length (); ++j
)
462 data_reference_p dr
= datarefs_vec
[j
];
464 RDGV_HAS_MEM_READS (v
) = true;
466 RDGV_HAS_MEM_WRITE (v
) = true;
467 RDGV_DATAREFS (v
).safe_push (dr
);
473 /* Array mapping basic block's index to its topological order. */
474 static int *bb_top_order_index
;
475 /* And size of the array. */
476 static int bb_top_order_index_size
;
478 /* If X has a smaller topological sort number than Y, returns -1;
479 if greater, returns 1. */
482 bb_top_order_cmp (const void *x
, const void *y
)
484 basic_block bb1
= *(const basic_block
*) x
;
485 basic_block bb2
= *(const basic_block
*) y
;
487 gcc_assert (bb1
->index
< bb_top_order_index_size
488 && bb2
->index
< bb_top_order_index_size
);
489 gcc_assert (bb1
== bb2
490 || bb_top_order_index
[bb1
->index
]
491 != bb_top_order_index
[bb2
->index
]);
493 return (bb_top_order_index
[bb1
->index
] - bb_top_order_index
[bb2
->index
]);
496 /* Initialize STMTS with all the statements of LOOP. We use topological
497 order to discover all statements. The order is important because
498 generate_loops_for_partition is using the same traversal for identifying
499 statements in loop copies. */
502 stmts_from_loop (struct loop
*loop
, vec
<gimple
*> *stmts
)
505 basic_block
*bbs
= get_loop_body_in_custom_order (loop
, bb_top_order_cmp
);
507 for (i
= 0; i
< loop
->num_nodes
; i
++)
509 basic_block bb
= bbs
[i
];
511 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
513 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
514 stmts
->safe_push (bsi
.phi ());
516 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
519 gimple
*stmt
= gsi_stmt (bsi
);
520 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
521 stmts
->safe_push (stmt
);
528 /* Free the reduced dependence graph RDG. */
531 free_rdg (struct graph
*rdg
)
535 for (i
= 0; i
< rdg
->n_vertices
; i
++)
537 struct vertex
*v
= &(rdg
->vertices
[i
]);
538 struct graph_edge
*e
;
540 for (e
= v
->succ
; e
; e
= e
->succ_next
)
545 gimple_set_uid (RDGV_STMT (v
), -1);
546 (RDGV_DATAREFS (v
)).release ();
554 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
555 LOOP, and one edge per flow dependence or control dependence from control
556 dependence CD. During visiting each statement, data references are also
557 collected and recorded in global data DATAREFS_VEC. */
559 static struct graph
*
560 build_rdg (struct loop
*loop
, control_dependences
*cd
)
564 /* Create the RDG vertices from the stmts of the loop nest. */
565 auto_vec
<gimple
*, 10> stmts
;
566 stmts_from_loop (loop
, &stmts
);
567 rdg
= new_graph (stmts
.length ());
568 if (!create_rdg_vertices (rdg
, stmts
, loop
))
575 create_rdg_flow_edges (rdg
);
577 create_rdg_cd_edges (rdg
, cd
, loop
);
583 /* Kind of distributed loop. */
584 enum partition_kind
{
585 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
, PKIND_MEMMOVE
588 /* Type of distributed loop. */
589 enum partition_type
{
590 /* The distributed loop can be executed parallelly. */
592 /* The distributed loop has to be executed sequentially. */
596 /* Builtin info for loop distribution. */
599 /* data-references a kind != PKIND_NORMAL partition is about. */
600 data_reference_p dst_dr
;
601 data_reference_p src_dr
;
602 /* Base address and size of memory objects operated by the builtin. Note
603 both dest and source memory objects must have the same size. */
609 /* Partition for loop distribution. */
612 /* Statements of the partition. */
614 /* True if the partition defines variable which is used outside of loop. */
616 enum partition_kind kind
;
617 enum partition_type type
;
618 /* Data references in the partition. */
620 /* Information of builtin parition. */
621 struct builtin_info
*builtin
;
625 /* Allocate and initialize a partition from BITMAP. */
628 partition_alloc (void)
630 partition
*partition
= XCNEW (struct partition
);
631 partition
->stmts
= BITMAP_ALLOC (NULL
);
632 partition
->reduction_p
= false;
633 partition
->kind
= PKIND_NORMAL
;
634 partition
->datarefs
= BITMAP_ALLOC (NULL
);
638 /* Free PARTITION. */
641 partition_free (partition
*partition
)
643 BITMAP_FREE (partition
->stmts
);
644 BITMAP_FREE (partition
->datarefs
);
645 if (partition
->builtin
)
646 free (partition
->builtin
);
651 /* Returns true if the partition can be generated as a builtin. */
654 partition_builtin_p (partition
*partition
)
656 return partition
->kind
!= PKIND_NORMAL
;
659 /* Returns true if the partition contains a reduction. */
662 partition_reduction_p (partition
*partition
)
664 return partition
->reduction_p
;
667 /* Partitions are fused because of different reasons. */
670 FUSE_NON_BUILTIN
= 0,
677 /* Description on different fusing reason. */
678 static const char *fuse_message
[] = {
679 "they are non-builtins",
680 "they have reductions",
681 "they have shared memory refs",
682 "they are in the same dependence scc",
683 "there is no point to distribute loop"};
686 update_type_for_merge (struct graph
*, partition
*, partition
*);
688 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence
689 graph and we update type for result partition if it is non-NULL. */
692 partition_merge_into (struct graph
*rdg
, partition
*dest
,
693 partition
*partition
, enum fuse_type ft
)
695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
697 fprintf (dump_file
, "Fuse partitions because %s:\n", fuse_message
[ft
]);
698 fprintf (dump_file
, " Part 1: ");
699 dump_bitmap (dump_file
, dest
->stmts
);
700 fprintf (dump_file
, " Part 2: ");
701 dump_bitmap (dump_file
, partition
->stmts
);
704 dest
->kind
= PKIND_NORMAL
;
705 if (dest
->type
== PTYPE_PARALLEL
)
706 dest
->type
= partition
->type
;
708 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
709 if (partition_reduction_p (partition
))
710 dest
->reduction_p
= true;
712 /* Further check if any data dependence prevents us from executing the
713 new partition parallelly. */
714 if (dest
->type
== PTYPE_PARALLEL
&& rdg
!= NULL
)
715 update_type_for_merge (rdg
, dest
, partition
);
717 bitmap_ior_into (dest
->datarefs
, partition
->datarefs
);
721 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
725 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
727 imm_use_iterator imm_iter
;
730 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
732 if (is_gimple_debug (USE_STMT (use_p
)))
735 basic_block use_bb
= gimple_bb (USE_STMT (use_p
));
736 if (!flow_bb_inside_loop_p (loop
, use_bb
))
743 /* Returns true when STMT defines a scalar variable used after the
747 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple
*stmt
)
752 if (gimple_code (stmt
) == GIMPLE_PHI
)
753 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
755 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
756 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
762 /* Return a copy of LOOP placed before LOOP. */
765 copy_loop_before (struct loop
*loop
)
768 edge preheader
= loop_preheader_edge (loop
);
770 initialize_original_copy_tables ();
771 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
772 gcc_assert (res
!= NULL
);
773 free_original_copy_tables ();
774 delete_update_ssa ();
779 /* Creates an empty basic block after LOOP. */
782 create_bb_after_loop (struct loop
*loop
)
784 edge exit
= single_exit (loop
);
792 /* Generate code for PARTITION from the code in LOOP. The loop is
793 copied when COPY_P is true. All the statements not flagged in the
794 PARTITION bitmap are removed from the loop or from its copy. The
795 statements are indexed in sequence inside a basic block, and the
796 basic blocks of a loop are taken in dom order. */
799 generate_loops_for_partition (struct loop
*loop
, partition
*partition
,
807 int orig_loop_num
= loop
->orig_loop_num
;
808 loop
= copy_loop_before (loop
);
809 gcc_assert (loop
!= NULL
);
810 loop
->orig_loop_num
= orig_loop_num
;
811 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
812 create_bb_after_loop (loop
);
816 /* Origin number is set to the new versioned loop's num. */
817 gcc_assert (loop
->orig_loop_num
!= loop
->num
);
820 /* Remove stmts not in the PARTITION bitmap. */
821 bbs
= get_loop_body_in_dom_order (loop
);
823 if (MAY_HAVE_DEBUG_STMTS
)
824 for (i
= 0; i
< loop
->num_nodes
; i
++)
826 basic_block bb
= bbs
[i
];
828 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
831 gphi
*phi
= bsi
.phi ();
832 if (!virtual_operand_p (gimple_phi_result (phi
))
833 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
834 reset_debug_uses (phi
);
837 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
839 gimple
*stmt
= gsi_stmt (bsi
);
840 if (gimple_code (stmt
) != GIMPLE_LABEL
841 && !is_gimple_debug (stmt
)
842 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
843 reset_debug_uses (stmt
);
847 for (i
= 0; i
< loop
->num_nodes
; i
++)
849 basic_block bb
= bbs
[i
];
850 edge inner_exit
= NULL
;
852 if (loop
!= bb
->loop_father
)
853 inner_exit
= single_exit (bb
->loop_father
);
855 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
857 gphi
*phi
= bsi
.phi ();
858 if (!virtual_operand_p (gimple_phi_result (phi
))
859 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
860 remove_phi_node (&bsi
, true);
865 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
867 gimple
*stmt
= gsi_stmt (bsi
);
868 if (gimple_code (stmt
) != GIMPLE_LABEL
869 && !is_gimple_debug (stmt
)
870 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
872 /* In distribution of loop nest, if bb is inner loop's exit_bb,
873 we choose its exit edge/path in order to avoid generating
874 infinite loop. For all other cases, we choose an arbitrary
875 path through the empty CFG part that this unnecessary
876 control stmt controls. */
877 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
879 if (inner_exit
&& inner_exit
->flags
& EDGE_TRUE_VALUE
)
880 gimple_cond_make_true (cond_stmt
);
882 gimple_cond_make_false (cond_stmt
);
885 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
887 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
888 gimple_switch_set_index
889 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
894 unlink_stmt_vdef (stmt
);
895 gsi_remove (&bsi
, true);
907 /* If VAL memory representation contains the same value in all bytes,
908 return that value, otherwise return -1.
909 E.g. for 0x24242424 return 0x24, for IEEE double
910 747708026454360457216.0 return 0x44, etc. */
913 const_with_all_bytes_same (tree val
)
915 unsigned char buf
[64];
918 if (integer_zerop (val
)
919 || (TREE_CODE (val
) == CONSTRUCTOR
920 && !TREE_CLOBBER_P (val
)
921 && CONSTRUCTOR_NELTS (val
) == 0))
924 if (real_zerop (val
))
926 /* Only return 0 for +0.0, not for -0.0, which doesn't have
927 an all bytes same memory representation. Don't transform
928 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
929 switch (TREE_CODE (val
))
932 if (!real_isneg (TREE_REAL_CST_PTR (val
)))
936 if (!const_with_all_bytes_same (TREE_REALPART (val
))
937 && !const_with_all_bytes_same (TREE_IMAGPART (val
)))
942 for (j
= 0; j
< VECTOR_CST_NELTS (val
); ++j
)
943 if (const_with_all_bytes_same (VECTOR_CST_ELT (val
, j
)))
945 if (j
== VECTOR_CST_NELTS (val
))
953 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
956 len
= native_encode_expr (val
, buf
, sizeof (buf
));
959 for (i
= 1; i
< len
; i
++)
960 if (buf
[i
] != buf
[0])
965 /* Generate a call to memset for PARTITION in LOOP. */
968 generate_memset_builtin (struct loop
*loop
, partition
*partition
)
970 gimple_stmt_iterator gsi
;
971 tree mem
, fn
, nb_bytes
;
973 struct builtin_info
*builtin
= partition
->builtin
;
976 /* The new statements will be placed before LOOP. */
977 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
979 nb_bytes
= builtin
->size
;
980 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
981 false, GSI_CONTINUE_LINKING
);
982 mem
= builtin
->dst_base
;
983 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
984 false, GSI_CONTINUE_LINKING
);
986 /* This exactly matches the pattern recognition in classify_partition. */
987 val
= gimple_assign_rhs1 (DR_STMT (builtin
->dst_dr
));
988 /* Handle constants like 0x15151515 and similarly
989 floating point constants etc. where all bytes are the same. */
990 int bytev
= const_with_all_bytes_same (val
);
992 val
= build_int_cst (integer_type_node
, bytev
);
993 else if (TREE_CODE (val
) == INTEGER_CST
)
994 val
= fold_convert (integer_type_node
, val
);
995 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
997 tree tem
= make_ssa_name (integer_type_node
);
998 gimple
*cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
999 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
1003 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
1004 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
1005 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
1007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1009 fprintf (dump_file
, "generated memset");
1011 fprintf (dump_file
, " zero\n");
1013 fprintf (dump_file
, "\n");
1017 /* Generate a call to memcpy for PARTITION in LOOP. */
1020 generate_memcpy_builtin (struct loop
*loop
, partition
*partition
)
1022 gimple_stmt_iterator gsi
;
1024 tree dest
, src
, fn
, nb_bytes
;
1025 enum built_in_function kind
;
1026 struct builtin_info
*builtin
= partition
->builtin
;
1028 /* The new statements will be placed before LOOP. */
1029 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
1031 nb_bytes
= builtin
->size
;
1032 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
1033 false, GSI_CONTINUE_LINKING
);
1034 dest
= builtin
->dst_base
;
1035 src
= builtin
->src_base
;
1036 if (partition
->kind
== PKIND_MEMCPY
1037 || ! ptr_derefs_may_alias_p (dest
, src
))
1038 kind
= BUILT_IN_MEMCPY
;
1040 kind
= BUILT_IN_MEMMOVE
;
1042 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
1043 false, GSI_CONTINUE_LINKING
);
1044 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
1045 false, GSI_CONTINUE_LINKING
);
1046 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
1047 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
1048 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
1050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 if (kind
== BUILT_IN_MEMCPY
)
1053 fprintf (dump_file
, "generated memcpy\n");
1055 fprintf (dump_file
, "generated memmove\n");
1059 /* Remove and destroy the loop LOOP. */
1062 destroy_loop (struct loop
*loop
)
1064 unsigned nbbs
= loop
->num_nodes
;
1065 edge exit
= single_exit (loop
);
1066 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
1070 bbs
= get_loop_body_in_dom_order (loop
);
1072 redirect_edge_pred (exit
, src
);
1073 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
1074 exit
->flags
|= EDGE_FALLTHRU
;
1075 cancel_loop_tree (loop
);
1076 rescan_loop_exit (exit
, false, true);
1081 /* We have made sure to not leave any dangling uses of SSA
1082 names defined in the loop. With the exception of virtuals.
1083 Make sure we replace all uses of virtual defs that will remain
1084 outside of the loop with the bare symbol as delete_basic_block
1085 will release them. */
1087 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
1090 gphi
*phi
= gsi
.phi ();
1091 if (virtual_operand_p (gimple_phi_result (phi
)))
1092 mark_virtual_phi_result_for_renaming (phi
);
1094 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
1097 gimple
*stmt
= gsi_stmt (gsi
);
1098 tree vdef
= gimple_vdef (stmt
);
1099 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1100 mark_virtual_operand_for_renaming (vdef
);
1102 delete_basic_block (bbs
[i
]);
1108 set_immediate_dominator (CDI_DOMINATORS
, dest
,
1109 recompute_dominator (CDI_DOMINATORS
, dest
));
1112 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
1115 generate_code_for_partition (struct loop
*loop
,
1116 partition
*partition
, bool copy_p
)
1118 switch (partition
->kind
)
1121 /* Reductions all have to be in the last partition. */
1122 gcc_assert (!partition_reduction_p (partition
)
1124 generate_loops_for_partition (loop
, partition
, copy_p
);
1128 generate_memset_builtin (loop
, partition
);
1133 generate_memcpy_builtin (loop
, partition
);
1140 /* Common tail for partitions we turn into a call. If this was the last
1141 partition for which we generate code, we have to destroy the loop. */
1147 /* Return data dependence relation for data references A and B. The two
1148 data references must be in lexicographic order wrto reduced dependence
1149 graph RDG. We firstly try to find ddr from global ddr hash table. If
1150 it doesn't exist, compute the ddr and cache it. */
1152 static data_dependence_relation
*
1153 get_data_dependence (struct graph
*rdg
, data_reference_p a
, data_reference_p b
)
1155 struct data_dependence_relation ent
, **slot
;
1156 struct data_dependence_relation
*ddr
;
1158 gcc_assert (DR_IS_WRITE (a
) || DR_IS_WRITE (b
));
1159 gcc_assert (rdg_vertex_for_stmt (rdg
, DR_STMT (a
))
1160 <= rdg_vertex_for_stmt (rdg
, DR_STMT (b
)));
1163 slot
= ddrs_table
->find_slot (&ent
, INSERT
);
1166 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
1167 compute_affine_dependence (ddr
, loop_nest
[0]);
1174 /* In reduced dependence graph RDG for loop distribution, return true if
1175 dependence between references DR1 and DR2 leads to a dependence cycle
1176 and such dependence cycle can't be resolved by runtime alias check. */
1179 data_dep_in_cycle_p (struct graph
*rdg
,
1180 data_reference_p dr1
, data_reference_p dr2
)
1182 struct data_dependence_relation
*ddr
;
1184 /* Re-shuffle data-refs to be in topological order. */
1185 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1186 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1187 std::swap (dr1
, dr2
);
1189 ddr
= get_data_dependence (rdg
, dr1
, dr2
);
1191 /* In case of no data dependence. */
1192 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1194 /* For unknown data dependence or known data dependence which can't be
1195 expressed in classic distance vector, we check if it can be resolved
1196 by runtime alias check. If yes, we still consider data dependence
1197 as won't introduce data dependence cycle. */
1198 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
1199 || DDR_NUM_DIST_VECTS (ddr
) == 0)
1200 return !runtime_alias_check_p (ddr
, NULL
, true);
1201 else if (DDR_NUM_DIST_VECTS (ddr
) > 1)
1203 else if (DDR_REVERSED_P (ddr
)
1204 || lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1210 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
1211 PARTITION1's type after merging PARTITION2 into PARTITION1. */
1214 update_type_for_merge (struct graph
*rdg
,
1215 partition
*partition1
, partition
*partition2
)
1218 bitmap_iterator bi
, bj
;
1219 data_reference_p dr1
, dr2
;
1221 EXECUTE_IF_SET_IN_BITMAP (partition1
->datarefs
, 0, i
, bi
)
1223 unsigned start
= (partition1
== partition2
) ? i
+ 1 : 0;
1225 dr1
= datarefs_vec
[i
];
1226 EXECUTE_IF_SET_IN_BITMAP (partition2
->datarefs
, start
, j
, bj
)
1228 dr2
= datarefs_vec
[j
];
1229 if (DR_IS_READ (dr1
) && DR_IS_READ (dr2
))
1232 /* Partition can only be executed sequentially if there is any
1233 data dependence cycle. */
1234 if (data_dep_in_cycle_p (rdg
, dr1
, dr2
))
1236 partition1
->type
= PTYPE_SEQUENTIAL
;
1243 /* Returns a partition with all the statements needed for computing
1244 the vertex V of the RDG, also including the loop exit conditions. */
1247 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
1249 partition
*partition
= partition_alloc ();
1250 auto_vec
<int, 3> nodes
;
1253 data_reference_p dr
;
1255 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
1257 FOR_EACH_VEC_ELT (nodes
, i
, x
)
1259 bitmap_set_bit (partition
->stmts
, x
);
1261 for (j
= 0; RDG_DATAREFS (rdg
, x
).iterate (j
, &dr
); ++j
)
1263 unsigned idx
= (unsigned) DR_INDEX (dr
);
1264 gcc_assert (idx
< datarefs_vec
.length ());
1266 /* Partition can only be executed sequentially if there is any
1267 unknown data reference. */
1268 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
)
1269 || !DR_INIT (dr
) || !DR_STEP (dr
))
1270 partition
->type
= PTYPE_SEQUENTIAL
;
1272 bitmap_set_bit (partition
->datarefs
, idx
);
1276 if (partition
->type
== PTYPE_SEQUENTIAL
)
1279 /* Further check if any data dependence prevents us from executing the
1280 partition parallelly. */
1281 update_type_for_merge (rdg
, partition
, partition
);
1286 /* Given PARTITION of RDG, record single load/store data references for
1287 builtin partition in SRC_DR/DST_DR, return false if there is no such
1291 find_single_drs (struct graph
*rdg
, partition
*partition
,
1292 data_reference_p
*dst_dr
, data_reference_p
*src_dr
)
1295 data_reference_p single_ld
= NULL
, single_st
= NULL
;
1298 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1300 gimple
*stmt
= RDG_STMT (rdg
, i
);
1301 data_reference_p dr
;
1303 if (gimple_code (stmt
) == GIMPLE_PHI
)
1306 /* Any scalar stmts are ok. */
1307 if (!gimple_vuse (stmt
))
1310 /* Otherwise just regular loads/stores. */
1311 if (!gimple_assign_single_p (stmt
))
1314 /* But exactly one store and/or load. */
1315 for (unsigned j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1317 tree type
= TREE_TYPE (DR_REF (dr
));
1319 /* The memset, memcpy and memmove library calls are only
1320 able to deal with generic address space. */
1321 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type
)))
1324 if (DR_IS_READ (dr
))
1326 if (single_ld
!= NULL
)
1332 if (single_st
!= NULL
)
1342 /* Bail out if this is a bitfield memory reference. */
1343 if (TREE_CODE (DR_REF (single_st
)) == COMPONENT_REF
1344 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st
), 1)))
1347 /* Data reference must be executed exactly once per iteration. */
1348 basic_block bb_st
= gimple_bb (DR_STMT (single_st
));
1349 struct loop
*inner
= bb_st
->loop_father
;
1350 if (!dominated_by_p (CDI_DOMINATORS
, inner
->latch
, bb_st
))
1355 gimple
*store
= DR_STMT (single_st
), *load
= DR_STMT (single_ld
);
1356 /* Direct aggregate copy or via an SSA name temporary. */
1358 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1361 /* Bail out if this is a bitfield memory reference. */
1362 if (TREE_CODE (DR_REF (single_ld
)) == COMPONENT_REF
1363 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld
), 1)))
1366 /* Load and store must be in the same loop nest. */
1367 basic_block bb_ld
= gimple_bb (DR_STMT (single_ld
));
1368 if (inner
!= bb_ld
->loop_father
)
1371 /* Data reference must be executed exactly once per iteration. */
1372 if (!dominated_by_p (CDI_DOMINATORS
, inner
->latch
, bb_ld
))
1375 edge e
= single_exit (inner
);
1376 bool dom_ld
= dominated_by_p (CDI_DOMINATORS
, e
->src
, bb_ld
);
1377 bool dom_st
= dominated_by_p (CDI_DOMINATORS
, e
->src
, bb_st
);
1378 if (dom_ld
!= dom_st
)
1382 *src_dr
= single_ld
;
1383 *dst_dr
= single_st
;
1387 /* Given data reference DR in LOOP_NEST, this function checks the enclosing
1388 loops from inner to outer to see if loop's step equals to access size at
1389 each level of loop. Return true if yes; record access base and size in
1390 BASE and SIZE; save loop's step at each level of loop in STEPS if it is
1391 not null. For example:
1393 int arr[100][100][100];
1394 for (i = 0; i < 100; i++) ;steps[2] = 40000
1395 for (j = 100; j > 0; j--) ;steps[1] = -400
1396 for (k = 0; k < 100; k++) ;steps[0] = 4
1397 arr[i][j - 1][k] = 0; ;base = &arr, size = 4000000. */
1400 compute_access_range (loop_p loop_nest
, data_reference_p dr
, tree
*base
,
1401 tree
*size
, vec
<tree
> *steps
= NULL
)
1403 location_t loc
= gimple_location (DR_STMT (dr
));
1404 basic_block bb
= gimple_bb (DR_STMT (dr
));
1405 struct loop
*loop
= bb
->loop_father
;
1406 tree ref
= DR_REF (dr
);
1407 tree access_base
= build_fold_addr_expr (ref
);
1408 tree access_size
= TYPE_SIZE_UNIT (TREE_TYPE (ref
));
1411 tree scev_fn
= analyze_scalar_evolution (loop
, access_base
);
1412 if (TREE_CODE (scev_fn
) != POLYNOMIAL_CHREC
)
1415 access_base
= CHREC_LEFT (scev_fn
);
1416 if (tree_contains_chrecs (access_base
, NULL
))
1419 tree scev_step
= CHREC_RIGHT (scev_fn
);
1420 /* Only support constant steps. */
1421 if (TREE_CODE (scev_step
) != INTEGER_CST
)
1424 enum ev_direction access_dir
= scev_direction (scev_fn
);
1425 if (access_dir
== EV_DIR_UNKNOWN
)
1429 steps
->safe_push (scev_step
);
1431 scev_step
= fold_convert_loc (loc
, sizetype
, scev_step
);
1432 /* Compute absolute value of scev step. */
1433 if (access_dir
== EV_DIR_DECREASES
)
1434 scev_step
= fold_build1_loc (loc
, NEGATE_EXPR
, sizetype
, scev_step
);
1436 /* At each level of loop, scev step must equal to access size. In other
1437 words, DR must access consecutive memory between loop iterations. */
1438 if (!operand_equal_p (scev_step
, access_size
, 0))
1441 /* Compute DR's execution times in loop. */
1442 tree niters
= number_of_latch_executions (loop
);
1443 niters
= fold_convert_loc (loc
, sizetype
, niters
);
1444 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
, bb
))
1445 niters
= size_binop_loc (loc
, PLUS_EXPR
, niters
, size_one_node
);
1447 /* Compute DR's overall access size in loop. */
1448 access_size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
1450 /* Adjust base address in case of negative step. */
1451 if (access_dir
== EV_DIR_DECREASES
)
1453 tree adj
= fold_build2_loc (loc
, MINUS_EXPR
, sizetype
,
1454 scev_step
, access_size
);
1455 access_base
= fold_build_pointer_plus_loc (loc
, access_base
, adj
);
1457 } while (loop
!= loop_nest
&& (loop
= loop_outer (loop
)) != NULL
);
1459 *base
= access_base
;
1460 *size
= access_size
;
1464 /* Allocate and return builtin struct. Record information like DST_DR,
1465 SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct. */
1467 static struct builtin_info
*
1468 alloc_builtin (data_reference_p dst_dr
, data_reference_p src_dr
,
1469 tree dst_base
, tree src_base
, tree size
)
1471 struct builtin_info
*builtin
= XNEW (struct builtin_info
);
1472 builtin
->dst_dr
= dst_dr
;
1473 builtin
->src_dr
= src_dr
;
1474 builtin
->dst_base
= dst_base
;
1475 builtin
->src_base
= src_base
;
1476 builtin
->size
= size
;
1480 /* Given data reference DR in loop nest LOOP, classify if it forms builtin
1484 classify_builtin_st (loop_p loop
, partition
*partition
, data_reference_p dr
)
1486 gimple
*stmt
= DR_STMT (dr
);
1487 tree base
, size
, rhs
= gimple_assign_rhs1 (stmt
);
1489 if (const_with_all_bytes_same (rhs
) == -1
1490 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1491 || (TYPE_MODE (TREE_TYPE (rhs
))
1492 != TYPE_MODE (unsigned_char_type_node
))))
1495 if (TREE_CODE (rhs
) == SSA_NAME
1496 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1497 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1500 if (!compute_access_range (loop
, dr
, &base
, &size
))
1503 partition
->builtin
= alloc_builtin (dr
, NULL
, base
, NULL_TREE
, size
);
1504 partition
->kind
= PKIND_MEMSET
;
1507 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
1508 if it forms builtin memcpy or memmove call. */
1511 classify_builtin_ldst (loop_p loop
, struct graph
*rdg
, partition
*partition
,
1512 data_reference_p dst_dr
, data_reference_p src_dr
)
1514 tree base
, size
, src_base
, src_size
;
1515 auto_vec
<tree
> dst_steps
, src_steps
;
1517 /* Compute access range of both load and store. They much have the same
1519 if (!compute_access_range (loop
, dst_dr
, &base
, &size
, &dst_steps
)
1520 || !compute_access_range (loop
, src_dr
, &src_base
, &src_size
, &src_steps
)
1521 || !operand_equal_p (size
, src_size
, 0))
1524 /* Load and store in loop nest must access memory in the same way, i.e,
1525 their must have the same steps in each loop of the nest. */
1526 if (dst_steps
.length () != src_steps
.length ())
1528 for (unsigned i
= 0; i
< dst_steps
.length (); ++i
)
1529 if (!operand_equal_p (dst_steps
[i
], src_steps
[i
], 0))
1532 /* Now check that if there is a dependence. */
1533 ddr_p ddr
= get_data_dependence (rdg
, src_dr
, dst_dr
);
1535 /* Classify as memcpy if no dependence between load and store. */
1536 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1538 partition
->builtin
= alloc_builtin (dst_dr
, src_dr
, base
, src_base
, size
);
1539 partition
->kind
= PKIND_MEMCPY
;
1543 /* Can't do memmove in case of unknown dependence or dependence without
1544 classical distance vector. */
1545 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
1546 || DDR_NUM_DIST_VECTS (ddr
) == 0)
1550 lambda_vector dist_v
;
1551 int num_lev
= (DDR_LOOP_NEST (ddr
)).length ();
1552 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1554 unsigned dep_lev
= dependence_level (dist_v
, num_lev
);
1555 /* Can't do memmove if load depends on store. */
1556 if (dep_lev
> 0 && dist_v
[dep_lev
- 1] > 0 && !DDR_REVERSED_P (ddr
))
1560 partition
->builtin
= alloc_builtin (dst_dr
, src_dr
, base
, src_base
, size
);
1561 partition
->kind
= PKIND_MEMMOVE
;
1565 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1566 For the moment we detect memset, memcpy and memmove patterns. Bitmap
1567 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
1570 classify_partition (loop_p loop
, struct graph
*rdg
, partition
*partition
,
1571 bitmap stmt_in_all_partitions
)
1575 data_reference_p single_ld
= NULL
, single_st
= NULL
;
1576 bool volatiles_p
= false, has_reduction
= false;
1578 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1580 gimple
*stmt
= RDG_STMT (rdg
, i
);
1582 if (gimple_has_volatile_ops (stmt
))
1585 /* If the stmt is not included by all partitions and there is uses
1586 outside of the loop, then mark the partition as reduction. */
1587 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1589 /* Due to limitation in the transform phase we have to fuse all
1590 reduction partitions. As a result, this could cancel valid
1591 loop distribution especially for loop that induction variable
1592 is used outside of loop. To workaround this issue, we skip
1593 marking partition as reudction if the reduction stmt belongs
1594 to all partitions. In such case, reduction will be computed
1595 correctly no matter how partitions are fused/distributed. */
1596 if (!bitmap_bit_p (stmt_in_all_partitions
, i
))
1598 partition
->reduction_p
= true;
1601 has_reduction
= true;
1605 /* Perform general partition disqualification for builtins. */
1607 /* Simple workaround to prevent classifying the partition as builtin
1608 if it contains any use outside of loop. */
1610 || !flag_tree_loop_distribute_patterns
)
1613 /* Find single load/store data references for builtin partition. */
1614 if (!find_single_drs (rdg
, partition
, &single_st
, &single_ld
))
1617 /* Classify the builtin kind. */
1618 if (single_ld
== NULL
)
1619 classify_builtin_st (loop
, partition
, single_st
);
1621 classify_builtin_ldst (loop
, rdg
, partition
, single_st
, single_ld
);
1624 /* Returns true when PARTITION1 and PARTITION2 access the same memory
1628 share_memory_accesses (struct graph
*rdg
,
1629 partition
*partition1
, partition
*partition2
)
1632 bitmap_iterator bi
, bj
;
1633 data_reference_p dr1
, dr2
;
1635 /* First check whether in the intersection of the two partitions are
1636 any loads or stores. Common loads are the situation that happens
1638 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1639 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1640 || RDG_MEM_READS_STMT (rdg
, i
))
1643 /* Then check whether the two partitions access the same memory object. */
1644 EXECUTE_IF_SET_IN_BITMAP (partition1
->datarefs
, 0, i
, bi
)
1646 dr1
= datarefs_vec
[i
];
1648 if (!DR_BASE_ADDRESS (dr1
)
1649 || !DR_OFFSET (dr1
) || !DR_INIT (dr1
) || !DR_STEP (dr1
))
1652 EXECUTE_IF_SET_IN_BITMAP (partition2
->datarefs
, 0, j
, bj
)
1654 dr2
= datarefs_vec
[j
];
1656 if (!DR_BASE_ADDRESS (dr2
)
1657 || !DR_OFFSET (dr2
) || !DR_INIT (dr2
) || !DR_STEP (dr2
))
1660 if (operand_equal_p (DR_BASE_ADDRESS (dr1
), DR_BASE_ADDRESS (dr2
), 0)
1661 && operand_equal_p (DR_OFFSET (dr1
), DR_OFFSET (dr2
), 0)
1662 && operand_equal_p (DR_INIT (dr1
), DR_INIT (dr2
), 0)
1663 && operand_equal_p (DR_STEP (dr1
), DR_STEP (dr2
), 0))
1671 /* For each seed statement in STARTING_STMTS, this function builds
1672 partition for it by adding depended statements according to RDG.
1673 All partitions are recorded in PARTITIONS. */
1676 rdg_build_partitions (struct graph
*rdg
,
1677 vec
<gimple
*> starting_stmts
,
1678 vec
<partition
*> *partitions
)
1680 auto_bitmap processed
;
1684 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1686 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1690 "ldist asked to generate code for vertex %d\n", v
);
1692 /* If the vertex is already contained in another partition so
1693 is the partition rooted at it. */
1694 if (bitmap_bit_p (processed
, v
))
1697 partition
*partition
= build_rdg_partition_for_vertex (rdg
, v
);
1698 bitmap_ior_into (processed
, partition
->stmts
);
1700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1702 fprintf (dump_file
, "ldist creates useful %s partition:\n",
1703 partition
->type
== PTYPE_PARALLEL
? "parallel" : "sequent");
1704 bitmap_print (dump_file
, partition
->stmts
, " ", "\n");
1707 partitions
->safe_push (partition
);
1710 /* All vertices should have been assigned to at least one partition now,
1711 other than vertices belonging to dead code. */
1714 /* Dump to FILE the PARTITIONS. */
1717 dump_rdg_partitions (FILE *file
, vec
<partition
*> partitions
)
1720 partition
*partition
;
1722 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1723 debug_bitmap_file (file
, partition
->stmts
);
1726 /* Debug PARTITIONS. */
1727 extern void debug_rdg_partitions (vec
<partition
*> );
1730 debug_rdg_partitions (vec
<partition
*> partitions
)
1732 dump_rdg_partitions (stderr
, partitions
);
1735 /* Returns the number of read and write operations in the RDG. */
1738 number_of_rw_in_rdg (struct graph
*rdg
)
1742 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1744 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1747 if (RDG_MEM_READS_STMT (rdg
, i
))
1754 /* Returns the number of read and write operations in a PARTITION of
1758 number_of_rw_in_partition (struct graph
*rdg
, partition
*partition
)
1764 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1766 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1769 if (RDG_MEM_READS_STMT (rdg
, i
))
1776 /* Returns true when one of the PARTITIONS contains all the read or
1777 write operations of RDG. */
1780 partition_contains_all_rw (struct graph
*rdg
,
1781 vec
<partition
*> partitions
)
1784 partition
*partition
;
1785 int nrw
= number_of_rw_in_rdg (rdg
);
1787 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1788 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1794 /* Compute partition dependence created by the data references in DRS1
1795 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
1796 not NULL, we record dependence introduced by possible alias between
1797 two data references in ALIAS_DDRS; otherwise, we simply ignore such
1798 dependence as if it doesn't exist at all. */
1801 pg_add_dependence_edges (struct graph
*rdg
, int dir
,
1802 bitmap drs1
, bitmap drs2
, vec
<ddr_p
> *alias_ddrs
)
1805 bitmap_iterator bi
, bj
;
1806 data_reference_p dr1
, dr2
, saved_dr1
;
1808 /* dependence direction - 0 is no dependence, -1 is back,
1809 1 is forth, 2 is both (we can stop then, merging will occur). */
1810 EXECUTE_IF_SET_IN_BITMAP (drs1
, 0, i
, bi
)
1812 dr1
= datarefs_vec
[i
];
1814 EXECUTE_IF_SET_IN_BITMAP (drs2
, 0, j
, bj
)
1816 int res
, this_dir
= 1;
1819 dr2
= datarefs_vec
[j
];
1821 /* Skip all <read, read> data dependence. */
1822 if (DR_IS_READ (dr1
) && DR_IS_READ (dr2
))
1826 /* Re-shuffle data-refs to be in topological order. */
1827 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1828 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1830 std::swap (dr1
, dr2
);
1831 this_dir
= -this_dir
;
1833 ddr
= get_data_dependence (rdg
, dr1
, dr2
);
1834 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1837 res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr1
),
1838 DR_BASE_ADDRESS (dr2
));
1839 /* Be conservative. If data references are not well analyzed,
1840 or the two data references have the same base address and
1841 offset, add dependence and consider it alias to each other.
1842 In other words, the dependence can not be resolved by
1843 runtime alias check. */
1844 if (!DR_BASE_ADDRESS (dr1
) || !DR_BASE_ADDRESS (dr2
)
1845 || !DR_OFFSET (dr1
) || !DR_OFFSET (dr2
)
1846 || !DR_INIT (dr1
) || !DR_INIT (dr2
)
1847 || !DR_STEP (dr1
) || !tree_fits_uhwi_p (DR_STEP (dr1
))
1848 || !DR_STEP (dr2
) || !tree_fits_uhwi_p (DR_STEP (dr2
))
1851 /* Data dependence could be resolved by runtime alias check,
1852 record it in ALIAS_DDRS. */
1853 else if (alias_ddrs
!= NULL
)
1854 alias_ddrs
->safe_push (ddr
);
1855 /* Or simply ignore it. */
1857 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1859 if (DDR_REVERSED_P (ddr
))
1860 this_dir
= -this_dir
;
1862 /* Known dependences can still be unordered througout the
1863 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1864 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1866 /* If the overlap is exact preserve stmt order. */
1867 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1869 /* Else as the distance vector is lexicographic positive swap
1870 the dependence direction. */
1872 this_dir
= -this_dir
;
1880 else if (this_dir
!= 0 && dir
!= this_dir
)
1882 /* Shuffle "back" dr1. */
1889 /* Compare postorder number of the partition graph vertices V1 and V2. */
1892 pgcmp (const void *v1_
, const void *v2_
)
1894 const vertex
*v1
= (const vertex
*)v1_
;
1895 const vertex
*v2
= (const vertex
*)v2_
;
1896 return v2
->post
- v1
->post
;
1899 /* Data attached to vertices of partition dependence graph. */
1902 /* ID of the corresponding partition. */
1904 /* The partition. */
1905 struct partition
*partition
;
1908 /* Data attached to edges of partition dependence graph. */
1911 /* If the dependence edge can be resolved by runtime alias check,
1912 this vector contains data dependence relations for runtime alias
1913 check. On the other hand, if the dependence edge is introduced
1914 because of compilation time known data dependence, this vector
1915 contains nothing. */
1916 vec
<ddr_p
> alias_ddrs
;
1919 /* Callback data for traversing edges in graph. */
1920 struct pg_edge_callback_data
1922 /* Bitmap contains strong connected components should be merged. */
1923 bitmap sccs_to_merge
;
1924 /* Array constains component information for all vertices. */
1925 int *vertices_component
;
1926 /* Vector to record all data dependence relations which are needed
1927 to break strong connected components by runtime alias checks. */
1928 vec
<ddr_p
> *alias_ddrs
;
1931 /* Initialize vertice's data for partition dependence graph PG with
1935 init_partition_graph_vertices (struct graph
*pg
,
1936 vec
<struct partition
*> *partitions
)
1939 partition
*partition
;
1940 struct pg_vdata
*data
;
1942 for (i
= 0; partitions
->iterate (i
, &partition
); ++i
)
1944 data
= new pg_vdata
;
1945 pg
->vertices
[i
].data
= data
;
1947 data
->partition
= partition
;
1951 /* Add edge <I, J> to partition dependence graph PG. Attach vector of data
1952 dependence relations to the EDGE if DDRS isn't NULL. */
1955 add_partition_graph_edge (struct graph
*pg
, int i
, int j
, vec
<ddr_p
> *ddrs
)
1957 struct graph_edge
*e
= add_edge (pg
, i
, j
);
1959 /* If the edge is attached with data dependence relations, it means this
1960 dependence edge can be resolved by runtime alias checks. */
1963 struct pg_edata
*data
= new pg_edata
;
1965 gcc_assert (ddrs
->length () > 0);
1967 data
->alias_ddrs
= vNULL
;
1968 data
->alias_ddrs
.safe_splice (*ddrs
);
1972 /* Callback function for graph travesal algorithm. It returns true
1973 if edge E should skipped when traversing the graph. */
1976 pg_skip_alias_edge (struct graph_edge
*e
)
1978 struct pg_edata
*data
= (struct pg_edata
*)e
->data
;
1979 return (data
!= NULL
&& data
->alias_ddrs
.length () > 0);
1982 /* Callback function freeing data attached to edge E of graph. */
1985 free_partition_graph_edata_cb (struct graph
*, struct graph_edge
*e
, void *)
1987 if (e
->data
!= NULL
)
1989 struct pg_edata
*data
= (struct pg_edata
*)e
->data
;
1990 data
->alias_ddrs
.release ();
1995 /* Free data attached to vertice of partition dependence graph PG. */
1998 free_partition_graph_vdata (struct graph
*pg
)
2001 struct pg_vdata
*data
;
2003 for (i
= 0; i
< pg
->n_vertices
; ++i
)
2005 data
= (struct pg_vdata
*)pg
->vertices
[i
].data
;
2010 /* Build and return partition dependence graph for PARTITIONS. RDG is
2011 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
2012 is true, data dependence caused by possible alias between references
2013 is ignored, as if it doesn't exist at all; otherwise all depdendences
2016 static struct graph
*
2017 build_partition_graph (struct graph
*rdg
,
2018 vec
<struct partition
*> *partitions
,
2019 bool ignore_alias_p
)
2022 struct partition
*partition1
, *partition2
;
2023 graph
*pg
= new_graph (partitions
->length ());
2024 auto_vec
<ddr_p
> alias_ddrs
, *alias_ddrs_p
;
2026 alias_ddrs_p
= ignore_alias_p
? NULL
: &alias_ddrs
;
2028 init_partition_graph_vertices (pg
, partitions
);
2030 for (i
= 0; partitions
->iterate (i
, &partition1
); ++i
)
2032 for (j
= i
+ 1; partitions
->iterate (j
, &partition2
); ++j
)
2034 /* dependence direction - 0 is no dependence, -1 is back,
2035 1 is forth, 2 is both (we can stop then, merging will occur). */
2038 /* If the first partition has reduction, add back edge; if the
2039 second partition has reduction, add forth edge. This makes
2040 sure that reduction partition will be sorted as the last one. */
2041 if (partition_reduction_p (partition1
))
2043 else if (partition_reduction_p (partition2
))
2046 /* Cleanup the temporary vector. */
2047 alias_ddrs
.truncate (0);
2049 dir
= pg_add_dependence_edges (rdg
, dir
, partition1
->datarefs
,
2050 partition2
->datarefs
, alias_ddrs_p
);
2052 /* Add edge to partition graph if there exists dependence. There
2053 are two types of edges. One type edge is caused by compilation
2054 time known dependence, this type can not be resolved by runtime
2055 alias check. The other type can be resolved by runtime alias
2057 if (dir
== 1 || dir
== 2
2058 || alias_ddrs
.length () > 0)
2060 /* Attach data dependence relations to edge that can be resolved
2061 by runtime alias check. */
2062 bool alias_edge_p
= (dir
!= 1 && dir
!= 2);
2063 add_partition_graph_edge (pg
, i
, j
,
2064 (alias_edge_p
) ? &alias_ddrs
: NULL
);
2066 if (dir
== -1 || dir
== 2
2067 || alias_ddrs
.length () > 0)
2069 /* Attach data dependence relations to edge that can be resolved
2070 by runtime alias check. */
2071 bool alias_edge_p
= (dir
!= -1 && dir
!= 2);
2072 add_partition_graph_edge (pg
, j
, i
,
2073 (alias_edge_p
) ? &alias_ddrs
: NULL
);
2080 /* Sort partitions in PG in descending post order and store them in
2084 sort_partitions_by_post_order (struct graph
*pg
,
2085 vec
<struct partition
*> *partitions
)
2088 struct pg_vdata
*data
;
2090 /* Now order the remaining nodes in descending postorder. */
2091 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
2092 partitions
->truncate (0);
2093 for (i
= 0; i
< pg
->n_vertices
; ++i
)
2095 data
= (struct pg_vdata
*)pg
->vertices
[i
].data
;
2096 if (data
->partition
)
2097 partitions
->safe_push (data
->partition
);
2101 /* Given reduced dependence graph RDG merge strong connected components
2102 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
2103 possible alias between references is ignored, as if it doesn't exist
2104 at all; otherwise all depdendences are considered. */
2107 merge_dep_scc_partitions (struct graph
*rdg
,
2108 vec
<struct partition
*> *partitions
,
2109 bool ignore_alias_p
)
2111 struct partition
*partition1
, *partition2
;
2112 struct pg_vdata
*data
;
2113 graph
*pg
= build_partition_graph (rdg
, partitions
, ignore_alias_p
);
2114 int i
, j
, num_sccs
= graphds_scc (pg
, NULL
);
2116 /* Strong connected compoenent means dependence cycle, we cannot distribute
2117 them. So fuse them together. */
2118 if ((unsigned) num_sccs
< partitions
->length ())
2120 for (i
= 0; i
< num_sccs
; ++i
)
2122 for (j
= 0; partitions
->iterate (j
, &partition1
); ++j
)
2123 if (pg
->vertices
[j
].component
== i
)
2125 for (j
= j
+ 1; partitions
->iterate (j
, &partition2
); ++j
)
2126 if (pg
->vertices
[j
].component
== i
)
2128 partition_merge_into (NULL
, partition1
,
2129 partition2
, FUSE_SAME_SCC
);
2130 partition1
->type
= PTYPE_SEQUENTIAL
;
2131 (*partitions
)[j
] = NULL
;
2132 partition_free (partition2
);
2133 data
= (struct pg_vdata
*)pg
->vertices
[j
].data
;
2134 data
->partition
= NULL
;
2139 sort_partitions_by_post_order (pg
, partitions
);
2140 gcc_assert (partitions
->length () == (unsigned)num_sccs
);
2141 free_partition_graph_vdata (pg
);
2145 /* Callback function for traversing edge E in graph G. DATA is private
2149 pg_collect_alias_ddrs (struct graph
*g
, struct graph_edge
*e
, void *data
)
2151 int i
, j
, component
;
2152 struct pg_edge_callback_data
*cbdata
;
2153 struct pg_edata
*edata
= (struct pg_edata
*) e
->data
;
2155 /* If the edge doesn't have attached data dependence, it represents
2156 compilation time known dependences. This type dependence cannot
2157 be resolved by runtime alias check. */
2158 if (edata
== NULL
|| edata
->alias_ddrs
.length () == 0)
2161 cbdata
= (struct pg_edge_callback_data
*) data
;
2164 component
= cbdata
->vertices_component
[i
];
2165 /* Vertices are topologically sorted according to compilation time
2166 known dependences, so we can break strong connected components
2167 by removing edges of the opposite direction, i.e, edges pointing
2168 from vertice with smaller post number to vertice with bigger post
2170 if (g
->vertices
[i
].post
< g
->vertices
[j
].post
2171 /* We only need to remove edges connecting vertices in the same
2172 strong connected component to break it. */
2173 && component
== cbdata
->vertices_component
[j
]
2174 /* Check if we want to break the strong connected component or not. */
2175 && !bitmap_bit_p (cbdata
->sccs_to_merge
, component
))
2176 cbdata
->alias_ddrs
->safe_splice (edata
->alias_ddrs
);
2179 /* This is the main function breaking strong conected components in
2180 PARTITIONS giving reduced depdendence graph RDG. Store data dependence
2181 relations for runtime alias check in ALIAS_DDRS. */
2184 break_alias_scc_partitions (struct graph
*rdg
,
2185 vec
<struct partition
*> *partitions
,
2186 vec
<ddr_p
> *alias_ddrs
)
2188 int i
, j
, k
, num_sccs
, num_sccs_no_alias
;
2189 /* Build partition dependence graph. */
2190 graph
*pg
= build_partition_graph (rdg
, partitions
, false);
2192 alias_ddrs
->truncate (0);
2193 /* Find strong connected components in the graph, with all dependence edges
2195 num_sccs
= graphds_scc (pg
, NULL
);
2196 /* All SCCs now can be broken by runtime alias checks because SCCs caused by
2197 compilation time known dependences are merged before this function. */
2198 if ((unsigned) num_sccs
< partitions
->length ())
2200 struct pg_edge_callback_data cbdata
;
2201 auto_bitmap sccs_to_merge
;
2202 auto_vec
<enum partition_type
> scc_types
;
2203 struct partition
*partition
, *first
;
2205 /* If all partitions in a SCC have the same type, we can simply merge the
2206 SCC. This loop finds out such SCCS and record them in bitmap. */
2207 bitmap_set_range (sccs_to_merge
, 0, (unsigned) num_sccs
);
2208 for (i
= 0; i
< num_sccs
; ++i
)
2210 for (j
= 0; partitions
->iterate (j
, &first
); ++j
)
2211 if (pg
->vertices
[j
].component
== i
)
2213 for (++j
; partitions
->iterate (j
, &partition
); ++j
)
2215 if (pg
->vertices
[j
].component
!= i
)
2218 /* Note we Merge partitions of parallel type on purpose, though
2219 the result partition is sequential. The reason is vectorizer
2220 can do more accurate runtime alias check in this case. Also
2221 it results in more conservative distribution. */
2222 if (first
->type
!= partition
->type
)
2224 bitmap_clear_bit (sccs_to_merge
, i
);
2230 /* Initialize callback data for traversing. */
2231 cbdata
.sccs_to_merge
= sccs_to_merge
;
2232 cbdata
.alias_ddrs
= alias_ddrs
;
2233 cbdata
.vertices_component
= XNEWVEC (int, pg
->n_vertices
);
2234 /* Record the component information which will be corrupted by next
2235 graph scc finding call. */
2236 for (i
= 0; i
< pg
->n_vertices
; ++i
)
2237 cbdata
.vertices_component
[i
] = pg
->vertices
[i
].component
;
2239 /* Collect data dependences for runtime alias checks to break SCCs. */
2240 if (bitmap_count_bits (sccs_to_merge
) != (unsigned) num_sccs
)
2242 /* Run SCC finding algorithm again, with alias dependence edges
2243 skipped. This is to topologically sort partitions according to
2244 compilation time known dependence. Note the topological order
2245 is stored in the form of pg's post order number. */
2246 num_sccs_no_alias
= graphds_scc (pg
, NULL
, pg_skip_alias_edge
);
2247 gcc_assert (partitions
->length () == (unsigned) num_sccs_no_alias
);
2248 /* With topological order, we can construct two subgraphs L and R.
2249 L contains edge <x, y> where x < y in terms of post order, while
2250 R contains edge <x, y> where x > y. Edges for compilation time
2251 known dependence all fall in R, so we break SCCs by removing all
2252 (alias) edges of in subgraph L. */
2253 for_each_edge (pg
, pg_collect_alias_ddrs
, &cbdata
);
2256 /* For SCC that doesn't need to be broken, merge it. */
2257 for (i
= 0; i
< num_sccs
; ++i
)
2259 if (!bitmap_bit_p (sccs_to_merge
, i
))
2262 for (j
= 0; partitions
->iterate (j
, &first
); ++j
)
2263 if (cbdata
.vertices_component
[j
] == i
)
2265 for (k
= j
+ 1; partitions
->iterate (k
, &partition
); ++k
)
2267 struct pg_vdata
*data
;
2269 if (cbdata
.vertices_component
[k
] != i
)
2272 /* Update postorder number so that merged reduction partition is
2273 sorted after other partitions. */
2274 if (!partition_reduction_p (first
)
2275 && partition_reduction_p (partition
))
2277 gcc_assert (pg
->vertices
[k
].post
< pg
->vertices
[j
].post
);
2278 pg
->vertices
[j
].post
= pg
->vertices
[k
].post
;
2280 partition_merge_into (NULL
, first
, partition
, FUSE_SAME_SCC
);
2281 (*partitions
)[k
] = NULL
;
2282 partition_free (partition
);
2283 data
= (struct pg_vdata
*)pg
->vertices
[k
].data
;
2284 gcc_assert (data
->id
== k
);
2285 data
->partition
= NULL
;
2286 /* The result partition of merged SCC must be sequential. */
2287 first
->type
= PTYPE_SEQUENTIAL
;
2292 sort_partitions_by_post_order (pg
, partitions
);
2293 free_partition_graph_vdata (pg
);
2294 for_each_edge (pg
, free_partition_graph_edata_cb
, NULL
);
2297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2299 fprintf (dump_file
, "Possible alias data dependence to break:\n");
2300 dump_data_dependence_relations (dump_file
, *alias_ddrs
);
2304 /* Compute and return an expression whose value is the segment length which
2305 will be accessed by DR in NITERS iterations. */
2308 data_ref_segment_size (struct data_reference
*dr
, tree niters
)
2310 tree segment_length
;
2312 if (integer_zerop (DR_STEP (dr
)))
2313 segment_length
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2315 segment_length
= size_binop (MULT_EXPR
,
2316 fold_convert (sizetype
, DR_STEP (dr
)),
2317 fold_convert (sizetype
, niters
));
2319 return segment_length
;
2322 /* Return true if LOOP's latch is dominated by statement for data reference
2326 latch_dominated_by_data_ref (struct loop
*loop
, data_reference
*dr
)
2328 return dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
2329 gimple_bb (DR_STMT (dr
)));
2332 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
2333 data dependence relations ALIAS_DDRS. */
2336 compute_alias_check_pairs (struct loop
*loop
, vec
<ddr_p
> *alias_ddrs
,
2337 vec
<dr_with_seg_len_pair_t
> *comp_alias_pairs
)
2340 unsigned HOST_WIDE_INT factor
= 1;
2341 tree niters_plus_one
, niters
= number_of_latch_executions (loop
);
2343 gcc_assert (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
2344 niters
= fold_convert (sizetype
, niters
);
2345 niters_plus_one
= size_binop (PLUS_EXPR
, niters
, size_one_node
);
2347 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2348 fprintf (dump_file
, "Creating alias check pairs:\n");
2350 /* Iterate all data dependence relations and compute alias check pairs. */
2351 for (i
= 0; i
< alias_ddrs
->length (); i
++)
2353 ddr_p ddr
= (*alias_ddrs
)[i
];
2354 struct data_reference
*dr_a
= DDR_A (ddr
);
2355 struct data_reference
*dr_b
= DDR_B (ddr
);
2356 tree seg_length_a
, seg_length_b
;
2357 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
2358 DR_BASE_ADDRESS (dr_b
));
2361 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
2362 gcc_assert (comp_res
!= 0);
2364 if (latch_dominated_by_data_ref (loop
, dr_a
))
2365 seg_length_a
= data_ref_segment_size (dr_a
, niters_plus_one
);
2367 seg_length_a
= data_ref_segment_size (dr_a
, niters
);
2369 if (latch_dominated_by_data_ref (loop
, dr_b
))
2370 seg_length_b
= data_ref_segment_size (dr_b
, niters_plus_one
);
2372 seg_length_b
= data_ref_segment_size (dr_b
, niters
);
2374 dr_with_seg_len_pair_t dr_with_seg_len_pair
2375 (dr_with_seg_len (dr_a
, seg_length_a
),
2376 dr_with_seg_len (dr_b
, seg_length_b
));
2378 /* Canonicalize pairs by sorting the two DR members. */
2380 std::swap (dr_with_seg_len_pair
.first
, dr_with_seg_len_pair
.second
);
2382 comp_alias_pairs
->safe_push (dr_with_seg_len_pair
);
2385 if (tree_fits_uhwi_p (niters
))
2386 factor
= tree_to_uhwi (niters
);
2388 /* Prune alias check pairs. */
2389 prune_runtime_alias_test_list (comp_alias_pairs
, factor
);
2390 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2392 "Improved number of alias checks from %d to %d\n",
2393 alias_ddrs
->length (), comp_alias_pairs
->length ());
2396 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
2397 checks and version LOOP under condition of these runtime alias checks. */
2400 version_loop_by_alias_check (struct loop
*loop
, vec
<ddr_p
> *alias_ddrs
)
2402 profile_probability prob
;
2403 basic_block cond_bb
;
2405 tree lhs
, arg0
, cond_expr
= NULL_TREE
;
2406 gimple_seq cond_stmts
= NULL
;
2407 gimple
*call_stmt
= NULL
;
2408 auto_vec
<dr_with_seg_len_pair_t
> comp_alias_pairs
;
2410 /* Generate code for runtime alias checks if necessary. */
2411 gcc_assert (alias_ddrs
->length () > 0);
2413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2415 "Version loop <%d> with runtime alias check\n", loop
->num
);
2417 compute_alias_check_pairs (loop
, alias_ddrs
, &comp_alias_pairs
);
2418 create_runtime_alias_checks (loop
, &comp_alias_pairs
, &cond_expr
);
2419 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_stmts
,
2420 is_gimple_val
, NULL_TREE
);
2422 /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */
2423 if (flag_tree_loop_vectorize
)
2425 /* Generate internal function call for loop distribution alias check. */
2426 call_stmt
= gimple_build_call_internal (IFN_LOOP_DIST_ALIAS
,
2427 2, NULL_TREE
, cond_expr
);
2428 lhs
= make_ssa_name (boolean_type_node
);
2429 gimple_call_set_lhs (call_stmt
, lhs
);
2434 prob
= profile_probability::guessed_always ().apply_scale (9, 10);
2435 initialize_original_copy_tables ();
2436 nloop
= loop_version (loop
, lhs
, &cond_bb
, prob
, prob
.invert (),
2437 prob
, prob
.invert (), true);
2438 free_original_copy_tables ();
2439 /* Record the original loop number in newly generated loops. In case of
2440 distribution, the original loop will be distributed and the new loop
2442 loop
->orig_loop_num
= nloop
->num
;
2443 nloop
->orig_loop_num
= nloop
->num
;
2444 nloop
->dont_vectorize
= true;
2445 nloop
->force_vectorize
= false;
2449 /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
2450 loop could be destroyed. */
2451 arg0
= build_int_cst (integer_type_node
, loop
->orig_loop_num
);
2452 gimple_call_set_arg (call_stmt
, 0, arg0
);
2453 gimple_seq_add_stmt_without_update (&cond_stmts
, call_stmt
);
2458 gimple_stmt_iterator cond_gsi
= gsi_last_bb (cond_bb
);
2459 gsi_insert_seq_before (&cond_gsi
, cond_stmts
, GSI_SAME_STMT
);
2461 update_ssa (TODO_update_ssa
);
2464 /* Return true if loop versioning is needed to distrubute PARTITIONS.
2465 ALIAS_DDRS are data dependence relations for runtime alias check. */
2468 version_for_distribution_p (vec
<struct partition
*> *partitions
,
2469 vec
<ddr_p
> *alias_ddrs
)
2471 /* No need to version loop if we have only one partition. */
2472 if (partitions
->length () == 1)
2475 /* Need to version loop if runtime alias check is necessary. */
2476 return (alias_ddrs
->length () > 0);
2479 /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
2480 ALIAS_DDRS contains ddrs which need runtime alias check. */
2483 finalize_partitions (struct loop
*loop
, vec
<struct partition
*> *partitions
,
2484 vec
<ddr_p
> *alias_ddrs
)
2487 struct partition
*partition
, *a
;
2489 if (partitions
->length () == 1
2490 || alias_ddrs
->length () > 0)
2493 unsigned num_builtin
= 0, num_normal
= 0;
2494 bool same_type_p
= true;
2495 enum partition_type type
= ((*partitions
)[0])->type
;
2496 for (i
= 0; partitions
->iterate (i
, &partition
); ++i
)
2498 same_type_p
&= (type
== partition
->type
);
2499 if (partition
->kind
!= PKIND_NORMAL
)
2505 /* Don't distribute current loop into too many loops given we don't have
2506 memory stream cost model. Be even more conservative in case of loop
2507 nest distribution. */
2508 if ((same_type_p
&& num_builtin
== 0)
2509 || (loop
->inner
!= NULL
2510 && i
>= NUM_PARTITION_THRESHOLD
&& num_normal
> 1)
2511 || (loop
->inner
== NULL
2512 && i
>= NUM_PARTITION_THRESHOLD
&& num_normal
> num_builtin
))
2514 a
= (*partitions
)[0];
2515 for (i
= 1; partitions
->iterate (i
, &partition
); ++i
)
2517 partition_merge_into (NULL
, a
, partition
, FUSE_FINALIZE
);
2518 partition_free (partition
);
2520 partitions
->truncate (1);
2524 /* Distributes the code from LOOP in such a way that producer statements
2525 are placed before consumer statements. Tries to separate only the
2526 statements from STMTS into separate loops. Returns the number of
2527 distributed loops. Set NB_CALLS to number of generated builtin calls.
2528 Set *DESTROY_P to whether LOOP needs to be destroyed. */
2531 distribute_loop (struct loop
*loop
, vec
<gimple
*> stmts
,
2532 control_dependences
*cd
, int *nb_calls
, bool *destroy_p
)
2534 ddrs_table
= new hash_table
<ddr_hasher
> (389);
2536 partition
*partition
;
2542 loop_nest
.create (0);
2543 if (!find_loop_nest (loop
, &loop_nest
))
2545 loop_nest
.release ();
2550 datarefs_vec
.create (20);
2551 rdg
= build_rdg (loop
, cd
);
2554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2556 "Loop %d not distributed: failed to build the RDG.\n",
2559 loop_nest
.release ();
2560 free_data_refs (datarefs_vec
);
2565 if (datarefs_vec
.length () > MAX_DATAREFS_NUM
)
2567 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2569 "Loop %d not distributed: too many memory references.\n",
2573 loop_nest
.release ();
2574 free_data_refs (datarefs_vec
);
2579 data_reference_p dref
;
2580 for (i
= 0; datarefs_vec
.iterate (i
, &dref
); ++i
)
2581 dref
->aux
= (void *) (uintptr_t) i
;
2583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2584 dump_rdg (dump_file
, rdg
);
2586 auto_vec
<struct partition
*, 3> partitions
;
2587 rdg_build_partitions (rdg
, stmts
, &partitions
);
2589 auto_vec
<ddr_p
> alias_ddrs
;
2591 auto_bitmap stmt_in_all_partitions
;
2592 bitmap_copy (stmt_in_all_partitions
, partitions
[0]->stmts
);
2593 for (i
= 1; partitions
.iterate (i
, &partition
); ++i
)
2594 bitmap_and_into (stmt_in_all_partitions
, partitions
[i
]->stmts
);
2596 any_builtin
= false;
2597 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2599 classify_partition (loop
, rdg
, partition
, stmt_in_all_partitions
);
2600 any_builtin
|= partition_builtin_p (partition
);
2603 /* If we are only distributing patterns but did not detect any,
2605 if (!flag_tree_loop_distribution
2612 /* If we are only distributing patterns fuse all partitions that
2613 were not classified as builtins. This also avoids chopping
2614 a loop into pieces, separated by builtin calls. That is, we
2615 only want no or a single loop body remaining. */
2616 struct partition
*into
;
2617 if (!flag_tree_loop_distribution
)
2619 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2620 if (!partition_builtin_p (into
))
2622 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
2623 if (!partition_builtin_p (partition
))
2625 partition_merge_into (NULL
, into
, partition
, FUSE_NON_BUILTIN
);
2626 partitions
.unordered_remove (i
);
2627 partition_free (partition
);
2632 /* Due to limitations in the transform phase we have to fuse all
2633 reduction partitions into the last partition so the existing
2634 loop will contain all loop-closed PHI nodes. */
2635 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2636 if (partition_reduction_p (into
))
2638 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
2639 if (partition_reduction_p (partition
))
2641 partition_merge_into (rdg
, into
, partition
, FUSE_REDUCTION
);
2642 partitions
.unordered_remove (i
);
2643 partition_free (partition
);
2647 /* Apply our simple cost model - fuse partitions with similar
2649 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2651 bool changed
= false;
2652 if (partition_builtin_p (into
))
2655 partitions
.iterate (j
, &partition
); ++j
)
2657 if (share_memory_accesses (rdg
, into
, partition
))
2659 partition_merge_into (rdg
, into
, partition
, FUSE_SHARE_REF
);
2660 partitions
.unordered_remove (j
);
2661 partition_free (partition
);
2666 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
2667 accesses when 1 and 2 have similar accesses but not 0 and 1
2668 then in the next iteration we will fail to consider merging
2669 1 into 0,2. So try again if we did any merging into 0. */
2674 /* Build the partition dependency graph and fuse partitions in strong
2675 connected component. */
2676 if (partitions
.length () > 1)
2678 /* Don't support loop nest distribution under runtime alias check
2679 since it's not likely to enable many vectorization opportunities. */
2681 merge_dep_scc_partitions (rdg
, &partitions
, false);
2684 merge_dep_scc_partitions (rdg
, &partitions
, true);
2685 if (partitions
.length () > 1)
2686 break_alias_scc_partitions (rdg
, &partitions
, &alias_ddrs
);
2690 finalize_partitions (loop
, &partitions
, &alias_ddrs
);
2692 nbp
= partitions
.length ();
2694 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
2695 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
2701 if (version_for_distribution_p (&partitions
, &alias_ddrs
))
2702 version_loop_by_alias_check (loop
, &alias_ddrs
);
2704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2707 "distribute loop <%d> into partitions:\n", loop
->num
);
2708 dump_rdg_partitions (dump_file
, partitions
);
2711 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2713 if (partition_builtin_p (partition
))
2715 *destroy_p
|= generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
2719 loop_nest
.release ();
2720 free_data_refs (datarefs_vec
);
2721 for (hash_table
<ddr_hasher
>::iterator iter
= ddrs_table
->begin ();
2722 iter
!= ddrs_table
->end (); ++iter
)
2724 free_dependence_relation (*iter
);
2729 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2730 partition_free (partition
);
2733 return nbp
- *nb_calls
;
2736 /* Distribute all loops in the current function. */
2740 const pass_data pass_data_loop_distribution
=
2742 GIMPLE_PASS
, /* type */
2744 OPTGROUP_LOOP
, /* optinfo_flags */
2745 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
2746 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2747 0, /* properties_provided */
2748 0, /* properties_destroyed */
2749 0, /* todo_flags_start */
2750 0, /* todo_flags_finish */
2753 class pass_loop_distribution
: public gimple_opt_pass
2756 pass_loop_distribution (gcc::context
*ctxt
)
2757 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
2760 /* opt_pass methods: */
2761 virtual bool gate (function
*)
2763 return flag_tree_loop_distribution
2764 || flag_tree_loop_distribute_patterns
;
2767 virtual unsigned int execute (function
*);
2769 }; // class pass_loop_distribution
2772 /* Given LOOP, this function records seed statements for distribution in
2773 WORK_LIST. Return false if there is nothing for distribution. */
2776 find_seed_stmts_for_distribution (struct loop
*loop
, vec
<gimple
*> *work_list
)
2778 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2780 /* Initialize the worklist with stmts we seed the partitions with. */
2781 for (unsigned i
= 0; i
< loop
->num_nodes
; ++i
)
2783 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
2784 !gsi_end_p (gsi
); gsi_next (&gsi
))
2786 gphi
*phi
= gsi
.phi ();
2787 if (virtual_operand_p (gimple_phi_result (phi
)))
2789 /* Distribute stmts which have defs that are used outside of
2791 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
2793 work_list
->safe_push (phi
);
2795 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
2796 !gsi_end_p (gsi
); gsi_next (&gsi
))
2798 gimple
*stmt
= gsi_stmt (gsi
);
2800 /* If there is a stmt with side-effects bail out - we
2801 cannot and should not distribute this loop. */
2802 if (gimple_has_side_effects (stmt
))
2808 /* Distribute stmts which have defs that are used outside of
2810 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
2812 /* Otherwise only distribute stores for now. */
2813 else if (!gimple_vdef (stmt
))
2816 work_list
->safe_push (stmt
);
2820 return work_list
->length () > 0;
2823 /* Given innermost LOOP, return the outermost enclosing loop that forms a
2824 perfect loop nest. */
2826 static struct loop
*
2827 prepare_perfect_loop_nest (struct loop
*loop
)
2829 struct loop
*outer
= loop_outer (loop
);
2830 tree niters
= number_of_latch_executions (loop
);
2832 /* TODO: We only support the innermost 2-level loop nest distribution
2833 because of compilation time issue for now. This should be relaxed
2835 while (loop
->inner
== NULL
2836 && loop_outer (outer
)
2837 && outer
->inner
== loop
&& loop
->next
== NULL
2838 && single_exit (outer
)
2839 && optimize_loop_for_speed_p (outer
)
2840 && !chrec_contains_symbols_defined_in_loop (niters
, outer
->num
)
2841 && (niters
= number_of_latch_executions (outer
)) != NULL_TREE
2842 && niters
!= chrec_dont_know
)
2845 outer
= loop_outer (loop
);
2852 pass_loop_distribution::execute (function
*fun
)
2855 bool changed
= false;
2857 control_dependences
*cd
= NULL
;
2858 auto_vec
<loop_p
> loops_to_be_destroyed
;
2860 if (number_of_loops (fun
) <= 1)
2863 /* Compute topological order for basic blocks. Topological order is
2864 needed because data dependence is computed for data references in
2865 lexicographical order. */
2866 if (bb_top_order_index
== NULL
)
2869 int *rpo
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
2871 bb_top_order_index
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
2872 bb_top_order_index_size
= last_basic_block_for_fn (cfun
);
2873 rpo_num
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, true);
2874 for (int i
= 0; i
< rpo_num
; i
++)
2875 bb_top_order_index
[rpo
[i
]] = i
;
2880 FOR_ALL_BB_FN (bb
, fun
)
2882 gimple_stmt_iterator gsi
;
2883 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2884 gimple_set_uid (gsi_stmt (gsi
), -1);
2885 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2886 gimple_set_uid (gsi_stmt (gsi
), -1);
2889 /* We can at the moment only distribute non-nested loops, thus restrict
2890 walking to innermost loops. */
2891 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
2893 /* Don't distribute multiple exit edges loop, or cold loop. */
2894 if (!single_exit (loop
)
2895 || !optimize_loop_for_speed_p (loop
))
2898 /* Don't distribute loop if niters is unknown. */
2899 tree niters
= number_of_latch_executions (loop
);
2900 if (niters
== NULL_TREE
|| niters
== chrec_dont_know
)
2903 /* Get the perfect loop nest for distribution. */
2904 loop
= prepare_perfect_loop_nest (loop
);
2905 for (; loop
; loop
= loop
->inner
)
2907 auto_vec
<gimple
*> work_list
;
2908 if (!find_seed_stmts_for_distribution (loop
, &work_list
))
2911 const char *str
= loop
->inner
? " nest" : "";
2912 location_t loc
= find_loop_location (loop
);
2915 calculate_dominance_info (CDI_DOMINATORS
);
2916 calculate_dominance_info (CDI_POST_DOMINATORS
);
2917 cd
= new control_dependences ();
2918 free_dominance_info (CDI_POST_DOMINATORS
);
2922 int nb_generated_loops
, nb_generated_calls
;
2923 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
2924 &nb_generated_calls
,
2927 loops_to_be_destroyed
.safe_push (loop
);
2929 if (nb_generated_loops
+ nb_generated_calls
> 0)
2932 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
2933 loc
, "Loop%s %d distributed: split to %d loops "
2934 "and %d library calls.\n", str
, loop
->num
,
2935 nb_generated_loops
, nb_generated_calls
);
2940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2941 fprintf (dump_file
, "Loop%s %d not distributed.\n", str
, loop
->num
);
2948 if (bb_top_order_index
!= NULL
)
2950 free (bb_top_order_index
);
2951 bb_top_order_index
= NULL
;
2952 bb_top_order_index_size
= 0;
2957 /* Destroy loop bodies that could not be reused. Do this late as we
2958 otherwise can end up refering to stale data in control dependences. */
2960 FOR_EACH_VEC_ELT (loops_to_be_destroyed
, i
, loop
)
2961 destroy_loop (loop
);
2963 /* Cached scalar evolutions now may refer to wrong or non-existing
2966 mark_virtual_operands_for_renaming (fun
);
2967 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
2970 checking_verify_loop_structure ();
2978 make_pass_loop_distribution (gcc::context
*ctxt
)
2980 return new pass_loop_distribution (ctxt
);