Re-factor inclusion of tree.h.
[official-gcc.git] / gcc / tree-loop-distribution.c
blob9b6936d321467b4a0b1bd50314984a7569b3e130
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "tree-ssa.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
54 #include "gimple-pretty-print.h"
55 #include "tree-vectorizer.h"
58 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
59 typedef struct rdg_vertex
61 /* The statement represented by this vertex. */
62 gimple stmt;
64 /* Vector of data-references in this statement. */
65 vec<data_reference_p> datarefs;
67 /* True when the statement contains a write to memory. */
68 bool has_mem_write;
70 /* True when the statement contains a read from memory. */
71 bool has_mem_reads;
72 } *rdg_vertex_p;
74 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
75 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
76 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
77 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
78 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
79 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
80 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
81 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
83 /* Data dependence type. */
85 enum rdg_dep_type
87 /* Read After Write (RAW). */
88 flow_dd = 'f',
90 /* Write After Read (WAR). */
91 anti_dd = 'a',
93 /* Write After Write (WAW). */
94 output_dd = 'o',
96 /* Read After Read (RAR). */
97 input_dd = 'i',
99 /* Control dependence (execute conditional on). */
100 control_dd = 'c'
103 /* Dependence information attached to an edge of the RDG. */
105 typedef struct rdg_edge
107 /* Type of the dependence. */
108 enum rdg_dep_type type;
110 /* Levels of the dependence: the depth of the loops that carry the
111 dependence. */
112 unsigned level;
114 /* Dependence relation between data dependences, NULL when one of
115 the vertices is a scalar. */
116 ddr_p relation;
117 } *rdg_edge_p;
119 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
120 #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
121 #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
123 /* Dump vertex I in RDG to FILE. */
125 static void
126 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
128 struct vertex *v = &(rdg->vertices[i]);
129 struct graph_edge *e;
131 fprintf (file, "(vertex %d: (%s%s) (in:", i,
132 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
133 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
135 if (v->pred)
136 for (e = v->pred; e; e = e->pred_next)
137 fprintf (file, " %d", e->src);
139 fprintf (file, ") (out:");
141 if (v->succ)
142 for (e = v->succ; e; e = e->succ_next)
143 fprintf (file, " %d", e->dest);
145 fprintf (file, ")\n");
146 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
147 fprintf (file, ")\n");
150 /* Call dump_rdg_vertex on stderr. */
152 DEBUG_FUNCTION void
153 debug_rdg_vertex (struct graph *rdg, int i)
155 dump_rdg_vertex (stderr, rdg, i);
158 /* Dump the reduced dependence graph RDG to FILE. */
160 static void
161 dump_rdg (FILE *file, struct graph *rdg)
163 fprintf (file, "(rdg\n");
164 for (int i = 0; i < rdg->n_vertices; i++)
165 dump_rdg_vertex (file, rdg, i);
166 fprintf (file, ")\n");
169 /* Call dump_rdg on stderr. */
171 DEBUG_FUNCTION void
172 debug_rdg (struct graph *rdg)
174 dump_rdg (stderr, rdg);
177 static void
178 dot_rdg_1 (FILE *file, struct graph *rdg)
180 int i;
181 pretty_printer buffer;
182 pp_needs_newline (&buffer) = false;
183 buffer.buffer->stream = file;
185 fprintf (file, "digraph RDG {\n");
187 for (i = 0; i < rdg->n_vertices; i++)
189 struct vertex *v = &(rdg->vertices[i]);
190 struct graph_edge *e;
192 fprintf (file, "%d [label=\"[%d] ", i, i);
193 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
194 pp_flush (&buffer);
195 fprintf (file, "\"]\n");
197 /* Highlight reads from memory. */
198 if (RDG_MEM_READS_STMT (rdg, i))
199 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
201 /* Highlight stores to memory. */
202 if (RDG_MEM_WRITE_STMT (rdg, i))
203 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
205 if (v->succ)
206 for (e = v->succ; e; e = e->succ_next)
207 switch (RDGE_TYPE (e))
209 case input_dd:
210 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
211 break;
213 case output_dd:
214 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
215 break;
217 case flow_dd:
218 /* These are the most common dependences: don't print these. */
219 fprintf (file, "%d -> %d \n", i, e->dest);
220 break;
222 case anti_dd:
223 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
224 break;
226 case control_dd:
227 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
228 break;
230 default:
231 gcc_unreachable ();
235 fprintf (file, "}\n\n");
238 /* Display the Reduced Dependence Graph using dotty. */
240 DEBUG_FUNCTION void
241 dot_rdg (struct graph *rdg)
243 /* When debugging, you may want to enable the following code. */
244 #if 1
245 FILE *file = popen ("dot -Tx11", "w");
246 if (!file)
247 return;
248 dot_rdg_1 (file, rdg);
249 fflush (file);
250 close (fileno (file));
251 pclose (file);
252 #else
253 dot_rdg_1 (stderr, rdg);
254 #endif
257 /* Returns the index of STMT in RDG. */
259 static int
260 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
262 int index = gimple_uid (stmt);
263 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
264 return index;
267 /* Creates an edge in RDG for each distance vector from DDR. The
268 order that we keep track of in the RDG is the order in which
269 statements have to be executed. */
271 static void
272 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
274 struct graph_edge *e;
275 int va, vb;
276 data_reference_p dra = DDR_A (ddr);
277 data_reference_p drb = DDR_B (ddr);
278 unsigned level = ddr_dependence_level (ddr);
280 /* For non scalar dependences, when the dependence is REVERSED,
281 statement B has to be executed before statement A. */
282 if (level > 0
283 && !DDR_REVERSED_P (ddr))
285 data_reference_p tmp = dra;
286 dra = drb;
287 drb = tmp;
290 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
291 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
293 if (va < 0 || vb < 0)
294 return;
296 e = add_edge (rdg, va, vb);
297 e->data = XNEW (struct rdg_edge);
299 RDGE_LEVEL (e) = level;
300 RDGE_RELATION (e) = ddr;
302 /* Determines the type of the data dependence. */
303 if (DR_IS_READ (dra) && DR_IS_READ (drb))
304 RDGE_TYPE (e) = input_dd;
305 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
306 RDGE_TYPE (e) = output_dd;
307 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
308 RDGE_TYPE (e) = flow_dd;
309 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
310 RDGE_TYPE (e) = anti_dd;
313 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
314 the index of DEF in RDG. */
316 static void
317 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
319 use_operand_p imm_use_p;
320 imm_use_iterator iterator;
322 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
324 struct graph_edge *e;
325 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
327 if (use < 0)
328 continue;
330 e = add_edge (rdg, idef, use);
331 e->data = XNEW (struct rdg_edge);
332 RDGE_TYPE (e) = flow_dd;
333 RDGE_RELATION (e) = NULL;
337 /* Creates an edge for the control dependences of BB to the vertex V. */
339 static void
340 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
341 int v, control_dependences *cd)
343 bitmap_iterator bi;
344 unsigned edge_n;
345 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
346 0, edge_n, bi)
348 basic_block cond_bb = cd->get_edge (edge_n)->src;
349 gimple stmt = last_stmt (cond_bb);
350 if (stmt && is_ctrl_stmt (stmt))
352 struct graph_edge *e;
353 int c = rdg_vertex_for_stmt (rdg, stmt);
354 if (c < 0)
355 continue;
357 e = add_edge (rdg, c, v);
358 e->data = XNEW (struct rdg_edge);
359 RDGE_TYPE (e) = control_dd;
360 RDGE_RELATION (e) = NULL;
365 /* Creates the edges of the reduced dependence graph RDG. */
367 static void
368 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
370 int i;
371 struct data_dependence_relation *ddr;
372 def_operand_p def_p;
373 ssa_op_iter iter;
375 FOR_EACH_VEC_ELT (ddrs, i, ddr)
376 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
377 create_rdg_edge_for_ddr (rdg, ddr);
378 else
379 free_dependence_relation (ddr);
381 for (i = 0; i < rdg->n_vertices; i++)
382 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
383 iter, SSA_OP_DEF)
384 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
386 if (cd)
387 for (i = 0; i < rdg->n_vertices; i++)
389 gimple stmt = RDG_STMT (rdg, i);
390 if (gimple_code (stmt) == GIMPLE_PHI)
392 edge_iterator ei;
393 edge e;
394 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
395 create_edge_for_control_dependence (rdg, e->src, i, cd);
397 else
398 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
402 /* Build the vertices of the reduced dependence graph RDG. Return false
403 if that failed. */
405 static bool
406 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
407 vec<data_reference_p> *datarefs)
409 int i;
410 gimple stmt;
412 FOR_EACH_VEC_ELT (stmts, i, stmt)
414 struct vertex *v = &(rdg->vertices[i]);
416 /* Record statement to vertex mapping. */
417 gimple_set_uid (stmt, i);
419 v->data = XNEW (struct rdg_vertex);
420 RDGV_STMT (v) = stmt;
421 RDGV_DATAREFS (v).create (0);
422 RDGV_HAS_MEM_WRITE (v) = false;
423 RDGV_HAS_MEM_READS (v) = false;
424 if (gimple_code (stmt) == GIMPLE_PHI)
425 continue;
427 unsigned drp = datarefs->length ();
428 if (!find_data_references_in_stmt (loop, stmt, datarefs))
429 return false;
430 for (unsigned j = drp; j < datarefs->length (); ++j)
432 data_reference_p dr = (*datarefs)[j];
433 if (DR_IS_READ (dr))
434 RDGV_HAS_MEM_READS (v) = true;
435 else
436 RDGV_HAS_MEM_WRITE (v) = true;
437 RDGV_DATAREFS (v).safe_push (dr);
440 return true;
443 /* Initialize STMTS with all the statements of LOOP. The order in
444 which we discover statements is important as
445 generate_loops_for_partition is using the same traversal for
446 identifying statements in loop copies. */
448 static void
449 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
451 unsigned int i;
452 basic_block *bbs = get_loop_body_in_dom_order (loop);
454 for (i = 0; i < loop->num_nodes; i++)
456 basic_block bb = bbs[i];
457 gimple_stmt_iterator bsi;
458 gimple stmt;
460 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
461 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
462 stmts->safe_push (gsi_stmt (bsi));
464 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
466 stmt = gsi_stmt (bsi);
467 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
468 stmts->safe_push (stmt);
472 free (bbs);
475 /* Free the reduced dependence graph RDG. */
477 static void
478 free_rdg (struct graph *rdg)
480 int i;
482 for (i = 0; i < rdg->n_vertices; i++)
484 struct vertex *v = &(rdg->vertices[i]);
485 struct graph_edge *e;
487 for (e = v->succ; e; e = e->succ_next)
489 free_dependence_relation (RDGE_RELATION (e));
490 free (e->data);
493 if (v->data)
495 gimple_set_uid (RDGV_STMT (v), -1);
496 free_data_refs (RDGV_DATAREFS (v));
497 free (v->data);
501 free_graph (rdg);
504 /* Build the Reduced Dependence Graph (RDG) with one vertex per
505 statement of the loop nest LOOP_NEST, and one edge per data dependence or
506 scalar dependence. */
508 static struct graph *
509 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
511 struct graph *rdg;
512 vec<gimple> stmts;
513 vec<data_reference_p> datarefs;
514 vec<ddr_p> dependence_relations;
516 /* Create the RDG vertices from the stmts of the loop nest. */
517 stmts.create (10);
518 stmts_from_loop (loop_nest[0], &stmts);
519 rdg = new_graph (stmts.length ());
520 datarefs.create (10);
521 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
523 stmts.release ();
524 datarefs.release ();
525 free_rdg (rdg);
526 return NULL;
528 stmts.release ();
530 /* Create the RDG edges from the data dependences in the loop nest. */
531 dependence_relations.create (100);
532 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
533 false)
534 || !known_dependences_p (dependence_relations))
536 free_dependence_relations (dependence_relations);
537 datarefs.release ();
538 free_rdg (rdg);
539 return NULL;
541 create_rdg_edges (rdg, dependence_relations, cd);
542 dependence_relations.release ();
543 datarefs.release ();
545 return rdg;
550 enum partition_kind {
551 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
554 typedef struct partition_s
556 bitmap stmts;
557 bitmap loops;
558 bool reduction_p;
559 enum partition_kind kind;
560 /* data-references a kind != PKIND_NORMAL partition is about. */
561 data_reference_p main_dr;
562 data_reference_p secondary_dr;
563 tree niter;
564 } *partition_t;
567 /* Allocate and initialize a partition from BITMAP. */
569 static partition_t
570 partition_alloc (bitmap stmts, bitmap loops)
572 partition_t partition = XCNEW (struct partition_s);
573 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
574 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
575 partition->reduction_p = false;
576 partition->kind = PKIND_NORMAL;
577 return partition;
580 /* Free PARTITION. */
582 static void
583 partition_free (partition_t partition)
585 BITMAP_FREE (partition->stmts);
586 BITMAP_FREE (partition->loops);
587 free (partition);
590 /* Returns true if the partition can be generated as a builtin. */
592 static bool
593 partition_builtin_p (partition_t partition)
595 return partition->kind != PKIND_NORMAL;
598 /* Returns true if the partition contains a reduction. */
600 static bool
601 partition_reduction_p (partition_t partition)
603 return partition->reduction_p;
606 /* Merge PARTITION into the partition DEST. */
608 static void
609 partition_merge_into (partition_t dest, partition_t partition)
611 dest->kind = PKIND_NORMAL;
612 bitmap_ior_into (dest->stmts, partition->stmts);
613 if (partition_reduction_p (partition))
614 dest->reduction_p = true;
618 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
619 the LOOP. */
621 static bool
622 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
624 imm_use_iterator imm_iter;
625 use_operand_p use_p;
627 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
629 gimple use_stmt = USE_STMT (use_p);
630 if (!is_gimple_debug (use_stmt)
631 && loop != loop_containing_stmt (use_stmt))
632 return true;
635 return false;
638 /* Returns true when STMT defines a scalar variable used after the
639 loop LOOP. */
641 static bool
642 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
644 def_operand_p def_p;
645 ssa_op_iter op_iter;
647 if (gimple_code (stmt) == GIMPLE_PHI)
648 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
650 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
651 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
652 return true;
654 return false;
657 /* Return a copy of LOOP placed before LOOP. */
659 static struct loop *
660 copy_loop_before (struct loop *loop)
662 struct loop *res;
663 edge preheader = loop_preheader_edge (loop);
665 initialize_original_copy_tables ();
666 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
667 gcc_assert (res != NULL);
668 free_original_copy_tables ();
669 delete_update_ssa ();
671 return res;
674 /* Creates an empty basic block after LOOP. */
676 static void
677 create_bb_after_loop (struct loop *loop)
679 edge exit = single_exit (loop);
681 if (!exit)
682 return;
684 split_edge (exit);
687 /* Generate code for PARTITION from the code in LOOP. The loop is
688 copied when COPY_P is true. All the statements not flagged in the
689 PARTITION bitmap are removed from the loop or from its copy. The
690 statements are indexed in sequence inside a basic block, and the
691 basic blocks of a loop are taken in dom order. */
693 static void
694 generate_loops_for_partition (struct loop *loop, partition_t partition,
695 bool copy_p)
697 unsigned i;
698 gimple_stmt_iterator bsi;
699 basic_block *bbs;
701 if (copy_p)
703 loop = copy_loop_before (loop);
704 gcc_assert (loop != NULL);
705 create_preheader (loop, CP_SIMPLE_PREHEADERS);
706 create_bb_after_loop (loop);
709 /* Remove stmts not in the PARTITION bitmap. */
710 bbs = get_loop_body_in_dom_order (loop);
712 if (MAY_HAVE_DEBUG_STMTS)
713 for (i = 0; i < loop->num_nodes; i++)
715 basic_block bb = bbs[i];
717 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
719 gimple phi = gsi_stmt (bsi);
720 if (!virtual_operand_p (gimple_phi_result (phi))
721 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
722 reset_debug_uses (phi);
725 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
727 gimple stmt = gsi_stmt (bsi);
728 if (gimple_code (stmt) != GIMPLE_LABEL
729 && !is_gimple_debug (stmt)
730 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
731 reset_debug_uses (stmt);
735 for (i = 0; i < loop->num_nodes; i++)
737 basic_block bb = bbs[i];
739 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
741 gimple phi = gsi_stmt (bsi);
742 if (!virtual_operand_p (gimple_phi_result (phi))
743 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
744 remove_phi_node (&bsi, true);
745 else
746 gsi_next (&bsi);
749 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
751 gimple stmt = gsi_stmt (bsi);
752 if (gimple_code (stmt) != GIMPLE_LABEL
753 && !is_gimple_debug (stmt)
754 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
756 /* Choose an arbitrary path through the empty CFG part
757 that this unnecessary control stmt controls. */
758 if (gimple_code (stmt) == GIMPLE_COND)
760 gimple_cond_make_false (stmt);
761 update_stmt (stmt);
763 else if (gimple_code (stmt) == GIMPLE_SWITCH)
765 gimple_switch_set_index
766 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
767 update_stmt (stmt);
769 else
771 unlink_stmt_vdef (stmt);
772 gsi_remove (&bsi, true);
773 release_defs (stmt);
774 continue;
777 gsi_next (&bsi);
781 free (bbs);
784 /* Build the size argument for a memory operation call. */
786 static tree
787 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
789 tree size;
790 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
791 fold_convert_loc (loc, sizetype, nb_iter),
792 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
793 return fold_convert_loc (loc, size_type_node, size);
796 /* Build an address argument for a memory operation call. */
798 static tree
799 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
801 tree addr_base;
803 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
804 addr_base = fold_convert_loc (loc, sizetype, addr_base);
806 /* Test for a negative stride, iterating over every element. */
807 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
809 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
810 fold_convert_loc (loc, sizetype, nb_bytes));
811 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
812 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
815 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
818 /* If VAL memory representation contains the same value in all bytes,
819 return that value, otherwise return -1.
820 E.g. for 0x24242424 return 0x24, for IEEE double
821 747708026454360457216.0 return 0x44, etc. */
823 static int
824 const_with_all_bytes_same (tree val)
826 unsigned char buf[64];
827 int i, len;
829 if (integer_zerop (val)
830 || real_zerop (val)
831 || (TREE_CODE (val) == CONSTRUCTOR
832 && !TREE_CLOBBER_P (val)
833 && CONSTRUCTOR_NELTS (val) == 0))
834 return 0;
836 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
837 return -1;
839 len = native_encode_expr (val, buf, sizeof (buf));
840 if (len == 0)
841 return -1;
842 for (i = 1; i < len; i++)
843 if (buf[i] != buf[0])
844 return -1;
845 return buf[0];
848 /* Generate a call to memset for PARTITION in LOOP. */
850 static void
851 generate_memset_builtin (struct loop *loop, partition_t partition)
853 gimple_stmt_iterator gsi;
854 gimple stmt, fn_call;
855 tree mem, fn, nb_bytes;
856 location_t loc;
857 tree val;
859 stmt = DR_STMT (partition->main_dr);
860 loc = gimple_location (stmt);
862 /* The new statements will be placed before LOOP. */
863 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
865 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
866 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
868 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
869 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
870 false, GSI_CONTINUE_LINKING);
872 /* This exactly matches the pattern recognition in classify_partition. */
873 val = gimple_assign_rhs1 (stmt);
874 /* Handle constants like 0x15151515 and similarly
875 floating point constants etc. where all bytes are the same. */
876 int bytev = const_with_all_bytes_same (val);
877 if (bytev != -1)
878 val = build_int_cst (integer_type_node, bytev);
879 else if (TREE_CODE (val) == INTEGER_CST)
880 val = fold_convert (integer_type_node, val);
881 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
883 gimple cstmt;
884 tree tem = make_ssa_name (integer_type_node, NULL);
885 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
886 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
887 val = tem;
890 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
891 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
892 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
894 if (dump_file && (dump_flags & TDF_DETAILS))
896 fprintf (dump_file, "generated memset");
897 if (bytev == 0)
898 fprintf (dump_file, " zero\n");
899 else
900 fprintf (dump_file, "\n");
904 /* Generate a call to memcpy for PARTITION in LOOP. */
906 static void
907 generate_memcpy_builtin (struct loop *loop, partition_t partition)
909 gimple_stmt_iterator gsi;
910 gimple stmt, fn_call;
911 tree dest, src, fn, nb_bytes;
912 location_t loc;
913 enum built_in_function kind;
915 stmt = DR_STMT (partition->main_dr);
916 loc = gimple_location (stmt);
918 /* The new statements will be placed before LOOP. */
919 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
921 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
922 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
923 false, GSI_CONTINUE_LINKING);
924 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
925 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
926 if (ptr_derefs_may_alias_p (dest, src))
927 kind = BUILT_IN_MEMMOVE;
928 else
929 kind = BUILT_IN_MEMCPY;
931 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
932 false, GSI_CONTINUE_LINKING);
933 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
934 false, GSI_CONTINUE_LINKING);
935 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
936 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
937 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
939 if (dump_file && (dump_flags & TDF_DETAILS))
941 if (kind == BUILT_IN_MEMCPY)
942 fprintf (dump_file, "generated memcpy\n");
943 else
944 fprintf (dump_file, "generated memmove\n");
948 /* Remove and destroy the loop LOOP. */
950 static void
951 destroy_loop (struct loop *loop)
953 unsigned nbbs = loop->num_nodes;
954 edge exit = single_exit (loop);
955 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
956 basic_block *bbs;
957 unsigned i;
959 bbs = get_loop_body_in_dom_order (loop);
961 redirect_edge_pred (exit, src);
962 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
963 exit->flags |= EDGE_FALLTHRU;
964 cancel_loop_tree (loop);
965 rescan_loop_exit (exit, false, true);
967 for (i = 0; i < nbbs; i++)
969 /* We have made sure to not leave any dangling uses of SSA
970 names defined in the loop. With the exception of virtuals.
971 Make sure we replace all uses of virtual defs that will remain
972 outside of the loop with the bare symbol as delete_basic_block
973 will release them. */
974 gimple_stmt_iterator gsi;
975 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
977 gimple phi = gsi_stmt (gsi);
978 if (virtual_operand_p (gimple_phi_result (phi)))
979 mark_virtual_phi_result_for_renaming (phi);
981 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
983 gimple stmt = gsi_stmt (gsi);
984 tree vdef = gimple_vdef (stmt);
985 if (vdef && TREE_CODE (vdef) == SSA_NAME)
986 mark_virtual_operand_for_renaming (vdef);
988 delete_basic_block (bbs[i]);
990 free (bbs);
992 set_immediate_dominator (CDI_DOMINATORS, dest,
993 recompute_dominator (CDI_DOMINATORS, dest));
996 /* Generates code for PARTITION. */
998 static void
999 generate_code_for_partition (struct loop *loop,
1000 partition_t partition, bool copy_p)
1002 switch (partition->kind)
1004 case PKIND_NORMAL:
1005 /* Reductions all have to be in the last partition. */
1006 gcc_assert (!partition_reduction_p (partition)
1007 || !copy_p);
1008 generate_loops_for_partition (loop, partition, copy_p);
1009 return;
1011 case PKIND_MEMSET:
1012 generate_memset_builtin (loop, partition);
1013 break;
1015 case PKIND_MEMCPY:
1016 generate_memcpy_builtin (loop, partition);
1017 break;
1019 default:
1020 gcc_unreachable ();
1023 /* Common tail for partitions we turn into a call. If this was the last
1024 partition for which we generate code, we have to destroy the loop. */
1025 if (!copy_p)
1026 destroy_loop (loop);
1030 /* Returns a partition with all the statements needed for computing
1031 the vertex V of the RDG, also including the loop exit conditions. */
1033 static partition_t
1034 build_rdg_partition_for_vertex (struct graph *rdg, int v)
1036 partition_t partition = partition_alloc (NULL, NULL);
1037 vec<int> nodes;
1038 unsigned i;
1039 int x;
1041 nodes.create (3);
1042 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1044 FOR_EACH_VEC_ELT (nodes, i, x)
1046 bitmap_set_bit (partition->stmts, x);
1047 bitmap_set_bit (partition->loops,
1048 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1051 nodes.release ();
1052 return partition;
1055 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1056 For the moment we detect only the memset zero pattern. */
1058 static void
1059 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1061 bitmap_iterator bi;
1062 unsigned i;
1063 tree nb_iter;
1064 data_reference_p single_load, single_store;
1065 bool volatiles_p = false;
1067 partition->kind = PKIND_NORMAL;
1068 partition->main_dr = NULL;
1069 partition->secondary_dr = NULL;
1070 partition->niter = NULL_TREE;
1072 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1074 gimple stmt = RDG_STMT (rdg, i);
1076 if (gimple_has_volatile_ops (stmt))
1077 volatiles_p = true;
1079 /* If the stmt has uses outside of the loop mark it as reduction. */
1080 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1082 partition->reduction_p = true;
1083 return;
1087 /* Perform general partition disqualification for builtins. */
1088 if (volatiles_p
1089 || !flag_tree_loop_distribute_patterns)
1090 return;
1092 /* Detect memset and memcpy. */
1093 single_load = NULL;
1094 single_store = NULL;
1095 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1097 gimple stmt = RDG_STMT (rdg, i);
1098 data_reference_p dr;
1099 unsigned j;
1101 if (gimple_code (stmt) == GIMPLE_PHI)
1102 continue;
1104 /* Any scalar stmts are ok. */
1105 if (!gimple_vuse (stmt))
1106 continue;
1108 /* Otherwise just regular loads/stores. */
1109 if (!gimple_assign_single_p (stmt))
1110 return;
1112 /* But exactly one store and/or load. */
1113 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1115 if (DR_IS_READ (dr))
1117 if (single_load != NULL)
1118 return;
1119 single_load = dr;
1121 else
1123 if (single_store != NULL)
1124 return;
1125 single_store = dr;
1130 if (!single_store)
1131 return;
1133 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1134 gimple_bb (DR_STMT (single_store))))
1135 nb_iter = number_of_latch_executions (loop);
1136 else
1137 nb_iter = number_of_exit_cond_executions (loop);
1138 if (!nb_iter || nb_iter == chrec_dont_know)
1139 return;
1141 if (single_store && !single_load)
1143 gimple stmt = DR_STMT (single_store);
1144 tree rhs = gimple_assign_rhs1 (stmt);
1145 if (const_with_all_bytes_same (rhs) == -1
1146 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1147 || (TYPE_MODE (TREE_TYPE (rhs))
1148 != TYPE_MODE (unsigned_char_type_node))))
1149 return;
1150 if (TREE_CODE (rhs) == SSA_NAME
1151 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1152 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1153 return;
1154 if (!adjacent_dr_p (single_store)
1155 || !dominated_by_p (CDI_DOMINATORS,
1156 loop->latch, gimple_bb (stmt)))
1157 return;
1158 partition->kind = PKIND_MEMSET;
1159 partition->main_dr = single_store;
1160 partition->niter = nb_iter;
1162 else if (single_store && single_load)
1164 gimple store = DR_STMT (single_store);
1165 gimple load = DR_STMT (single_load);
1166 /* Direct aggregate copy or via an SSA name temporary. */
1167 if (load != store
1168 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1169 return;
1170 if (!adjacent_dr_p (single_store)
1171 || !adjacent_dr_p (single_load)
1172 || !operand_equal_p (DR_STEP (single_store),
1173 DR_STEP (single_load), 0)
1174 || !dominated_by_p (CDI_DOMINATORS,
1175 loop->latch, gimple_bb (store)))
1176 return;
1177 /* Now check that if there is a dependence this dependence is
1178 of a suitable form for memmove. */
1179 vec<loop_p> loops = vNULL;
1180 ddr_p ddr;
1181 loops.safe_push (loop);
1182 ddr = initialize_data_dependence_relation (single_load, single_store,
1183 loops);
1184 compute_affine_dependence (ddr, loop);
1185 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1187 free_dependence_relation (ddr);
1188 loops.release ();
1189 return;
1191 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1193 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1195 free_dependence_relation (ddr);
1196 loops.release ();
1197 return;
1199 lambda_vector dist_v;
1200 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1202 int dist = dist_v[index_in_loop_nest (loop->num,
1203 DDR_LOOP_NEST (ddr))];
1204 if (dist > 0 && !DDR_REVERSED_P (ddr))
1206 free_dependence_relation (ddr);
1207 loops.release ();
1208 return;
1212 free_dependence_relation (ddr);
1213 loops.release ();
1214 partition->kind = PKIND_MEMCPY;
1215 partition->main_dr = single_store;
1216 partition->secondary_dr = single_load;
1217 partition->niter = nb_iter;
1221 /* For a data reference REF, return the declaration of its base
1222 address or NULL_TREE if the base is not determined. */
1224 static tree
1225 ref_base_address (data_reference_p dr)
1227 tree base_address = DR_BASE_ADDRESS (dr);
1228 if (base_address
1229 && TREE_CODE (base_address) == ADDR_EXPR)
1230 return TREE_OPERAND (base_address, 0);
1232 return base_address;
1235 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1236 accesses in RDG. */
1238 static bool
1239 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1240 partition_t partition2)
1242 unsigned i, j, k, l;
1243 bitmap_iterator bi, bj;
1244 data_reference_p ref1, ref2;
1246 /* First check whether in the intersection of the two partitions are
1247 any loads or stores. Common loads are the situation that happens
1248 most often. */
1249 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1250 if (RDG_MEM_WRITE_STMT (rdg, i)
1251 || RDG_MEM_READS_STMT (rdg, i))
1252 return true;
1254 /* Then check all data-references against each other. */
1255 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1256 if (RDG_MEM_WRITE_STMT (rdg, i)
1257 || RDG_MEM_READS_STMT (rdg, i))
1258 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1259 if (RDG_MEM_WRITE_STMT (rdg, j)
1260 || RDG_MEM_READS_STMT (rdg, j))
1262 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1264 tree base1 = ref_base_address (ref1);
1265 if (base1)
1266 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1267 if (base1 == ref_base_address (ref2))
1268 return true;
1272 return false;
1275 /* Aggregate several components into a useful partition that is
1276 registered in the PARTITIONS vector. Partitions will be
1277 distributed in different loops. */
1279 static void
1280 rdg_build_partitions (struct graph *rdg,
1281 vec<gimple> starting_stmts,
1282 vec<partition_t> *partitions)
1284 bitmap processed = BITMAP_ALLOC (NULL);
1285 int i;
1286 gimple stmt;
1288 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1290 int v = rdg_vertex_for_stmt (rdg, stmt);
1292 if (dump_file && (dump_flags & TDF_DETAILS))
1293 fprintf (dump_file,
1294 "ldist asked to generate code for vertex %d\n", v);
1296 /* If the vertex is already contained in another partition so
1297 is the partition rooted at it. */
1298 if (bitmap_bit_p (processed, v))
1299 continue;
1301 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1302 bitmap_ior_into (processed, partition->stmts);
1304 if (dump_file && (dump_flags & TDF_DETAILS))
1306 fprintf (dump_file, "ldist useful partition:\n");
1307 dump_bitmap (dump_file, partition->stmts);
1310 partitions->safe_push (partition);
1313 /* All vertices should have been assigned to at least one partition now,
1314 other than vertices belonging to dead code. */
1316 BITMAP_FREE (processed);
1319 /* Dump to FILE the PARTITIONS. */
1321 static void
1322 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1324 int i;
1325 partition_t partition;
1327 FOR_EACH_VEC_ELT (partitions, i, partition)
1328 debug_bitmap_file (file, partition->stmts);
1331 /* Debug PARTITIONS. */
1332 extern void debug_rdg_partitions (vec<partition_t> );
1334 DEBUG_FUNCTION void
1335 debug_rdg_partitions (vec<partition_t> partitions)
1337 dump_rdg_partitions (stderr, partitions);
1340 /* Returns the number of read and write operations in the RDG. */
1342 static int
1343 number_of_rw_in_rdg (struct graph *rdg)
1345 int i, res = 0;
1347 for (i = 0; i < rdg->n_vertices; i++)
1349 if (RDG_MEM_WRITE_STMT (rdg, i))
1350 ++res;
1352 if (RDG_MEM_READS_STMT (rdg, i))
1353 ++res;
1356 return res;
1359 /* Returns the number of read and write operations in a PARTITION of
1360 the RDG. */
1362 static int
1363 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1365 int res = 0;
1366 unsigned i;
1367 bitmap_iterator ii;
1369 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1371 if (RDG_MEM_WRITE_STMT (rdg, i))
1372 ++res;
1374 if (RDG_MEM_READS_STMT (rdg, i))
1375 ++res;
1378 return res;
1381 /* Returns true when one of the PARTITIONS contains all the read or
1382 write operations of RDG. */
1384 static bool
1385 partition_contains_all_rw (struct graph *rdg,
1386 vec<partition_t> partitions)
1388 int i;
1389 partition_t partition;
1390 int nrw = number_of_rw_in_rdg (rdg);
1392 FOR_EACH_VEC_ELT (partitions, i, partition)
1393 if (nrw == number_of_rw_in_partition (rdg, partition))
1394 return true;
1396 return false;
1400 /* Distributes the code from LOOP in such a way that producer
1401 statements are placed before consumer statements. Tries to separate
1402 only the statements from STMTS into separate loops.
1403 Returns the number of distributed loops. */
1405 static int
1406 distribute_loop (struct loop *loop, vec<gimple> stmts,
1407 control_dependences *cd, int *nb_calls)
1409 struct graph *rdg;
1410 vec<loop_p> loop_nest;
1411 vec<partition_t> partitions;
1412 partition_t partition;
1413 bool any_builtin;
1414 int i, nbp;
1416 *nb_calls = 0;
1417 loop_nest.create (3);
1418 if (!find_loop_nest (loop, &loop_nest))
1420 loop_nest.release ();
1421 return 0;
1424 rdg = build_rdg (loop_nest, cd);
1425 if (!rdg)
1427 if (dump_file && (dump_flags & TDF_DETAILS))
1428 fprintf (dump_file,
1429 "Loop %d not distributed: failed to build the RDG.\n",
1430 loop->num);
1432 loop_nest.release ();
1433 return 0;
1436 if (dump_file && (dump_flags & TDF_DETAILS))
1437 dump_rdg (dump_file, rdg);
1439 partitions.create (3);
1440 rdg_build_partitions (rdg, stmts, &partitions);
1442 any_builtin = false;
1443 FOR_EACH_VEC_ELT (partitions, i, partition)
1445 classify_partition (loop, rdg, partition);
1446 any_builtin |= partition_builtin_p (partition);
1449 /* If we did not detect any builtin but are not asked to apply
1450 regular loop distribution simply bail out. */
1451 if (!flag_tree_loop_distribution
1452 && !any_builtin)
1454 nbp = 0;
1455 goto ldist_done;
1458 /* Apply our simple cost model - fuse partitions with similar
1459 memory accesses. */
1460 partition_t into;
1461 for (i = 0; partitions.iterate (i, &into); ++i)
1463 if (partition_builtin_p (into))
1464 continue;
1465 for (int j = i + 1;
1466 partitions.iterate (j, &partition); ++j)
1468 if (!partition_builtin_p (partition)
1469 && similar_memory_accesses (rdg, into, partition))
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1473 fprintf (dump_file, "fusing partitions\n");
1474 dump_bitmap (dump_file, into->stmts);
1475 dump_bitmap (dump_file, partition->stmts);
1476 fprintf (dump_file, "because they have similar "
1477 "memory accesses\n");
1479 partition_merge_into (into, partition);
1480 partitions.ordered_remove (j);
1481 partition_free (partition);
1482 j--;
1487 /* If we are only distributing patterns fuse all partitions that
1488 were not properly classified as builtins. */
1489 if (!flag_tree_loop_distribution)
1491 partition_t into;
1492 /* Only fuse adjacent non-builtin partitions, see PR53616.
1493 ??? Use dependence information to improve partition ordering. */
1494 i = 0;
1497 for (; partitions.iterate (i, &into); ++i)
1498 if (!partition_builtin_p (into))
1499 break;
1500 for (++i; partitions.iterate (i, &partition); ++i)
1501 if (!partition_builtin_p (partition))
1503 partition_merge_into (into, partition);
1504 partitions.ordered_remove (i);
1505 partition_free (partition);
1506 i--;
1508 else
1509 break;
1511 while ((unsigned) i < partitions.length ());
1514 /* Fuse all reduction partitions into the last. */
1515 if (partitions.length () > 1)
1517 partition_t into = partitions.last ();
1518 for (i = partitions.length () - 2; i >= 0; --i)
1520 partition_t what = partitions[i];
1521 if (partition_reduction_p (what))
1523 if (dump_file && (dump_flags & TDF_DETAILS))
1525 fprintf (dump_file, "fusing partitions\n");
1526 dump_bitmap (dump_file, into->stmts);
1527 dump_bitmap (dump_file, what->stmts);
1528 fprintf (dump_file, "because the latter has reductions\n");
1530 partition_merge_into (into, what);
1531 partitions.ordered_remove (i);
1532 partition_free (what);
1537 nbp = partitions.length ();
1538 if (nbp == 0
1539 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1540 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1542 nbp = 0;
1543 goto ldist_done;
1546 if (dump_file && (dump_flags & TDF_DETAILS))
1547 dump_rdg_partitions (dump_file, partitions);
1549 FOR_EACH_VEC_ELT (partitions, i, partition)
1551 if (partition_builtin_p (partition))
1552 (*nb_calls)++;
1553 generate_code_for_partition (loop, partition, i < nbp - 1);
1556 ldist_done:
1558 FOR_EACH_VEC_ELT (partitions, i, partition)
1559 partition_free (partition);
1560 partitions.release ();
1562 free_rdg (rdg);
1563 loop_nest.release ();
1564 return nbp - *nb_calls;
1567 /* Distribute all loops in the current function. */
1569 static unsigned int
1570 tree_loop_distribution (void)
1572 struct loop *loop;
1573 loop_iterator li;
1574 bool changed = false;
1575 basic_block bb;
1576 control_dependences *cd = NULL;
1578 FOR_ALL_BB (bb)
1580 gimple_stmt_iterator gsi;
1581 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1582 gimple_set_uid (gsi_stmt (gsi), -1);
1583 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1584 gimple_set_uid (gsi_stmt (gsi), -1);
1587 /* We can at the moment only distribute non-nested loops, thus restrict
1588 walking to innermost loops. */
1589 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1591 vec<gimple> work_list = vNULL;
1592 basic_block *bbs;
1593 int num = loop->num;
1594 unsigned int i;
1596 /* If the loop doesn't have a single exit we will fail anyway,
1597 so do that early. */
1598 if (!single_exit (loop))
1599 continue;
1601 /* Only optimize hot loops. */
1602 if (!optimize_loop_for_speed_p (loop))
1603 continue;
1605 /* Initialize the worklist with stmts we seed the partitions with. */
1606 bbs = get_loop_body_in_dom_order (loop);
1607 for (i = 0; i < loop->num_nodes; ++i)
1609 gimple_stmt_iterator gsi;
1610 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1612 gimple phi = gsi_stmt (gsi);
1613 if (virtual_operand_p (gimple_phi_result (phi)))
1614 continue;
1615 /* Distribute stmts which have defs that are used outside of
1616 the loop. */
1617 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1618 continue;
1619 work_list.safe_push (phi);
1621 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1623 gimple stmt = gsi_stmt (gsi);
1625 /* If there is a stmt with side-effects bail out - we
1626 cannot and should not distribute this loop. */
1627 if (gimple_has_side_effects (stmt))
1629 work_list.truncate (0);
1630 goto out;
1633 /* Distribute stmts which have defs that are used outside of
1634 the loop. */
1635 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1637 /* Otherwise only distribute stores for now. */
1638 else if (!gimple_assign_single_p (stmt)
1639 || is_gimple_reg (gimple_assign_lhs (stmt)))
1640 continue;
1642 work_list.safe_push (stmt);
1645 out:
1646 free (bbs);
1648 int nb_generated_loops = 0;
1649 int nb_generated_calls = 0;
1650 location_t loc = find_loop_location (loop);
1651 if (work_list.length () > 0)
1653 if (!cd)
1655 calculate_dominance_info (CDI_DOMINATORS);
1656 calculate_dominance_info (CDI_POST_DOMINATORS);
1657 cd = new control_dependences (create_edge_list ());
1658 free_dominance_info (CDI_POST_DOMINATORS);
1660 nb_generated_loops = distribute_loop (loop, work_list, cd,
1661 &nb_generated_calls);
1664 if (nb_generated_loops + nb_generated_calls > 0)
1666 changed = true;
1667 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1668 loc, "Loop %d distributed: split to %d loops "
1669 "and %d library calls.\n",
1670 num, nb_generated_loops, nb_generated_calls);
1672 else if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, "Loop %d is the same.\n", num);
1675 work_list.release ();
1678 if (cd)
1679 delete cd;
1681 if (changed)
1683 mark_virtual_operands_for_renaming (cfun);
1684 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1687 #ifdef ENABLE_CHECKING
1688 verify_loop_structure ();
1689 #endif
1691 return 0;
1694 static bool
1695 gate_tree_loop_distribution (void)
1697 return flag_tree_loop_distribution
1698 || flag_tree_loop_distribute_patterns;
1701 namespace {
1703 const pass_data pass_data_loop_distribution =
1705 GIMPLE_PASS, /* type */
1706 "ldist", /* name */
1707 OPTGROUP_LOOP, /* optinfo_flags */
1708 true, /* has_gate */
1709 true, /* has_execute */
1710 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1711 ( PROP_cfg | PROP_ssa ), /* properties_required */
1712 0, /* properties_provided */
1713 0, /* properties_destroyed */
1714 0, /* todo_flags_start */
1715 TODO_verify_ssa, /* todo_flags_finish */
1718 class pass_loop_distribution : public gimple_opt_pass
1720 public:
1721 pass_loop_distribution (gcc::context *ctxt)
1722 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1725 /* opt_pass methods: */
1726 bool gate () { return gate_tree_loop_distribution (); }
1727 unsigned int execute () { return tree_loop_distribution (); }
1729 }; // class pass_loop_distribution
1731 } // anon namespace
1733 gimple_opt_pass *
1734 make_pass_loop_distribution (gcc::context *ctxt)
1736 return new pass_loop_distribution (ctxt);