vectorizer cost model enhancement
[official-gcc.git] / gcc / tree-loop-distribution.c
blob51b6ef03b4e7d831266e75e606723c21f4e084ee
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree-ssa.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
53 #include "gimple-pretty-print.h"
56 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
57 typedef struct rdg_vertex
59 /* The statement represented by this vertex. */
60 gimple stmt;
62 /* Vector of data-references in this statement. */
63 vec<data_reference_p> datarefs;
65 /* True when the statement contains a write to memory. */
66 bool has_mem_write;
68 /* True when the statement contains a read from memory. */
69 bool has_mem_reads;
70 } *rdg_vertex_p;
72 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
73 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
74 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
75 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
76 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
77 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
78 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
79 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
81 /* Data dependence type. */
83 enum rdg_dep_type
85 /* Read After Write (RAW). */
86 flow_dd = 'f',
88 /* Write After Read (WAR). */
89 anti_dd = 'a',
91 /* Write After Write (WAW). */
92 output_dd = 'o',
94 /* Read After Read (RAR). */
95 input_dd = 'i',
97 /* Control dependence (execute conditional on). */
98 control_dd = 'c'
101 /* Dependence information attached to an edge of the RDG. */
103 typedef struct rdg_edge
105 /* Type of the dependence. */
106 enum rdg_dep_type type;
108 /* Levels of the dependence: the depth of the loops that carry the
109 dependence. */
110 unsigned level;
112 /* Dependence relation between data dependences, NULL when one of
113 the vertices is a scalar. */
114 ddr_p relation;
115 } *rdg_edge_p;
117 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
118 #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
119 #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
121 /* Dump vertex I in RDG to FILE. */
123 static void
124 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
126 struct vertex *v = &(rdg->vertices[i]);
127 struct graph_edge *e;
129 fprintf (file, "(vertex %d: (%s%s) (in:", i,
130 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
131 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
133 if (v->pred)
134 for (e = v->pred; e; e = e->pred_next)
135 fprintf (file, " %d", e->src);
137 fprintf (file, ") (out:");
139 if (v->succ)
140 for (e = v->succ; e; e = e->succ_next)
141 fprintf (file, " %d", e->dest);
143 fprintf (file, ")\n");
144 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
145 fprintf (file, ")\n");
148 /* Call dump_rdg_vertex on stderr. */
150 DEBUG_FUNCTION void
151 debug_rdg_vertex (struct graph *rdg, int i)
153 dump_rdg_vertex (stderr, rdg, i);
156 /* Dump the reduced dependence graph RDG to FILE. */
158 static void
159 dump_rdg (FILE *file, struct graph *rdg)
161 fprintf (file, "(rdg\n");
162 for (int i = 0; i < rdg->n_vertices; i++)
163 dump_rdg_vertex (file, rdg, i);
164 fprintf (file, ")\n");
167 /* Call dump_rdg on stderr. */
169 DEBUG_FUNCTION void
170 debug_rdg (struct graph *rdg)
172 dump_rdg (stderr, rdg);
175 static void
176 dot_rdg_1 (FILE *file, struct graph *rdg)
178 int i;
179 pretty_printer buffer;
180 pp_needs_newline (&buffer) = false;
181 buffer.buffer->stream = file;
183 fprintf (file, "digraph RDG {\n");
185 for (i = 0; i < rdg->n_vertices; i++)
187 struct vertex *v = &(rdg->vertices[i]);
188 struct graph_edge *e;
190 fprintf (file, "%d [label=\"[%d] ", i, i);
191 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
192 pp_flush (&buffer);
193 fprintf (file, "\"]\n");
195 /* Highlight reads from memory. */
196 if (RDG_MEM_READS_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
199 /* Highlight stores to memory. */
200 if (RDG_MEM_WRITE_STMT (rdg, i))
201 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
203 if (v->succ)
204 for (e = v->succ; e; e = e->succ_next)
205 switch (RDGE_TYPE (e))
207 case input_dd:
208 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
209 break;
211 case output_dd:
212 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
213 break;
215 case flow_dd:
216 /* These are the most common dependences: don't print these. */
217 fprintf (file, "%d -> %d \n", i, e->dest);
218 break;
220 case anti_dd:
221 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
222 break;
224 case control_dd:
225 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
226 break;
228 default:
229 gcc_unreachable ();
233 fprintf (file, "}\n\n");
236 /* Display the Reduced Dependence Graph using dotty. */
238 DEBUG_FUNCTION void
239 dot_rdg (struct graph *rdg)
241 /* When debugging, you may want to enable the following code. */
242 #if 1
243 FILE *file = popen("dot -Tx11", "w");
244 if (!file)
245 return;
246 dot_rdg_1 (file, rdg);
247 fflush (file);
248 close (fileno (file));
249 pclose (file);
250 #else
251 dot_rdg_1 (stderr, rdg);
252 #endif
255 /* Returns the index of STMT in RDG. */
257 static int
258 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
260 int index = gimple_uid (stmt);
261 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
262 return index;
265 /* Creates an edge in RDG for each distance vector from DDR. The
266 order that we keep track of in the RDG is the order in which
267 statements have to be executed. */
269 static void
270 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
272 struct graph_edge *e;
273 int va, vb;
274 data_reference_p dra = DDR_A (ddr);
275 data_reference_p drb = DDR_B (ddr);
276 unsigned level = ddr_dependence_level (ddr);
278 /* For non scalar dependences, when the dependence is REVERSED,
279 statement B has to be executed before statement A. */
280 if (level > 0
281 && !DDR_REVERSED_P (ddr))
283 data_reference_p tmp = dra;
284 dra = drb;
285 drb = tmp;
288 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
289 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
291 if (va < 0 || vb < 0)
292 return;
294 e = add_edge (rdg, va, vb);
295 e->data = XNEW (struct rdg_edge);
297 RDGE_LEVEL (e) = level;
298 RDGE_RELATION (e) = ddr;
300 /* Determines the type of the data dependence. */
301 if (DR_IS_READ (dra) && DR_IS_READ (drb))
302 RDGE_TYPE (e) = input_dd;
303 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
304 RDGE_TYPE (e) = output_dd;
305 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
306 RDGE_TYPE (e) = flow_dd;
307 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
308 RDGE_TYPE (e) = anti_dd;
311 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
312 the index of DEF in RDG. */
314 static void
315 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
317 use_operand_p imm_use_p;
318 imm_use_iterator iterator;
320 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
322 struct graph_edge *e;
323 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
325 if (use < 0)
326 continue;
328 e = add_edge (rdg, idef, use);
329 e->data = XNEW (struct rdg_edge);
330 RDGE_TYPE (e) = flow_dd;
331 RDGE_RELATION (e) = NULL;
335 /* Creates an edge for the control dependences of BB to the vertex V. */
337 static void
338 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
339 int v, control_dependences *cd)
341 bitmap_iterator bi;
342 unsigned edge_n;
343 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
344 0, edge_n, bi)
346 basic_block cond_bb = cd->get_edge (edge_n)->src;
347 gimple stmt = last_stmt (cond_bb);
348 if (stmt && is_ctrl_stmt (stmt))
350 struct graph_edge *e;
351 int c = rdg_vertex_for_stmt (rdg, stmt);
352 if (c < 0)
353 continue;
355 e = add_edge (rdg, c, v);
356 e->data = XNEW (struct rdg_edge);
357 RDGE_TYPE (e) = control_dd;
358 RDGE_RELATION (e) = NULL;
363 /* Creates the edges of the reduced dependence graph RDG. */
365 static void
366 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
368 int i;
369 struct data_dependence_relation *ddr;
370 def_operand_p def_p;
371 ssa_op_iter iter;
373 FOR_EACH_VEC_ELT (ddrs, i, ddr)
374 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
375 create_rdg_edge_for_ddr (rdg, ddr);
376 else
377 free_dependence_relation (ddr);
379 for (i = 0; i < rdg->n_vertices; i++)
380 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
381 iter, SSA_OP_DEF)
382 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
384 if (cd)
385 for (i = 0; i < rdg->n_vertices; i++)
387 gimple stmt = RDG_STMT (rdg, i);
388 if (gimple_code (stmt) == GIMPLE_PHI)
390 edge_iterator ei;
391 edge e;
392 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
393 create_edge_for_control_dependence (rdg, e->src, i, cd);
395 else
396 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
400 /* Build the vertices of the reduced dependence graph RDG. Return false
401 if that failed. */
403 static bool
404 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
405 vec<data_reference_p> *datarefs)
407 int i;
408 gimple stmt;
410 FOR_EACH_VEC_ELT (stmts, i, stmt)
412 struct vertex *v = &(rdg->vertices[i]);
414 /* Record statement to vertex mapping. */
415 gimple_set_uid (stmt, i);
417 v->data = XNEW (struct rdg_vertex);
418 RDGV_STMT (v) = stmt;
419 RDGV_DATAREFS (v).create (0);
420 RDGV_HAS_MEM_WRITE (v) = false;
421 RDGV_HAS_MEM_READS (v) = false;
422 if (gimple_code (stmt) == GIMPLE_PHI)
423 continue;
425 unsigned drp = datarefs->length ();
426 if (!find_data_references_in_stmt (loop, stmt, datarefs))
427 return false;
428 for (unsigned j = drp; j < datarefs->length (); ++j)
430 data_reference_p dr = (*datarefs)[j];
431 if (DR_IS_READ (dr))
432 RDGV_HAS_MEM_READS (v) = true;
433 else
434 RDGV_HAS_MEM_WRITE (v) = true;
435 RDGV_DATAREFS (v).safe_push (dr);
438 return true;
441 /* Initialize STMTS with all the statements of LOOP. The order in
442 which we discover statements is important as
443 generate_loops_for_partition is using the same traversal for
444 identifying statements in loop copies. */
446 static void
447 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
449 unsigned int i;
450 basic_block *bbs = get_loop_body_in_dom_order (loop);
452 for (i = 0; i < loop->num_nodes; i++)
454 basic_block bb = bbs[i];
455 gimple_stmt_iterator bsi;
456 gimple stmt;
458 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
459 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
460 stmts->safe_push (gsi_stmt (bsi));
462 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
464 stmt = gsi_stmt (bsi);
465 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
466 stmts->safe_push (stmt);
470 free (bbs);
473 /* Build the Reduced Dependence Graph (RDG) with one vertex per
474 statement of the loop nest, and one edge per data dependence or
475 scalar dependence. */
477 struct graph *
478 build_empty_rdg (int n_stmts)
480 struct graph *rdg = new_graph (n_stmts);
481 return rdg;
484 /* Free the reduced dependence graph RDG. */
486 static void
487 free_rdg (struct graph *rdg)
489 int i;
491 for (i = 0; i < rdg->n_vertices; i++)
493 struct vertex *v = &(rdg->vertices[i]);
494 struct graph_edge *e;
496 for (e = v->succ; e; e = e->succ_next)
498 free_dependence_relation (RDGE_RELATION (e));
499 free (e->data);
502 if (v->data)
504 gimple_set_uid (RDGV_STMT (v), -1);
505 free_data_refs (RDGV_DATAREFS (v));
506 free (v->data);
510 free_graph (rdg);
513 /* Build the Reduced Dependence Graph (RDG) with one vertex per
514 statement of the loop nest LOOP_NEST, and one edge per data dependence or
515 scalar dependence. */
517 static struct graph *
518 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
520 struct graph *rdg;
521 vec<gimple> stmts;
522 vec<data_reference_p> datarefs;
523 vec<ddr_p> dependence_relations;
525 /* Create the RDG vertices from the stmts of the loop nest. */
526 stmts.create (10);
527 stmts_from_loop (loop_nest[0], &stmts);
528 rdg = build_empty_rdg (stmts.length ());
529 datarefs.create (10);
530 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
532 stmts.release ();
533 datarefs.release ();
534 free_rdg (rdg);
535 return NULL;
537 stmts.release ();
539 /* Create the RDG edges from the data dependences in the loop nest. */
540 dependence_relations.create (100);
541 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
542 false)
543 || !known_dependences_p (dependence_relations))
545 free_dependence_relations (dependence_relations);
546 datarefs.release ();
547 free_rdg (rdg);
548 return NULL;
550 create_rdg_edges (rdg, dependence_relations, cd);
551 dependence_relations.release ();
552 datarefs.release ();
554 return rdg;
559 enum partition_kind {
560 PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
563 typedef struct partition_s
565 bitmap stmts;
566 bitmap loops;
567 bool has_writes;
568 enum partition_kind kind;
569 /* data-references a kind != PKIND_NORMAL partition is about. */
570 data_reference_p main_dr;
571 data_reference_p secondary_dr;
572 } *partition_t;
575 /* Allocate and initialize a partition from BITMAP. */
577 static partition_t
578 partition_alloc (bitmap stmts, bitmap loops)
580 partition_t partition = XCNEW (struct partition_s);
581 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
582 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
583 partition->has_writes = false;
584 partition->kind = PKIND_NORMAL;
585 return partition;
588 /* Free PARTITION. */
590 static void
591 partition_free (partition_t partition)
593 BITMAP_FREE (partition->stmts);
594 BITMAP_FREE (partition->loops);
595 free (partition);
598 /* Returns true if the partition can be generated as a builtin. */
600 static bool
601 partition_builtin_p (partition_t partition)
603 return partition->kind > PKIND_REDUCTION;
606 /* Returns true if the partition has an writes. */
608 static bool
609 partition_has_writes (partition_t partition)
611 return partition->has_writes;
614 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
615 the LOOP. */
617 static bool
618 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
620 imm_use_iterator imm_iter;
621 use_operand_p use_p;
623 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
625 gimple use_stmt = USE_STMT (use_p);
626 if (!is_gimple_debug (use_stmt)
627 && loop != loop_containing_stmt (use_stmt))
628 return true;
631 return false;
634 /* Returns true when STMT defines a scalar variable used after the
635 loop LOOP. */
637 static bool
638 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
640 def_operand_p def_p;
641 ssa_op_iter op_iter;
643 if (gimple_code (stmt) == GIMPLE_PHI)
644 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
646 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
647 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
648 return true;
650 return false;
653 /* Return a copy of LOOP placed before LOOP. */
655 static struct loop *
656 copy_loop_before (struct loop *loop)
658 struct loop *res;
659 edge preheader = loop_preheader_edge (loop);
661 initialize_original_copy_tables ();
662 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
663 gcc_assert (res != NULL);
664 free_original_copy_tables ();
665 delete_update_ssa ();
667 return res;
670 /* Creates an empty basic block after LOOP. */
672 static void
673 create_bb_after_loop (struct loop *loop)
675 edge exit = single_exit (loop);
677 if (!exit)
678 return;
680 split_edge (exit);
683 /* Generate code for PARTITION from the code in LOOP. The loop is
684 copied when COPY_P is true. All the statements not flagged in the
685 PARTITION bitmap are removed from the loop or from its copy. The
686 statements are indexed in sequence inside a basic block, and the
687 basic blocks of a loop are taken in dom order. */
689 static void
690 generate_loops_for_partition (struct loop *loop, partition_t partition,
691 bool copy_p)
693 unsigned i;
694 gimple_stmt_iterator bsi;
695 basic_block *bbs;
697 if (copy_p)
699 loop = copy_loop_before (loop);
700 gcc_assert (loop != NULL);
701 create_preheader (loop, CP_SIMPLE_PREHEADERS);
702 create_bb_after_loop (loop);
705 /* Remove stmts not in the PARTITION bitmap. */
706 bbs = get_loop_body_in_dom_order (loop);
708 if (MAY_HAVE_DEBUG_STMTS)
709 for (i = 0; i < loop->num_nodes; i++)
711 basic_block bb = bbs[i];
713 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
715 gimple phi = gsi_stmt (bsi);
716 if (!virtual_operand_p (gimple_phi_result (phi))
717 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
718 reset_debug_uses (phi);
721 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
723 gimple stmt = gsi_stmt (bsi);
724 if (gimple_code (stmt) != GIMPLE_LABEL
725 && !is_gimple_debug (stmt)
726 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
727 reset_debug_uses (stmt);
731 for (i = 0; i < loop->num_nodes; i++)
733 basic_block bb = bbs[i];
735 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
737 gimple phi = gsi_stmt (bsi);
738 if (!virtual_operand_p (gimple_phi_result (phi))
739 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
740 remove_phi_node (&bsi, true);
741 else
742 gsi_next (&bsi);
745 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
747 gimple stmt = gsi_stmt (bsi);
748 if (gimple_code (stmt) != GIMPLE_LABEL
749 && !is_gimple_debug (stmt)
750 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
752 /* Choose an arbitrary path through the empty CFG part
753 that this unnecessary control stmt controls. */
754 if (gimple_code (stmt) == GIMPLE_COND)
756 gimple_cond_make_false (stmt);
757 update_stmt (stmt);
759 else if (gimple_code (stmt) == GIMPLE_SWITCH)
761 gimple_switch_set_index
762 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
763 update_stmt (stmt);
765 else
767 unlink_stmt_vdef (stmt);
768 gsi_remove (&bsi, true);
769 release_defs (stmt);
770 continue;
773 gsi_next (&bsi);
777 free (bbs);
780 /* Build the size argument for a memory operation call. */
782 static tree
783 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
785 tree size;
786 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
787 fold_convert_loc (loc, sizetype, nb_iter),
788 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
789 return fold_convert_loc (loc, size_type_node, size);
792 /* Build an address argument for a memory operation call. */
794 static tree
795 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
797 tree addr_base;
799 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
800 addr_base = fold_convert_loc (loc, sizetype, addr_base);
802 /* Test for a negative stride, iterating over every element. */
803 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
805 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
806 fold_convert_loc (loc, sizetype, nb_bytes));
807 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
808 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
811 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
814 /* If VAL memory representation contains the same value in all bytes,
815 return that value, otherwise return -1.
816 E.g. for 0x24242424 return 0x24, for IEEE double
817 747708026454360457216.0 return 0x44, etc. */
819 static int
820 const_with_all_bytes_same (tree val)
822 unsigned char buf[64];
823 int i, len;
825 if (integer_zerop (val)
826 || real_zerop (val)
827 || (TREE_CODE (val) == CONSTRUCTOR
828 && !TREE_CLOBBER_P (val)
829 && CONSTRUCTOR_NELTS (val) == 0))
830 return 0;
832 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
833 return -1;
835 len = native_encode_expr (val, buf, sizeof (buf));
836 if (len == 0)
837 return -1;
838 for (i = 1; i < len; i++)
839 if (buf[i] != buf[0])
840 return -1;
841 return buf[0];
844 /* Generate a call to memset for PARTITION in LOOP. */
846 static void
847 generate_memset_builtin (struct loop *loop, partition_t partition)
849 gimple_stmt_iterator gsi;
850 gimple stmt, fn_call;
851 tree nb_iter, mem, fn, nb_bytes;
852 location_t loc;
853 tree val;
855 stmt = DR_STMT (partition->main_dr);
856 loc = gimple_location (stmt);
857 if (gimple_bb (stmt) == loop->latch)
858 nb_iter = number_of_latch_executions (loop);
859 else
860 nb_iter = number_of_exit_cond_executions (loop);
862 /* The new statements will be placed before LOOP. */
863 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
865 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
866 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
868 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
869 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
870 false, GSI_CONTINUE_LINKING);
872 /* This exactly matches the pattern recognition in classify_partition. */
873 val = gimple_assign_rhs1 (stmt);
874 /* Handle constants like 0x15151515 and similarly
875 floating point constants etc. where all bytes are the same. */
876 int bytev = const_with_all_bytes_same (val);
877 if (bytev != -1)
878 val = build_int_cst (integer_type_node, bytev);
879 else if (TREE_CODE (val) == INTEGER_CST)
880 val = fold_convert (integer_type_node, val);
881 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
883 gimple cstmt;
884 tree tem = make_ssa_name (integer_type_node, NULL);
885 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
886 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
887 val = tem;
890 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
891 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
892 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
894 if (dump_file && (dump_flags & TDF_DETAILS))
896 fprintf (dump_file, "generated memset");
897 if (bytev == 0)
898 fprintf (dump_file, " zero\n");
899 else
900 fprintf (dump_file, "\n");
904 /* Generate a call to memcpy for PARTITION in LOOP. */
906 static void
907 generate_memcpy_builtin (struct loop *loop, partition_t partition)
909 gimple_stmt_iterator gsi;
910 gimple stmt, fn_call;
911 tree nb_iter, dest, src, fn, nb_bytes;
912 location_t loc;
913 enum built_in_function kind;
915 stmt = DR_STMT (partition->main_dr);
916 loc = gimple_location (stmt);
917 if (gimple_bb (stmt) == loop->latch)
918 nb_iter = number_of_latch_executions (loop);
919 else
920 nb_iter = number_of_exit_cond_executions (loop);
922 /* The new statements will be placed before LOOP. */
923 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
925 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
926 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
928 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
929 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
930 if (ptr_derefs_may_alias_p (dest, src))
931 kind = BUILT_IN_MEMMOVE;
932 else
933 kind = BUILT_IN_MEMCPY;
935 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
936 false, GSI_CONTINUE_LINKING);
937 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
938 false, GSI_CONTINUE_LINKING);
939 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
940 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
941 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
943 if (dump_file && (dump_flags & TDF_DETAILS))
945 if (kind == BUILT_IN_MEMCPY)
946 fprintf (dump_file, "generated memcpy\n");
947 else
948 fprintf (dump_file, "generated memmove\n");
952 /* Remove and destroy the loop LOOP. */
954 static void
955 destroy_loop (struct loop *loop)
957 unsigned nbbs = loop->num_nodes;
958 edge exit = single_exit (loop);
959 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
960 basic_block *bbs;
961 unsigned i;
963 bbs = get_loop_body_in_dom_order (loop);
965 redirect_edge_pred (exit, src);
966 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
967 exit->flags |= EDGE_FALLTHRU;
968 cancel_loop_tree (loop);
969 rescan_loop_exit (exit, false, true);
971 for (i = 0; i < nbbs; i++)
973 /* We have made sure to not leave any dangling uses of SSA
974 names defined in the loop. With the exception of virtuals.
975 Make sure we replace all uses of virtual defs that will remain
976 outside of the loop with the bare symbol as delete_basic_block
977 will release them. */
978 gimple_stmt_iterator gsi;
979 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
981 gimple phi = gsi_stmt (gsi);
982 if (virtual_operand_p (gimple_phi_result (phi)))
983 mark_virtual_phi_result_for_renaming (phi);
985 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
987 gimple stmt = gsi_stmt (gsi);
988 tree vdef = gimple_vdef (stmt);
989 if (vdef && TREE_CODE (vdef) == SSA_NAME)
990 mark_virtual_operand_for_renaming (vdef);
992 delete_basic_block (bbs[i]);
994 free (bbs);
996 set_immediate_dominator (CDI_DOMINATORS, dest,
997 recompute_dominator (CDI_DOMINATORS, dest));
1000 /* Generates code for PARTITION. */
1002 static void
1003 generate_code_for_partition (struct loop *loop,
1004 partition_t partition, bool copy_p)
1006 switch (partition->kind)
1008 case PKIND_MEMSET:
1009 generate_memset_builtin (loop, partition);
1010 /* If this is the last partition for which we generate code, we have
1011 to destroy the loop. */
1012 if (!copy_p)
1013 destroy_loop (loop);
1014 break;
1016 case PKIND_MEMCPY:
1017 generate_memcpy_builtin (loop, partition);
1018 /* If this is the last partition for which we generate code, we have
1019 to destroy the loop. */
1020 if (!copy_p)
1021 destroy_loop (loop);
1022 break;
1024 case PKIND_REDUCTION:
1025 /* Reductions all have to be in the last partition. */
1026 gcc_assert (!copy_p);
1027 case PKIND_NORMAL:
1028 generate_loops_for_partition (loop, partition, copy_p);
1029 break;
1031 default:
1032 gcc_unreachable ();
1037 /* Returns true if the node V of RDG cannot be recomputed. */
1039 static bool
1040 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
1042 if (RDG_MEM_WRITE_STMT (rdg, v))
1043 return true;
1045 return false;
1048 /* Returns true when the vertex V has already been generated in the
1049 current partition (V is in PROCESSED), or when V belongs to another
1050 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
1052 static inline bool
1053 already_processed_vertex_p (bitmap processed, int v)
1055 return bitmap_bit_p (processed, v);
1058 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
1059 bitmap);
1061 /* Flag V from RDG as part of PARTITION, and also flag its loop number
1062 in LOOPS. */
1064 static void
1065 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
1067 struct loop *loop;
1069 if (!bitmap_set_bit (partition->stmts, v))
1070 return;
1072 loop = loop_containing_stmt (RDG_STMT (rdg, v));
1073 bitmap_set_bit (partition->loops, loop->num);
1075 if (rdg_cannot_recompute_vertex_p (rdg, v))
1076 partition->has_writes = true;
1079 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
1080 Also flag their loop number in LOOPS. */
1082 static void
1083 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
1084 bitmap processed)
1086 unsigned i;
1087 vec<int> nodes;
1088 nodes.create (3);
1089 int x;
1091 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1093 FOR_EACH_VEC_ELT (nodes, i, x)
1094 if (bitmap_set_bit (processed, x))
1095 rdg_flag_vertex (rdg, x, partition);
1097 nodes.release ();
1100 /* Returns a partition with all the statements needed for computing
1101 the vertex V of the RDG, also including the loop exit conditions. */
1103 static partition_t
1104 build_rdg_partition_for_vertex (struct graph *rdg, int v)
1106 partition_t partition = partition_alloc (NULL, NULL);
1107 bitmap processed = BITMAP_ALLOC (NULL);
1108 rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
1109 BITMAP_FREE (processed);
1110 return partition;
1113 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1114 For the moment we detect only the memset zero pattern. */
1116 static void
1117 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1119 bitmap_iterator bi;
1120 unsigned i;
1121 tree nb_iter;
1122 data_reference_p single_load, single_store;
1123 bool volatiles_p = false;
1125 partition->kind = PKIND_NORMAL;
1126 partition->main_dr = NULL;
1127 partition->secondary_dr = NULL;
1129 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1131 gimple stmt = RDG_STMT (rdg, i);
1133 if (gimple_has_volatile_ops (stmt))
1134 volatiles_p = true;
1136 /* If the stmt has uses outside of the loop fail.
1137 ??? If the stmt is generated in another partition that
1138 is not created as builtin we can ignore this. */
1139 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, "not generating builtin, partition has "
1143 "scalar uses outside of the loop\n");
1144 partition->kind = PKIND_REDUCTION;
1145 return;
1149 /* Perform general partition disqualification for builtins. */
1150 if (volatiles_p
1151 || !flag_tree_loop_distribute_patterns)
1152 return;
1154 nb_iter = number_of_exit_cond_executions (loop);
1155 if (!nb_iter || nb_iter == chrec_dont_know)
1156 return;
1158 /* Detect memset and memcpy. */
1159 single_load = NULL;
1160 single_store = NULL;
1161 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1163 gimple stmt = RDG_STMT (rdg, i);
1164 data_reference_p dr;
1165 unsigned j;
1167 if (gimple_code (stmt) == GIMPLE_PHI)
1168 continue;
1170 /* Any scalar stmts are ok. */
1171 if (!gimple_vuse (stmt))
1172 continue;
1174 /* Otherwise just regular loads/stores. */
1175 if (!gimple_assign_single_p (stmt))
1176 return;
1178 /* But exactly one store and/or load. */
1179 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1181 if (DR_IS_READ (dr))
1183 if (single_load != NULL)
1184 return;
1185 single_load = dr;
1187 else
1189 if (single_store != NULL)
1190 return;
1191 single_store = dr;
1196 if (single_store && !single_load)
1198 gimple stmt = DR_STMT (single_store);
1199 tree rhs = gimple_assign_rhs1 (stmt);
1200 if (const_with_all_bytes_same (rhs) == -1
1201 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1202 || (TYPE_MODE (TREE_TYPE (rhs))
1203 != TYPE_MODE (unsigned_char_type_node))))
1204 return;
1205 if (TREE_CODE (rhs) == SSA_NAME
1206 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1207 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1208 return;
1209 if (!adjacent_dr_p (single_store))
1210 return;
1211 partition->kind = PKIND_MEMSET;
1212 partition->main_dr = single_store;
1214 else if (single_store && single_load)
1216 gimple store = DR_STMT (single_store);
1217 gimple load = DR_STMT (single_load);
1218 /* Direct aggregate copy or via an SSA name temporary. */
1219 if (load != store
1220 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1221 return;
1222 if (!adjacent_dr_p (single_store)
1223 || !adjacent_dr_p (single_load)
1224 || !operand_equal_p (DR_STEP (single_store),
1225 DR_STEP (single_load), 0))
1226 return;
1227 /* Now check that if there is a dependence this dependence is
1228 of a suitable form for memmove. */
1229 vec<loop_p> loops = vNULL;
1230 ddr_p ddr;
1231 loops.safe_push (loop);
1232 ddr = initialize_data_dependence_relation (single_load, single_store,
1233 loops);
1234 compute_affine_dependence (ddr, loop);
1235 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1237 free_dependence_relation (ddr);
1238 loops.release ();
1239 return;
1241 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1243 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1245 free_dependence_relation (ddr);
1246 loops.release ();
1247 return;
1249 lambda_vector dist_v;
1250 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1252 int dist = dist_v[index_in_loop_nest (loop->num,
1253 DDR_LOOP_NEST (ddr))];
1254 if (dist > 0 && !DDR_REVERSED_P (ddr))
1256 free_dependence_relation (ddr);
1257 loops.release ();
1258 return;
1262 free_dependence_relation (ddr);
1263 loops.release ();
1264 partition->kind = PKIND_MEMCPY;
1265 partition->main_dr = single_store;
1266 partition->secondary_dr = single_load;
1270 /* For a data reference REF, return the declaration of its base
1271 address or NULL_TREE if the base is not determined. */
1273 static tree
1274 ref_base_address (data_reference_p dr)
1276 tree base_address = DR_BASE_ADDRESS (dr);
1277 if (base_address
1278 && TREE_CODE (base_address) == ADDR_EXPR)
1279 return TREE_OPERAND (base_address, 0);
1281 return base_address;
1284 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1285 accesses in RDG. */
1287 static bool
1288 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1289 partition_t partition2)
1291 unsigned i, j, k, l;
1292 bitmap_iterator bi, bj;
1293 data_reference_p ref1, ref2;
1295 /* First check whether in the intersection of the two partitions are
1296 any loads or stores. Common loads are the situation that happens
1297 most often. */
1298 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1299 if (RDG_MEM_WRITE_STMT (rdg, i)
1300 || RDG_MEM_READS_STMT (rdg, i))
1301 return true;
1303 /* Then check all data-references against each other. */
1304 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1305 if (RDG_MEM_WRITE_STMT (rdg, i)
1306 || RDG_MEM_READS_STMT (rdg, i))
1307 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1308 if (RDG_MEM_WRITE_STMT (rdg, j)
1309 || RDG_MEM_READS_STMT (rdg, j))
1311 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1313 tree base1 = ref_base_address (ref1);
1314 if (base1)
1315 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1316 if (base1 == ref_base_address (ref2))
1317 return true;
1321 return false;
1324 /* Aggregate several components into a useful partition that is
1325 registered in the PARTITIONS vector. Partitions will be
1326 distributed in different loops. */
1328 static void
1329 rdg_build_partitions (struct graph *rdg,
1330 vec<gimple> starting_stmts,
1331 vec<partition_t> *partitions)
1333 bitmap processed = BITMAP_ALLOC (NULL);
1334 int i;
1335 gimple stmt;
1336 partition_t partition = partition_alloc (NULL, NULL);
1338 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1340 partition_t np;
1341 int v = rdg_vertex_for_stmt (rdg, stmt);
1343 if (dump_file && (dump_flags & TDF_DETAILS))
1344 fprintf (dump_file,
1345 "ldist asked to generate code for vertex %d\n", v);
1347 if (bitmap_bit_p (processed, v))
1348 continue;
1350 np = build_rdg_partition_for_vertex (rdg, v);
1351 bitmap_ior_into (partition->stmts, np->stmts);
1352 partition->has_writes = partition_has_writes (np);
1353 bitmap_ior_into (processed, np->stmts);
1354 partition_free (np);
1356 if (partition_has_writes (partition))
1358 if (dump_file && (dump_flags & TDF_DETAILS))
1360 fprintf (dump_file, "ldist useful partition:\n");
1361 dump_bitmap (dump_file, partition->stmts);
1364 partitions->safe_push (partition);
1365 partition = partition_alloc (NULL, NULL);
1369 /* All vertices should have been assigned to at least one partition now,
1370 other than vertices belonging to dead code. */
1372 if (!bitmap_empty_p (partition->stmts))
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1376 fprintf (dump_file, "remaining partition:\n");
1377 dump_bitmap (dump_file, partition->stmts);
1380 partitions->safe_push (partition);
1382 else
1383 partition_free (partition);
1385 BITMAP_FREE (processed);
1388 /* Dump to FILE the PARTITIONS. */
1390 static void
1391 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1393 int i;
1394 partition_t partition;
1396 FOR_EACH_VEC_ELT (partitions, i, partition)
1397 debug_bitmap_file (file, partition->stmts);
1400 /* Debug PARTITIONS. */
1401 extern void debug_rdg_partitions (vec<partition_t> );
1403 DEBUG_FUNCTION void
1404 debug_rdg_partitions (vec<partition_t> partitions)
1406 dump_rdg_partitions (stderr, partitions);
1409 /* Returns the number of read and write operations in the RDG. */
1411 static int
1412 number_of_rw_in_rdg (struct graph *rdg)
1414 int i, res = 0;
1416 for (i = 0; i < rdg->n_vertices; i++)
1418 if (RDG_MEM_WRITE_STMT (rdg, i))
1419 ++res;
1421 if (RDG_MEM_READS_STMT (rdg, i))
1422 ++res;
1425 return res;
1428 /* Returns the number of read and write operations in a PARTITION of
1429 the RDG. */
1431 static int
1432 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1434 int res = 0;
1435 unsigned i;
1436 bitmap_iterator ii;
1438 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1440 if (RDG_MEM_WRITE_STMT (rdg, i))
1441 ++res;
1443 if (RDG_MEM_READS_STMT (rdg, i))
1444 ++res;
1447 return res;
1450 /* Returns true when one of the PARTITIONS contains all the read or
1451 write operations of RDG. */
1453 static bool
1454 partition_contains_all_rw (struct graph *rdg,
1455 vec<partition_t> partitions)
1457 int i;
1458 partition_t partition;
1459 int nrw = number_of_rw_in_rdg (rdg);
1461 FOR_EACH_VEC_ELT (partitions, i, partition)
1462 if (nrw == number_of_rw_in_partition (rdg, partition))
1463 return true;
1465 return false;
1469 /* Distributes the code from LOOP in such a way that producer
1470 statements are placed before consumer statements. Tries to separate
1471 only the statements from STMTS into separate loops.
1472 Returns the number of distributed loops. */
1474 static int
1475 distribute_loop (struct loop *loop, vec<gimple> stmts,
1476 control_dependences *cd)
1478 struct graph *rdg;
1479 vec<loop_p> loop_nest;
1480 vec<partition_t> partitions;
1481 partition_t partition;
1482 bool any_builtin;
1483 int i, nbp;
1485 loop_nest.create (3);
1486 if (!find_loop_nest (loop, &loop_nest))
1488 loop_nest.release ();
1489 return 0;
1492 rdg = build_rdg (loop_nest, cd);
1493 if (!rdg)
1495 if (dump_file && (dump_flags & TDF_DETAILS))
1496 fprintf (dump_file,
1497 "Loop %d not distributed: failed to build the RDG.\n",
1498 loop->num);
1500 loop_nest.release ();
1501 return 0;
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1505 dump_rdg (dump_file, rdg);
1507 partitions.create (3);
1508 rdg_build_partitions (rdg, stmts, &partitions);
1510 any_builtin = false;
1511 FOR_EACH_VEC_ELT (partitions, i, partition)
1513 classify_partition (loop, rdg, partition);
1514 any_builtin |= partition_builtin_p (partition);
1517 /* If we did not detect any builtin but are not asked to apply
1518 regular loop distribution simply bail out. */
1519 if (!flag_tree_loop_distribution
1520 && !any_builtin)
1522 nbp = 0;
1523 goto ldist_done;
1526 /* Apply our simple cost model - fuse partitions with similar
1527 memory accesses. */
1528 partition_t into;
1529 for (i = 0; partitions.iterate (i, &into); ++i)
1531 if (partition_builtin_p (into))
1532 continue;
1533 for (int j = i + 1;
1534 partitions.iterate (j, &partition); ++j)
1536 if (!partition_builtin_p (partition)
1537 && similar_memory_accesses (rdg, into, partition))
1539 if (dump_file && (dump_flags & TDF_DETAILS))
1541 fprintf (dump_file, "fusing partitions\n");
1542 dump_bitmap (dump_file, into->stmts);
1543 dump_bitmap (dump_file, partition->stmts);
1544 fprintf (dump_file, "because they have similar "
1545 "memory accesses\n");
1547 bitmap_ior_into (into->stmts, partition->stmts);
1548 if (partition->kind == PKIND_REDUCTION)
1549 into->kind = PKIND_REDUCTION;
1550 partitions.ordered_remove (j);
1551 partition_free (partition);
1552 j--;
1557 /* If we are only distributing patterns fuse all partitions that
1558 were not properly classified as builtins. */
1559 if (!flag_tree_loop_distribution)
1561 partition_t into;
1562 /* Only fuse adjacent non-builtin partitions, see PR53616.
1563 ??? Use dependence information to improve partition ordering. */
1564 i = 0;
1567 for (; partitions.iterate (i, &into); ++i)
1568 if (!partition_builtin_p (into))
1569 break;
1570 for (++i; partitions.iterate (i, &partition); ++i)
1571 if (!partition_builtin_p (partition))
1573 bitmap_ior_into (into->stmts, partition->stmts);
1574 if (partition->kind == PKIND_REDUCTION)
1575 into->kind = PKIND_REDUCTION;
1576 partitions.ordered_remove (i);
1577 partition_free (partition);
1578 i--;
1580 else
1581 break;
1583 while ((unsigned) i < partitions.length ());
1586 /* Fuse all reduction partitions into the last. */
1587 if (partitions.length () > 1)
1589 partition_t into = partitions.last ();
1590 for (i = partitions.length () - 2; i >= 0; --i)
1592 partition_t what = partitions[i];
1593 if (what->kind == PKIND_REDUCTION)
1595 if (dump_file && (dump_flags & TDF_DETAILS))
1597 fprintf (dump_file, "fusing partitions\n");
1598 dump_bitmap (dump_file, into->stmts);
1599 dump_bitmap (dump_file, what->stmts);
1600 fprintf (dump_file, "because the latter has reductions\n");
1602 bitmap_ior_into (into->stmts, what->stmts);
1603 into->kind = PKIND_REDUCTION;
1604 partitions.ordered_remove (i);
1605 partition_free (what);
1610 nbp = partitions.length ();
1611 if (nbp == 0
1612 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1613 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1615 nbp = 0;
1616 goto ldist_done;
1619 if (dump_file && (dump_flags & TDF_DETAILS))
1620 dump_rdg_partitions (dump_file, partitions);
1622 FOR_EACH_VEC_ELT (partitions, i, partition)
1623 generate_code_for_partition (loop, partition, i < nbp - 1);
1625 ldist_done:
1627 FOR_EACH_VEC_ELT (partitions, i, partition)
1628 partition_free (partition);
1629 partitions.release ();
1631 free_rdg (rdg);
1632 loop_nest.release ();
1633 return nbp;
1636 /* Distribute all loops in the current function. */
1638 static unsigned int
1639 tree_loop_distribution (void)
1641 struct loop *loop;
1642 loop_iterator li;
1643 bool changed = false;
1644 basic_block bb;
1645 control_dependences *cd = NULL;
1647 FOR_ALL_BB (bb)
1649 gimple_stmt_iterator gsi;
1650 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1651 gimple_set_uid (gsi_stmt (gsi), -1);
1652 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1653 gimple_set_uid (gsi_stmt (gsi), -1);
1656 /* We can at the moment only distribute non-nested loops, thus restrict
1657 walking to innermost loops. */
1658 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1660 vec<gimple> work_list = vNULL;
1661 basic_block *bbs;
1662 int num = loop->num;
1663 int nb_generated_loops = 0;
1664 unsigned int i;
1666 /* If the loop doesn't have a single exit we will fail anyway,
1667 so do that early. */
1668 if (!single_exit (loop))
1669 continue;
1671 /* Only optimize hot loops. */
1672 if (!optimize_loop_for_speed_p (loop))
1673 continue;
1675 /* Initialize the worklist with stmts we seed the partitions with. */
1676 bbs = get_loop_body_in_dom_order (loop);
1677 for (i = 0; i < loop->num_nodes; ++i)
1679 gimple_stmt_iterator gsi;
1680 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1682 gimple phi = gsi_stmt (gsi);
1683 if (virtual_operand_p (gimple_phi_result (phi)))
1684 continue;
1685 /* Distribute stmts which have defs that are used outside of
1686 the loop. */
1687 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1688 continue;
1689 work_list.safe_push (phi);
1691 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1693 gimple stmt = gsi_stmt (gsi);
1695 /* If there is a stmt with side-effects bail out - we
1696 cannot and should not distribute this loop. */
1697 if (gimple_has_side_effects (stmt))
1699 work_list.truncate (0);
1700 goto out;
1703 /* Distribute stmts which have defs that are used outside of
1704 the loop. */
1705 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1707 /* Otherwise only distribute stores for now. */
1708 else if (!gimple_assign_single_p (stmt)
1709 || is_gimple_reg (gimple_assign_lhs (stmt)))
1710 continue;
1712 work_list.safe_push (stmt);
1715 out:
1716 free (bbs);
1718 if (work_list.length () > 0)
1720 if (!cd)
1722 calculate_dominance_info (CDI_POST_DOMINATORS);
1723 cd = new control_dependences (create_edge_list ());
1724 free_dominance_info (CDI_POST_DOMINATORS);
1726 nb_generated_loops = distribute_loop (loop, work_list, cd);
1729 if (nb_generated_loops > 0)
1730 changed = true;
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1734 if (nb_generated_loops > 1)
1735 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1736 num, nb_generated_loops);
1737 else
1738 fprintf (dump_file, "Loop %d is the same.\n", num);
1741 work_list.release ();
1744 if (cd)
1745 delete cd;
1747 if (changed)
1749 mark_virtual_operands_for_renaming (cfun);
1750 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1753 #ifdef ENABLE_CHECKING
1754 verify_loop_structure ();
1755 #endif
1757 return 0;
1760 static bool
1761 gate_tree_loop_distribution (void)
1763 return flag_tree_loop_distribution
1764 || flag_tree_loop_distribute_patterns;
1767 namespace {
1769 const pass_data pass_data_loop_distribution =
1771 GIMPLE_PASS, /* type */
1772 "ldist", /* name */
1773 OPTGROUP_LOOP, /* optinfo_flags */
1774 true, /* has_gate */
1775 true, /* has_execute */
1776 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1777 ( PROP_cfg | PROP_ssa ), /* properties_required */
1778 0, /* properties_provided */
1779 0, /* properties_destroyed */
1780 0, /* todo_flags_start */
1781 TODO_verify_ssa, /* todo_flags_finish */
1784 class pass_loop_distribution : public gimple_opt_pass
1786 public:
1787 pass_loop_distribution(gcc::context *ctxt)
1788 : gimple_opt_pass(pass_data_loop_distribution, ctxt)
1791 /* opt_pass methods: */
1792 bool gate () { return gate_tree_loop_distribution (); }
1793 unsigned int execute () { return tree_loop_distribution (); }
1795 }; // class pass_loop_distribution
1797 } // anon namespace
1799 gimple_opt_pass *
1800 make_pass_loop_distribution (gcc::context *ctxt)
1802 return new pass_loop_distribution (ctxt);