Daily bump.
[official-gcc.git] / gcc / tree-loop-distribution.c
blob353ce247de0550fe44e15fad44986e44e261e11b
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-ssa.h"
50 #include "tree-cfg.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-into-ssa.h"
57 #include "tree-ssa.h"
58 #include "cfgloop.h"
59 #include "tree-chrec.h"
60 #include "tree-data-ref.h"
61 #include "tree-scalar-evolution.h"
62 #include "tree-pass.h"
63 #include "gimple-pretty-print.h"
64 #include "tree-vectorizer.h"
67 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
68 typedef struct rdg_vertex
70 /* The statement represented by this vertex. */
71 gimple stmt;
73 /* Vector of data-references in this statement. */
74 vec<data_reference_p> datarefs;
76 /* True when the statement contains a write to memory. */
77 bool has_mem_write;
79 /* True when the statement contains a read from memory. */
80 bool has_mem_reads;
81 } *rdg_vertex_p;
83 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
84 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
85 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
86 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
87 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
88 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
89 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
90 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
92 /* Data dependence type. */
94 enum rdg_dep_type
96 /* Read After Write (RAW). */
97 flow_dd = 'f',
99 /* Control dependence (execute conditional on). */
100 control_dd = 'c'
103 /* Dependence information attached to an edge of the RDG. */
105 typedef struct rdg_edge
107 /* Type of the dependence. */
108 enum rdg_dep_type type;
109 } *rdg_edge_p;
111 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
113 /* Dump vertex I in RDG to FILE. */
115 static void
116 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
118 struct vertex *v = &(rdg->vertices[i]);
119 struct graph_edge *e;
121 fprintf (file, "(vertex %d: (%s%s) (in:", i,
122 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
123 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
125 if (v->pred)
126 for (e = v->pred; e; e = e->pred_next)
127 fprintf (file, " %d", e->src);
129 fprintf (file, ") (out:");
131 if (v->succ)
132 for (e = v->succ; e; e = e->succ_next)
133 fprintf (file, " %d", e->dest);
135 fprintf (file, ")\n");
136 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
137 fprintf (file, ")\n");
140 /* Call dump_rdg_vertex on stderr. */
142 DEBUG_FUNCTION void
143 debug_rdg_vertex (struct graph *rdg, int i)
145 dump_rdg_vertex (stderr, rdg, i);
148 /* Dump the reduced dependence graph RDG to FILE. */
150 static void
151 dump_rdg (FILE *file, struct graph *rdg)
153 fprintf (file, "(rdg\n");
154 for (int i = 0; i < rdg->n_vertices; i++)
155 dump_rdg_vertex (file, rdg, i);
156 fprintf (file, ")\n");
159 /* Call dump_rdg on stderr. */
161 DEBUG_FUNCTION void
162 debug_rdg (struct graph *rdg)
164 dump_rdg (stderr, rdg);
167 static void
168 dot_rdg_1 (FILE *file, struct graph *rdg)
170 int i;
171 pretty_printer buffer;
172 pp_needs_newline (&buffer) = false;
173 buffer.buffer->stream = file;
175 fprintf (file, "digraph RDG {\n");
177 for (i = 0; i < rdg->n_vertices; i++)
179 struct vertex *v = &(rdg->vertices[i]);
180 struct graph_edge *e;
182 fprintf (file, "%d [label=\"[%d] ", i, i);
183 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
184 pp_flush (&buffer);
185 fprintf (file, "\"]\n");
187 /* Highlight reads from memory. */
188 if (RDG_MEM_READS_STMT (rdg, i))
189 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
191 /* Highlight stores to memory. */
192 if (RDG_MEM_WRITE_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
195 if (v->succ)
196 for (e = v->succ; e; e = e->succ_next)
197 switch (RDGE_TYPE (e))
199 case flow_dd:
200 /* These are the most common dependences: don't print these. */
201 fprintf (file, "%d -> %d \n", i, e->dest);
202 break;
204 case control_dd:
205 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
206 break;
208 default:
209 gcc_unreachable ();
213 fprintf (file, "}\n\n");
216 /* Display the Reduced Dependence Graph using dotty. */
218 DEBUG_FUNCTION void
219 dot_rdg (struct graph *rdg)
221 /* When debugging, you may want to enable the following code. */
222 #if 1
223 FILE *file = popen ("dot -Tx11", "w");
224 if (!file)
225 return;
226 dot_rdg_1 (file, rdg);
227 fflush (file);
228 close (fileno (file));
229 pclose (file);
230 #else
231 dot_rdg_1 (stderr, rdg);
232 #endif
235 /* Returns the index of STMT in RDG. */
237 static int
238 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
240 int index = gimple_uid (stmt);
241 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
242 return index;
245 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
246 the index of DEF in RDG. */
248 static void
249 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
251 use_operand_p imm_use_p;
252 imm_use_iterator iterator;
254 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
256 struct graph_edge *e;
257 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
259 if (use < 0)
260 continue;
262 e = add_edge (rdg, idef, use);
263 e->data = XNEW (struct rdg_edge);
264 RDGE_TYPE (e) = flow_dd;
268 /* Creates an edge for the control dependences of BB to the vertex V. */
270 static void
271 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
272 int v, control_dependences *cd)
274 bitmap_iterator bi;
275 unsigned edge_n;
276 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
277 0, edge_n, bi)
279 basic_block cond_bb = cd->get_edge (edge_n)->src;
280 gimple stmt = last_stmt (cond_bb);
281 if (stmt && is_ctrl_stmt (stmt))
283 struct graph_edge *e;
284 int c = rdg_vertex_for_stmt (rdg, stmt);
285 if (c < 0)
286 continue;
288 e = add_edge (rdg, c, v);
289 e->data = XNEW (struct rdg_edge);
290 RDGE_TYPE (e) = control_dd;
295 /* Creates the edges of the reduced dependence graph RDG. */
297 static void
298 create_rdg_flow_edges (struct graph *rdg)
300 int i;
301 def_operand_p def_p;
302 ssa_op_iter iter;
304 for (i = 0; i < rdg->n_vertices; i++)
305 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
306 iter, SSA_OP_DEF)
307 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 /* Creates the edges of the reduced dependence graph RDG. */
312 static void
313 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
315 int i;
317 for (i = 0; i < rdg->n_vertices; i++)
319 gimple stmt = RDG_STMT (rdg, i);
320 if (gimple_code (stmt) == GIMPLE_PHI)
322 edge_iterator ei;
323 edge e;
324 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
325 create_edge_for_control_dependence (rdg, e->src, i, cd);
327 else
328 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 /* Build the vertices of the reduced dependence graph RDG. Return false
333 if that failed. */
335 static bool
336 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
337 vec<data_reference_p> *datarefs)
339 int i;
340 gimple stmt;
342 FOR_EACH_VEC_ELT (stmts, i, stmt)
344 struct vertex *v = &(rdg->vertices[i]);
346 /* Record statement to vertex mapping. */
347 gimple_set_uid (stmt, i);
349 v->data = XNEW (struct rdg_vertex);
350 RDGV_STMT (v) = stmt;
351 RDGV_DATAREFS (v).create (0);
352 RDGV_HAS_MEM_WRITE (v) = false;
353 RDGV_HAS_MEM_READS (v) = false;
354 if (gimple_code (stmt) == GIMPLE_PHI)
355 continue;
357 unsigned drp = datarefs->length ();
358 if (!find_data_references_in_stmt (loop, stmt, datarefs))
359 return false;
360 for (unsigned j = drp; j < datarefs->length (); ++j)
362 data_reference_p dr = (*datarefs)[j];
363 if (DR_IS_READ (dr))
364 RDGV_HAS_MEM_READS (v) = true;
365 else
366 RDGV_HAS_MEM_WRITE (v) = true;
367 RDGV_DATAREFS (v).safe_push (dr);
370 return true;
373 /* Initialize STMTS with all the statements of LOOP. The order in
374 which we discover statements is important as
375 generate_loops_for_partition is using the same traversal for
376 identifying statements in loop copies. */
378 static void
379 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
381 unsigned int i;
382 basic_block *bbs = get_loop_body_in_dom_order (loop);
384 for (i = 0; i < loop->num_nodes; i++)
386 basic_block bb = bbs[i];
387 gimple_stmt_iterator bsi;
388 gimple stmt;
390 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
391 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
392 stmts->safe_push (gsi_stmt (bsi));
394 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
396 stmt = gsi_stmt (bsi);
397 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
398 stmts->safe_push (stmt);
402 free (bbs);
405 /* Free the reduced dependence graph RDG. */
407 static void
408 free_rdg (struct graph *rdg)
410 int i;
412 for (i = 0; i < rdg->n_vertices; i++)
414 struct vertex *v = &(rdg->vertices[i]);
415 struct graph_edge *e;
417 for (e = v->succ; e; e = e->succ_next)
418 free (e->data);
420 if (v->data)
422 gimple_set_uid (RDGV_STMT (v), -1);
423 free_data_refs (RDGV_DATAREFS (v));
424 free (v->data);
428 free_graph (rdg);
431 /* Build the Reduced Dependence Graph (RDG) with one vertex per
432 statement of the loop nest LOOP_NEST, and one edge per data dependence or
433 scalar dependence. */
435 static struct graph *
436 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
438 struct graph *rdg;
439 vec<gimple> stmts;
440 vec<data_reference_p> datarefs;
442 /* Create the RDG vertices from the stmts of the loop nest. */
443 stmts.create (10);
444 stmts_from_loop (loop_nest[0], &stmts);
445 rdg = new_graph (stmts.length ());
446 datarefs.create (10);
447 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
449 stmts.release ();
450 datarefs.release ();
451 free_rdg (rdg);
452 return NULL;
454 stmts.release ();
456 create_rdg_flow_edges (rdg);
457 if (cd)
458 create_rdg_cd_edges (rdg, cd);
460 datarefs.release ();
462 return rdg;
467 enum partition_kind {
468 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
471 typedef struct partition_s
473 bitmap stmts;
474 bitmap loops;
475 bool reduction_p;
476 enum partition_kind kind;
477 /* data-references a kind != PKIND_NORMAL partition is about. */
478 data_reference_p main_dr;
479 data_reference_p secondary_dr;
480 tree niter;
481 } *partition_t;
484 /* Allocate and initialize a partition from BITMAP. */
486 static partition_t
487 partition_alloc (bitmap stmts, bitmap loops)
489 partition_t partition = XCNEW (struct partition_s);
490 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
491 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
492 partition->reduction_p = false;
493 partition->kind = PKIND_NORMAL;
494 return partition;
497 /* Free PARTITION. */
499 static void
500 partition_free (partition_t partition)
502 BITMAP_FREE (partition->stmts);
503 BITMAP_FREE (partition->loops);
504 free (partition);
507 /* Returns true if the partition can be generated as a builtin. */
509 static bool
510 partition_builtin_p (partition_t partition)
512 return partition->kind != PKIND_NORMAL;
515 /* Returns true if the partition contains a reduction. */
517 static bool
518 partition_reduction_p (partition_t partition)
520 return partition->reduction_p;
523 /* Merge PARTITION into the partition DEST. */
525 static void
526 partition_merge_into (partition_t dest, partition_t partition)
528 dest->kind = PKIND_NORMAL;
529 bitmap_ior_into (dest->stmts, partition->stmts);
530 if (partition_reduction_p (partition))
531 dest->reduction_p = true;
535 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
536 the LOOP. */
538 static bool
539 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
541 imm_use_iterator imm_iter;
542 use_operand_p use_p;
544 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
546 gimple use_stmt = USE_STMT (use_p);
547 if (!is_gimple_debug (use_stmt)
548 && loop != loop_containing_stmt (use_stmt))
549 return true;
552 return false;
555 /* Returns true when STMT defines a scalar variable used after the
556 loop LOOP. */
558 static bool
559 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
561 def_operand_p def_p;
562 ssa_op_iter op_iter;
564 if (gimple_code (stmt) == GIMPLE_PHI)
565 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
567 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
568 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
569 return true;
571 return false;
574 /* Return a copy of LOOP placed before LOOP. */
576 static struct loop *
577 copy_loop_before (struct loop *loop)
579 struct loop *res;
580 edge preheader = loop_preheader_edge (loop);
582 initialize_original_copy_tables ();
583 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
584 gcc_assert (res != NULL);
585 free_original_copy_tables ();
586 delete_update_ssa ();
588 return res;
591 /* Creates an empty basic block after LOOP. */
593 static void
594 create_bb_after_loop (struct loop *loop)
596 edge exit = single_exit (loop);
598 if (!exit)
599 return;
601 split_edge (exit);
604 /* Generate code for PARTITION from the code in LOOP. The loop is
605 copied when COPY_P is true. All the statements not flagged in the
606 PARTITION bitmap are removed from the loop or from its copy. The
607 statements are indexed in sequence inside a basic block, and the
608 basic blocks of a loop are taken in dom order. */
610 static void
611 generate_loops_for_partition (struct loop *loop, partition_t partition,
612 bool copy_p)
614 unsigned i;
615 gimple_stmt_iterator bsi;
616 basic_block *bbs;
618 if (copy_p)
620 loop = copy_loop_before (loop);
621 gcc_assert (loop != NULL);
622 create_preheader (loop, CP_SIMPLE_PREHEADERS);
623 create_bb_after_loop (loop);
626 /* Remove stmts not in the PARTITION bitmap. */
627 bbs = get_loop_body_in_dom_order (loop);
629 if (MAY_HAVE_DEBUG_STMTS)
630 for (i = 0; i < loop->num_nodes; i++)
632 basic_block bb = bbs[i];
634 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
636 gimple phi = gsi_stmt (bsi);
637 if (!virtual_operand_p (gimple_phi_result (phi))
638 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
639 reset_debug_uses (phi);
642 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
644 gimple stmt = gsi_stmt (bsi);
645 if (gimple_code (stmt) != GIMPLE_LABEL
646 && !is_gimple_debug (stmt)
647 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
648 reset_debug_uses (stmt);
652 for (i = 0; i < loop->num_nodes; i++)
654 basic_block bb = bbs[i];
656 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
658 gimple phi = gsi_stmt (bsi);
659 if (!virtual_operand_p (gimple_phi_result (phi))
660 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
661 remove_phi_node (&bsi, true);
662 else
663 gsi_next (&bsi);
666 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
668 gimple stmt = gsi_stmt (bsi);
669 if (gimple_code (stmt) != GIMPLE_LABEL
670 && !is_gimple_debug (stmt)
671 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
673 /* Choose an arbitrary path through the empty CFG part
674 that this unnecessary control stmt controls. */
675 if (gimple_code (stmt) == GIMPLE_COND)
677 gimple_cond_make_false (stmt);
678 update_stmt (stmt);
680 else if (gimple_code (stmt) == GIMPLE_SWITCH)
682 gimple_switch_set_index
683 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
684 update_stmt (stmt);
686 else
688 unlink_stmt_vdef (stmt);
689 gsi_remove (&bsi, true);
690 release_defs (stmt);
691 continue;
694 gsi_next (&bsi);
698 free (bbs);
701 /* Build the size argument for a memory operation call. */
703 static tree
704 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
706 tree size;
707 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
708 fold_convert_loc (loc, sizetype, nb_iter),
709 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
710 return fold_convert_loc (loc, size_type_node, size);
713 /* Build an address argument for a memory operation call. */
715 static tree
716 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
718 tree addr_base;
720 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
721 addr_base = fold_convert_loc (loc, sizetype, addr_base);
723 /* Test for a negative stride, iterating over every element. */
724 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
726 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
727 fold_convert_loc (loc, sizetype, nb_bytes));
728 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
729 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
732 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
735 /* If VAL memory representation contains the same value in all bytes,
736 return that value, otherwise return -1.
737 E.g. for 0x24242424 return 0x24, for IEEE double
738 747708026454360457216.0 return 0x44, etc. */
740 static int
741 const_with_all_bytes_same (tree val)
743 unsigned char buf[64];
744 int i, len;
746 if (integer_zerop (val)
747 || real_zerop (val)
748 || (TREE_CODE (val) == CONSTRUCTOR
749 && !TREE_CLOBBER_P (val)
750 && CONSTRUCTOR_NELTS (val) == 0))
751 return 0;
753 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
754 return -1;
756 len = native_encode_expr (val, buf, sizeof (buf));
757 if (len == 0)
758 return -1;
759 for (i = 1; i < len; i++)
760 if (buf[i] != buf[0])
761 return -1;
762 return buf[0];
765 /* Generate a call to memset for PARTITION in LOOP. */
767 static void
768 generate_memset_builtin (struct loop *loop, partition_t partition)
770 gimple_stmt_iterator gsi;
771 gimple stmt, fn_call;
772 tree mem, fn, nb_bytes;
773 location_t loc;
774 tree val;
776 stmt = DR_STMT (partition->main_dr);
777 loc = gimple_location (stmt);
779 /* The new statements will be placed before LOOP. */
780 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
782 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
783 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
784 false, GSI_CONTINUE_LINKING);
785 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
786 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
787 false, GSI_CONTINUE_LINKING);
789 /* This exactly matches the pattern recognition in classify_partition. */
790 val = gimple_assign_rhs1 (stmt);
791 /* Handle constants like 0x15151515 and similarly
792 floating point constants etc. where all bytes are the same. */
793 int bytev = const_with_all_bytes_same (val);
794 if (bytev != -1)
795 val = build_int_cst (integer_type_node, bytev);
796 else if (TREE_CODE (val) == INTEGER_CST)
797 val = fold_convert (integer_type_node, val);
798 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
800 gimple cstmt;
801 tree tem = make_ssa_name (integer_type_node, NULL);
802 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
803 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
804 val = tem;
807 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
808 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
809 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
811 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, "generated memset");
814 if (bytev == 0)
815 fprintf (dump_file, " zero\n");
816 else
817 fprintf (dump_file, "\n");
821 /* Generate a call to memcpy for PARTITION in LOOP. */
823 static void
824 generate_memcpy_builtin (struct loop *loop, partition_t partition)
826 gimple_stmt_iterator gsi;
827 gimple stmt, fn_call;
828 tree dest, src, fn, nb_bytes;
829 location_t loc;
830 enum built_in_function kind;
832 stmt = DR_STMT (partition->main_dr);
833 loc = gimple_location (stmt);
835 /* The new statements will be placed before LOOP. */
836 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
838 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
839 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
840 false, GSI_CONTINUE_LINKING);
841 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
842 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
843 if (ptr_derefs_may_alias_p (dest, src))
844 kind = BUILT_IN_MEMMOVE;
845 else
846 kind = BUILT_IN_MEMCPY;
848 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
849 false, GSI_CONTINUE_LINKING);
850 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
853 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
854 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
856 if (dump_file && (dump_flags & TDF_DETAILS))
858 if (kind == BUILT_IN_MEMCPY)
859 fprintf (dump_file, "generated memcpy\n");
860 else
861 fprintf (dump_file, "generated memmove\n");
865 /* Remove and destroy the loop LOOP. */
867 static void
868 destroy_loop (struct loop *loop)
870 unsigned nbbs = loop->num_nodes;
871 edge exit = single_exit (loop);
872 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
873 basic_block *bbs;
874 unsigned i;
876 bbs = get_loop_body_in_dom_order (loop);
878 redirect_edge_pred (exit, src);
879 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
880 exit->flags |= EDGE_FALLTHRU;
881 cancel_loop_tree (loop);
882 rescan_loop_exit (exit, false, true);
884 for (i = 0; i < nbbs; i++)
886 /* We have made sure to not leave any dangling uses of SSA
887 names defined in the loop. With the exception of virtuals.
888 Make sure we replace all uses of virtual defs that will remain
889 outside of the loop with the bare symbol as delete_basic_block
890 will release them. */
891 gimple_stmt_iterator gsi;
892 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
894 gimple phi = gsi_stmt (gsi);
895 if (virtual_operand_p (gimple_phi_result (phi)))
896 mark_virtual_phi_result_for_renaming (phi);
898 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
900 gimple stmt = gsi_stmt (gsi);
901 tree vdef = gimple_vdef (stmt);
902 if (vdef && TREE_CODE (vdef) == SSA_NAME)
903 mark_virtual_operand_for_renaming (vdef);
905 delete_basic_block (bbs[i]);
907 free (bbs);
909 set_immediate_dominator (CDI_DOMINATORS, dest,
910 recompute_dominator (CDI_DOMINATORS, dest));
913 /* Generates code for PARTITION. */
915 static void
916 generate_code_for_partition (struct loop *loop,
917 partition_t partition, bool copy_p)
919 switch (partition->kind)
921 case PKIND_NORMAL:
922 /* Reductions all have to be in the last partition. */
923 gcc_assert (!partition_reduction_p (partition)
924 || !copy_p);
925 generate_loops_for_partition (loop, partition, copy_p);
926 return;
928 case PKIND_MEMSET:
929 generate_memset_builtin (loop, partition);
930 break;
932 case PKIND_MEMCPY:
933 generate_memcpy_builtin (loop, partition);
934 break;
936 default:
937 gcc_unreachable ();
940 /* Common tail for partitions we turn into a call. If this was the last
941 partition for which we generate code, we have to destroy the loop. */
942 if (!copy_p)
943 destroy_loop (loop);
947 /* Returns a partition with all the statements needed for computing
948 the vertex V of the RDG, also including the loop exit conditions. */
950 static partition_t
951 build_rdg_partition_for_vertex (struct graph *rdg, int v)
953 partition_t partition = partition_alloc (NULL, NULL);
954 vec<int> nodes;
955 unsigned i;
956 int x;
958 nodes.create (3);
959 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
961 FOR_EACH_VEC_ELT (nodes, i, x)
963 bitmap_set_bit (partition->stmts, x);
964 bitmap_set_bit (partition->loops,
965 loop_containing_stmt (RDG_STMT (rdg, x))->num);
968 nodes.release ();
969 return partition;
972 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
973 For the moment we detect only the memset zero pattern. */
975 static void
976 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
978 bitmap_iterator bi;
979 unsigned i;
980 tree nb_iter;
981 data_reference_p single_load, single_store;
982 bool volatiles_p = false;
984 partition->kind = PKIND_NORMAL;
985 partition->main_dr = NULL;
986 partition->secondary_dr = NULL;
987 partition->niter = NULL_TREE;
989 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
991 gimple stmt = RDG_STMT (rdg, i);
993 if (gimple_has_volatile_ops (stmt))
994 volatiles_p = true;
996 /* If the stmt has uses outside of the loop mark it as reduction. */
997 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
999 partition->reduction_p = true;
1000 return;
1004 /* Perform general partition disqualification for builtins. */
1005 if (volatiles_p
1006 || !flag_tree_loop_distribute_patterns)
1007 return;
1009 /* Detect memset and memcpy. */
1010 single_load = NULL;
1011 single_store = NULL;
1012 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1014 gimple stmt = RDG_STMT (rdg, i);
1015 data_reference_p dr;
1016 unsigned j;
1018 if (gimple_code (stmt) == GIMPLE_PHI)
1019 continue;
1021 /* Any scalar stmts are ok. */
1022 if (!gimple_vuse (stmt))
1023 continue;
1025 /* Otherwise just regular loads/stores. */
1026 if (!gimple_assign_single_p (stmt))
1027 return;
1029 /* But exactly one store and/or load. */
1030 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1032 if (DR_IS_READ (dr))
1034 if (single_load != NULL)
1035 return;
1036 single_load = dr;
1038 else
1040 if (single_store != NULL)
1041 return;
1042 single_store = dr;
1047 if (!single_store)
1048 return;
1050 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1051 gimple_bb (DR_STMT (single_store))))
1052 nb_iter = number_of_latch_executions (loop);
1053 else
1054 nb_iter = number_of_exit_cond_executions (loop);
1055 if (!nb_iter || nb_iter == chrec_dont_know)
1056 return;
1058 if (single_store && !single_load)
1060 gimple stmt = DR_STMT (single_store);
1061 tree rhs = gimple_assign_rhs1 (stmt);
1062 if (const_with_all_bytes_same (rhs) == -1
1063 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1064 || (TYPE_MODE (TREE_TYPE (rhs))
1065 != TYPE_MODE (unsigned_char_type_node))))
1066 return;
1067 if (TREE_CODE (rhs) == SSA_NAME
1068 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1069 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1070 return;
1071 if (!adjacent_dr_p (single_store)
1072 || !dominated_by_p (CDI_DOMINATORS,
1073 loop->latch, gimple_bb (stmt)))
1074 return;
1075 partition->kind = PKIND_MEMSET;
1076 partition->main_dr = single_store;
1077 partition->niter = nb_iter;
1079 else if (single_store && single_load)
1081 gimple store = DR_STMT (single_store);
1082 gimple load = DR_STMT (single_load);
1083 /* Direct aggregate copy or via an SSA name temporary. */
1084 if (load != store
1085 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1086 return;
1087 if (!adjacent_dr_p (single_store)
1088 || !adjacent_dr_p (single_load)
1089 || !operand_equal_p (DR_STEP (single_store),
1090 DR_STEP (single_load), 0)
1091 || !dominated_by_p (CDI_DOMINATORS,
1092 loop->latch, gimple_bb (store)))
1093 return;
1094 /* Now check that if there is a dependence this dependence is
1095 of a suitable form for memmove. */
1096 vec<loop_p> loops = vNULL;
1097 ddr_p ddr;
1098 loops.safe_push (loop);
1099 ddr = initialize_data_dependence_relation (single_load, single_store,
1100 loops);
1101 compute_affine_dependence (ddr, loop);
1102 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1104 free_dependence_relation (ddr);
1105 loops.release ();
1106 return;
1108 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1110 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1112 free_dependence_relation (ddr);
1113 loops.release ();
1114 return;
1116 lambda_vector dist_v;
1117 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1119 int dist = dist_v[index_in_loop_nest (loop->num,
1120 DDR_LOOP_NEST (ddr))];
1121 if (dist > 0 && !DDR_REVERSED_P (ddr))
1123 free_dependence_relation (ddr);
1124 loops.release ();
1125 return;
1129 free_dependence_relation (ddr);
1130 loops.release ();
1131 partition->kind = PKIND_MEMCPY;
1132 partition->main_dr = single_store;
1133 partition->secondary_dr = single_load;
1134 partition->niter = nb_iter;
1138 /* For a data reference REF, return the declaration of its base
1139 address or NULL_TREE if the base is not determined. */
1141 static tree
1142 ref_base_address (data_reference_p dr)
1144 tree base_address = DR_BASE_ADDRESS (dr);
1145 if (base_address
1146 && TREE_CODE (base_address) == ADDR_EXPR)
1147 return TREE_OPERAND (base_address, 0);
1149 return base_address;
1152 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1153 accesses in RDG. */
1155 static bool
1156 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1157 partition_t partition2)
1159 unsigned i, j, k, l;
1160 bitmap_iterator bi, bj;
1161 data_reference_p ref1, ref2;
1163 /* First check whether in the intersection of the two partitions are
1164 any loads or stores. Common loads are the situation that happens
1165 most often. */
1166 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1167 if (RDG_MEM_WRITE_STMT (rdg, i)
1168 || RDG_MEM_READS_STMT (rdg, i))
1169 return true;
1171 /* Then check all data-references against each other. */
1172 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1173 if (RDG_MEM_WRITE_STMT (rdg, i)
1174 || RDG_MEM_READS_STMT (rdg, i))
1175 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1176 if (RDG_MEM_WRITE_STMT (rdg, j)
1177 || RDG_MEM_READS_STMT (rdg, j))
1179 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1181 tree base1 = ref_base_address (ref1);
1182 if (base1)
1183 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1184 if (base1 == ref_base_address (ref2))
1185 return true;
1189 return false;
1192 /* Aggregate several components into a useful partition that is
1193 registered in the PARTITIONS vector. Partitions will be
1194 distributed in different loops. */
1196 static void
1197 rdg_build_partitions (struct graph *rdg,
1198 vec<gimple> starting_stmts,
1199 vec<partition_t> *partitions)
1201 bitmap processed = BITMAP_ALLOC (NULL);
1202 int i;
1203 gimple stmt;
1205 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1207 int v = rdg_vertex_for_stmt (rdg, stmt);
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 fprintf (dump_file,
1211 "ldist asked to generate code for vertex %d\n", v);
1213 /* If the vertex is already contained in another partition so
1214 is the partition rooted at it. */
1215 if (bitmap_bit_p (processed, v))
1216 continue;
1218 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1219 bitmap_ior_into (processed, partition->stmts);
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file, "ldist useful partition:\n");
1224 dump_bitmap (dump_file, partition->stmts);
1227 partitions->safe_push (partition);
1230 /* All vertices should have been assigned to at least one partition now,
1231 other than vertices belonging to dead code. */
1233 BITMAP_FREE (processed);
1236 /* Dump to FILE the PARTITIONS. */
1238 static void
1239 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1241 int i;
1242 partition_t partition;
1244 FOR_EACH_VEC_ELT (partitions, i, partition)
1245 debug_bitmap_file (file, partition->stmts);
1248 /* Debug PARTITIONS. */
1249 extern void debug_rdg_partitions (vec<partition_t> );
1251 DEBUG_FUNCTION void
1252 debug_rdg_partitions (vec<partition_t> partitions)
1254 dump_rdg_partitions (stderr, partitions);
1257 /* Returns the number of read and write operations in the RDG. */
1259 static int
1260 number_of_rw_in_rdg (struct graph *rdg)
1262 int i, res = 0;
1264 for (i = 0; i < rdg->n_vertices; i++)
1266 if (RDG_MEM_WRITE_STMT (rdg, i))
1267 ++res;
1269 if (RDG_MEM_READS_STMT (rdg, i))
1270 ++res;
1273 return res;
1276 /* Returns the number of read and write operations in a PARTITION of
1277 the RDG. */
1279 static int
1280 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1282 int res = 0;
1283 unsigned i;
1284 bitmap_iterator ii;
1286 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1288 if (RDG_MEM_WRITE_STMT (rdg, i))
1289 ++res;
1291 if (RDG_MEM_READS_STMT (rdg, i))
1292 ++res;
1295 return res;
1298 /* Returns true when one of the PARTITIONS contains all the read or
1299 write operations of RDG. */
1301 static bool
1302 partition_contains_all_rw (struct graph *rdg,
1303 vec<partition_t> partitions)
1305 int i;
1306 partition_t partition;
1307 int nrw = number_of_rw_in_rdg (rdg);
1309 FOR_EACH_VEC_ELT (partitions, i, partition)
1310 if (nrw == number_of_rw_in_partition (rdg, partition))
1311 return true;
1313 return false;
1316 /* Compute partition dependence created by the data references in DRS1
1317 and DRS2 and modify and return DIR according to that. */
1319 static int
1320 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1321 vec<data_reference_p> drs1,
1322 vec<data_reference_p> drs2)
1324 data_reference_p dr1, dr2;
1326 /* dependence direction - 0 is no dependence, -1 is back,
1327 1 is forth, 2 is both (we can stop then, merging will occur). */
1328 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1329 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1331 int this_dir = 1;
1332 ddr_p ddr;
1333 /* Re-shuffle data-refs to be in dominator order. */
1334 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1335 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1337 data_reference_p tem = dr1;
1338 dr1 = dr2;
1339 dr2 = tem;
1340 this_dir = -this_dir;
1342 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1343 compute_affine_dependence (ddr, loops[0]);
1344 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1345 this_dir = 2;
1346 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1348 if (DDR_REVERSED_P (ddr))
1350 data_reference_p tem = dr1;
1351 dr1 = dr2;
1352 dr2 = tem;
1353 this_dir = -this_dir;
1355 /* Known dependences can still be unordered througout the
1356 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1357 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1358 this_dir = 2;
1360 else
1361 this_dir = 0;
1362 free_dependence_relation (ddr);
1363 if (dir == 0)
1364 dir = this_dir;
1365 else if (dir != this_dir)
1366 return 2;
1368 return dir;
1371 /* Compare postorder number of the partition graph vertices V1 and V2. */
1373 static int
1374 pgcmp (const void *v1_, const void *v2_)
1376 const vertex *v1 = (const vertex *)v1_;
1377 const vertex *v2 = (const vertex *)v2_;
1378 return v2->post - v1->post;
1381 /* Distributes the code from LOOP in such a way that producer
1382 statements are placed before consumer statements. Tries to separate
1383 only the statements from STMTS into separate loops.
1384 Returns the number of distributed loops. */
1386 static int
1387 distribute_loop (struct loop *loop, vec<gimple> stmts,
1388 control_dependences *cd, int *nb_calls)
1390 struct graph *rdg;
1391 vec<loop_p> loop_nest;
1392 vec<partition_t> partitions;
1393 partition_t partition;
1394 bool any_builtin;
1395 int i, nbp;
1396 graph *pg = NULL;
1397 int num_sccs = 1;
1399 *nb_calls = 0;
1400 loop_nest.create (3);
1401 if (!find_loop_nest (loop, &loop_nest))
1403 loop_nest.release ();
1404 return 0;
1407 rdg = build_rdg (loop_nest, cd);
1408 if (!rdg)
1410 if (dump_file && (dump_flags & TDF_DETAILS))
1411 fprintf (dump_file,
1412 "Loop %d not distributed: failed to build the RDG.\n",
1413 loop->num);
1415 loop_nest.release ();
1416 return 0;
1419 if (dump_file && (dump_flags & TDF_DETAILS))
1420 dump_rdg (dump_file, rdg);
1422 partitions.create (3);
1423 rdg_build_partitions (rdg, stmts, &partitions);
1425 any_builtin = false;
1426 FOR_EACH_VEC_ELT (partitions, i, partition)
1428 classify_partition (loop, rdg, partition);
1429 any_builtin |= partition_builtin_p (partition);
1432 /* If we are only distributing patterns but did not detect any,
1433 simply bail out. */
1434 if (!flag_tree_loop_distribution
1435 && !any_builtin)
1437 nbp = 0;
1438 goto ldist_done;
1441 /* If we are only distributing patterns fuse all partitions that
1442 were not classified as builtins. This also avoids chopping
1443 a loop into pieces, separated by builtin calls. That is, we
1444 only want no or a single loop body remaining. */
1445 partition_t into;
1446 if (!flag_tree_loop_distribution)
1448 for (i = 0; partitions.iterate (i, &into); ++i)
1449 if (!partition_builtin_p (into))
1450 break;
1451 for (++i; partitions.iterate (i, &partition); ++i)
1452 if (!partition_builtin_p (partition))
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1456 fprintf (dump_file, "fusing non-builtin partitions\n");
1457 dump_bitmap (dump_file, into->stmts);
1458 dump_bitmap (dump_file, partition->stmts);
1460 partition_merge_into (into, partition);
1461 partitions.unordered_remove (i);
1462 partition_free (partition);
1463 i--;
1467 /* Due to limitations in the transform phase we have to fuse all
1468 reduction partitions into the last partition so the existing
1469 loop will contain all loop-closed PHI nodes. */
1470 for (i = 0; partitions.iterate (i, &into); ++i)
1471 if (partition_reduction_p (into))
1472 break;
1473 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1474 if (partition_reduction_p (partition))
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1478 fprintf (dump_file, "fusing partitions\n");
1479 dump_bitmap (dump_file, into->stmts);
1480 dump_bitmap (dump_file, partition->stmts);
1481 fprintf (dump_file, "because they have reductions\n");
1483 partition_merge_into (into, partition);
1484 partitions.unordered_remove (i);
1485 partition_free (partition);
1486 i--;
1489 /* Apply our simple cost model - fuse partitions with similar
1490 memory accesses. */
1491 for (i = 0; partitions.iterate (i, &into); ++i)
1493 if (partition_builtin_p (into))
1494 continue;
1495 for (int j = i + 1;
1496 partitions.iterate (j, &partition); ++j)
1498 if (!partition_builtin_p (partition)
1499 && similar_memory_accesses (rdg, into, partition))
1501 if (dump_file && (dump_flags & TDF_DETAILS))
1503 fprintf (dump_file, "fusing partitions\n");
1504 dump_bitmap (dump_file, into->stmts);
1505 dump_bitmap (dump_file, partition->stmts);
1506 fprintf (dump_file, "because they have similar "
1507 "memory accesses\n");
1509 partition_merge_into (into, partition);
1510 partitions.unordered_remove (j);
1511 partition_free (partition);
1512 j--;
1517 /* Build the partition dependency graph. */
1518 if (partitions.length () > 1)
1520 pg = new_graph (partitions.length ());
1521 struct pgdata {
1522 partition_t partition;
1523 vec<data_reference_p> writes;
1524 vec<data_reference_p> reads;
1526 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1527 for (i = 0; partitions.iterate (i, &partition); ++i)
1529 vertex *v = &pg->vertices[i];
1530 pgdata *data = new pgdata;
1531 data_reference_p dr;
1532 /* FIXME - leaks. */
1533 v->data = data;
1534 bitmap_iterator bi;
1535 unsigned j;
1536 data->partition = partition;
1537 data->reads = vNULL;
1538 data->writes = vNULL;
1539 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1540 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1541 if (DR_IS_READ (dr))
1542 data->reads.safe_push (dr);
1543 else
1544 data->writes.safe_push (dr);
1546 partition_t partition1, partition2;
1547 for (i = 0; partitions.iterate (i, &partition1); ++i)
1548 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1550 /* dependence direction - 0 is no dependence, -1 is back,
1551 1 is forth, 2 is both (we can stop then, merging will occur). */
1552 int dir = 0;
1553 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1554 PGDATA(i)->writes,
1555 PGDATA(j)->reads);
1556 if (dir != 2)
1557 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1558 PGDATA(i)->reads,
1559 PGDATA(j)->writes);
1560 if (dir != 2)
1561 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1562 PGDATA(i)->writes,
1563 PGDATA(j)->writes);
1564 if (dir == 1 || dir == 2)
1565 add_edge (pg, i, j);
1566 if (dir == -1 || dir == 2)
1567 add_edge (pg, j, i);
1570 /* Add edges to the reduction partition (if any) to force it last. */
1571 unsigned j;
1572 for (j = 0; partitions.iterate (j, &partition); ++j)
1573 if (partition_reduction_p (partition))
1574 break;
1575 if (j < partitions.length ())
1577 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1578 if (i != j)
1579 add_edge (pg, i, j);
1582 /* Compute partitions we cannot separate and fuse them. */
1583 num_sccs = graphds_scc (pg, NULL);
1584 for (i = 0; i < num_sccs; ++i)
1586 partition_t first;
1587 int j;
1588 for (j = 0; partitions.iterate (j, &first); ++j)
1589 if (pg->vertices[j].component == i)
1590 break;
1591 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1592 if (pg->vertices[j].component == i)
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "fusing partitions\n");
1597 dump_bitmap (dump_file, first->stmts);
1598 dump_bitmap (dump_file, partition->stmts);
1599 fprintf (dump_file, "because they are in the same "
1600 "dependence SCC\n");
1602 partition_merge_into (first, partition);
1603 partitions[j] = NULL;
1604 partition_free (partition);
1605 PGDATA (j)->partition = NULL;
1609 /* Now order the remaining nodes in postorder. */
1610 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1611 partitions.truncate (0);
1612 for (i = 0; i < pg->n_vertices; ++i)
1614 pgdata *data = PGDATA (i);
1615 if (data->partition)
1616 partitions.safe_push (data->partition);
1617 data->reads.release ();
1618 data->writes.release ();
1619 delete data;
1621 gcc_assert (partitions.length () == (unsigned)num_sccs);
1622 free_graph (pg);
1625 nbp = partitions.length ();
1626 if (nbp == 0
1627 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1628 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1630 nbp = 0;
1631 goto ldist_done;
1634 if (dump_file && (dump_flags & TDF_DETAILS))
1635 dump_rdg_partitions (dump_file, partitions);
1637 FOR_EACH_VEC_ELT (partitions, i, partition)
1639 if (partition_builtin_p (partition))
1640 (*nb_calls)++;
1641 generate_code_for_partition (loop, partition, i < nbp - 1);
1644 ldist_done:
1646 FOR_EACH_VEC_ELT (partitions, i, partition)
1647 partition_free (partition);
1648 partitions.release ();
1650 free_rdg (rdg);
1651 loop_nest.release ();
1652 return nbp - *nb_calls;
1655 /* Distribute all loops in the current function. */
1657 static unsigned int
1658 tree_loop_distribution (void)
1660 struct loop *loop;
1661 loop_iterator li;
1662 bool changed = false;
1663 basic_block bb;
1664 control_dependences *cd = NULL;
1666 FOR_ALL_BB (bb)
1668 gimple_stmt_iterator gsi;
1669 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1670 gimple_set_uid (gsi_stmt (gsi), -1);
1671 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1672 gimple_set_uid (gsi_stmt (gsi), -1);
1675 /* We can at the moment only distribute non-nested loops, thus restrict
1676 walking to innermost loops. */
1677 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1679 vec<gimple> work_list = vNULL;
1680 basic_block *bbs;
1681 int num = loop->num;
1682 unsigned int i;
1684 /* If the loop doesn't have a single exit we will fail anyway,
1685 so do that early. */
1686 if (!single_exit (loop))
1687 continue;
1689 /* Only optimize hot loops. */
1690 if (!optimize_loop_for_speed_p (loop))
1691 continue;
1693 /* Initialize the worklist with stmts we seed the partitions with. */
1694 bbs = get_loop_body_in_dom_order (loop);
1695 for (i = 0; i < loop->num_nodes; ++i)
1697 gimple_stmt_iterator gsi;
1698 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1700 gimple phi = gsi_stmt (gsi);
1701 if (virtual_operand_p (gimple_phi_result (phi)))
1702 continue;
1703 /* Distribute stmts which have defs that are used outside of
1704 the loop. */
1705 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1706 continue;
1707 work_list.safe_push (phi);
1709 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1711 gimple stmt = gsi_stmt (gsi);
1713 /* If there is a stmt with side-effects bail out - we
1714 cannot and should not distribute this loop. */
1715 if (gimple_has_side_effects (stmt))
1717 work_list.truncate (0);
1718 goto out;
1721 /* Distribute stmts which have defs that are used outside of
1722 the loop. */
1723 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1725 /* Otherwise only distribute stores for now. */
1726 else if (!gimple_assign_single_p (stmt)
1727 || is_gimple_reg (gimple_assign_lhs (stmt)))
1728 continue;
1730 work_list.safe_push (stmt);
1733 out:
1734 free (bbs);
1736 int nb_generated_loops = 0;
1737 int nb_generated_calls = 0;
1738 location_t loc = find_loop_location (loop);
1739 if (work_list.length () > 0)
1741 if (!cd)
1743 calculate_dominance_info (CDI_DOMINATORS);
1744 calculate_dominance_info (CDI_POST_DOMINATORS);
1745 cd = new control_dependences (create_edge_list ());
1746 free_dominance_info (CDI_POST_DOMINATORS);
1748 nb_generated_loops = distribute_loop (loop, work_list, cd,
1749 &nb_generated_calls);
1752 if (nb_generated_loops + nb_generated_calls > 0)
1754 changed = true;
1755 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1756 loc, "Loop %d distributed: split to %d loops "
1757 "and %d library calls.\n",
1758 num, nb_generated_loops, nb_generated_calls);
1760 else if (dump_file && (dump_flags & TDF_DETAILS))
1761 fprintf (dump_file, "Loop %d is the same.\n", num);
1763 work_list.release ();
1766 if (cd)
1767 delete cd;
1769 if (changed)
1771 mark_virtual_operands_for_renaming (cfun);
1772 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1775 #ifdef ENABLE_CHECKING
1776 verify_loop_structure ();
1777 #endif
1779 return 0;
1782 static bool
1783 gate_tree_loop_distribution (void)
1785 return flag_tree_loop_distribution
1786 || flag_tree_loop_distribute_patterns;
1789 namespace {
1791 const pass_data pass_data_loop_distribution =
1793 GIMPLE_PASS, /* type */
1794 "ldist", /* name */
1795 OPTGROUP_LOOP, /* optinfo_flags */
1796 true, /* has_gate */
1797 true, /* has_execute */
1798 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1799 ( PROP_cfg | PROP_ssa ), /* properties_required */
1800 0, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 TODO_verify_ssa, /* todo_flags_finish */
1806 class pass_loop_distribution : public gimple_opt_pass
1808 public:
1809 pass_loop_distribution (gcc::context *ctxt)
1810 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1813 /* opt_pass methods: */
1814 bool gate () { return gate_tree_loop_distribution (); }
1815 unsigned int execute () { return tree_loop_distribution (); }
1817 }; // class pass_loop_distribution
1819 } // anon namespace
1821 gimple_opt_pass *
1822 make_pass_loop_distribution (gcc::context *ctxt)
1824 return new pass_loop_distribution (ctxt);