PR middle-end/59175
[official-gcc.git] / gcc / tree-loop-distribution.c
blob075487726f47a033f783e9b653e36aaa34c9db44
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "tree-ssanames.h"
56 #include "tree-ssa-loop-manip.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-into-ssa.h"
59 #include "tree-ssa.h"
60 #include "cfgloop.h"
61 #include "tree-chrec.h"
62 #include "tree-data-ref.h"
63 #include "tree-scalar-evolution.h"
64 #include "tree-pass.h"
65 #include "gimple-pretty-print.h"
66 #include "tree-vectorizer.h"
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 typedef struct rdg_vertex
72 /* The statement represented by this vertex. */
73 gimple stmt;
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
83 } *rdg_vertex_p;
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
94 /* Data dependence type. */
96 enum rdg_dep_type
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
105 /* Dependence information attached to an edge of the RDG. */
107 typedef struct rdg_edge
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
111 } *rdg_edge_p;
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 /* Dump vertex I in RDG to FILE. */
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
131 fprintf (file, ") (out:");
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
142 /* Call dump_rdg_vertex on stderr. */
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
147 dump_rdg_vertex (stderr, rdg, i);
150 /* Dump the reduced dependence graph RDG to FILE. */
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
161 /* Call dump_rdg on stderr. */
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
166 dump_rdg (stderr, rdg);
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
177 fprintf (file, "digraph RDG {\n");
179 for (i = 0; i < rdg->n_vertices; i++)
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
210 default:
211 gcc_unreachable ();
215 fprintf (file, "}\n\n");
218 /* Display the Reduced Dependence Graph using dotty. */
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
223 /* When debugging, you may want to enable the following code. */
224 #if 1
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
237 /* Returns the index of STMT in RDG. */
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
261 if (use < 0)
262 continue;
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
270 /* Creates an edge for the control dependences of BB to the vertex V. */
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
281 basic_block cond_bb = cd->get_edge (edge_n)->src;
282 gimple stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
297 /* Creates the edges of the reduced dependence graph RDG. */
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
312 /* Creates the edges of the reduced dependence graph RDG. */
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
317 int i;
319 for (i = 0; i < rdg->n_vertices; i++)
321 gimple stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 create_edge_for_control_dependence (rdg, e->src, i, cd);
329 else
330 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
334 /* Build the vertices of the reduced dependence graph RDG. Return false
335 if that failed. */
337 static bool
338 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
339 vec<data_reference_p> *datarefs)
341 int i;
342 gimple stmt;
344 FOR_EACH_VEC_ELT (stmts, i, stmt)
346 struct vertex *v = &(rdg->vertices[i]);
348 /* Record statement to vertex mapping. */
349 gimple_set_uid (stmt, i);
351 v->data = XNEW (struct rdg_vertex);
352 RDGV_STMT (v) = stmt;
353 RDGV_DATAREFS (v).create (0);
354 RDGV_HAS_MEM_WRITE (v) = false;
355 RDGV_HAS_MEM_READS (v) = false;
356 if (gimple_code (stmt) == GIMPLE_PHI)
357 continue;
359 unsigned drp = datarefs->length ();
360 if (!find_data_references_in_stmt (loop, stmt, datarefs))
361 return false;
362 for (unsigned j = drp; j < datarefs->length (); ++j)
364 data_reference_p dr = (*datarefs)[j];
365 if (DR_IS_READ (dr))
366 RDGV_HAS_MEM_READS (v) = true;
367 else
368 RDGV_HAS_MEM_WRITE (v) = true;
369 RDGV_DATAREFS (v).safe_push (dr);
372 return true;
375 /* Initialize STMTS with all the statements of LOOP. The order in
376 which we discover statements is important as
377 generate_loops_for_partition is using the same traversal for
378 identifying statements in loop copies. */
380 static void
381 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
383 unsigned int i;
384 basic_block *bbs = get_loop_body_in_dom_order (loop);
386 for (i = 0; i < loop->num_nodes; i++)
388 basic_block bb = bbs[i];
389 gimple_stmt_iterator bsi;
390 gimple stmt;
392 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
394 stmts->safe_push (gsi_stmt (bsi));
396 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
398 stmt = gsi_stmt (bsi);
399 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
400 stmts->safe_push (stmt);
404 free (bbs);
407 /* Free the reduced dependence graph RDG. */
409 static void
410 free_rdg (struct graph *rdg)
412 int i;
414 for (i = 0; i < rdg->n_vertices; i++)
416 struct vertex *v = &(rdg->vertices[i]);
417 struct graph_edge *e;
419 for (e = v->succ; e; e = e->succ_next)
420 free (e->data);
422 if (v->data)
424 gimple_set_uid (RDGV_STMT (v), -1);
425 free_data_refs (RDGV_DATAREFS (v));
426 free (v->data);
430 free_graph (rdg);
433 /* Build the Reduced Dependence Graph (RDG) with one vertex per
434 statement of the loop nest LOOP_NEST, and one edge per data dependence or
435 scalar dependence. */
437 static struct graph *
438 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
440 struct graph *rdg;
441 vec<data_reference_p> datarefs;
443 /* Create the RDG vertices from the stmts of the loop nest. */
444 stack_vec<gimple, 10> stmts;
445 stmts_from_loop (loop_nest[0], &stmts);
446 rdg = new_graph (stmts.length ());
447 datarefs.create (10);
448 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
450 datarefs.release ();
451 free_rdg (rdg);
452 return NULL;
454 stmts.release ();
456 create_rdg_flow_edges (rdg);
457 if (cd)
458 create_rdg_cd_edges (rdg, cd);
460 datarefs.release ();
462 return rdg;
467 enum partition_kind {
468 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
471 typedef struct partition_s
473 bitmap stmts;
474 bitmap loops;
475 bool reduction_p;
476 enum partition_kind kind;
477 /* data-references a kind != PKIND_NORMAL partition is about. */
478 data_reference_p main_dr;
479 data_reference_p secondary_dr;
480 tree niter;
481 } *partition_t;
484 /* Allocate and initialize a partition from BITMAP. */
486 static partition_t
487 partition_alloc (bitmap stmts, bitmap loops)
489 partition_t partition = XCNEW (struct partition_s);
490 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
491 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
492 partition->reduction_p = false;
493 partition->kind = PKIND_NORMAL;
494 return partition;
497 /* Free PARTITION. */
499 static void
500 partition_free (partition_t partition)
502 BITMAP_FREE (partition->stmts);
503 BITMAP_FREE (partition->loops);
504 free (partition);
507 /* Returns true if the partition can be generated as a builtin. */
509 static bool
510 partition_builtin_p (partition_t partition)
512 return partition->kind != PKIND_NORMAL;
515 /* Returns true if the partition contains a reduction. */
517 static bool
518 partition_reduction_p (partition_t partition)
520 return partition->reduction_p;
523 /* Merge PARTITION into the partition DEST. */
525 static void
526 partition_merge_into (partition_t dest, partition_t partition)
528 dest->kind = PKIND_NORMAL;
529 bitmap_ior_into (dest->stmts, partition->stmts);
530 if (partition_reduction_p (partition))
531 dest->reduction_p = true;
535 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
536 the LOOP. */
538 static bool
539 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
541 imm_use_iterator imm_iter;
542 use_operand_p use_p;
544 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
546 gimple use_stmt = USE_STMT (use_p);
547 if (!is_gimple_debug (use_stmt)
548 && loop != loop_containing_stmt (use_stmt))
549 return true;
552 return false;
555 /* Returns true when STMT defines a scalar variable used after the
556 loop LOOP. */
558 static bool
559 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
561 def_operand_p def_p;
562 ssa_op_iter op_iter;
564 if (gimple_code (stmt) == GIMPLE_PHI)
565 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
567 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
568 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
569 return true;
571 return false;
574 /* Return a copy of LOOP placed before LOOP. */
576 static struct loop *
577 copy_loop_before (struct loop *loop)
579 struct loop *res;
580 edge preheader = loop_preheader_edge (loop);
582 initialize_original_copy_tables ();
583 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
584 gcc_assert (res != NULL);
585 free_original_copy_tables ();
586 delete_update_ssa ();
588 return res;
591 /* Creates an empty basic block after LOOP. */
593 static void
594 create_bb_after_loop (struct loop *loop)
596 edge exit = single_exit (loop);
598 if (!exit)
599 return;
601 split_edge (exit);
604 /* Generate code for PARTITION from the code in LOOP. The loop is
605 copied when COPY_P is true. All the statements not flagged in the
606 PARTITION bitmap are removed from the loop or from its copy. The
607 statements are indexed in sequence inside a basic block, and the
608 basic blocks of a loop are taken in dom order. */
610 static void
611 generate_loops_for_partition (struct loop *loop, partition_t partition,
612 bool copy_p)
614 unsigned i;
615 gimple_stmt_iterator bsi;
616 basic_block *bbs;
618 if (copy_p)
620 loop = copy_loop_before (loop);
621 gcc_assert (loop != NULL);
622 create_preheader (loop, CP_SIMPLE_PREHEADERS);
623 create_bb_after_loop (loop);
626 /* Remove stmts not in the PARTITION bitmap. */
627 bbs = get_loop_body_in_dom_order (loop);
629 if (MAY_HAVE_DEBUG_STMTS)
630 for (i = 0; i < loop->num_nodes; i++)
632 basic_block bb = bbs[i];
634 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
636 gimple phi = gsi_stmt (bsi);
637 if (!virtual_operand_p (gimple_phi_result (phi))
638 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
639 reset_debug_uses (phi);
642 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
644 gimple stmt = gsi_stmt (bsi);
645 if (gimple_code (stmt) != GIMPLE_LABEL
646 && !is_gimple_debug (stmt)
647 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
648 reset_debug_uses (stmt);
652 for (i = 0; i < loop->num_nodes; i++)
654 basic_block bb = bbs[i];
656 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
658 gimple phi = gsi_stmt (bsi);
659 if (!virtual_operand_p (gimple_phi_result (phi))
660 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
661 remove_phi_node (&bsi, true);
662 else
663 gsi_next (&bsi);
666 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
668 gimple stmt = gsi_stmt (bsi);
669 if (gimple_code (stmt) != GIMPLE_LABEL
670 && !is_gimple_debug (stmt)
671 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
673 /* Choose an arbitrary path through the empty CFG part
674 that this unnecessary control stmt controls. */
675 if (gimple_code (stmt) == GIMPLE_COND)
677 gimple_cond_make_false (stmt);
678 update_stmt (stmt);
680 else if (gimple_code (stmt) == GIMPLE_SWITCH)
682 gimple_switch_set_index
683 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
684 update_stmt (stmt);
686 else
688 unlink_stmt_vdef (stmt);
689 gsi_remove (&bsi, true);
690 release_defs (stmt);
691 continue;
694 gsi_next (&bsi);
698 free (bbs);
701 /* Build the size argument for a memory operation call. */
703 static tree
704 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
706 tree size;
707 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
708 fold_convert_loc (loc, sizetype, nb_iter),
709 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
710 return fold_convert_loc (loc, size_type_node, size);
713 /* Build an address argument for a memory operation call. */
715 static tree
716 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
718 tree addr_base;
720 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
721 addr_base = fold_convert_loc (loc, sizetype, addr_base);
723 /* Test for a negative stride, iterating over every element. */
724 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
726 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
727 fold_convert_loc (loc, sizetype, nb_bytes));
728 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
729 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
732 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
735 /* If VAL memory representation contains the same value in all bytes,
736 return that value, otherwise return -1.
737 E.g. for 0x24242424 return 0x24, for IEEE double
738 747708026454360457216.0 return 0x44, etc. */
740 static int
741 const_with_all_bytes_same (tree val)
743 unsigned char buf[64];
744 int i, len;
746 if (integer_zerop (val)
747 || real_zerop (val)
748 || (TREE_CODE (val) == CONSTRUCTOR
749 && !TREE_CLOBBER_P (val)
750 && CONSTRUCTOR_NELTS (val) == 0))
751 return 0;
753 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
754 return -1;
756 len = native_encode_expr (val, buf, sizeof (buf));
757 if (len == 0)
758 return -1;
759 for (i = 1; i < len; i++)
760 if (buf[i] != buf[0])
761 return -1;
762 return buf[0];
765 /* Generate a call to memset for PARTITION in LOOP. */
767 static void
768 generate_memset_builtin (struct loop *loop, partition_t partition)
770 gimple_stmt_iterator gsi;
771 gimple stmt, fn_call;
772 tree mem, fn, nb_bytes;
773 location_t loc;
774 tree val;
776 stmt = DR_STMT (partition->main_dr);
777 loc = gimple_location (stmt);
779 /* The new statements will be placed before LOOP. */
780 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
782 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
783 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
784 false, GSI_CONTINUE_LINKING);
785 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
786 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
787 false, GSI_CONTINUE_LINKING);
789 /* This exactly matches the pattern recognition in classify_partition. */
790 val = gimple_assign_rhs1 (stmt);
791 /* Handle constants like 0x15151515 and similarly
792 floating point constants etc. where all bytes are the same. */
793 int bytev = const_with_all_bytes_same (val);
794 if (bytev != -1)
795 val = build_int_cst (integer_type_node, bytev);
796 else if (TREE_CODE (val) == INTEGER_CST)
797 val = fold_convert (integer_type_node, val);
798 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
800 gimple cstmt;
801 tree tem = make_ssa_name (integer_type_node, NULL);
802 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
803 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
804 val = tem;
807 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
808 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
809 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
811 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, "generated memset");
814 if (bytev == 0)
815 fprintf (dump_file, " zero\n");
816 else
817 fprintf (dump_file, "\n");
821 /* Generate a call to memcpy for PARTITION in LOOP. */
823 static void
824 generate_memcpy_builtin (struct loop *loop, partition_t partition)
826 gimple_stmt_iterator gsi;
827 gimple stmt, fn_call;
828 tree dest, src, fn, nb_bytes;
829 location_t loc;
830 enum built_in_function kind;
832 stmt = DR_STMT (partition->main_dr);
833 loc = gimple_location (stmt);
835 /* The new statements will be placed before LOOP. */
836 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
838 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
839 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
840 false, GSI_CONTINUE_LINKING);
841 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
842 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
843 if (ptr_derefs_may_alias_p (dest, src))
844 kind = BUILT_IN_MEMMOVE;
845 else
846 kind = BUILT_IN_MEMCPY;
848 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
849 false, GSI_CONTINUE_LINKING);
850 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
853 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
854 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
856 if (dump_file && (dump_flags & TDF_DETAILS))
858 if (kind == BUILT_IN_MEMCPY)
859 fprintf (dump_file, "generated memcpy\n");
860 else
861 fprintf (dump_file, "generated memmove\n");
865 /* Remove and destroy the loop LOOP. */
867 static void
868 destroy_loop (struct loop *loop)
870 unsigned nbbs = loop->num_nodes;
871 edge exit = single_exit (loop);
872 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
873 basic_block *bbs;
874 unsigned i;
876 bbs = get_loop_body_in_dom_order (loop);
878 redirect_edge_pred (exit, src);
879 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
880 exit->flags |= EDGE_FALLTHRU;
881 cancel_loop_tree (loop);
882 rescan_loop_exit (exit, false, true);
884 for (i = 0; i < nbbs; i++)
886 /* We have made sure to not leave any dangling uses of SSA
887 names defined in the loop. With the exception of virtuals.
888 Make sure we replace all uses of virtual defs that will remain
889 outside of the loop with the bare symbol as delete_basic_block
890 will release them. */
891 gimple_stmt_iterator gsi;
892 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
894 gimple phi = gsi_stmt (gsi);
895 if (virtual_operand_p (gimple_phi_result (phi)))
896 mark_virtual_phi_result_for_renaming (phi);
898 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
900 gimple stmt = gsi_stmt (gsi);
901 tree vdef = gimple_vdef (stmt);
902 if (vdef && TREE_CODE (vdef) == SSA_NAME)
903 mark_virtual_operand_for_renaming (vdef);
905 delete_basic_block (bbs[i]);
907 free (bbs);
909 set_immediate_dominator (CDI_DOMINATORS, dest,
910 recompute_dominator (CDI_DOMINATORS, dest));
913 /* Generates code for PARTITION. */
915 static void
916 generate_code_for_partition (struct loop *loop,
917 partition_t partition, bool copy_p)
919 switch (partition->kind)
921 case PKIND_NORMAL:
922 /* Reductions all have to be in the last partition. */
923 gcc_assert (!partition_reduction_p (partition)
924 || !copy_p);
925 generate_loops_for_partition (loop, partition, copy_p);
926 return;
928 case PKIND_MEMSET:
929 generate_memset_builtin (loop, partition);
930 break;
932 case PKIND_MEMCPY:
933 generate_memcpy_builtin (loop, partition);
934 break;
936 default:
937 gcc_unreachable ();
940 /* Common tail for partitions we turn into a call. If this was the last
941 partition for which we generate code, we have to destroy the loop. */
942 if (!copy_p)
943 destroy_loop (loop);
947 /* Returns a partition with all the statements needed for computing
948 the vertex V of the RDG, also including the loop exit conditions. */
950 static partition_t
951 build_rdg_partition_for_vertex (struct graph *rdg, int v)
953 partition_t partition = partition_alloc (NULL, NULL);
954 stack_vec<int, 3> nodes;
955 unsigned i;
956 int x;
958 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
960 FOR_EACH_VEC_ELT (nodes, i, x)
962 bitmap_set_bit (partition->stmts, x);
963 bitmap_set_bit (partition->loops,
964 loop_containing_stmt (RDG_STMT (rdg, x))->num);
967 return partition;
970 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
971 For the moment we detect only the memset zero pattern. */
973 static void
974 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
976 bitmap_iterator bi;
977 unsigned i;
978 tree nb_iter;
979 data_reference_p single_load, single_store;
980 bool volatiles_p = false;
982 partition->kind = PKIND_NORMAL;
983 partition->main_dr = NULL;
984 partition->secondary_dr = NULL;
985 partition->niter = NULL_TREE;
987 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
989 gimple stmt = RDG_STMT (rdg, i);
991 if (gimple_has_volatile_ops (stmt))
992 volatiles_p = true;
994 /* If the stmt has uses outside of the loop mark it as reduction. */
995 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
997 partition->reduction_p = true;
998 return;
1002 /* Perform general partition disqualification for builtins. */
1003 if (volatiles_p
1004 || !flag_tree_loop_distribute_patterns)
1005 return;
1007 /* Detect memset and memcpy. */
1008 single_load = NULL;
1009 single_store = NULL;
1010 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1012 gimple stmt = RDG_STMT (rdg, i);
1013 data_reference_p dr;
1014 unsigned j;
1016 if (gimple_code (stmt) == GIMPLE_PHI)
1017 continue;
1019 /* Any scalar stmts are ok. */
1020 if (!gimple_vuse (stmt))
1021 continue;
1023 /* Otherwise just regular loads/stores. */
1024 if (!gimple_assign_single_p (stmt))
1025 return;
1027 /* But exactly one store and/or load. */
1028 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1030 if (DR_IS_READ (dr))
1032 if (single_load != NULL)
1033 return;
1034 single_load = dr;
1036 else
1038 if (single_store != NULL)
1039 return;
1040 single_store = dr;
1045 if (!single_store)
1046 return;
1048 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1049 gimple_bb (DR_STMT (single_store))))
1050 nb_iter = number_of_latch_executions (loop);
1051 else
1052 nb_iter = number_of_exit_cond_executions (loop);
1053 if (!nb_iter || nb_iter == chrec_dont_know)
1054 return;
1056 if (single_store && !single_load)
1058 gimple stmt = DR_STMT (single_store);
1059 tree rhs = gimple_assign_rhs1 (stmt);
1060 if (const_with_all_bytes_same (rhs) == -1
1061 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1062 || (TYPE_MODE (TREE_TYPE (rhs))
1063 != TYPE_MODE (unsigned_char_type_node))))
1064 return;
1065 if (TREE_CODE (rhs) == SSA_NAME
1066 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1067 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1068 return;
1069 if (!adjacent_dr_p (single_store)
1070 || !dominated_by_p (CDI_DOMINATORS,
1071 loop->latch, gimple_bb (stmt)))
1072 return;
1073 partition->kind = PKIND_MEMSET;
1074 partition->main_dr = single_store;
1075 partition->niter = nb_iter;
1077 else if (single_store && single_load)
1079 gimple store = DR_STMT (single_store);
1080 gimple load = DR_STMT (single_load);
1081 /* Direct aggregate copy or via an SSA name temporary. */
1082 if (load != store
1083 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1084 return;
1085 if (!adjacent_dr_p (single_store)
1086 || !adjacent_dr_p (single_load)
1087 || !operand_equal_p (DR_STEP (single_store),
1088 DR_STEP (single_load), 0)
1089 || !dominated_by_p (CDI_DOMINATORS,
1090 loop->latch, gimple_bb (store)))
1091 return;
1092 /* Now check that if there is a dependence this dependence is
1093 of a suitable form for memmove. */
1094 vec<loop_p> loops = vNULL;
1095 ddr_p ddr;
1096 loops.safe_push (loop);
1097 ddr = initialize_data_dependence_relation (single_load, single_store,
1098 loops);
1099 compute_affine_dependence (ddr, loop);
1100 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1102 free_dependence_relation (ddr);
1103 loops.release ();
1104 return;
1106 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1108 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1110 free_dependence_relation (ddr);
1111 loops.release ();
1112 return;
1114 lambda_vector dist_v;
1115 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1117 int dist = dist_v[index_in_loop_nest (loop->num,
1118 DDR_LOOP_NEST (ddr))];
1119 if (dist > 0 && !DDR_REVERSED_P (ddr))
1121 free_dependence_relation (ddr);
1122 loops.release ();
1123 return;
1127 free_dependence_relation (ddr);
1128 loops.release ();
1129 partition->kind = PKIND_MEMCPY;
1130 partition->main_dr = single_store;
1131 partition->secondary_dr = single_load;
1132 partition->niter = nb_iter;
1136 /* For a data reference REF, return the declaration of its base
1137 address or NULL_TREE if the base is not determined. */
1139 static tree
1140 ref_base_address (data_reference_p dr)
1142 tree base_address = DR_BASE_ADDRESS (dr);
1143 if (base_address
1144 && TREE_CODE (base_address) == ADDR_EXPR)
1145 return TREE_OPERAND (base_address, 0);
1147 return base_address;
1150 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1151 accesses in RDG. */
1153 static bool
1154 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1155 partition_t partition2)
1157 unsigned i, j, k, l;
1158 bitmap_iterator bi, bj;
1159 data_reference_p ref1, ref2;
1161 /* First check whether in the intersection of the two partitions are
1162 any loads or stores. Common loads are the situation that happens
1163 most often. */
1164 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1165 if (RDG_MEM_WRITE_STMT (rdg, i)
1166 || RDG_MEM_READS_STMT (rdg, i))
1167 return true;
1169 /* Then check all data-references against each other. */
1170 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1171 if (RDG_MEM_WRITE_STMT (rdg, i)
1172 || RDG_MEM_READS_STMT (rdg, i))
1173 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1174 if (RDG_MEM_WRITE_STMT (rdg, j)
1175 || RDG_MEM_READS_STMT (rdg, j))
1177 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1179 tree base1 = ref_base_address (ref1);
1180 if (base1)
1181 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1182 if (base1 == ref_base_address (ref2))
1183 return true;
1187 return false;
1190 /* Aggregate several components into a useful partition that is
1191 registered in the PARTITIONS vector. Partitions will be
1192 distributed in different loops. */
1194 static void
1195 rdg_build_partitions (struct graph *rdg,
1196 vec<gimple> starting_stmts,
1197 vec<partition_t> *partitions)
1199 bitmap processed = BITMAP_ALLOC (NULL);
1200 int i;
1201 gimple stmt;
1203 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1205 int v = rdg_vertex_for_stmt (rdg, stmt);
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 fprintf (dump_file,
1209 "ldist asked to generate code for vertex %d\n", v);
1211 /* If the vertex is already contained in another partition so
1212 is the partition rooted at it. */
1213 if (bitmap_bit_p (processed, v))
1214 continue;
1216 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1217 bitmap_ior_into (processed, partition->stmts);
1219 if (dump_file && (dump_flags & TDF_DETAILS))
1221 fprintf (dump_file, "ldist useful partition:\n");
1222 dump_bitmap (dump_file, partition->stmts);
1225 partitions->safe_push (partition);
1228 /* All vertices should have been assigned to at least one partition now,
1229 other than vertices belonging to dead code. */
1231 BITMAP_FREE (processed);
1234 /* Dump to FILE the PARTITIONS. */
1236 static void
1237 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1239 int i;
1240 partition_t partition;
1242 FOR_EACH_VEC_ELT (partitions, i, partition)
1243 debug_bitmap_file (file, partition->stmts);
1246 /* Debug PARTITIONS. */
1247 extern void debug_rdg_partitions (vec<partition_t> );
1249 DEBUG_FUNCTION void
1250 debug_rdg_partitions (vec<partition_t> partitions)
1252 dump_rdg_partitions (stderr, partitions);
1255 /* Returns the number of read and write operations in the RDG. */
1257 static int
1258 number_of_rw_in_rdg (struct graph *rdg)
1260 int i, res = 0;
1262 for (i = 0; i < rdg->n_vertices; i++)
1264 if (RDG_MEM_WRITE_STMT (rdg, i))
1265 ++res;
1267 if (RDG_MEM_READS_STMT (rdg, i))
1268 ++res;
1271 return res;
1274 /* Returns the number of read and write operations in a PARTITION of
1275 the RDG. */
1277 static int
1278 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1280 int res = 0;
1281 unsigned i;
1282 bitmap_iterator ii;
1284 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1286 if (RDG_MEM_WRITE_STMT (rdg, i))
1287 ++res;
1289 if (RDG_MEM_READS_STMT (rdg, i))
1290 ++res;
1293 return res;
1296 /* Returns true when one of the PARTITIONS contains all the read or
1297 write operations of RDG. */
1299 static bool
1300 partition_contains_all_rw (struct graph *rdg,
1301 vec<partition_t> partitions)
1303 int i;
1304 partition_t partition;
1305 int nrw = number_of_rw_in_rdg (rdg);
1307 FOR_EACH_VEC_ELT (partitions, i, partition)
1308 if (nrw == number_of_rw_in_partition (rdg, partition))
1309 return true;
1311 return false;
1314 /* Compute partition dependence created by the data references in DRS1
1315 and DRS2 and modify and return DIR according to that. */
1317 static int
1318 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1319 vec<data_reference_p> drs1,
1320 vec<data_reference_p> drs2)
1322 data_reference_p dr1, dr2;
1324 /* dependence direction - 0 is no dependence, -1 is back,
1325 1 is forth, 2 is both (we can stop then, merging will occur). */
1326 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1327 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1329 int this_dir = 1;
1330 ddr_p ddr;
1331 /* Re-shuffle data-refs to be in dominator order. */
1332 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1333 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1335 data_reference_p tem = dr1;
1336 dr1 = dr2;
1337 dr2 = tem;
1338 this_dir = -this_dir;
1340 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1341 compute_affine_dependence (ddr, loops[0]);
1342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1343 this_dir = 2;
1344 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1346 if (DDR_REVERSED_P (ddr))
1348 data_reference_p tem = dr1;
1349 dr1 = dr2;
1350 dr2 = tem;
1351 this_dir = -this_dir;
1353 /* Known dependences can still be unordered througout the
1354 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1355 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1356 this_dir = 2;
1357 /* If the overlap is exact preserve stmt order. */
1358 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1360 else
1362 /* Else as the distance vector is lexicographic positive
1363 swap the dependence direction. */
1364 this_dir = -this_dir;
1367 else
1368 this_dir = 0;
1369 free_dependence_relation (ddr);
1370 if (dir == 0)
1371 dir = this_dir;
1372 else if (dir != this_dir)
1373 return 2;
1375 return dir;
1378 /* Compare postorder number of the partition graph vertices V1 and V2. */
1380 static int
1381 pgcmp (const void *v1_, const void *v2_)
1383 const vertex *v1 = (const vertex *)v1_;
1384 const vertex *v2 = (const vertex *)v2_;
1385 return v2->post - v1->post;
1388 /* Distributes the code from LOOP in such a way that producer
1389 statements are placed before consumer statements. Tries to separate
1390 only the statements from STMTS into separate loops.
1391 Returns the number of distributed loops. */
1393 static int
1394 distribute_loop (struct loop *loop, vec<gimple> stmts,
1395 control_dependences *cd, int *nb_calls)
1397 struct graph *rdg;
1398 vec<partition_t> partitions;
1399 partition_t partition;
1400 bool any_builtin;
1401 int i, nbp;
1402 graph *pg = NULL;
1403 int num_sccs = 1;
1405 *nb_calls = 0;
1406 stack_vec<loop_p, 3> loop_nest;
1407 if (!find_loop_nest (loop, &loop_nest))
1408 return 0;
1410 rdg = build_rdg (loop_nest, cd);
1411 if (!rdg)
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file,
1415 "Loop %d not distributed: failed to build the RDG.\n",
1416 loop->num);
1418 return 0;
1421 if (dump_file && (dump_flags & TDF_DETAILS))
1422 dump_rdg (dump_file, rdg);
1424 partitions.create (3);
1425 rdg_build_partitions (rdg, stmts, &partitions);
1427 any_builtin = false;
1428 FOR_EACH_VEC_ELT (partitions, i, partition)
1430 classify_partition (loop, rdg, partition);
1431 any_builtin |= partition_builtin_p (partition);
1434 /* If we are only distributing patterns but did not detect any,
1435 simply bail out. */
1436 if (!flag_tree_loop_distribution
1437 && !any_builtin)
1439 nbp = 0;
1440 goto ldist_done;
1443 /* If we are only distributing patterns fuse all partitions that
1444 were not classified as builtins. This also avoids chopping
1445 a loop into pieces, separated by builtin calls. That is, we
1446 only want no or a single loop body remaining. */
1447 partition_t into;
1448 if (!flag_tree_loop_distribution)
1450 for (i = 0; partitions.iterate (i, &into); ++i)
1451 if (!partition_builtin_p (into))
1452 break;
1453 for (++i; partitions.iterate (i, &partition); ++i)
1454 if (!partition_builtin_p (partition))
1456 if (dump_file && (dump_flags & TDF_DETAILS))
1458 fprintf (dump_file, "fusing non-builtin partitions\n");
1459 dump_bitmap (dump_file, into->stmts);
1460 dump_bitmap (dump_file, partition->stmts);
1462 partition_merge_into (into, partition);
1463 partitions.unordered_remove (i);
1464 partition_free (partition);
1465 i--;
1469 /* Due to limitations in the transform phase we have to fuse all
1470 reduction partitions into the last partition so the existing
1471 loop will contain all loop-closed PHI nodes. */
1472 for (i = 0; partitions.iterate (i, &into); ++i)
1473 if (partition_reduction_p (into))
1474 break;
1475 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1476 if (partition_reduction_p (partition))
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1480 fprintf (dump_file, "fusing partitions\n");
1481 dump_bitmap (dump_file, into->stmts);
1482 dump_bitmap (dump_file, partition->stmts);
1483 fprintf (dump_file, "because they have reductions\n");
1485 partition_merge_into (into, partition);
1486 partitions.unordered_remove (i);
1487 partition_free (partition);
1488 i--;
1491 /* Apply our simple cost model - fuse partitions with similar
1492 memory accesses. */
1493 for (i = 0; partitions.iterate (i, &into); ++i)
1495 if (partition_builtin_p (into))
1496 continue;
1497 for (int j = i + 1;
1498 partitions.iterate (j, &partition); ++j)
1500 if (!partition_builtin_p (partition)
1501 && similar_memory_accesses (rdg, into, partition))
1503 if (dump_file && (dump_flags & TDF_DETAILS))
1505 fprintf (dump_file, "fusing partitions\n");
1506 dump_bitmap (dump_file, into->stmts);
1507 dump_bitmap (dump_file, partition->stmts);
1508 fprintf (dump_file, "because they have similar "
1509 "memory accesses\n");
1511 partition_merge_into (into, partition);
1512 partitions.unordered_remove (j);
1513 partition_free (partition);
1514 j--;
1519 /* Build the partition dependency graph. */
1520 if (partitions.length () > 1)
1522 pg = new_graph (partitions.length ());
1523 struct pgdata {
1524 partition_t partition;
1525 vec<data_reference_p> writes;
1526 vec<data_reference_p> reads;
1528 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1529 for (i = 0; partitions.iterate (i, &partition); ++i)
1531 vertex *v = &pg->vertices[i];
1532 pgdata *data = new pgdata;
1533 data_reference_p dr;
1534 /* FIXME - leaks. */
1535 v->data = data;
1536 bitmap_iterator bi;
1537 unsigned j;
1538 data->partition = partition;
1539 data->reads = vNULL;
1540 data->writes = vNULL;
1541 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1542 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1543 if (DR_IS_READ (dr))
1544 data->reads.safe_push (dr);
1545 else
1546 data->writes.safe_push (dr);
1548 partition_t partition1, partition2;
1549 for (i = 0; partitions.iterate (i, &partition1); ++i)
1550 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1552 /* dependence direction - 0 is no dependence, -1 is back,
1553 1 is forth, 2 is both (we can stop then, merging will occur). */
1554 int dir = 0;
1555 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1556 PGDATA(i)->writes,
1557 PGDATA(j)->reads);
1558 if (dir != 2)
1559 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1560 PGDATA(i)->reads,
1561 PGDATA(j)->writes);
1562 if (dir != 2)
1563 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1564 PGDATA(i)->writes,
1565 PGDATA(j)->writes);
1566 if (dir == 1 || dir == 2)
1567 add_edge (pg, i, j);
1568 if (dir == -1 || dir == 2)
1569 add_edge (pg, j, i);
1572 /* Add edges to the reduction partition (if any) to force it last. */
1573 unsigned j;
1574 for (j = 0; partitions.iterate (j, &partition); ++j)
1575 if (partition_reduction_p (partition))
1576 break;
1577 if (j < partitions.length ())
1579 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1580 if (i != j)
1581 add_edge (pg, i, j);
1584 /* Compute partitions we cannot separate and fuse them. */
1585 num_sccs = graphds_scc (pg, NULL);
1586 for (i = 0; i < num_sccs; ++i)
1588 partition_t first;
1589 int j;
1590 for (j = 0; partitions.iterate (j, &first); ++j)
1591 if (pg->vertices[j].component == i)
1592 break;
1593 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1594 if (pg->vertices[j].component == i)
1596 if (dump_file && (dump_flags & TDF_DETAILS))
1598 fprintf (dump_file, "fusing partitions\n");
1599 dump_bitmap (dump_file, first->stmts);
1600 dump_bitmap (dump_file, partition->stmts);
1601 fprintf (dump_file, "because they are in the same "
1602 "dependence SCC\n");
1604 partition_merge_into (first, partition);
1605 partitions[j] = NULL;
1606 partition_free (partition);
1607 PGDATA (j)->partition = NULL;
1611 /* Now order the remaining nodes in postorder. */
1612 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1613 partitions.truncate (0);
1614 for (i = 0; i < pg->n_vertices; ++i)
1616 pgdata *data = PGDATA (i);
1617 if (data->partition)
1618 partitions.safe_push (data->partition);
1619 data->reads.release ();
1620 data->writes.release ();
1621 delete data;
1623 gcc_assert (partitions.length () == (unsigned)num_sccs);
1624 free_graph (pg);
1627 nbp = partitions.length ();
1628 if (nbp == 0
1629 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1630 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1632 nbp = 0;
1633 goto ldist_done;
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 dump_rdg_partitions (dump_file, partitions);
1639 FOR_EACH_VEC_ELT (partitions, i, partition)
1641 if (partition_builtin_p (partition))
1642 (*nb_calls)++;
1643 generate_code_for_partition (loop, partition, i < nbp - 1);
1646 ldist_done:
1648 FOR_EACH_VEC_ELT (partitions, i, partition)
1649 partition_free (partition);
1650 partitions.release ();
1652 free_rdg (rdg);
1653 return nbp - *nb_calls;
1656 /* Distribute all loops in the current function. */
1658 static unsigned int
1659 tree_loop_distribution (void)
1661 struct loop *loop;
1662 loop_iterator li;
1663 bool changed = false;
1664 basic_block bb;
1665 control_dependences *cd = NULL;
1667 FOR_ALL_BB (bb)
1669 gimple_stmt_iterator gsi;
1670 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1671 gimple_set_uid (gsi_stmt (gsi), -1);
1672 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1673 gimple_set_uid (gsi_stmt (gsi), -1);
1676 /* We can at the moment only distribute non-nested loops, thus restrict
1677 walking to innermost loops. */
1678 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1680 vec<gimple> work_list = vNULL;
1681 basic_block *bbs;
1682 int num = loop->num;
1683 unsigned int i;
1685 /* If the loop doesn't have a single exit we will fail anyway,
1686 so do that early. */
1687 if (!single_exit (loop))
1688 continue;
1690 /* Only optimize hot loops. */
1691 if (!optimize_loop_for_speed_p (loop))
1692 continue;
1694 /* Initialize the worklist with stmts we seed the partitions with. */
1695 bbs = get_loop_body_in_dom_order (loop);
1696 for (i = 0; i < loop->num_nodes; ++i)
1698 gimple_stmt_iterator gsi;
1699 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1701 gimple phi = gsi_stmt (gsi);
1702 if (virtual_operand_p (gimple_phi_result (phi)))
1703 continue;
1704 /* Distribute stmts which have defs that are used outside of
1705 the loop. */
1706 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1707 continue;
1708 work_list.safe_push (phi);
1710 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1712 gimple stmt = gsi_stmt (gsi);
1714 /* If there is a stmt with side-effects bail out - we
1715 cannot and should not distribute this loop. */
1716 if (gimple_has_side_effects (stmt))
1718 work_list.truncate (0);
1719 goto out;
1722 /* Distribute stmts which have defs that are used outside of
1723 the loop. */
1724 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1726 /* Otherwise only distribute stores for now. */
1727 else if (!gimple_vdef (stmt))
1728 continue;
1730 work_list.safe_push (stmt);
1733 out:
1734 free (bbs);
1736 int nb_generated_loops = 0;
1737 int nb_generated_calls = 0;
1738 location_t loc = find_loop_location (loop);
1739 if (work_list.length () > 0)
1741 if (!cd)
1743 calculate_dominance_info (CDI_DOMINATORS);
1744 calculate_dominance_info (CDI_POST_DOMINATORS);
1745 cd = new control_dependences (create_edge_list ());
1746 free_dominance_info (CDI_POST_DOMINATORS);
1748 nb_generated_loops = distribute_loop (loop, work_list, cd,
1749 &nb_generated_calls);
1752 if (nb_generated_loops + nb_generated_calls > 0)
1754 changed = true;
1755 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1756 loc, "Loop %d distributed: split to %d loops "
1757 "and %d library calls.\n",
1758 num, nb_generated_loops, nb_generated_calls);
1760 else if (dump_file && (dump_flags & TDF_DETAILS))
1761 fprintf (dump_file, "Loop %d is the same.\n", num);
1763 work_list.release ();
1766 if (cd)
1767 delete cd;
1769 if (changed)
1771 mark_virtual_operands_for_renaming (cfun);
1772 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1775 #ifdef ENABLE_CHECKING
1776 verify_loop_structure ();
1777 #endif
1779 return 0;
1782 static bool
1783 gate_tree_loop_distribution (void)
1785 return flag_tree_loop_distribution
1786 || flag_tree_loop_distribute_patterns;
1789 namespace {
1791 const pass_data pass_data_loop_distribution =
1793 GIMPLE_PASS, /* type */
1794 "ldist", /* name */
1795 OPTGROUP_LOOP, /* optinfo_flags */
1796 true, /* has_gate */
1797 true, /* has_execute */
1798 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1799 ( PROP_cfg | PROP_ssa ), /* properties_required */
1800 0, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 TODO_verify_ssa, /* todo_flags_finish */
1806 class pass_loop_distribution : public gimple_opt_pass
1808 public:
1809 pass_loop_distribution (gcc::context *ctxt)
1810 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1813 /* opt_pass methods: */
1814 bool gate () { return gate_tree_loop_distribution (); }
1815 unsigned int execute () { return tree_loop_distribution (); }
1817 }; // class pass_loop_distribution
1819 } // anon namespace
1821 gimple_opt_pass *
1822 make_pass_loop_distribution (gcc::context *ctxt)
1824 return new pass_loop_distribution (ctxt);