* gcc.dg/atomic-compare-exchange-1.c,
[official-gcc.git] / gcc / tree-loop-distribution.c
blob4f9b848847a9718c3b6bfac07d7eab34aa209b0a
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-ssa.h"
50 #include "tree-cfg.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-into-ssa.h"
57 #include "tree-ssa.h"
58 #include "cfgloop.h"
59 #include "tree-chrec.h"
60 #include "tree-data-ref.h"
61 #include "tree-scalar-evolution.h"
62 #include "tree-pass.h"
63 #include "gimple-pretty-print.h"
64 #include "tree-vectorizer.h"
67 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
68 typedef struct rdg_vertex
70 /* The statement represented by this vertex. */
71 gimple stmt;
73 /* Vector of data-references in this statement. */
74 vec<data_reference_p> datarefs;
76 /* True when the statement contains a write to memory. */
77 bool has_mem_write;
79 /* True when the statement contains a read from memory. */
80 bool has_mem_reads;
81 } *rdg_vertex_p;
83 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
84 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
85 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
86 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
87 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
88 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
89 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
90 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
92 /* Data dependence type. */
94 enum rdg_dep_type
96 /* Read After Write (RAW). */
97 flow_dd = 'f',
99 /* Control dependence (execute conditional on). */
100 control_dd = 'c'
103 /* Dependence information attached to an edge of the RDG. */
105 typedef struct rdg_edge
107 /* Type of the dependence. */
108 enum rdg_dep_type type;
109 } *rdg_edge_p;
111 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
113 /* Dump vertex I in RDG to FILE. */
115 static void
116 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
118 struct vertex *v = &(rdg->vertices[i]);
119 struct graph_edge *e;
121 fprintf (file, "(vertex %d: (%s%s) (in:", i,
122 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
123 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
125 if (v->pred)
126 for (e = v->pred; e; e = e->pred_next)
127 fprintf (file, " %d", e->src);
129 fprintf (file, ") (out:");
131 if (v->succ)
132 for (e = v->succ; e; e = e->succ_next)
133 fprintf (file, " %d", e->dest);
135 fprintf (file, ")\n");
136 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
137 fprintf (file, ")\n");
140 /* Call dump_rdg_vertex on stderr. */
142 DEBUG_FUNCTION void
143 debug_rdg_vertex (struct graph *rdg, int i)
145 dump_rdg_vertex (stderr, rdg, i);
148 /* Dump the reduced dependence graph RDG to FILE. */
150 static void
151 dump_rdg (FILE *file, struct graph *rdg)
153 fprintf (file, "(rdg\n");
154 for (int i = 0; i < rdg->n_vertices; i++)
155 dump_rdg_vertex (file, rdg, i);
156 fprintf (file, ")\n");
159 /* Call dump_rdg on stderr. */
161 DEBUG_FUNCTION void
162 debug_rdg (struct graph *rdg)
164 dump_rdg (stderr, rdg);
167 static void
168 dot_rdg_1 (FILE *file, struct graph *rdg)
170 int i;
171 pretty_printer buffer;
172 pp_needs_newline (&buffer) = false;
173 buffer.buffer->stream = file;
175 fprintf (file, "digraph RDG {\n");
177 for (i = 0; i < rdg->n_vertices; i++)
179 struct vertex *v = &(rdg->vertices[i]);
180 struct graph_edge *e;
182 fprintf (file, "%d [label=\"[%d] ", i, i);
183 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
184 pp_flush (&buffer);
185 fprintf (file, "\"]\n");
187 /* Highlight reads from memory. */
188 if (RDG_MEM_READS_STMT (rdg, i))
189 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
191 /* Highlight stores to memory. */
192 if (RDG_MEM_WRITE_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
195 if (v->succ)
196 for (e = v->succ; e; e = e->succ_next)
197 switch (RDGE_TYPE (e))
199 case flow_dd:
200 /* These are the most common dependences: don't print these. */
201 fprintf (file, "%d -> %d \n", i, e->dest);
202 break;
204 case control_dd:
205 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
206 break;
208 default:
209 gcc_unreachable ();
213 fprintf (file, "}\n\n");
216 /* Display the Reduced Dependence Graph using dotty. */
218 DEBUG_FUNCTION void
219 dot_rdg (struct graph *rdg)
221 /* When debugging, you may want to enable the following code. */
222 #if 1
223 FILE *file = popen ("dot -Tx11", "w");
224 if (!file)
225 return;
226 dot_rdg_1 (file, rdg);
227 fflush (file);
228 close (fileno (file));
229 pclose (file);
230 #else
231 dot_rdg_1 (stderr, rdg);
232 #endif
235 /* Returns the index of STMT in RDG. */
237 static int
238 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
240 int index = gimple_uid (stmt);
241 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
242 return index;
245 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
246 the index of DEF in RDG. */
248 static void
249 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
251 use_operand_p imm_use_p;
252 imm_use_iterator iterator;
254 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
256 struct graph_edge *e;
257 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
259 if (use < 0)
260 continue;
262 e = add_edge (rdg, idef, use);
263 e->data = XNEW (struct rdg_edge);
264 RDGE_TYPE (e) = flow_dd;
268 /* Creates an edge for the control dependences of BB to the vertex V. */
270 static void
271 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
272 int v, control_dependences *cd)
274 bitmap_iterator bi;
275 unsigned edge_n;
276 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
277 0, edge_n, bi)
279 basic_block cond_bb = cd->get_edge (edge_n)->src;
280 gimple stmt = last_stmt (cond_bb);
281 if (stmt && is_ctrl_stmt (stmt))
283 struct graph_edge *e;
284 int c = rdg_vertex_for_stmt (rdg, stmt);
285 if (c < 0)
286 continue;
288 e = add_edge (rdg, c, v);
289 e->data = XNEW (struct rdg_edge);
290 RDGE_TYPE (e) = control_dd;
295 /* Creates the edges of the reduced dependence graph RDG. */
297 static void
298 create_rdg_flow_edges (struct graph *rdg)
300 int i;
301 def_operand_p def_p;
302 ssa_op_iter iter;
304 for (i = 0; i < rdg->n_vertices; i++)
305 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
306 iter, SSA_OP_DEF)
307 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 /* Creates the edges of the reduced dependence graph RDG. */
312 static void
313 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
315 int i;
317 for (i = 0; i < rdg->n_vertices; i++)
319 gimple stmt = RDG_STMT (rdg, i);
320 if (gimple_code (stmt) == GIMPLE_PHI)
322 edge_iterator ei;
323 edge e;
324 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
325 create_edge_for_control_dependence (rdg, e->src, i, cd);
327 else
328 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 /* Build the vertices of the reduced dependence graph RDG. Return false
333 if that failed. */
335 static bool
336 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
337 vec<data_reference_p> *datarefs)
339 int i;
340 gimple stmt;
342 FOR_EACH_VEC_ELT (stmts, i, stmt)
344 struct vertex *v = &(rdg->vertices[i]);
346 /* Record statement to vertex mapping. */
347 gimple_set_uid (stmt, i);
349 v->data = XNEW (struct rdg_vertex);
350 RDGV_STMT (v) = stmt;
351 RDGV_DATAREFS (v).create (0);
352 RDGV_HAS_MEM_WRITE (v) = false;
353 RDGV_HAS_MEM_READS (v) = false;
354 if (gimple_code (stmt) == GIMPLE_PHI)
355 continue;
357 unsigned drp = datarefs->length ();
358 if (!find_data_references_in_stmt (loop, stmt, datarefs))
359 return false;
360 for (unsigned j = drp; j < datarefs->length (); ++j)
362 data_reference_p dr = (*datarefs)[j];
363 if (DR_IS_READ (dr))
364 RDGV_HAS_MEM_READS (v) = true;
365 else
366 RDGV_HAS_MEM_WRITE (v) = true;
367 RDGV_DATAREFS (v).safe_push (dr);
370 return true;
373 /* Initialize STMTS with all the statements of LOOP. The order in
374 which we discover statements is important as
375 generate_loops_for_partition is using the same traversal for
376 identifying statements in loop copies. */
378 static void
379 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
381 unsigned int i;
382 basic_block *bbs = get_loop_body_in_dom_order (loop);
384 for (i = 0; i < loop->num_nodes; i++)
386 basic_block bb = bbs[i];
387 gimple_stmt_iterator bsi;
388 gimple stmt;
390 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
391 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
392 stmts->safe_push (gsi_stmt (bsi));
394 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
396 stmt = gsi_stmt (bsi);
397 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
398 stmts->safe_push (stmt);
402 free (bbs);
405 /* Free the reduced dependence graph RDG. */
407 static void
408 free_rdg (struct graph *rdg)
410 int i;
412 for (i = 0; i < rdg->n_vertices; i++)
414 struct vertex *v = &(rdg->vertices[i]);
415 struct graph_edge *e;
417 for (e = v->succ; e; e = e->succ_next)
418 free (e->data);
420 if (v->data)
422 gimple_set_uid (RDGV_STMT (v), -1);
423 free_data_refs (RDGV_DATAREFS (v));
424 free (v->data);
428 free_graph (rdg);
431 /* Build the Reduced Dependence Graph (RDG) with one vertex per
432 statement of the loop nest LOOP_NEST, and one edge per data dependence or
433 scalar dependence. */
435 static struct graph *
436 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
438 struct graph *rdg;
439 vec<data_reference_p> datarefs;
441 /* Create the RDG vertices from the stmts of the loop nest. */
442 stack_vec<gimple, 10> stmts;
443 stmts_from_loop (loop_nest[0], &stmts);
444 rdg = new_graph (stmts.length ());
445 datarefs.create (10);
446 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
448 datarefs.release ();
449 free_rdg (rdg);
450 return NULL;
452 stmts.release ();
454 create_rdg_flow_edges (rdg);
455 if (cd)
456 create_rdg_cd_edges (rdg, cd);
458 datarefs.release ();
460 return rdg;
465 enum partition_kind {
466 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
469 typedef struct partition_s
471 bitmap stmts;
472 bitmap loops;
473 bool reduction_p;
474 enum partition_kind kind;
475 /* data-references a kind != PKIND_NORMAL partition is about. */
476 data_reference_p main_dr;
477 data_reference_p secondary_dr;
478 tree niter;
479 } *partition_t;
482 /* Allocate and initialize a partition from BITMAP. */
484 static partition_t
485 partition_alloc (bitmap stmts, bitmap loops)
487 partition_t partition = XCNEW (struct partition_s);
488 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
489 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
490 partition->reduction_p = false;
491 partition->kind = PKIND_NORMAL;
492 return partition;
495 /* Free PARTITION. */
497 static void
498 partition_free (partition_t partition)
500 BITMAP_FREE (partition->stmts);
501 BITMAP_FREE (partition->loops);
502 free (partition);
505 /* Returns true if the partition can be generated as a builtin. */
507 static bool
508 partition_builtin_p (partition_t partition)
510 return partition->kind != PKIND_NORMAL;
513 /* Returns true if the partition contains a reduction. */
515 static bool
516 partition_reduction_p (partition_t partition)
518 return partition->reduction_p;
521 /* Merge PARTITION into the partition DEST. */
523 static void
524 partition_merge_into (partition_t dest, partition_t partition)
526 dest->kind = PKIND_NORMAL;
527 bitmap_ior_into (dest->stmts, partition->stmts);
528 if (partition_reduction_p (partition))
529 dest->reduction_p = true;
533 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
534 the LOOP. */
536 static bool
537 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
539 imm_use_iterator imm_iter;
540 use_operand_p use_p;
542 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
544 gimple use_stmt = USE_STMT (use_p);
545 if (!is_gimple_debug (use_stmt)
546 && loop != loop_containing_stmt (use_stmt))
547 return true;
550 return false;
553 /* Returns true when STMT defines a scalar variable used after the
554 loop LOOP. */
556 static bool
557 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
559 def_operand_p def_p;
560 ssa_op_iter op_iter;
562 if (gimple_code (stmt) == GIMPLE_PHI)
563 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
565 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
566 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
567 return true;
569 return false;
572 /* Return a copy of LOOP placed before LOOP. */
574 static struct loop *
575 copy_loop_before (struct loop *loop)
577 struct loop *res;
578 edge preheader = loop_preheader_edge (loop);
580 initialize_original_copy_tables ();
581 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
582 gcc_assert (res != NULL);
583 free_original_copy_tables ();
584 delete_update_ssa ();
586 return res;
589 /* Creates an empty basic block after LOOP. */
591 static void
592 create_bb_after_loop (struct loop *loop)
594 edge exit = single_exit (loop);
596 if (!exit)
597 return;
599 split_edge (exit);
602 /* Generate code for PARTITION from the code in LOOP. The loop is
603 copied when COPY_P is true. All the statements not flagged in the
604 PARTITION bitmap are removed from the loop or from its copy. The
605 statements are indexed in sequence inside a basic block, and the
606 basic blocks of a loop are taken in dom order. */
608 static void
609 generate_loops_for_partition (struct loop *loop, partition_t partition,
610 bool copy_p)
612 unsigned i;
613 gimple_stmt_iterator bsi;
614 basic_block *bbs;
616 if (copy_p)
618 loop = copy_loop_before (loop);
619 gcc_assert (loop != NULL);
620 create_preheader (loop, CP_SIMPLE_PREHEADERS);
621 create_bb_after_loop (loop);
624 /* Remove stmts not in the PARTITION bitmap. */
625 bbs = get_loop_body_in_dom_order (loop);
627 if (MAY_HAVE_DEBUG_STMTS)
628 for (i = 0; i < loop->num_nodes; i++)
630 basic_block bb = bbs[i];
632 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
634 gimple phi = gsi_stmt (bsi);
635 if (!virtual_operand_p (gimple_phi_result (phi))
636 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
637 reset_debug_uses (phi);
640 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
642 gimple stmt = gsi_stmt (bsi);
643 if (gimple_code (stmt) != GIMPLE_LABEL
644 && !is_gimple_debug (stmt)
645 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
646 reset_debug_uses (stmt);
650 for (i = 0; i < loop->num_nodes; i++)
652 basic_block bb = bbs[i];
654 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
656 gimple phi = gsi_stmt (bsi);
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
659 remove_phi_node (&bsi, true);
660 else
661 gsi_next (&bsi);
664 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
666 gimple stmt = gsi_stmt (bsi);
667 if (gimple_code (stmt) != GIMPLE_LABEL
668 && !is_gimple_debug (stmt)
669 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
671 /* Choose an arbitrary path through the empty CFG part
672 that this unnecessary control stmt controls. */
673 if (gimple_code (stmt) == GIMPLE_COND)
675 gimple_cond_make_false (stmt);
676 update_stmt (stmt);
678 else if (gimple_code (stmt) == GIMPLE_SWITCH)
680 gimple_switch_set_index
681 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
682 update_stmt (stmt);
684 else
686 unlink_stmt_vdef (stmt);
687 gsi_remove (&bsi, true);
688 release_defs (stmt);
689 continue;
692 gsi_next (&bsi);
696 free (bbs);
699 /* Build the size argument for a memory operation call. */
701 static tree
702 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
704 tree size;
705 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
706 fold_convert_loc (loc, sizetype, nb_iter),
707 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
708 return fold_convert_loc (loc, size_type_node, size);
711 /* Build an address argument for a memory operation call. */
713 static tree
714 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
716 tree addr_base;
718 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
719 addr_base = fold_convert_loc (loc, sizetype, addr_base);
721 /* Test for a negative stride, iterating over every element. */
722 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
724 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
725 fold_convert_loc (loc, sizetype, nb_bytes));
726 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
727 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
730 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
733 /* If VAL memory representation contains the same value in all bytes,
734 return that value, otherwise return -1.
735 E.g. for 0x24242424 return 0x24, for IEEE double
736 747708026454360457216.0 return 0x44, etc. */
738 static int
739 const_with_all_bytes_same (tree val)
741 unsigned char buf[64];
742 int i, len;
744 if (integer_zerop (val)
745 || real_zerop (val)
746 || (TREE_CODE (val) == CONSTRUCTOR
747 && !TREE_CLOBBER_P (val)
748 && CONSTRUCTOR_NELTS (val) == 0))
749 return 0;
751 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
752 return -1;
754 len = native_encode_expr (val, buf, sizeof (buf));
755 if (len == 0)
756 return -1;
757 for (i = 1; i < len; i++)
758 if (buf[i] != buf[0])
759 return -1;
760 return buf[0];
763 /* Generate a call to memset for PARTITION in LOOP. */
765 static void
766 generate_memset_builtin (struct loop *loop, partition_t partition)
768 gimple_stmt_iterator gsi;
769 gimple stmt, fn_call;
770 tree mem, fn, nb_bytes;
771 location_t loc;
772 tree val;
774 stmt = DR_STMT (partition->main_dr);
775 loc = gimple_location (stmt);
777 /* The new statements will be placed before LOOP. */
778 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
780 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
781 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
782 false, GSI_CONTINUE_LINKING);
783 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
784 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
785 false, GSI_CONTINUE_LINKING);
787 /* This exactly matches the pattern recognition in classify_partition. */
788 val = gimple_assign_rhs1 (stmt);
789 /* Handle constants like 0x15151515 and similarly
790 floating point constants etc. where all bytes are the same. */
791 int bytev = const_with_all_bytes_same (val);
792 if (bytev != -1)
793 val = build_int_cst (integer_type_node, bytev);
794 else if (TREE_CODE (val) == INTEGER_CST)
795 val = fold_convert (integer_type_node, val);
796 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
798 gimple cstmt;
799 tree tem = make_ssa_name (integer_type_node, NULL);
800 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
801 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
802 val = tem;
805 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
806 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
807 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
809 if (dump_file && (dump_flags & TDF_DETAILS))
811 fprintf (dump_file, "generated memset");
812 if (bytev == 0)
813 fprintf (dump_file, " zero\n");
814 else
815 fprintf (dump_file, "\n");
819 /* Generate a call to memcpy for PARTITION in LOOP. */
821 static void
822 generate_memcpy_builtin (struct loop *loop, partition_t partition)
824 gimple_stmt_iterator gsi;
825 gimple stmt, fn_call;
826 tree dest, src, fn, nb_bytes;
827 location_t loc;
828 enum built_in_function kind;
830 stmt = DR_STMT (partition->main_dr);
831 loc = gimple_location (stmt);
833 /* The new statements will be placed before LOOP. */
834 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
836 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
837 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
838 false, GSI_CONTINUE_LINKING);
839 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
840 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
841 if (ptr_derefs_may_alias_p (dest, src))
842 kind = BUILT_IN_MEMMOVE;
843 else
844 kind = BUILT_IN_MEMCPY;
846 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
847 false, GSI_CONTINUE_LINKING);
848 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
849 false, GSI_CONTINUE_LINKING);
850 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
851 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
852 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
854 if (dump_file && (dump_flags & TDF_DETAILS))
856 if (kind == BUILT_IN_MEMCPY)
857 fprintf (dump_file, "generated memcpy\n");
858 else
859 fprintf (dump_file, "generated memmove\n");
863 /* Remove and destroy the loop LOOP. */
865 static void
866 destroy_loop (struct loop *loop)
868 unsigned nbbs = loop->num_nodes;
869 edge exit = single_exit (loop);
870 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
871 basic_block *bbs;
872 unsigned i;
874 bbs = get_loop_body_in_dom_order (loop);
876 redirect_edge_pred (exit, src);
877 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
878 exit->flags |= EDGE_FALLTHRU;
879 cancel_loop_tree (loop);
880 rescan_loop_exit (exit, false, true);
882 for (i = 0; i < nbbs; i++)
884 /* We have made sure to not leave any dangling uses of SSA
885 names defined in the loop. With the exception of virtuals.
886 Make sure we replace all uses of virtual defs that will remain
887 outside of the loop with the bare symbol as delete_basic_block
888 will release them. */
889 gimple_stmt_iterator gsi;
890 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
892 gimple phi = gsi_stmt (gsi);
893 if (virtual_operand_p (gimple_phi_result (phi)))
894 mark_virtual_phi_result_for_renaming (phi);
896 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
898 gimple stmt = gsi_stmt (gsi);
899 tree vdef = gimple_vdef (stmt);
900 if (vdef && TREE_CODE (vdef) == SSA_NAME)
901 mark_virtual_operand_for_renaming (vdef);
903 delete_basic_block (bbs[i]);
905 free (bbs);
907 set_immediate_dominator (CDI_DOMINATORS, dest,
908 recompute_dominator (CDI_DOMINATORS, dest));
911 /* Generates code for PARTITION. */
913 static void
914 generate_code_for_partition (struct loop *loop,
915 partition_t partition, bool copy_p)
917 switch (partition->kind)
919 case PKIND_NORMAL:
920 /* Reductions all have to be in the last partition. */
921 gcc_assert (!partition_reduction_p (partition)
922 || !copy_p);
923 generate_loops_for_partition (loop, partition, copy_p);
924 return;
926 case PKIND_MEMSET:
927 generate_memset_builtin (loop, partition);
928 break;
930 case PKIND_MEMCPY:
931 generate_memcpy_builtin (loop, partition);
932 break;
934 default:
935 gcc_unreachable ();
938 /* Common tail for partitions we turn into a call. If this was the last
939 partition for which we generate code, we have to destroy the loop. */
940 if (!copy_p)
941 destroy_loop (loop);
945 /* Returns a partition with all the statements needed for computing
946 the vertex V of the RDG, also including the loop exit conditions. */
948 static partition_t
949 build_rdg_partition_for_vertex (struct graph *rdg, int v)
951 partition_t partition = partition_alloc (NULL, NULL);
952 stack_vec<int, 3> nodes;
953 unsigned i;
954 int x;
956 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
958 FOR_EACH_VEC_ELT (nodes, i, x)
960 bitmap_set_bit (partition->stmts, x);
961 bitmap_set_bit (partition->loops,
962 loop_containing_stmt (RDG_STMT (rdg, x))->num);
965 return partition;
968 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
969 For the moment we detect only the memset zero pattern. */
971 static void
972 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
974 bitmap_iterator bi;
975 unsigned i;
976 tree nb_iter;
977 data_reference_p single_load, single_store;
978 bool volatiles_p = false;
980 partition->kind = PKIND_NORMAL;
981 partition->main_dr = NULL;
982 partition->secondary_dr = NULL;
983 partition->niter = NULL_TREE;
985 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
987 gimple stmt = RDG_STMT (rdg, i);
989 if (gimple_has_volatile_ops (stmt))
990 volatiles_p = true;
992 /* If the stmt has uses outside of the loop mark it as reduction. */
993 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
995 partition->reduction_p = true;
996 return;
1000 /* Perform general partition disqualification for builtins. */
1001 if (volatiles_p
1002 || !flag_tree_loop_distribute_patterns)
1003 return;
1005 /* Detect memset and memcpy. */
1006 single_load = NULL;
1007 single_store = NULL;
1008 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1010 gimple stmt = RDG_STMT (rdg, i);
1011 data_reference_p dr;
1012 unsigned j;
1014 if (gimple_code (stmt) == GIMPLE_PHI)
1015 continue;
1017 /* Any scalar stmts are ok. */
1018 if (!gimple_vuse (stmt))
1019 continue;
1021 /* Otherwise just regular loads/stores. */
1022 if (!gimple_assign_single_p (stmt))
1023 return;
1025 /* But exactly one store and/or load. */
1026 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1028 if (DR_IS_READ (dr))
1030 if (single_load != NULL)
1031 return;
1032 single_load = dr;
1034 else
1036 if (single_store != NULL)
1037 return;
1038 single_store = dr;
1043 if (!single_store)
1044 return;
1046 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1047 gimple_bb (DR_STMT (single_store))))
1048 nb_iter = number_of_latch_executions (loop);
1049 else
1050 nb_iter = number_of_exit_cond_executions (loop);
1051 if (!nb_iter || nb_iter == chrec_dont_know)
1052 return;
1054 if (single_store && !single_load)
1056 gimple stmt = DR_STMT (single_store);
1057 tree rhs = gimple_assign_rhs1 (stmt);
1058 if (const_with_all_bytes_same (rhs) == -1
1059 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1060 || (TYPE_MODE (TREE_TYPE (rhs))
1061 != TYPE_MODE (unsigned_char_type_node))))
1062 return;
1063 if (TREE_CODE (rhs) == SSA_NAME
1064 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1065 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1066 return;
1067 if (!adjacent_dr_p (single_store)
1068 || !dominated_by_p (CDI_DOMINATORS,
1069 loop->latch, gimple_bb (stmt)))
1070 return;
1071 partition->kind = PKIND_MEMSET;
1072 partition->main_dr = single_store;
1073 partition->niter = nb_iter;
1075 else if (single_store && single_load)
1077 gimple store = DR_STMT (single_store);
1078 gimple load = DR_STMT (single_load);
1079 /* Direct aggregate copy or via an SSA name temporary. */
1080 if (load != store
1081 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1082 return;
1083 if (!adjacent_dr_p (single_store)
1084 || !adjacent_dr_p (single_load)
1085 || !operand_equal_p (DR_STEP (single_store),
1086 DR_STEP (single_load), 0)
1087 || !dominated_by_p (CDI_DOMINATORS,
1088 loop->latch, gimple_bb (store)))
1089 return;
1090 /* Now check that if there is a dependence this dependence is
1091 of a suitable form for memmove. */
1092 vec<loop_p> loops = vNULL;
1093 ddr_p ddr;
1094 loops.safe_push (loop);
1095 ddr = initialize_data_dependence_relation (single_load, single_store,
1096 loops);
1097 compute_affine_dependence (ddr, loop);
1098 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1100 free_dependence_relation (ddr);
1101 loops.release ();
1102 return;
1104 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1106 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1108 free_dependence_relation (ddr);
1109 loops.release ();
1110 return;
1112 lambda_vector dist_v;
1113 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1115 int dist = dist_v[index_in_loop_nest (loop->num,
1116 DDR_LOOP_NEST (ddr))];
1117 if (dist > 0 && !DDR_REVERSED_P (ddr))
1119 free_dependence_relation (ddr);
1120 loops.release ();
1121 return;
1125 free_dependence_relation (ddr);
1126 loops.release ();
1127 partition->kind = PKIND_MEMCPY;
1128 partition->main_dr = single_store;
1129 partition->secondary_dr = single_load;
1130 partition->niter = nb_iter;
1134 /* For a data reference REF, return the declaration of its base
1135 address or NULL_TREE if the base is not determined. */
1137 static tree
1138 ref_base_address (data_reference_p dr)
1140 tree base_address = DR_BASE_ADDRESS (dr);
1141 if (base_address
1142 && TREE_CODE (base_address) == ADDR_EXPR)
1143 return TREE_OPERAND (base_address, 0);
1145 return base_address;
1148 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1149 accesses in RDG. */
1151 static bool
1152 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1153 partition_t partition2)
1155 unsigned i, j, k, l;
1156 bitmap_iterator bi, bj;
1157 data_reference_p ref1, ref2;
1159 /* First check whether in the intersection of the two partitions are
1160 any loads or stores. Common loads are the situation that happens
1161 most often. */
1162 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1163 if (RDG_MEM_WRITE_STMT (rdg, i)
1164 || RDG_MEM_READS_STMT (rdg, i))
1165 return true;
1167 /* Then check all data-references against each other. */
1168 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1169 if (RDG_MEM_WRITE_STMT (rdg, i)
1170 || RDG_MEM_READS_STMT (rdg, i))
1171 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1172 if (RDG_MEM_WRITE_STMT (rdg, j)
1173 || RDG_MEM_READS_STMT (rdg, j))
1175 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1177 tree base1 = ref_base_address (ref1);
1178 if (base1)
1179 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1180 if (base1 == ref_base_address (ref2))
1181 return true;
1185 return false;
1188 /* Aggregate several components into a useful partition that is
1189 registered in the PARTITIONS vector. Partitions will be
1190 distributed in different loops. */
1192 static void
1193 rdg_build_partitions (struct graph *rdg,
1194 vec<gimple> starting_stmts,
1195 vec<partition_t> *partitions)
1197 bitmap processed = BITMAP_ALLOC (NULL);
1198 int i;
1199 gimple stmt;
1201 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1203 int v = rdg_vertex_for_stmt (rdg, stmt);
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file,
1207 "ldist asked to generate code for vertex %d\n", v);
1209 /* If the vertex is already contained in another partition so
1210 is the partition rooted at it. */
1211 if (bitmap_bit_p (processed, v))
1212 continue;
1214 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1215 bitmap_ior_into (processed, partition->stmts);
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1219 fprintf (dump_file, "ldist useful partition:\n");
1220 dump_bitmap (dump_file, partition->stmts);
1223 partitions->safe_push (partition);
1226 /* All vertices should have been assigned to at least one partition now,
1227 other than vertices belonging to dead code. */
1229 BITMAP_FREE (processed);
1232 /* Dump to FILE the PARTITIONS. */
1234 static void
1235 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1237 int i;
1238 partition_t partition;
1240 FOR_EACH_VEC_ELT (partitions, i, partition)
1241 debug_bitmap_file (file, partition->stmts);
1244 /* Debug PARTITIONS. */
1245 extern void debug_rdg_partitions (vec<partition_t> );
1247 DEBUG_FUNCTION void
1248 debug_rdg_partitions (vec<partition_t> partitions)
1250 dump_rdg_partitions (stderr, partitions);
1253 /* Returns the number of read and write operations in the RDG. */
1255 static int
1256 number_of_rw_in_rdg (struct graph *rdg)
1258 int i, res = 0;
1260 for (i = 0; i < rdg->n_vertices; i++)
1262 if (RDG_MEM_WRITE_STMT (rdg, i))
1263 ++res;
1265 if (RDG_MEM_READS_STMT (rdg, i))
1266 ++res;
1269 return res;
1272 /* Returns the number of read and write operations in a PARTITION of
1273 the RDG. */
1275 static int
1276 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1278 int res = 0;
1279 unsigned i;
1280 bitmap_iterator ii;
1282 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1284 if (RDG_MEM_WRITE_STMT (rdg, i))
1285 ++res;
1287 if (RDG_MEM_READS_STMT (rdg, i))
1288 ++res;
1291 return res;
1294 /* Returns true when one of the PARTITIONS contains all the read or
1295 write operations of RDG. */
1297 static bool
1298 partition_contains_all_rw (struct graph *rdg,
1299 vec<partition_t> partitions)
1301 int i;
1302 partition_t partition;
1303 int nrw = number_of_rw_in_rdg (rdg);
1305 FOR_EACH_VEC_ELT (partitions, i, partition)
1306 if (nrw == number_of_rw_in_partition (rdg, partition))
1307 return true;
1309 return false;
1312 /* Compute partition dependence created by the data references in DRS1
1313 and DRS2 and modify and return DIR according to that. */
1315 static int
1316 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1317 vec<data_reference_p> drs1,
1318 vec<data_reference_p> drs2)
1320 data_reference_p dr1, dr2;
1322 /* dependence direction - 0 is no dependence, -1 is back,
1323 1 is forth, 2 is both (we can stop then, merging will occur). */
1324 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1325 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1327 int this_dir = -1;
1328 ddr_p ddr;
1329 /* Re-shuffle data-refs to be in dominator order. */
1330 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1331 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1333 data_reference_p tem = dr1;
1334 dr1 = dr2;
1335 dr2 = tem;
1336 this_dir = -this_dir;
1338 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1339 compute_affine_dependence (ddr, loops[0]);
1340 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1341 this_dir = 2;
1342 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1344 if (DDR_REVERSED_P (ddr))
1346 data_reference_p tem = dr1;
1347 dr1 = dr2;
1348 dr2 = tem;
1349 this_dir = -this_dir;
1351 /* Known dependences can still be unordered througout the
1352 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1353 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1354 this_dir = 2;
1356 else
1357 this_dir = 0;
1358 free_dependence_relation (ddr);
1359 if (dir == 0)
1360 dir = this_dir;
1361 else if (dir != this_dir)
1362 return 2;
1364 return dir;
1367 /* Compare postorder number of the partition graph vertices V1 and V2. */
1369 static int
1370 pgcmp (const void *v1_, const void *v2_)
1372 const vertex *v1 = (const vertex *)v1_;
1373 const vertex *v2 = (const vertex *)v2_;
1374 return v2->post - v1->post;
1377 /* Distributes the code from LOOP in such a way that producer
1378 statements are placed before consumer statements. Tries to separate
1379 only the statements from STMTS into separate loops.
1380 Returns the number of distributed loops. */
1382 static int
1383 distribute_loop (struct loop *loop, vec<gimple> stmts,
1384 control_dependences *cd, int *nb_calls)
1386 struct graph *rdg;
1387 vec<partition_t> partitions;
1388 partition_t partition;
1389 bool any_builtin;
1390 int i, nbp;
1391 graph *pg = NULL;
1392 int num_sccs = 1;
1394 *nb_calls = 0;
1395 stack_vec<loop_p, 3> loop_nest;
1396 if (!find_loop_nest (loop, &loop_nest))
1397 return 0;
1399 rdg = build_rdg (loop_nest, cd);
1400 if (!rdg)
1402 if (dump_file && (dump_flags & TDF_DETAILS))
1403 fprintf (dump_file,
1404 "Loop %d not distributed: failed to build the RDG.\n",
1405 loop->num);
1407 return 0;
1410 if (dump_file && (dump_flags & TDF_DETAILS))
1411 dump_rdg (dump_file, rdg);
1413 partitions.create (3);
1414 rdg_build_partitions (rdg, stmts, &partitions);
1416 any_builtin = false;
1417 FOR_EACH_VEC_ELT (partitions, i, partition)
1419 classify_partition (loop, rdg, partition);
1420 any_builtin |= partition_builtin_p (partition);
1423 /* If we are only distributing patterns but did not detect any,
1424 simply bail out. */
1425 if (!flag_tree_loop_distribution
1426 && !any_builtin)
1428 nbp = 0;
1429 goto ldist_done;
1432 /* If we are only distributing patterns fuse all partitions that
1433 were not classified as builtins. This also avoids chopping
1434 a loop into pieces, separated by builtin calls. That is, we
1435 only want no or a single loop body remaining. */
1436 partition_t into;
1437 if (!flag_tree_loop_distribution)
1439 for (i = 0; partitions.iterate (i, &into); ++i)
1440 if (!partition_builtin_p (into))
1441 break;
1442 for (++i; partitions.iterate (i, &partition); ++i)
1443 if (!partition_builtin_p (partition))
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1447 fprintf (dump_file, "fusing non-builtin partitions\n");
1448 dump_bitmap (dump_file, into->stmts);
1449 dump_bitmap (dump_file, partition->stmts);
1451 partition_merge_into (into, partition);
1452 partitions.unordered_remove (i);
1453 partition_free (partition);
1454 i--;
1458 /* Due to limitations in the transform phase we have to fuse all
1459 reduction partitions into the last partition so the existing
1460 loop will contain all loop-closed PHI nodes. */
1461 for (i = 0; partitions.iterate (i, &into); ++i)
1462 if (partition_reduction_p (into))
1463 break;
1464 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1465 if (partition_reduction_p (partition))
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1469 fprintf (dump_file, "fusing partitions\n");
1470 dump_bitmap (dump_file, into->stmts);
1471 dump_bitmap (dump_file, partition->stmts);
1472 fprintf (dump_file, "because they have reductions\n");
1474 partition_merge_into (into, partition);
1475 partitions.unordered_remove (i);
1476 partition_free (partition);
1477 i--;
1480 /* Apply our simple cost model - fuse partitions with similar
1481 memory accesses. */
1482 for (i = 0; partitions.iterate (i, &into); ++i)
1484 if (partition_builtin_p (into))
1485 continue;
1486 for (int j = i + 1;
1487 partitions.iterate (j, &partition); ++j)
1489 if (!partition_builtin_p (partition)
1490 && similar_memory_accesses (rdg, into, partition))
1492 if (dump_file && (dump_flags & TDF_DETAILS))
1494 fprintf (dump_file, "fusing partitions\n");
1495 dump_bitmap (dump_file, into->stmts);
1496 dump_bitmap (dump_file, partition->stmts);
1497 fprintf (dump_file, "because they have similar "
1498 "memory accesses\n");
1500 partition_merge_into (into, partition);
1501 partitions.unordered_remove (j);
1502 partition_free (partition);
1503 j--;
1508 /* Build the partition dependency graph. */
1509 if (partitions.length () > 1)
1511 pg = new_graph (partitions.length ());
1512 struct pgdata {
1513 partition_t partition;
1514 vec<data_reference_p> writes;
1515 vec<data_reference_p> reads;
1517 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1518 for (i = 0; partitions.iterate (i, &partition); ++i)
1520 vertex *v = &pg->vertices[i];
1521 pgdata *data = new pgdata;
1522 data_reference_p dr;
1523 /* FIXME - leaks. */
1524 v->data = data;
1525 bitmap_iterator bi;
1526 unsigned j;
1527 data->partition = partition;
1528 data->reads = vNULL;
1529 data->writes = vNULL;
1530 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1531 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1532 if (DR_IS_READ (dr))
1533 data->reads.safe_push (dr);
1534 else
1535 data->writes.safe_push (dr);
1537 partition_t partition1, partition2;
1538 for (i = 0; partitions.iterate (i, &partition1); ++i)
1539 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1541 /* dependence direction - 0 is no dependence, -1 is back,
1542 1 is forth, 2 is both (we can stop then, merging will occur). */
1543 int dir = 0;
1544 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1545 PGDATA(i)->writes,
1546 PGDATA(j)->reads);
1547 if (dir != 2)
1548 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1549 PGDATA(i)->reads,
1550 PGDATA(j)->writes);
1551 if (dir != 2)
1552 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1553 PGDATA(i)->writes,
1554 PGDATA(j)->writes);
1555 if (dir == 1 || dir == 2)
1556 add_edge (pg, i, j);
1557 if (dir == -1 || dir == 2)
1558 add_edge (pg, j, i);
1561 /* Add edges to the reduction partition (if any) to force it last. */
1562 unsigned j;
1563 for (j = 0; partitions.iterate (j, &partition); ++j)
1564 if (partition_reduction_p (partition))
1565 break;
1566 if (j < partitions.length ())
1568 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1569 if (i != j)
1570 add_edge (pg, i, j);
1573 /* Compute partitions we cannot separate and fuse them. */
1574 num_sccs = graphds_scc (pg, NULL);
1575 for (i = 0; i < num_sccs; ++i)
1577 partition_t first;
1578 int j;
1579 for (j = 0; partitions.iterate (j, &first); ++j)
1580 if (pg->vertices[j].component == i)
1581 break;
1582 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1583 if (pg->vertices[j].component == i)
1585 if (dump_file && (dump_flags & TDF_DETAILS))
1587 fprintf (dump_file, "fusing partitions\n");
1588 dump_bitmap (dump_file, first->stmts);
1589 dump_bitmap (dump_file, partition->stmts);
1590 fprintf (dump_file, "because they are in the same "
1591 "dependence SCC\n");
1593 partition_merge_into (first, partition);
1594 partitions[j] = NULL;
1595 partition_free (partition);
1596 PGDATA (j)->partition = NULL;
1600 /* Now order the remaining nodes in postorder. */
1601 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1602 partitions.truncate (0);
1603 for (i = 0; i < pg->n_vertices; ++i)
1605 pgdata *data = PGDATA (i);
1606 if (data->partition)
1607 partitions.safe_push (data->partition);
1608 data->reads.release ();
1609 data->writes.release ();
1610 delete data;
1612 gcc_assert (partitions.length () == (unsigned)num_sccs);
1613 free_graph (pg);
1616 nbp = partitions.length ();
1617 if (nbp == 0
1618 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1619 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1621 nbp = 0;
1622 goto ldist_done;
1625 if (dump_file && (dump_flags & TDF_DETAILS))
1626 dump_rdg_partitions (dump_file, partitions);
1628 FOR_EACH_VEC_ELT (partitions, i, partition)
1630 if (partition_builtin_p (partition))
1631 (*nb_calls)++;
1632 generate_code_for_partition (loop, partition, i < nbp - 1);
1635 ldist_done:
1637 FOR_EACH_VEC_ELT (partitions, i, partition)
1638 partition_free (partition);
1639 partitions.release ();
1641 free_rdg (rdg);
1642 return nbp - *nb_calls;
1645 /* Distribute all loops in the current function. */
1647 static unsigned int
1648 tree_loop_distribution (void)
1650 struct loop *loop;
1651 loop_iterator li;
1652 bool changed = false;
1653 basic_block bb;
1654 control_dependences *cd = NULL;
1656 FOR_ALL_BB (bb)
1658 gimple_stmt_iterator gsi;
1659 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1660 gimple_set_uid (gsi_stmt (gsi), -1);
1661 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1662 gimple_set_uid (gsi_stmt (gsi), -1);
1665 /* We can at the moment only distribute non-nested loops, thus restrict
1666 walking to innermost loops. */
1667 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1669 vec<gimple> work_list = vNULL;
1670 basic_block *bbs;
1671 int num = loop->num;
1672 unsigned int i;
1674 /* If the loop doesn't have a single exit we will fail anyway,
1675 so do that early. */
1676 if (!single_exit (loop))
1677 continue;
1679 /* Only optimize hot loops. */
1680 if (!optimize_loop_for_speed_p (loop))
1681 continue;
1683 /* Initialize the worklist with stmts we seed the partitions with. */
1684 bbs = get_loop_body_in_dom_order (loop);
1685 for (i = 0; i < loop->num_nodes; ++i)
1687 gimple_stmt_iterator gsi;
1688 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1690 gimple phi = gsi_stmt (gsi);
1691 if (virtual_operand_p (gimple_phi_result (phi)))
1692 continue;
1693 /* Distribute stmts which have defs that are used outside of
1694 the loop. */
1695 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1696 continue;
1697 work_list.safe_push (phi);
1699 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1701 gimple stmt = gsi_stmt (gsi);
1703 /* If there is a stmt with side-effects bail out - we
1704 cannot and should not distribute this loop. */
1705 if (gimple_has_side_effects (stmt))
1707 work_list.truncate (0);
1708 goto out;
1711 /* Distribute stmts which have defs that are used outside of
1712 the loop. */
1713 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1715 /* Otherwise only distribute stores for now. */
1716 else if (!gimple_assign_single_p (stmt)
1717 || is_gimple_reg (gimple_assign_lhs (stmt)))
1718 continue;
1720 work_list.safe_push (stmt);
1723 out:
1724 free (bbs);
1726 int nb_generated_loops = 0;
1727 int nb_generated_calls = 0;
1728 location_t loc = find_loop_location (loop);
1729 if (work_list.length () > 0)
1731 if (!cd)
1733 calculate_dominance_info (CDI_DOMINATORS);
1734 calculate_dominance_info (CDI_POST_DOMINATORS);
1735 cd = new control_dependences (create_edge_list ());
1736 free_dominance_info (CDI_POST_DOMINATORS);
1738 nb_generated_loops = distribute_loop (loop, work_list, cd,
1739 &nb_generated_calls);
1742 if (nb_generated_loops + nb_generated_calls > 0)
1744 changed = true;
1745 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1746 loc, "Loop %d distributed: split to %d loops "
1747 "and %d library calls.\n",
1748 num, nb_generated_loops, nb_generated_calls);
1750 else if (dump_file && (dump_flags & TDF_DETAILS))
1751 fprintf (dump_file, "Loop %d is the same.\n", num);
1753 work_list.release ();
1756 if (cd)
1757 delete cd;
1759 if (changed)
1761 mark_virtual_operands_for_renaming (cfun);
1762 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1765 #ifdef ENABLE_CHECKING
1766 verify_loop_structure ();
1767 #endif
1769 return 0;
1772 static bool
1773 gate_tree_loop_distribution (void)
1775 return flag_tree_loop_distribution
1776 || flag_tree_loop_distribute_patterns;
1779 namespace {
1781 const pass_data pass_data_loop_distribution =
1783 GIMPLE_PASS, /* type */
1784 "ldist", /* name */
1785 OPTGROUP_LOOP, /* optinfo_flags */
1786 true, /* has_gate */
1787 true, /* has_execute */
1788 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1789 ( PROP_cfg | PROP_ssa ), /* properties_required */
1790 0, /* properties_provided */
1791 0, /* properties_destroyed */
1792 0, /* todo_flags_start */
1793 TODO_verify_ssa, /* todo_flags_finish */
1796 class pass_loop_distribution : public gimple_opt_pass
1798 public:
1799 pass_loop_distribution (gcc::context *ctxt)
1800 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1803 /* opt_pass methods: */
1804 bool gate () { return gate_tree_loop_distribution (); }
1805 unsigned int execute () { return tree_loop_distribution (); }
1807 }; // class pass_loop_distribution
1809 } // anon namespace
1811 gimple_opt_pass *
1812 make_pass_loop_distribution (gcc::context *ctxt)
1814 return new pass_loop_distribution (ctxt);