2013-11-21 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobb4ca2119d1e82b1aac3e50fd123f26be96d7a8b0
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
61 #include "tree-ssa.h"
62 #include "cfgloop.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "gimple-pretty-print.h"
68 #include "tree-vectorizer.h"
71 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 typedef struct rdg_vertex
74 /* The statement represented by this vertex. */
75 gimple stmt;
77 /* Vector of data-references in this statement. */
78 vec<data_reference_p> datarefs;
80 /* True when the statement contains a write to memory. */
81 bool has_mem_write;
83 /* True when the statement contains a read from memory. */
84 bool has_mem_reads;
85 } *rdg_vertex_p;
87 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
88 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
89 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
90 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
91 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
92 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
93 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
94 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
96 /* Data dependence type. */
98 enum rdg_dep_type
100 /* Read After Write (RAW). */
101 flow_dd = 'f',
103 /* Control dependence (execute conditional on). */
104 control_dd = 'c'
107 /* Dependence information attached to an edge of the RDG. */
109 typedef struct rdg_edge
111 /* Type of the dependence. */
112 enum rdg_dep_type type;
113 } *rdg_edge_p;
115 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
117 /* Dump vertex I in RDG to FILE. */
119 static void
120 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
122 struct vertex *v = &(rdg->vertices[i]);
123 struct graph_edge *e;
125 fprintf (file, "(vertex %d: (%s%s) (in:", i,
126 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
127 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
129 if (v->pred)
130 for (e = v->pred; e; e = e->pred_next)
131 fprintf (file, " %d", e->src);
133 fprintf (file, ") (out:");
135 if (v->succ)
136 for (e = v->succ; e; e = e->succ_next)
137 fprintf (file, " %d", e->dest);
139 fprintf (file, ")\n");
140 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
141 fprintf (file, ")\n");
144 /* Call dump_rdg_vertex on stderr. */
146 DEBUG_FUNCTION void
147 debug_rdg_vertex (struct graph *rdg, int i)
149 dump_rdg_vertex (stderr, rdg, i);
152 /* Dump the reduced dependence graph RDG to FILE. */
154 static void
155 dump_rdg (FILE *file, struct graph *rdg)
157 fprintf (file, "(rdg\n");
158 for (int i = 0; i < rdg->n_vertices; i++)
159 dump_rdg_vertex (file, rdg, i);
160 fprintf (file, ")\n");
163 /* Call dump_rdg on stderr. */
165 DEBUG_FUNCTION void
166 debug_rdg (struct graph *rdg)
168 dump_rdg (stderr, rdg);
171 static void
172 dot_rdg_1 (FILE *file, struct graph *rdg)
174 int i;
175 pretty_printer buffer;
176 pp_needs_newline (&buffer) = false;
177 buffer.buffer->stream = file;
179 fprintf (file, "digraph RDG {\n");
181 for (i = 0; i < rdg->n_vertices; i++)
183 struct vertex *v = &(rdg->vertices[i]);
184 struct graph_edge *e;
186 fprintf (file, "%d [label=\"[%d] ", i, i);
187 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
188 pp_flush (&buffer);
189 fprintf (file, "\"]\n");
191 /* Highlight reads from memory. */
192 if (RDG_MEM_READS_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
195 /* Highlight stores to memory. */
196 if (RDG_MEM_WRITE_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
199 if (v->succ)
200 for (e = v->succ; e; e = e->succ_next)
201 switch (RDGE_TYPE (e))
203 case flow_dd:
204 /* These are the most common dependences: don't print these. */
205 fprintf (file, "%d -> %d \n", i, e->dest);
206 break;
208 case control_dd:
209 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
210 break;
212 default:
213 gcc_unreachable ();
217 fprintf (file, "}\n\n");
220 /* Display the Reduced Dependence Graph using dotty. */
222 DEBUG_FUNCTION void
223 dot_rdg (struct graph *rdg)
225 /* When debugging, you may want to enable the following code. */
226 #if 1
227 FILE *file = popen ("dot -Tx11", "w");
228 if (!file)
229 return;
230 dot_rdg_1 (file, rdg);
231 fflush (file);
232 close (fileno (file));
233 pclose (file);
234 #else
235 dot_rdg_1 (stderr, rdg);
236 #endif
239 /* Returns the index of STMT in RDG. */
241 static int
242 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
244 int index = gimple_uid (stmt);
245 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
246 return index;
249 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
250 the index of DEF in RDG. */
252 static void
253 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
255 use_operand_p imm_use_p;
256 imm_use_iterator iterator;
258 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
260 struct graph_edge *e;
261 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
263 if (use < 0)
264 continue;
266 e = add_edge (rdg, idef, use);
267 e->data = XNEW (struct rdg_edge);
268 RDGE_TYPE (e) = flow_dd;
272 /* Creates an edge for the control dependences of BB to the vertex V. */
274 static void
275 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
276 int v, control_dependences *cd)
278 bitmap_iterator bi;
279 unsigned edge_n;
280 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
281 0, edge_n, bi)
283 basic_block cond_bb = cd->get_edge (edge_n)->src;
284 gimple stmt = last_stmt (cond_bb);
285 if (stmt && is_ctrl_stmt (stmt))
287 struct graph_edge *e;
288 int c = rdg_vertex_for_stmt (rdg, stmt);
289 if (c < 0)
290 continue;
292 e = add_edge (rdg, c, v);
293 e->data = XNEW (struct rdg_edge);
294 RDGE_TYPE (e) = control_dd;
299 /* Creates the edges of the reduced dependence graph RDG. */
301 static void
302 create_rdg_flow_edges (struct graph *rdg)
304 int i;
305 def_operand_p def_p;
306 ssa_op_iter iter;
308 for (i = 0; i < rdg->n_vertices; i++)
309 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
310 iter, SSA_OP_DEF)
311 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
314 /* Creates the edges of the reduced dependence graph RDG. */
316 static void
317 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
319 int i;
321 for (i = 0; i < rdg->n_vertices; i++)
323 gimple stmt = RDG_STMT (rdg, i);
324 if (gimple_code (stmt) == GIMPLE_PHI)
326 edge_iterator ei;
327 edge e;
328 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
329 create_edge_for_control_dependence (rdg, e->src, i, cd);
331 else
332 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
336 /* Build the vertices of the reduced dependence graph RDG. Return false
337 if that failed. */
339 static bool
340 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
341 vec<data_reference_p> *datarefs)
343 int i;
344 gimple stmt;
346 FOR_EACH_VEC_ELT (stmts, i, stmt)
348 struct vertex *v = &(rdg->vertices[i]);
350 /* Record statement to vertex mapping. */
351 gimple_set_uid (stmt, i);
353 v->data = XNEW (struct rdg_vertex);
354 RDGV_STMT (v) = stmt;
355 RDGV_DATAREFS (v).create (0);
356 RDGV_HAS_MEM_WRITE (v) = false;
357 RDGV_HAS_MEM_READS (v) = false;
358 if (gimple_code (stmt) == GIMPLE_PHI)
359 continue;
361 unsigned drp = datarefs->length ();
362 if (!find_data_references_in_stmt (loop, stmt, datarefs))
363 return false;
364 for (unsigned j = drp; j < datarefs->length (); ++j)
366 data_reference_p dr = (*datarefs)[j];
367 if (DR_IS_READ (dr))
368 RDGV_HAS_MEM_READS (v) = true;
369 else
370 RDGV_HAS_MEM_WRITE (v) = true;
371 RDGV_DATAREFS (v).safe_push (dr);
374 return true;
377 /* Initialize STMTS with all the statements of LOOP. The order in
378 which we discover statements is important as
379 generate_loops_for_partition is using the same traversal for
380 identifying statements in loop copies. */
382 static void
383 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
385 unsigned int i;
386 basic_block *bbs = get_loop_body_in_dom_order (loop);
388 for (i = 0; i < loop->num_nodes; i++)
390 basic_block bb = bbs[i];
391 gimple_stmt_iterator bsi;
392 gimple stmt;
394 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
395 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
396 stmts->safe_push (gsi_stmt (bsi));
398 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
400 stmt = gsi_stmt (bsi);
401 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
402 stmts->safe_push (stmt);
406 free (bbs);
409 /* Free the reduced dependence graph RDG. */
411 static void
412 free_rdg (struct graph *rdg)
414 int i;
416 for (i = 0; i < rdg->n_vertices; i++)
418 struct vertex *v = &(rdg->vertices[i]);
419 struct graph_edge *e;
421 for (e = v->succ; e; e = e->succ_next)
422 free (e->data);
424 if (v->data)
426 gimple_set_uid (RDGV_STMT (v), -1);
427 free_data_refs (RDGV_DATAREFS (v));
428 free (v->data);
432 free_graph (rdg);
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436 statement of the loop nest LOOP_NEST, and one edge per data dependence or
437 scalar dependence. */
439 static struct graph *
440 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
442 struct graph *rdg;
443 vec<data_reference_p> datarefs;
445 /* Create the RDG vertices from the stmts of the loop nest. */
446 stack_vec<gimple, 10> stmts;
447 stmts_from_loop (loop_nest[0], &stmts);
448 rdg = new_graph (stmts.length ());
449 datarefs.create (10);
450 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
452 datarefs.release ();
453 free_rdg (rdg);
454 return NULL;
456 stmts.release ();
458 create_rdg_flow_edges (rdg);
459 if (cd)
460 create_rdg_cd_edges (rdg, cd);
462 datarefs.release ();
464 return rdg;
469 enum partition_kind {
470 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
473 typedef struct partition_s
475 bitmap stmts;
476 bitmap loops;
477 bool reduction_p;
478 enum partition_kind kind;
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr;
481 data_reference_p secondary_dr;
482 tree niter;
483 bool plus_one;
484 } *partition_t;
487 /* Allocate and initialize a partition from BITMAP. */
489 static partition_t
490 partition_alloc (bitmap stmts, bitmap loops)
492 partition_t partition = XCNEW (struct partition_s);
493 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
494 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
495 partition->reduction_p = false;
496 partition->kind = PKIND_NORMAL;
497 return partition;
500 /* Free PARTITION. */
502 static void
503 partition_free (partition_t partition)
505 BITMAP_FREE (partition->stmts);
506 BITMAP_FREE (partition->loops);
507 free (partition);
510 /* Returns true if the partition can be generated as a builtin. */
512 static bool
513 partition_builtin_p (partition_t partition)
515 return partition->kind != PKIND_NORMAL;
518 /* Returns true if the partition contains a reduction. */
520 static bool
521 partition_reduction_p (partition_t partition)
523 return partition->reduction_p;
526 /* Merge PARTITION into the partition DEST. */
528 static void
529 partition_merge_into (partition_t dest, partition_t partition)
531 dest->kind = PKIND_NORMAL;
532 bitmap_ior_into (dest->stmts, partition->stmts);
533 if (partition_reduction_p (partition))
534 dest->reduction_p = true;
538 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
539 the LOOP. */
541 static bool
542 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
544 imm_use_iterator imm_iter;
545 use_operand_p use_p;
547 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
549 gimple use_stmt = USE_STMT (use_p);
550 if (!is_gimple_debug (use_stmt)
551 && loop != loop_containing_stmt (use_stmt))
552 return true;
555 return false;
558 /* Returns true when STMT defines a scalar variable used after the
559 loop LOOP. */
561 static bool
562 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
564 def_operand_p def_p;
565 ssa_op_iter op_iter;
567 if (gimple_code (stmt) == GIMPLE_PHI)
568 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
570 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
571 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
572 return true;
574 return false;
577 /* Return a copy of LOOP placed before LOOP. */
579 static struct loop *
580 copy_loop_before (struct loop *loop)
582 struct loop *res;
583 edge preheader = loop_preheader_edge (loop);
585 initialize_original_copy_tables ();
586 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
587 gcc_assert (res != NULL);
588 free_original_copy_tables ();
589 delete_update_ssa ();
591 return res;
594 /* Creates an empty basic block after LOOP. */
596 static void
597 create_bb_after_loop (struct loop *loop)
599 edge exit = single_exit (loop);
601 if (!exit)
602 return;
604 split_edge (exit);
607 /* Generate code for PARTITION from the code in LOOP. The loop is
608 copied when COPY_P is true. All the statements not flagged in the
609 PARTITION bitmap are removed from the loop or from its copy. The
610 statements are indexed in sequence inside a basic block, and the
611 basic blocks of a loop are taken in dom order. */
613 static void
614 generate_loops_for_partition (struct loop *loop, partition_t partition,
615 bool copy_p)
617 unsigned i;
618 gimple_stmt_iterator bsi;
619 basic_block *bbs;
621 if (copy_p)
623 loop = copy_loop_before (loop);
624 gcc_assert (loop != NULL);
625 create_preheader (loop, CP_SIMPLE_PREHEADERS);
626 create_bb_after_loop (loop);
629 /* Remove stmts not in the PARTITION bitmap. */
630 bbs = get_loop_body_in_dom_order (loop);
632 if (MAY_HAVE_DEBUG_STMTS)
633 for (i = 0; i < loop->num_nodes; i++)
635 basic_block bb = bbs[i];
637 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
639 gimple phi = gsi_stmt (bsi);
640 if (!virtual_operand_p (gimple_phi_result (phi))
641 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
642 reset_debug_uses (phi);
645 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
647 gimple stmt = gsi_stmt (bsi);
648 if (gimple_code (stmt) != GIMPLE_LABEL
649 && !is_gimple_debug (stmt)
650 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
651 reset_debug_uses (stmt);
655 for (i = 0; i < loop->num_nodes; i++)
657 basic_block bb = bbs[i];
659 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
661 gimple phi = gsi_stmt (bsi);
662 if (!virtual_operand_p (gimple_phi_result (phi))
663 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
664 remove_phi_node (&bsi, true);
665 else
666 gsi_next (&bsi);
669 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
671 gimple stmt = gsi_stmt (bsi);
672 if (gimple_code (stmt) != GIMPLE_LABEL
673 && !is_gimple_debug (stmt)
674 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
676 /* Choose an arbitrary path through the empty CFG part
677 that this unnecessary control stmt controls. */
678 if (gimple_code (stmt) == GIMPLE_COND)
680 gimple_cond_make_false (stmt);
681 update_stmt (stmt);
683 else if (gimple_code (stmt) == GIMPLE_SWITCH)
685 gimple_switch_set_index
686 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
687 update_stmt (stmt);
689 else
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
697 gsi_next (&bsi);
701 free (bbs);
704 /* Build the size argument for a memory operation call. */
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
719 /* Build an address argument for a memory operation call. */
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
724 tree addr_base;
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
746 static int
747 const_with_all_bytes_same (tree val)
749 unsigned char buf[64];
750 int i, len;
752 if (integer_zerop (val)
753 || real_zerop (val)
754 || (TREE_CODE (val) == CONSTRUCTOR
755 && !TREE_CLOBBER_P (val)
756 && CONSTRUCTOR_NELTS (val) == 0))
757 return 0;
759 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
760 return -1;
762 len = native_encode_expr (val, buf, sizeof (buf));
763 if (len == 0)
764 return -1;
765 for (i = 1; i < len; i++)
766 if (buf[i] != buf[0])
767 return -1;
768 return buf[0];
771 /* Generate a call to memset for PARTITION in LOOP. */
773 static void
774 generate_memset_builtin (struct loop *loop, partition_t partition)
776 gimple_stmt_iterator gsi;
777 gimple stmt, fn_call;
778 tree mem, fn, nb_bytes;
779 location_t loc;
780 tree val;
782 stmt = DR_STMT (partition->main_dr);
783 loc = gimple_location (stmt);
785 /* The new statements will be placed before LOOP. */
786 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
788 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
789 partition->plus_one);
790 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
791 false, GSI_CONTINUE_LINKING);
792 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
793 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
794 false, GSI_CONTINUE_LINKING);
796 /* This exactly matches the pattern recognition in classify_partition. */
797 val = gimple_assign_rhs1 (stmt);
798 /* Handle constants like 0x15151515 and similarly
799 floating point constants etc. where all bytes are the same. */
800 int bytev = const_with_all_bytes_same (val);
801 if (bytev != -1)
802 val = build_int_cst (integer_type_node, bytev);
803 else if (TREE_CODE (val) == INTEGER_CST)
804 val = fold_convert (integer_type_node, val);
805 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
807 gimple cstmt;
808 tree tem = make_ssa_name (integer_type_node, NULL);
809 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
810 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
811 val = tem;
814 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
815 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
816 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
818 if (dump_file && (dump_flags & TDF_DETAILS))
820 fprintf (dump_file, "generated memset");
821 if (bytev == 0)
822 fprintf (dump_file, " zero\n");
823 else
824 fprintf (dump_file, "\n");
828 /* Generate a call to memcpy for PARTITION in LOOP. */
830 static void
831 generate_memcpy_builtin (struct loop *loop, partition_t partition)
833 gimple_stmt_iterator gsi;
834 gimple stmt, fn_call;
835 tree dest, src, fn, nb_bytes;
836 location_t loc;
837 enum built_in_function kind;
839 stmt = DR_STMT (partition->main_dr);
840 loc = gimple_location (stmt);
842 /* The new statements will be placed before LOOP. */
843 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
845 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
846 partition->plus_one);
847 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
848 false, GSI_CONTINUE_LINKING);
849 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
850 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
851 if (ptr_derefs_may_alias_p (dest, src))
852 kind = BUILT_IN_MEMMOVE;
853 else
854 kind = BUILT_IN_MEMCPY;
856 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
857 false, GSI_CONTINUE_LINKING);
858 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
859 false, GSI_CONTINUE_LINKING);
860 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
861 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
862 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
864 if (dump_file && (dump_flags & TDF_DETAILS))
866 if (kind == BUILT_IN_MEMCPY)
867 fprintf (dump_file, "generated memcpy\n");
868 else
869 fprintf (dump_file, "generated memmove\n");
873 /* Remove and destroy the loop LOOP. */
875 static void
876 destroy_loop (struct loop *loop)
878 unsigned nbbs = loop->num_nodes;
879 edge exit = single_exit (loop);
880 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
881 basic_block *bbs;
882 unsigned i;
884 bbs = get_loop_body_in_dom_order (loop);
886 redirect_edge_pred (exit, src);
887 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
888 exit->flags |= EDGE_FALLTHRU;
889 cancel_loop_tree (loop);
890 rescan_loop_exit (exit, false, true);
892 for (i = 0; i < nbbs; i++)
894 /* We have made sure to not leave any dangling uses of SSA
895 names defined in the loop. With the exception of virtuals.
896 Make sure we replace all uses of virtual defs that will remain
897 outside of the loop with the bare symbol as delete_basic_block
898 will release them. */
899 gimple_stmt_iterator gsi;
900 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
902 gimple phi = gsi_stmt (gsi);
903 if (virtual_operand_p (gimple_phi_result (phi)))
904 mark_virtual_phi_result_for_renaming (phi);
906 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
908 gimple stmt = gsi_stmt (gsi);
909 tree vdef = gimple_vdef (stmt);
910 if (vdef && TREE_CODE (vdef) == SSA_NAME)
911 mark_virtual_operand_for_renaming (vdef);
913 delete_basic_block (bbs[i]);
915 free (bbs);
917 set_immediate_dominator (CDI_DOMINATORS, dest,
918 recompute_dominator (CDI_DOMINATORS, dest));
921 /* Generates code for PARTITION. */
923 static void
924 generate_code_for_partition (struct loop *loop,
925 partition_t partition, bool copy_p)
927 switch (partition->kind)
929 case PKIND_NORMAL:
930 /* Reductions all have to be in the last partition. */
931 gcc_assert (!partition_reduction_p (partition)
932 || !copy_p);
933 generate_loops_for_partition (loop, partition, copy_p);
934 return;
936 case PKIND_MEMSET:
937 generate_memset_builtin (loop, partition);
938 break;
940 case PKIND_MEMCPY:
941 generate_memcpy_builtin (loop, partition);
942 break;
944 default:
945 gcc_unreachable ();
948 /* Common tail for partitions we turn into a call. If this was the last
949 partition for which we generate code, we have to destroy the loop. */
950 if (!copy_p)
951 destroy_loop (loop);
955 /* Returns a partition with all the statements needed for computing
956 the vertex V of the RDG, also including the loop exit conditions. */
958 static partition_t
959 build_rdg_partition_for_vertex (struct graph *rdg, int v)
961 partition_t partition = partition_alloc (NULL, NULL);
962 stack_vec<int, 3> nodes;
963 unsigned i;
964 int x;
966 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
968 FOR_EACH_VEC_ELT (nodes, i, x)
970 bitmap_set_bit (partition->stmts, x);
971 bitmap_set_bit (partition->loops,
972 loop_containing_stmt (RDG_STMT (rdg, x))->num);
975 return partition;
978 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
979 For the moment we detect only the memset zero pattern. */
981 static void
982 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
984 bitmap_iterator bi;
985 unsigned i;
986 tree nb_iter;
987 data_reference_p single_load, single_store;
988 bool volatiles_p = false;
989 bool plus_one = false;
991 partition->kind = PKIND_NORMAL;
992 partition->main_dr = NULL;
993 partition->secondary_dr = NULL;
994 partition->niter = NULL_TREE;
995 partition->plus_one = false;
997 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
999 gimple stmt = RDG_STMT (rdg, i);
1001 if (gimple_has_volatile_ops (stmt))
1002 volatiles_p = true;
1004 /* If the stmt has uses outside of the loop mark it as reduction. */
1005 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1007 partition->reduction_p = true;
1008 return;
1012 /* Perform general partition disqualification for builtins. */
1013 if (volatiles_p
1014 || !flag_tree_loop_distribute_patterns)
1015 return;
1017 /* Detect memset and memcpy. */
1018 single_load = NULL;
1019 single_store = NULL;
1020 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1022 gimple stmt = RDG_STMT (rdg, i);
1023 data_reference_p dr;
1024 unsigned j;
1026 if (gimple_code (stmt) == GIMPLE_PHI)
1027 continue;
1029 /* Any scalar stmts are ok. */
1030 if (!gimple_vuse (stmt))
1031 continue;
1033 /* Otherwise just regular loads/stores. */
1034 if (!gimple_assign_single_p (stmt))
1035 return;
1037 /* But exactly one store and/or load. */
1038 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1040 if (DR_IS_READ (dr))
1042 if (single_load != NULL)
1043 return;
1044 single_load = dr;
1046 else
1048 if (single_store != NULL)
1049 return;
1050 single_store = dr;
1055 if (!single_store)
1056 return;
1058 nb_iter = number_of_latch_executions (loop);
1059 if (!nb_iter || nb_iter == chrec_dont_know)
1060 return;
1061 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1062 gimple_bb (DR_STMT (single_store))))
1063 plus_one = true;
1065 if (single_store && !single_load)
1067 gimple stmt = DR_STMT (single_store);
1068 tree rhs = gimple_assign_rhs1 (stmt);
1069 if (const_with_all_bytes_same (rhs) == -1
1070 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1071 || (TYPE_MODE (TREE_TYPE (rhs))
1072 != TYPE_MODE (unsigned_char_type_node))))
1073 return;
1074 if (TREE_CODE (rhs) == SSA_NAME
1075 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1076 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1077 return;
1078 if (!adjacent_dr_p (single_store)
1079 || !dominated_by_p (CDI_DOMINATORS,
1080 loop->latch, gimple_bb (stmt)))
1081 return;
1082 partition->kind = PKIND_MEMSET;
1083 partition->main_dr = single_store;
1084 partition->niter = nb_iter;
1085 partition->plus_one = plus_one;
1087 else if (single_store && single_load)
1089 gimple store = DR_STMT (single_store);
1090 gimple load = DR_STMT (single_load);
1091 /* Direct aggregate copy or via an SSA name temporary. */
1092 if (load != store
1093 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1094 return;
1095 if (!adjacent_dr_p (single_store)
1096 || !adjacent_dr_p (single_load)
1097 || !operand_equal_p (DR_STEP (single_store),
1098 DR_STEP (single_load), 0)
1099 || !dominated_by_p (CDI_DOMINATORS,
1100 loop->latch, gimple_bb (store)))
1101 return;
1102 /* Now check that if there is a dependence this dependence is
1103 of a suitable form for memmove. */
1104 vec<loop_p> loops = vNULL;
1105 ddr_p ddr;
1106 loops.safe_push (loop);
1107 ddr = initialize_data_dependence_relation (single_load, single_store,
1108 loops);
1109 compute_affine_dependence (ddr, loop);
1110 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1112 free_dependence_relation (ddr);
1113 loops.release ();
1114 return;
1116 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1118 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1120 free_dependence_relation (ddr);
1121 loops.release ();
1122 return;
1124 lambda_vector dist_v;
1125 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1127 int dist = dist_v[index_in_loop_nest (loop->num,
1128 DDR_LOOP_NEST (ddr))];
1129 if (dist > 0 && !DDR_REVERSED_P (ddr))
1131 free_dependence_relation (ddr);
1132 loops.release ();
1133 return;
1137 free_dependence_relation (ddr);
1138 loops.release ();
1139 partition->kind = PKIND_MEMCPY;
1140 partition->main_dr = single_store;
1141 partition->secondary_dr = single_load;
1142 partition->niter = nb_iter;
1143 partition->plus_one = plus_one;
1147 /* For a data reference REF, return the declaration of its base
1148 address or NULL_TREE if the base is not determined. */
1150 static tree
1151 ref_base_address (data_reference_p dr)
1153 tree base_address = DR_BASE_ADDRESS (dr);
1154 if (base_address
1155 && TREE_CODE (base_address) == ADDR_EXPR)
1156 return TREE_OPERAND (base_address, 0);
1158 return base_address;
1161 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1162 accesses in RDG. */
1164 static bool
1165 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1166 partition_t partition2)
1168 unsigned i, j, k, l;
1169 bitmap_iterator bi, bj;
1170 data_reference_p ref1, ref2;
1172 /* First check whether in the intersection of the two partitions are
1173 any loads or stores. Common loads are the situation that happens
1174 most often. */
1175 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1176 if (RDG_MEM_WRITE_STMT (rdg, i)
1177 || RDG_MEM_READS_STMT (rdg, i))
1178 return true;
1180 /* Then check all data-references against each other. */
1181 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1182 if (RDG_MEM_WRITE_STMT (rdg, i)
1183 || RDG_MEM_READS_STMT (rdg, i))
1184 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1185 if (RDG_MEM_WRITE_STMT (rdg, j)
1186 || RDG_MEM_READS_STMT (rdg, j))
1188 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1190 tree base1 = ref_base_address (ref1);
1191 if (base1)
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1193 if (base1 == ref_base_address (ref2))
1194 return true;
1198 return false;
1201 /* Aggregate several components into a useful partition that is
1202 registered in the PARTITIONS vector. Partitions will be
1203 distributed in different loops. */
1205 static void
1206 rdg_build_partitions (struct graph *rdg,
1207 vec<gimple> starting_stmts,
1208 vec<partition_t> *partitions)
1210 bitmap processed = BITMAP_ALLOC (NULL);
1211 int i;
1212 gimple stmt;
1214 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1216 int v = rdg_vertex_for_stmt (rdg, stmt);
1218 if (dump_file && (dump_flags & TDF_DETAILS))
1219 fprintf (dump_file,
1220 "ldist asked to generate code for vertex %d\n", v);
1222 /* If the vertex is already contained in another partition so
1223 is the partition rooted at it. */
1224 if (bitmap_bit_p (processed, v))
1225 continue;
1227 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1228 bitmap_ior_into (processed, partition->stmts);
1230 if (dump_file && (dump_flags & TDF_DETAILS))
1232 fprintf (dump_file, "ldist useful partition:\n");
1233 dump_bitmap (dump_file, partition->stmts);
1236 partitions->safe_push (partition);
1239 /* All vertices should have been assigned to at least one partition now,
1240 other than vertices belonging to dead code. */
1242 BITMAP_FREE (processed);
1245 /* Dump to FILE the PARTITIONS. */
1247 static void
1248 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1250 int i;
1251 partition_t partition;
1253 FOR_EACH_VEC_ELT (partitions, i, partition)
1254 debug_bitmap_file (file, partition->stmts);
1257 /* Debug PARTITIONS. */
1258 extern void debug_rdg_partitions (vec<partition_t> );
1260 DEBUG_FUNCTION void
1261 debug_rdg_partitions (vec<partition_t> partitions)
1263 dump_rdg_partitions (stderr, partitions);
1266 /* Returns the number of read and write operations in the RDG. */
1268 static int
1269 number_of_rw_in_rdg (struct graph *rdg)
1271 int i, res = 0;
1273 for (i = 0; i < rdg->n_vertices; i++)
1275 if (RDG_MEM_WRITE_STMT (rdg, i))
1276 ++res;
1278 if (RDG_MEM_READS_STMT (rdg, i))
1279 ++res;
1282 return res;
1285 /* Returns the number of read and write operations in a PARTITION of
1286 the RDG. */
1288 static int
1289 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1291 int res = 0;
1292 unsigned i;
1293 bitmap_iterator ii;
1295 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1298 ++res;
1300 if (RDG_MEM_READS_STMT (rdg, i))
1301 ++res;
1304 return res;
1307 /* Returns true when one of the PARTITIONS contains all the read or
1308 write operations of RDG. */
1310 static bool
1311 partition_contains_all_rw (struct graph *rdg,
1312 vec<partition_t> partitions)
1314 int i;
1315 partition_t partition;
1316 int nrw = number_of_rw_in_rdg (rdg);
1318 FOR_EACH_VEC_ELT (partitions, i, partition)
1319 if (nrw == number_of_rw_in_partition (rdg, partition))
1320 return true;
1322 return false;
1325 /* Compute partition dependence created by the data references in DRS1
1326 and DRS2 and modify and return DIR according to that. */
1328 static int
1329 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1330 vec<data_reference_p> drs1,
1331 vec<data_reference_p> drs2)
1333 data_reference_p dr1, dr2;
1335 /* dependence direction - 0 is no dependence, -1 is back,
1336 1 is forth, 2 is both (we can stop then, merging will occur). */
1337 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1338 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1340 int this_dir = 1;
1341 ddr_p ddr;
1342 /* Re-shuffle data-refs to be in dominator order. */
1343 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1344 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1346 data_reference_p tem = dr1;
1347 dr1 = dr2;
1348 dr2 = tem;
1349 this_dir = -this_dir;
1351 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1352 compute_affine_dependence (ddr, loops[0]);
1353 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1354 this_dir = 2;
1355 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1357 if (DDR_REVERSED_P (ddr))
1359 data_reference_p tem = dr1;
1360 dr1 = dr2;
1361 dr2 = tem;
1362 this_dir = -this_dir;
1364 /* Known dependences can still be unordered througout the
1365 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1366 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1367 this_dir = 2;
1368 /* If the overlap is exact preserve stmt order. */
1369 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1371 else
1373 /* Else as the distance vector is lexicographic positive
1374 swap the dependence direction. */
1375 this_dir = -this_dir;
1378 else
1379 this_dir = 0;
1380 free_dependence_relation (ddr);
1381 if (dir == 0)
1382 dir = this_dir;
1383 else if (dir != this_dir)
1384 return 2;
1386 return dir;
1389 /* Compare postorder number of the partition graph vertices V1 and V2. */
1391 static int
1392 pgcmp (const void *v1_, const void *v2_)
1394 const vertex *v1 = (const vertex *)v1_;
1395 const vertex *v2 = (const vertex *)v2_;
1396 return v2->post - v1->post;
1399 /* Distributes the code from LOOP in such a way that producer
1400 statements are placed before consumer statements. Tries to separate
1401 only the statements from STMTS into separate loops.
1402 Returns the number of distributed loops. */
1404 static int
1405 distribute_loop (struct loop *loop, vec<gimple> stmts,
1406 control_dependences *cd, int *nb_calls)
1408 struct graph *rdg;
1409 vec<partition_t> partitions;
1410 partition_t partition;
1411 bool any_builtin;
1412 int i, nbp;
1413 graph *pg = NULL;
1414 int num_sccs = 1;
1416 *nb_calls = 0;
1417 stack_vec<loop_p, 3> loop_nest;
1418 if (!find_loop_nest (loop, &loop_nest))
1419 return 0;
1421 rdg = build_rdg (loop_nest, cd);
1422 if (!rdg)
1424 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file,
1426 "Loop %d not distributed: failed to build the RDG.\n",
1427 loop->num);
1429 return 0;
1432 if (dump_file && (dump_flags & TDF_DETAILS))
1433 dump_rdg (dump_file, rdg);
1435 partitions.create (3);
1436 rdg_build_partitions (rdg, stmts, &partitions);
1438 any_builtin = false;
1439 FOR_EACH_VEC_ELT (partitions, i, partition)
1441 classify_partition (loop, rdg, partition);
1442 any_builtin |= partition_builtin_p (partition);
1445 /* If we are only distributing patterns but did not detect any,
1446 simply bail out. */
1447 if (!flag_tree_loop_distribution
1448 && !any_builtin)
1450 nbp = 0;
1451 goto ldist_done;
1454 /* If we are only distributing patterns fuse all partitions that
1455 were not classified as builtins. This also avoids chopping
1456 a loop into pieces, separated by builtin calls. That is, we
1457 only want no or a single loop body remaining. */
1458 partition_t into;
1459 if (!flag_tree_loop_distribution)
1461 for (i = 0; partitions.iterate (i, &into); ++i)
1462 if (!partition_builtin_p (into))
1463 break;
1464 for (++i; partitions.iterate (i, &partition); ++i)
1465 if (!partition_builtin_p (partition))
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1469 fprintf (dump_file, "fusing non-builtin partitions\n");
1470 dump_bitmap (dump_file, into->stmts);
1471 dump_bitmap (dump_file, partition->stmts);
1473 partition_merge_into (into, partition);
1474 partitions.unordered_remove (i);
1475 partition_free (partition);
1476 i--;
1480 /* Due to limitations in the transform phase we have to fuse all
1481 reduction partitions into the last partition so the existing
1482 loop will contain all loop-closed PHI nodes. */
1483 for (i = 0; partitions.iterate (i, &into); ++i)
1484 if (partition_reduction_p (into))
1485 break;
1486 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1487 if (partition_reduction_p (partition))
1489 if (dump_file && (dump_flags & TDF_DETAILS))
1491 fprintf (dump_file, "fusing partitions\n");
1492 dump_bitmap (dump_file, into->stmts);
1493 dump_bitmap (dump_file, partition->stmts);
1494 fprintf (dump_file, "because they have reductions\n");
1496 partition_merge_into (into, partition);
1497 partitions.unordered_remove (i);
1498 partition_free (partition);
1499 i--;
1502 /* Apply our simple cost model - fuse partitions with similar
1503 memory accesses. */
1504 for (i = 0; partitions.iterate (i, &into); ++i)
1506 if (partition_builtin_p (into))
1507 continue;
1508 for (int j = i + 1;
1509 partitions.iterate (j, &partition); ++j)
1511 if (!partition_builtin_p (partition)
1512 && similar_memory_accesses (rdg, into, partition))
1514 if (dump_file && (dump_flags & TDF_DETAILS))
1516 fprintf (dump_file, "fusing partitions\n");
1517 dump_bitmap (dump_file, into->stmts);
1518 dump_bitmap (dump_file, partition->stmts);
1519 fprintf (dump_file, "because they have similar "
1520 "memory accesses\n");
1522 partition_merge_into (into, partition);
1523 partitions.unordered_remove (j);
1524 partition_free (partition);
1525 j--;
1530 /* Build the partition dependency graph. */
1531 if (partitions.length () > 1)
1533 pg = new_graph (partitions.length ());
1534 struct pgdata {
1535 partition_t partition;
1536 vec<data_reference_p> writes;
1537 vec<data_reference_p> reads;
1539 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1540 for (i = 0; partitions.iterate (i, &partition); ++i)
1542 vertex *v = &pg->vertices[i];
1543 pgdata *data = new pgdata;
1544 data_reference_p dr;
1545 /* FIXME - leaks. */
1546 v->data = data;
1547 bitmap_iterator bi;
1548 unsigned j;
1549 data->partition = partition;
1550 data->reads = vNULL;
1551 data->writes = vNULL;
1552 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1553 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1554 if (DR_IS_READ (dr))
1555 data->reads.safe_push (dr);
1556 else
1557 data->writes.safe_push (dr);
1559 partition_t partition1, partition2;
1560 for (i = 0; partitions.iterate (i, &partition1); ++i)
1561 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1563 /* dependence direction - 0 is no dependence, -1 is back,
1564 1 is forth, 2 is both (we can stop then, merging will occur). */
1565 int dir = 0;
1566 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1567 PGDATA(i)->writes,
1568 PGDATA(j)->reads);
1569 if (dir != 2)
1570 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1571 PGDATA(i)->reads,
1572 PGDATA(j)->writes);
1573 if (dir != 2)
1574 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1575 PGDATA(i)->writes,
1576 PGDATA(j)->writes);
1577 if (dir == 1 || dir == 2)
1578 add_edge (pg, i, j);
1579 if (dir == -1 || dir == 2)
1580 add_edge (pg, j, i);
1583 /* Add edges to the reduction partition (if any) to force it last. */
1584 unsigned j;
1585 for (j = 0; partitions.iterate (j, &partition); ++j)
1586 if (partition_reduction_p (partition))
1587 break;
1588 if (j < partitions.length ())
1590 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1591 if (i != j)
1592 add_edge (pg, i, j);
1595 /* Compute partitions we cannot separate and fuse them. */
1596 num_sccs = graphds_scc (pg, NULL);
1597 for (i = 0; i < num_sccs; ++i)
1599 partition_t first;
1600 int j;
1601 for (j = 0; partitions.iterate (j, &first); ++j)
1602 if (pg->vertices[j].component == i)
1603 break;
1604 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1605 if (pg->vertices[j].component == i)
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1609 fprintf (dump_file, "fusing partitions\n");
1610 dump_bitmap (dump_file, first->stmts);
1611 dump_bitmap (dump_file, partition->stmts);
1612 fprintf (dump_file, "because they are in the same "
1613 "dependence SCC\n");
1615 partition_merge_into (first, partition);
1616 partitions[j] = NULL;
1617 partition_free (partition);
1618 PGDATA (j)->partition = NULL;
1622 /* Now order the remaining nodes in postorder. */
1623 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1624 partitions.truncate (0);
1625 for (i = 0; i < pg->n_vertices; ++i)
1627 pgdata *data = PGDATA (i);
1628 if (data->partition)
1629 partitions.safe_push (data->partition);
1630 data->reads.release ();
1631 data->writes.release ();
1632 delete data;
1634 gcc_assert (partitions.length () == (unsigned)num_sccs);
1635 free_graph (pg);
1638 nbp = partitions.length ();
1639 if (nbp == 0
1640 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1641 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1643 nbp = 0;
1644 goto ldist_done;
1647 if (dump_file && (dump_flags & TDF_DETAILS))
1648 dump_rdg_partitions (dump_file, partitions);
1650 FOR_EACH_VEC_ELT (partitions, i, partition)
1652 if (partition_builtin_p (partition))
1653 (*nb_calls)++;
1654 generate_code_for_partition (loop, partition, i < nbp - 1);
1657 ldist_done:
1659 FOR_EACH_VEC_ELT (partitions, i, partition)
1660 partition_free (partition);
1661 partitions.release ();
1663 free_rdg (rdg);
1664 return nbp - *nb_calls;
1667 /* Distribute all loops in the current function. */
1669 static unsigned int
1670 tree_loop_distribution (void)
1672 struct loop *loop;
1673 bool changed = false;
1674 basic_block bb;
1675 control_dependences *cd = NULL;
1677 FOR_ALL_BB (bb)
1679 gimple_stmt_iterator gsi;
1680 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1681 gimple_set_uid (gsi_stmt (gsi), -1);
1682 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1683 gimple_set_uid (gsi_stmt (gsi), -1);
1686 /* We can at the moment only distribute non-nested loops, thus restrict
1687 walking to innermost loops. */
1688 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1690 vec<gimple> work_list = vNULL;
1691 basic_block *bbs;
1692 int num = loop->num;
1693 unsigned int i;
1695 /* If the loop doesn't have a single exit we will fail anyway,
1696 so do that early. */
1697 if (!single_exit (loop))
1698 continue;
1700 /* Only optimize hot loops. */
1701 if (!optimize_loop_for_speed_p (loop))
1702 continue;
1704 /* Initialize the worklist with stmts we seed the partitions with. */
1705 bbs = get_loop_body_in_dom_order (loop);
1706 for (i = 0; i < loop->num_nodes; ++i)
1708 gimple_stmt_iterator gsi;
1709 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1711 gimple phi = gsi_stmt (gsi);
1712 if (virtual_operand_p (gimple_phi_result (phi)))
1713 continue;
1714 /* Distribute stmts which have defs that are used outside of
1715 the loop. */
1716 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1717 continue;
1718 work_list.safe_push (phi);
1720 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1722 gimple stmt = gsi_stmt (gsi);
1724 /* If there is a stmt with side-effects bail out - we
1725 cannot and should not distribute this loop. */
1726 if (gimple_has_side_effects (stmt))
1728 work_list.truncate (0);
1729 goto out;
1732 /* Distribute stmts which have defs that are used outside of
1733 the loop. */
1734 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1736 /* Otherwise only distribute stores for now. */
1737 else if (!gimple_vdef (stmt))
1738 continue;
1740 work_list.safe_push (stmt);
1743 out:
1744 free (bbs);
1746 int nb_generated_loops = 0;
1747 int nb_generated_calls = 0;
1748 location_t loc = find_loop_location (loop);
1749 if (work_list.length () > 0)
1751 if (!cd)
1753 calculate_dominance_info (CDI_DOMINATORS);
1754 calculate_dominance_info (CDI_POST_DOMINATORS);
1755 cd = new control_dependences (create_edge_list ());
1756 free_dominance_info (CDI_POST_DOMINATORS);
1758 nb_generated_loops = distribute_loop (loop, work_list, cd,
1759 &nb_generated_calls);
1762 if (nb_generated_loops + nb_generated_calls > 0)
1764 changed = true;
1765 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1766 loc, "Loop %d distributed: split to %d loops "
1767 "and %d library calls.\n",
1768 num, nb_generated_loops, nb_generated_calls);
1770 else if (dump_file && (dump_flags & TDF_DETAILS))
1771 fprintf (dump_file, "Loop %d is the same.\n", num);
1773 work_list.release ();
1776 if (cd)
1777 delete cd;
1779 if (changed)
1781 mark_virtual_operands_for_renaming (cfun);
1782 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1785 #ifdef ENABLE_CHECKING
1786 verify_loop_structure ();
1787 #endif
1789 return 0;
1792 static bool
1793 gate_tree_loop_distribution (void)
1795 return flag_tree_loop_distribution
1796 || flag_tree_loop_distribute_patterns;
1799 namespace {
1801 const pass_data pass_data_loop_distribution =
1803 GIMPLE_PASS, /* type */
1804 "ldist", /* name */
1805 OPTGROUP_LOOP, /* optinfo_flags */
1806 true, /* has_gate */
1807 true, /* has_execute */
1808 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1809 ( PROP_cfg | PROP_ssa ), /* properties_required */
1810 0, /* properties_provided */
1811 0, /* properties_destroyed */
1812 0, /* todo_flags_start */
1813 TODO_verify_ssa, /* todo_flags_finish */
1816 class pass_loop_distribution : public gimple_opt_pass
1818 public:
1819 pass_loop_distribution (gcc::context *ctxt)
1820 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1823 /* opt_pass methods: */
1824 bool gate () { return gate_tree_loop_distribution (); }
1825 unsigned int execute () { return tree_loop_distribution (); }
1827 }; // class pass_loop_distribution
1829 } // anon namespace
1831 gimple_opt_pass *
1832 make_pass_loop_distribution (gcc::context *ctxt)
1834 return new pass_loop_distribution (ctxt);