Merge branch 'master' r216746-r217593 into gimple-classes-v2-option-3
[official-gcc.git] / gcc / tree-loop-distribution.c
blob31ea04c3a0bc6148ce6fb3f74306c134fb85ee08
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "predict.h"
49 #include "vec.h"
50 #include "hashtab.h"
51 #include "hash-set.h"
52 #include "machmode.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "input.h"
56 #include "function.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfganal.h"
60 #include "basic-block.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimple-iterator.h"
67 #include "gimplify-me.h"
68 #include "stor-layout.h"
69 #include "gimple-ssa.h"
70 #include "tree-cfg.h"
71 #include "tree-phinodes.h"
72 #include "ssa-iterators.h"
73 #include "stringpool.h"
74 #include "tree-ssanames.h"
75 #include "tree-ssa-loop-manip.h"
76 #include "tree-ssa-loop.h"
77 #include "tree-into-ssa.h"
78 #include "tree-ssa.h"
79 #include "cfgloop.h"
80 #include "tree-chrec.h"
81 #include "tree-data-ref.h"
82 #include "tree-scalar-evolution.h"
83 #include "tree-pass.h"
84 #include "gimple-pretty-print.h"
85 #include "tree-vectorizer.h"
88 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
89 typedef struct rdg_vertex
91 /* The statement represented by this vertex. */
92 gimple stmt;
94 /* Vector of data-references in this statement. */
95 vec<data_reference_p> datarefs;
97 /* True when the statement contains a write to memory. */
98 bool has_mem_write;
100 /* True when the statement contains a read from memory. */
101 bool has_mem_reads;
102 } *rdg_vertex_p;
104 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
105 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
106 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
107 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
108 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
109 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
110 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
111 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
113 /* Data dependence type. */
115 enum rdg_dep_type
117 /* Read After Write (RAW). */
118 flow_dd = 'f',
120 /* Control dependence (execute conditional on). */
121 control_dd = 'c'
124 /* Dependence information attached to an edge of the RDG. */
126 typedef struct rdg_edge
128 /* Type of the dependence. */
129 enum rdg_dep_type type;
130 } *rdg_edge_p;
132 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
134 /* Dump vertex I in RDG to FILE. */
136 static void
137 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
139 struct vertex *v = &(rdg->vertices[i]);
140 struct graph_edge *e;
142 fprintf (file, "(vertex %d: (%s%s) (in:", i,
143 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
144 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
146 if (v->pred)
147 for (e = v->pred; e; e = e->pred_next)
148 fprintf (file, " %d", e->src);
150 fprintf (file, ") (out:");
152 if (v->succ)
153 for (e = v->succ; e; e = e->succ_next)
154 fprintf (file, " %d", e->dest);
156 fprintf (file, ")\n");
157 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
158 fprintf (file, ")\n");
161 /* Call dump_rdg_vertex on stderr. */
163 DEBUG_FUNCTION void
164 debug_rdg_vertex (struct graph *rdg, int i)
166 dump_rdg_vertex (stderr, rdg, i);
169 /* Dump the reduced dependence graph RDG to FILE. */
171 static void
172 dump_rdg (FILE *file, struct graph *rdg)
174 fprintf (file, "(rdg\n");
175 for (int i = 0; i < rdg->n_vertices; i++)
176 dump_rdg_vertex (file, rdg, i);
177 fprintf (file, ")\n");
180 /* Call dump_rdg on stderr. */
182 DEBUG_FUNCTION void
183 debug_rdg (struct graph *rdg)
185 dump_rdg (stderr, rdg);
188 static void
189 dot_rdg_1 (FILE *file, struct graph *rdg)
191 int i;
192 pretty_printer buffer;
193 pp_needs_newline (&buffer) = false;
194 buffer.buffer->stream = file;
196 fprintf (file, "digraph RDG {\n");
198 for (i = 0; i < rdg->n_vertices; i++)
200 struct vertex *v = &(rdg->vertices[i]);
201 struct graph_edge *e;
203 fprintf (file, "%d [label=\"[%d] ", i, i);
204 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
205 pp_flush (&buffer);
206 fprintf (file, "\"]\n");
208 /* Highlight reads from memory. */
209 if (RDG_MEM_READS_STMT (rdg, i))
210 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
212 /* Highlight stores to memory. */
213 if (RDG_MEM_WRITE_STMT (rdg, i))
214 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
216 if (v->succ)
217 for (e = v->succ; e; e = e->succ_next)
218 switch (RDGE_TYPE (e))
220 case flow_dd:
221 /* These are the most common dependences: don't print these. */
222 fprintf (file, "%d -> %d \n", i, e->dest);
223 break;
225 case control_dd:
226 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
227 break;
229 default:
230 gcc_unreachable ();
234 fprintf (file, "}\n\n");
237 /* Display the Reduced Dependence Graph using dotty. */
239 DEBUG_FUNCTION void
240 dot_rdg (struct graph *rdg)
242 /* When debugging, you may want to enable the following code. */
243 #ifdef HAVE_POPEN
244 FILE *file = popen ("dot -Tx11", "w");
245 if (!file)
246 return;
247 dot_rdg_1 (file, rdg);
248 fflush (file);
249 close (fileno (file));
250 pclose (file);
251 #else
252 dot_rdg_1 (stderr, rdg);
253 #endif
256 /* Returns the index of STMT in RDG. */
258 static int
259 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
261 int index = gimple_uid (stmt);
262 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
263 return index;
266 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
267 the index of DEF in RDG. */
269 static void
270 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
272 use_operand_p imm_use_p;
273 imm_use_iterator iterator;
275 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
277 struct graph_edge *e;
278 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
280 if (use < 0)
281 continue;
283 e = add_edge (rdg, idef, use);
284 e->data = XNEW (struct rdg_edge);
285 RDGE_TYPE (e) = flow_dd;
289 /* Creates an edge for the control dependences of BB to the vertex V. */
291 static void
292 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
293 int v, control_dependences *cd)
295 bitmap_iterator bi;
296 unsigned edge_n;
297 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
298 0, edge_n, bi)
300 basic_block cond_bb = cd->get_edge (edge_n)->src;
301 gimple stmt = last_stmt (cond_bb);
302 if (stmt && is_ctrl_stmt (stmt))
304 struct graph_edge *e;
305 int c = rdg_vertex_for_stmt (rdg, stmt);
306 if (c < 0)
307 continue;
309 e = add_edge (rdg, c, v);
310 e->data = XNEW (struct rdg_edge);
311 RDGE_TYPE (e) = control_dd;
316 /* Creates the edges of the reduced dependence graph RDG. */
318 static void
319 create_rdg_flow_edges (struct graph *rdg)
321 int i;
322 def_operand_p def_p;
323 ssa_op_iter iter;
325 for (i = 0; i < rdg->n_vertices; i++)
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
327 iter, SSA_OP_DEF)
328 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
331 /* Creates the edges of the reduced dependence graph RDG. */
333 static void
334 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
336 int i;
338 for (i = 0; i < rdg->n_vertices; i++)
340 gimple stmt = RDG_STMT (rdg, i);
341 if (gimple_code (stmt) == GIMPLE_PHI)
343 edge_iterator ei;
344 edge e;
345 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
346 create_edge_for_control_dependence (rdg, e->src, i, cd);
348 else
349 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
353 /* Build the vertices of the reduced dependence graph RDG. Return false
354 if that failed. */
356 static bool
357 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
358 vec<data_reference_p> *datarefs)
360 int i;
361 gimple stmt;
363 FOR_EACH_VEC_ELT (stmts, i, stmt)
365 struct vertex *v = &(rdg->vertices[i]);
367 /* Record statement to vertex mapping. */
368 gimple_set_uid (stmt, i);
370 v->data = XNEW (struct rdg_vertex);
371 RDGV_STMT (v) = stmt;
372 RDGV_DATAREFS (v).create (0);
373 RDGV_HAS_MEM_WRITE (v) = false;
374 RDGV_HAS_MEM_READS (v) = false;
375 if (gimple_code (stmt) == GIMPLE_PHI)
376 continue;
378 unsigned drp = datarefs->length ();
379 if (!find_data_references_in_stmt (loop, stmt, datarefs))
380 return false;
381 for (unsigned j = drp; j < datarefs->length (); ++j)
383 data_reference_p dr = (*datarefs)[j];
384 if (DR_IS_READ (dr))
385 RDGV_HAS_MEM_READS (v) = true;
386 else
387 RDGV_HAS_MEM_WRITE (v) = true;
388 RDGV_DATAREFS (v).safe_push (dr);
391 return true;
394 /* Initialize STMTS with all the statements of LOOP. The order in
395 which we discover statements is important as
396 generate_loops_for_partition is using the same traversal for
397 identifying statements in loop copies. */
399 static void
400 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
402 unsigned int i;
403 basic_block *bbs = get_loop_body_in_dom_order (loop);
405 for (i = 0; i < loop->num_nodes; i++)
407 basic_block bb = bbs[i];
409 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
410 gsi_next (&bsi))
411 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
412 stmts->safe_push (bsi.phi ());
414 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
415 gsi_next (&bsi))
417 gimple stmt = gsi_stmt (bsi);
418 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
419 stmts->safe_push (stmt);
423 free (bbs);
426 /* Free the reduced dependence graph RDG. */
428 static void
429 free_rdg (struct graph *rdg)
431 int i;
433 for (i = 0; i < rdg->n_vertices; i++)
435 struct vertex *v = &(rdg->vertices[i]);
436 struct graph_edge *e;
438 for (e = v->succ; e; e = e->succ_next)
439 free (e->data);
441 if (v->data)
443 gimple_set_uid (RDGV_STMT (v), -1);
444 free_data_refs (RDGV_DATAREFS (v));
445 free (v->data);
449 free_graph (rdg);
452 /* Build the Reduced Dependence Graph (RDG) with one vertex per
453 statement of the loop nest LOOP_NEST, and one edge per data dependence or
454 scalar dependence. */
456 static struct graph *
457 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
459 struct graph *rdg;
460 vec<data_reference_p> datarefs;
462 /* Create the RDG vertices from the stmts of the loop nest. */
463 auto_vec<gimple, 10> stmts;
464 stmts_from_loop (loop_nest[0], &stmts);
465 rdg = new_graph (stmts.length ());
466 datarefs.create (10);
467 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
469 datarefs.release ();
470 free_rdg (rdg);
471 return NULL;
473 stmts.release ();
475 create_rdg_flow_edges (rdg);
476 if (cd)
477 create_rdg_cd_edges (rdg, cd);
479 datarefs.release ();
481 return rdg;
486 enum partition_kind {
487 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
490 typedef struct partition_s
492 bitmap stmts;
493 bitmap loops;
494 bool reduction_p;
495 enum partition_kind kind;
496 /* data-references a kind != PKIND_NORMAL partition is about. */
497 data_reference_p main_dr;
498 data_reference_p secondary_dr;
499 tree niter;
500 bool plus_one;
501 } *partition_t;
504 /* Allocate and initialize a partition from BITMAP. */
506 static partition_t
507 partition_alloc (bitmap stmts, bitmap loops)
509 partition_t partition = XCNEW (struct partition_s);
510 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
511 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
512 partition->reduction_p = false;
513 partition->kind = PKIND_NORMAL;
514 return partition;
517 /* Free PARTITION. */
519 static void
520 partition_free (partition_t partition)
522 BITMAP_FREE (partition->stmts);
523 BITMAP_FREE (partition->loops);
524 free (partition);
527 /* Returns true if the partition can be generated as a builtin. */
529 static bool
530 partition_builtin_p (partition_t partition)
532 return partition->kind != PKIND_NORMAL;
535 /* Returns true if the partition contains a reduction. */
537 static bool
538 partition_reduction_p (partition_t partition)
540 return partition->reduction_p;
543 /* Merge PARTITION into the partition DEST. */
545 static void
546 partition_merge_into (partition_t dest, partition_t partition)
548 dest->kind = PKIND_NORMAL;
549 bitmap_ior_into (dest->stmts, partition->stmts);
550 if (partition_reduction_p (partition))
551 dest->reduction_p = true;
555 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
556 the LOOP. */
558 static bool
559 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
564 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
566 gimple use_stmt = USE_STMT (use_p);
567 if (!is_gimple_debug (use_stmt)
568 && loop != loop_containing_stmt (use_stmt))
569 return true;
572 return false;
575 /* Returns true when STMT defines a scalar variable used after the
576 loop LOOP. */
578 static bool
579 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
581 def_operand_p def_p;
582 ssa_op_iter op_iter;
584 if (gimple_code (stmt) == GIMPLE_PHI)
585 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
587 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
588 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
589 return true;
591 return false;
594 /* Return a copy of LOOP placed before LOOP. */
596 static struct loop *
597 copy_loop_before (struct loop *loop)
599 struct loop *res;
600 edge preheader = loop_preheader_edge (loop);
602 initialize_original_copy_tables ();
603 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
604 gcc_assert (res != NULL);
605 free_original_copy_tables ();
606 delete_update_ssa ();
608 return res;
611 /* Creates an empty basic block after LOOP. */
613 static void
614 create_bb_after_loop (struct loop *loop)
616 edge exit = single_exit (loop);
618 if (!exit)
619 return;
621 split_edge (exit);
624 /* Generate code for PARTITION from the code in LOOP. The loop is
625 copied when COPY_P is true. All the statements not flagged in the
626 PARTITION bitmap are removed from the loop or from its copy. The
627 statements are indexed in sequence inside a basic block, and the
628 basic blocks of a loop are taken in dom order. */
630 static void
631 generate_loops_for_partition (struct loop *loop, partition_t partition,
632 bool copy_p)
634 unsigned i;
635 basic_block *bbs;
637 if (copy_p)
639 loop = copy_loop_before (loop);
640 gcc_assert (loop != NULL);
641 create_preheader (loop, CP_SIMPLE_PREHEADERS);
642 create_bb_after_loop (loop);
645 /* Remove stmts not in the PARTITION bitmap. */
646 bbs = get_loop_body_in_dom_order (loop);
648 if (MAY_HAVE_DEBUG_STMTS)
649 for (i = 0; i < loop->num_nodes; i++)
651 basic_block bb = bbs[i];
653 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
654 gsi_next (&bsi))
656 gphi *phi = bsi.phi ();
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
659 reset_debug_uses (phi);
662 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
664 gimple stmt = gsi_stmt (bsi);
665 if (gimple_code (stmt) != GIMPLE_LABEL
666 && !is_gimple_debug (stmt)
667 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
668 reset_debug_uses (stmt);
672 for (i = 0; i < loop->num_nodes; i++)
674 basic_block bb = bbs[i];
676 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
678 gphi *phi = bsi.phi ();
679 if (!virtual_operand_p (gimple_phi_result (phi))
680 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
681 remove_phi_node (&bsi, true);
682 else
683 gsi_next (&bsi);
686 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
688 gimple stmt = gsi_stmt (bsi);
689 if (gimple_code (stmt) != GIMPLE_LABEL
690 && !is_gimple_debug (stmt)
691 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
693 /* Choose an arbitrary path through the empty CFG part
694 that this unnecessary control stmt controls. */
695 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
697 gimple_cond_make_false (cond_stmt);
698 update_stmt (stmt);
700 else if (gimple_code (stmt) == GIMPLE_SWITCH)
702 gswitch *switch_stmt = as_a <gswitch *> (stmt);
703 gimple_switch_set_index
704 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
705 update_stmt (stmt);
707 else
709 unlink_stmt_vdef (stmt);
710 gsi_remove (&bsi, true);
711 release_defs (stmt);
712 continue;
715 gsi_next (&bsi);
719 free (bbs);
722 /* Build the size argument for a memory operation call. */
724 static tree
725 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
726 bool plus_one)
728 tree size = fold_convert_loc (loc, sizetype, nb_iter);
729 if (plus_one)
730 size = size_binop (PLUS_EXPR, size, size_one_node);
731 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
732 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
733 size = fold_convert_loc (loc, size_type_node, size);
734 return size;
737 /* Build an address argument for a memory operation call. */
739 static tree
740 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
742 tree addr_base;
744 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
745 addr_base = fold_convert_loc (loc, sizetype, addr_base);
747 /* Test for a negative stride, iterating over every element. */
748 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
750 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
751 fold_convert_loc (loc, sizetype, nb_bytes));
752 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
753 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
756 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
759 /* If VAL memory representation contains the same value in all bytes,
760 return that value, otherwise return -1.
761 E.g. for 0x24242424 return 0x24, for IEEE double
762 747708026454360457216.0 return 0x44, etc. */
764 static int
765 const_with_all_bytes_same (tree val)
767 unsigned char buf[64];
768 int i, len;
770 if (integer_zerop (val)
771 || real_zerop (val)
772 || (TREE_CODE (val) == CONSTRUCTOR
773 && !TREE_CLOBBER_P (val)
774 && CONSTRUCTOR_NELTS (val) == 0))
775 return 0;
777 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
778 return -1;
780 len = native_encode_expr (val, buf, sizeof (buf));
781 if (len == 0)
782 return -1;
783 for (i = 1; i < len; i++)
784 if (buf[i] != buf[0])
785 return -1;
786 return buf[0];
789 /* Generate a call to memset for PARTITION in LOOP. */
791 static void
792 generate_memset_builtin (struct loop *loop, partition_t partition)
794 gimple_stmt_iterator gsi;
795 gimple stmt, fn_call;
796 tree mem, fn, nb_bytes;
797 location_t loc;
798 tree val;
800 stmt = DR_STMT (partition->main_dr);
801 loc = gimple_location (stmt);
803 /* The new statements will be placed before LOOP. */
804 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
806 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
807 partition->plus_one);
808 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
809 false, GSI_CONTINUE_LINKING);
810 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
811 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
812 false, GSI_CONTINUE_LINKING);
814 /* This exactly matches the pattern recognition in classify_partition. */
815 val = gimple_assign_rhs1 (stmt);
816 /* Handle constants like 0x15151515 and similarly
817 floating point constants etc. where all bytes are the same. */
818 int bytev = const_with_all_bytes_same (val);
819 if (bytev != -1)
820 val = build_int_cst (integer_type_node, bytev);
821 else if (TREE_CODE (val) == INTEGER_CST)
822 val = fold_convert (integer_type_node, val);
823 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
825 gimple cstmt;
826 tree tem = make_ssa_name (integer_type_node, NULL);
827 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
828 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
829 val = tem;
832 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
833 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
834 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
836 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, "generated memset");
839 if (bytev == 0)
840 fprintf (dump_file, " zero\n");
841 else
842 fprintf (dump_file, "\n");
846 /* Generate a call to memcpy for PARTITION in LOOP. */
848 static void
849 generate_memcpy_builtin (struct loop *loop, partition_t partition)
851 gimple_stmt_iterator gsi;
852 gimple stmt, fn_call;
853 tree dest, src, fn, nb_bytes;
854 location_t loc;
855 enum built_in_function kind;
857 stmt = DR_STMT (partition->main_dr);
858 loc = gimple_location (stmt);
860 /* The new statements will be placed before LOOP. */
861 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
863 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
864 partition->plus_one);
865 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
868 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
869 if (ptr_derefs_may_alias_p (dest, src))
870 kind = BUILT_IN_MEMMOVE;
871 else
872 kind = BUILT_IN_MEMCPY;
874 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
877 false, GSI_CONTINUE_LINKING);
878 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
879 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
880 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 if (kind == BUILT_IN_MEMCPY)
885 fprintf (dump_file, "generated memcpy\n");
886 else
887 fprintf (dump_file, "generated memmove\n");
891 /* Remove and destroy the loop LOOP. */
893 static void
894 destroy_loop (struct loop *loop)
896 unsigned nbbs = loop->num_nodes;
897 edge exit = single_exit (loop);
898 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
899 basic_block *bbs;
900 unsigned i;
902 bbs = get_loop_body_in_dom_order (loop);
904 redirect_edge_pred (exit, src);
905 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
906 exit->flags |= EDGE_FALLTHRU;
907 cancel_loop_tree (loop);
908 rescan_loop_exit (exit, false, true);
910 for (i = 0; i < nbbs; i++)
912 /* We have made sure to not leave any dangling uses of SSA
913 names defined in the loop. With the exception of virtuals.
914 Make sure we replace all uses of virtual defs that will remain
915 outside of the loop with the bare symbol as delete_basic_block
916 will release them. */
917 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
918 gsi_next (&gsi))
920 gphi *phi = gsi.phi ();
921 if (virtual_operand_p (gimple_phi_result (phi)))
922 mark_virtual_phi_result_for_renaming (phi);
924 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
925 gsi_next (&gsi))
927 gimple stmt = gsi_stmt (gsi);
928 tree vdef = gimple_vdef (stmt);
929 if (vdef && TREE_CODE (vdef) == SSA_NAME)
930 mark_virtual_operand_for_renaming (vdef);
932 delete_basic_block (bbs[i]);
934 free (bbs);
936 set_immediate_dominator (CDI_DOMINATORS, dest,
937 recompute_dominator (CDI_DOMINATORS, dest));
940 /* Generates code for PARTITION. */
942 static void
943 generate_code_for_partition (struct loop *loop,
944 partition_t partition, bool copy_p)
946 switch (partition->kind)
948 case PKIND_NORMAL:
949 /* Reductions all have to be in the last partition. */
950 gcc_assert (!partition_reduction_p (partition)
951 || !copy_p);
952 generate_loops_for_partition (loop, partition, copy_p);
953 return;
955 case PKIND_MEMSET:
956 generate_memset_builtin (loop, partition);
957 break;
959 case PKIND_MEMCPY:
960 generate_memcpy_builtin (loop, partition);
961 break;
963 default:
964 gcc_unreachable ();
967 /* Common tail for partitions we turn into a call. If this was the last
968 partition for which we generate code, we have to destroy the loop. */
969 if (!copy_p)
970 destroy_loop (loop);
974 /* Returns a partition with all the statements needed for computing
975 the vertex V of the RDG, also including the loop exit conditions. */
977 static partition_t
978 build_rdg_partition_for_vertex (struct graph *rdg, int v)
980 partition_t partition = partition_alloc (NULL, NULL);
981 auto_vec<int, 3> nodes;
982 unsigned i;
983 int x;
985 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
987 FOR_EACH_VEC_ELT (nodes, i, x)
989 bitmap_set_bit (partition->stmts, x);
990 bitmap_set_bit (partition->loops,
991 loop_containing_stmt (RDG_STMT (rdg, x))->num);
994 return partition;
997 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
998 For the moment we detect only the memset zero pattern. */
1000 static void
1001 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1003 bitmap_iterator bi;
1004 unsigned i;
1005 tree nb_iter;
1006 data_reference_p single_load, single_store;
1007 bool volatiles_p = false;
1008 bool plus_one = false;
1010 partition->kind = PKIND_NORMAL;
1011 partition->main_dr = NULL;
1012 partition->secondary_dr = NULL;
1013 partition->niter = NULL_TREE;
1014 partition->plus_one = false;
1016 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1018 gimple stmt = RDG_STMT (rdg, i);
1020 if (gimple_has_volatile_ops (stmt))
1021 volatiles_p = true;
1023 /* If the stmt has uses outside of the loop mark it as reduction. */
1024 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1026 partition->reduction_p = true;
1027 return;
1031 /* Perform general partition disqualification for builtins. */
1032 if (volatiles_p
1033 || !flag_tree_loop_distribute_patterns)
1034 return;
1036 /* Detect memset and memcpy. */
1037 single_load = NULL;
1038 single_store = NULL;
1039 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1041 gimple stmt = RDG_STMT (rdg, i);
1042 data_reference_p dr;
1043 unsigned j;
1045 if (gimple_code (stmt) == GIMPLE_PHI)
1046 continue;
1048 /* Any scalar stmts are ok. */
1049 if (!gimple_vuse (stmt))
1050 continue;
1052 /* Otherwise just regular loads/stores. */
1053 if (!gimple_assign_single_p (stmt))
1054 return;
1056 /* But exactly one store and/or load. */
1057 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1059 if (DR_IS_READ (dr))
1061 if (single_load != NULL)
1062 return;
1063 single_load = dr;
1065 else
1067 if (single_store != NULL)
1068 return;
1069 single_store = dr;
1074 if (!single_store)
1075 return;
1077 nb_iter = number_of_latch_executions (loop);
1078 if (!nb_iter || nb_iter == chrec_dont_know)
1079 return;
1080 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1081 gimple_bb (DR_STMT (single_store))))
1082 plus_one = true;
1084 if (single_store && !single_load)
1086 gimple stmt = DR_STMT (single_store);
1087 tree rhs = gimple_assign_rhs1 (stmt);
1088 if (const_with_all_bytes_same (rhs) == -1
1089 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1090 || (TYPE_MODE (TREE_TYPE (rhs))
1091 != TYPE_MODE (unsigned_char_type_node))))
1092 return;
1093 if (TREE_CODE (rhs) == SSA_NAME
1094 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1095 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1096 return;
1097 if (!adjacent_dr_p (single_store)
1098 || !dominated_by_p (CDI_DOMINATORS,
1099 loop->latch, gimple_bb (stmt)))
1100 return;
1101 partition->kind = PKIND_MEMSET;
1102 partition->main_dr = single_store;
1103 partition->niter = nb_iter;
1104 partition->plus_one = plus_one;
1106 else if (single_store && single_load)
1108 gimple store = DR_STMT (single_store);
1109 gimple load = DR_STMT (single_load);
1110 /* Direct aggregate copy or via an SSA name temporary. */
1111 if (load != store
1112 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1113 return;
1114 if (!adjacent_dr_p (single_store)
1115 || !adjacent_dr_p (single_load)
1116 || !operand_equal_p (DR_STEP (single_store),
1117 DR_STEP (single_load), 0)
1118 || !dominated_by_p (CDI_DOMINATORS,
1119 loop->latch, gimple_bb (store)))
1120 return;
1121 /* Now check that if there is a dependence this dependence is
1122 of a suitable form for memmove. */
1123 vec<loop_p> loops = vNULL;
1124 ddr_p ddr;
1125 loops.safe_push (loop);
1126 ddr = initialize_data_dependence_relation (single_load, single_store,
1127 loops);
1128 compute_affine_dependence (ddr, loop);
1129 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1131 free_dependence_relation (ddr);
1132 loops.release ();
1133 return;
1135 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1137 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1139 free_dependence_relation (ddr);
1140 loops.release ();
1141 return;
1143 lambda_vector dist_v;
1144 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1146 int dist = dist_v[index_in_loop_nest (loop->num,
1147 DDR_LOOP_NEST (ddr))];
1148 if (dist > 0 && !DDR_REVERSED_P (ddr))
1150 free_dependence_relation (ddr);
1151 loops.release ();
1152 return;
1156 free_dependence_relation (ddr);
1157 loops.release ();
1158 partition->kind = PKIND_MEMCPY;
1159 partition->main_dr = single_store;
1160 partition->secondary_dr = single_load;
1161 partition->niter = nb_iter;
1162 partition->plus_one = plus_one;
1166 /* For a data reference REF, return the declaration of its base
1167 address or NULL_TREE if the base is not determined. */
1169 static tree
1170 ref_base_address (data_reference_p dr)
1172 tree base_address = DR_BASE_ADDRESS (dr);
1173 if (base_address
1174 && TREE_CODE (base_address) == ADDR_EXPR)
1175 return TREE_OPERAND (base_address, 0);
1177 return base_address;
1180 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1181 accesses in RDG. */
1183 static bool
1184 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1185 partition_t partition2)
1187 unsigned i, j, k, l;
1188 bitmap_iterator bi, bj;
1189 data_reference_p ref1, ref2;
1191 /* First check whether in the intersection of the two partitions are
1192 any loads or stores. Common loads are the situation that happens
1193 most often. */
1194 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1195 if (RDG_MEM_WRITE_STMT (rdg, i)
1196 || RDG_MEM_READS_STMT (rdg, i))
1197 return true;
1199 /* Then check all data-references against each other. */
1200 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1201 if (RDG_MEM_WRITE_STMT (rdg, i)
1202 || RDG_MEM_READS_STMT (rdg, i))
1203 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1204 if (RDG_MEM_WRITE_STMT (rdg, j)
1205 || RDG_MEM_READS_STMT (rdg, j))
1207 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1209 tree base1 = ref_base_address (ref1);
1210 if (base1)
1211 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1212 if (base1 == ref_base_address (ref2))
1213 return true;
1217 return false;
1220 /* Aggregate several components into a useful partition that is
1221 registered in the PARTITIONS vector. Partitions will be
1222 distributed in different loops. */
1224 static void
1225 rdg_build_partitions (struct graph *rdg,
1226 vec<gimple> starting_stmts,
1227 vec<partition_t> *partitions)
1229 bitmap processed = BITMAP_ALLOC (NULL);
1230 int i;
1231 gimple stmt;
1233 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1235 int v = rdg_vertex_for_stmt (rdg, stmt);
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file,
1239 "ldist asked to generate code for vertex %d\n", v);
1241 /* If the vertex is already contained in another partition so
1242 is the partition rooted at it. */
1243 if (bitmap_bit_p (processed, v))
1244 continue;
1246 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1247 bitmap_ior_into (processed, partition->stmts);
1249 if (dump_file && (dump_flags & TDF_DETAILS))
1251 fprintf (dump_file, "ldist useful partition:\n");
1252 dump_bitmap (dump_file, partition->stmts);
1255 partitions->safe_push (partition);
1258 /* All vertices should have been assigned to at least one partition now,
1259 other than vertices belonging to dead code. */
1261 BITMAP_FREE (processed);
1264 /* Dump to FILE the PARTITIONS. */
1266 static void
1267 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1269 int i;
1270 partition_t partition;
1272 FOR_EACH_VEC_ELT (partitions, i, partition)
1273 debug_bitmap_file (file, partition->stmts);
1276 /* Debug PARTITIONS. */
1277 extern void debug_rdg_partitions (vec<partition_t> );
1279 DEBUG_FUNCTION void
1280 debug_rdg_partitions (vec<partition_t> partitions)
1282 dump_rdg_partitions (stderr, partitions);
1285 /* Returns the number of read and write operations in the RDG. */
1287 static int
1288 number_of_rw_in_rdg (struct graph *rdg)
1290 int i, res = 0;
1292 for (i = 0; i < rdg->n_vertices; i++)
1294 if (RDG_MEM_WRITE_STMT (rdg, i))
1295 ++res;
1297 if (RDG_MEM_READS_STMT (rdg, i))
1298 ++res;
1301 return res;
1304 /* Returns the number of read and write operations in a PARTITION of
1305 the RDG. */
1307 static int
1308 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1310 int res = 0;
1311 unsigned i;
1312 bitmap_iterator ii;
1314 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1316 if (RDG_MEM_WRITE_STMT (rdg, i))
1317 ++res;
1319 if (RDG_MEM_READS_STMT (rdg, i))
1320 ++res;
1323 return res;
1326 /* Returns true when one of the PARTITIONS contains all the read or
1327 write operations of RDG. */
1329 static bool
1330 partition_contains_all_rw (struct graph *rdg,
1331 vec<partition_t> partitions)
1333 int i;
1334 partition_t partition;
1335 int nrw = number_of_rw_in_rdg (rdg);
1337 FOR_EACH_VEC_ELT (partitions, i, partition)
1338 if (nrw == number_of_rw_in_partition (rdg, partition))
1339 return true;
1341 return false;
1344 /* Compute partition dependence created by the data references in DRS1
1345 and DRS2 and modify and return DIR according to that. */
1347 static int
1348 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1349 vec<data_reference_p> drs1,
1350 vec<data_reference_p> drs2)
1352 data_reference_p dr1, dr2;
1354 /* dependence direction - 0 is no dependence, -1 is back,
1355 1 is forth, 2 is both (we can stop then, merging will occur). */
1356 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1357 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1359 int this_dir = 1;
1360 ddr_p ddr;
1361 /* Re-shuffle data-refs to be in dominator order. */
1362 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1363 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1365 data_reference_p tem = dr1;
1366 dr1 = dr2;
1367 dr2 = tem;
1368 this_dir = -this_dir;
1370 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1371 compute_affine_dependence (ddr, loops[0]);
1372 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1373 this_dir = 2;
1374 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1376 if (DDR_REVERSED_P (ddr))
1378 data_reference_p tem = dr1;
1379 dr1 = dr2;
1380 dr2 = tem;
1381 this_dir = -this_dir;
1383 /* Known dependences can still be unordered througout the
1384 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1385 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1386 this_dir = 2;
1387 /* If the overlap is exact preserve stmt order. */
1388 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1390 else
1392 /* Else as the distance vector is lexicographic positive
1393 swap the dependence direction. */
1394 this_dir = -this_dir;
1397 else
1398 this_dir = 0;
1399 free_dependence_relation (ddr);
1400 if (dir == 0)
1401 dir = this_dir;
1402 else if (dir != this_dir)
1403 return 2;
1405 return dir;
1408 /* Compare postorder number of the partition graph vertices V1 and V2. */
1410 static int
1411 pgcmp (const void *v1_, const void *v2_)
1413 const vertex *v1 = (const vertex *)v1_;
1414 const vertex *v2 = (const vertex *)v2_;
1415 return v2->post - v1->post;
1418 /* Distributes the code from LOOP in such a way that producer
1419 statements are placed before consumer statements. Tries to separate
1420 only the statements from STMTS into separate loops.
1421 Returns the number of distributed loops. */
1423 static int
1424 distribute_loop (struct loop *loop, vec<gimple> stmts,
1425 control_dependences *cd, int *nb_calls)
1427 struct graph *rdg;
1428 partition_t partition;
1429 bool any_builtin;
1430 int i, nbp;
1431 graph *pg = NULL;
1432 int num_sccs = 1;
1434 *nb_calls = 0;
1435 auto_vec<loop_p, 3> loop_nest;
1436 if (!find_loop_nest (loop, &loop_nest))
1437 return 0;
1439 rdg = build_rdg (loop_nest, cd);
1440 if (!rdg)
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1443 fprintf (dump_file,
1444 "Loop %d not distributed: failed to build the RDG.\n",
1445 loop->num);
1447 return 0;
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1451 dump_rdg (dump_file, rdg);
1453 auto_vec<partition_t, 3> partitions;
1454 rdg_build_partitions (rdg, stmts, &partitions);
1456 any_builtin = false;
1457 FOR_EACH_VEC_ELT (partitions, i, partition)
1459 classify_partition (loop, rdg, partition);
1460 any_builtin |= partition_builtin_p (partition);
1463 /* If we are only distributing patterns but did not detect any,
1464 simply bail out. */
1465 if (!flag_tree_loop_distribution
1466 && !any_builtin)
1468 nbp = 0;
1469 goto ldist_done;
1472 /* If we are only distributing patterns fuse all partitions that
1473 were not classified as builtins. This also avoids chopping
1474 a loop into pieces, separated by builtin calls. That is, we
1475 only want no or a single loop body remaining. */
1476 partition_t into;
1477 if (!flag_tree_loop_distribution)
1479 for (i = 0; partitions.iterate (i, &into); ++i)
1480 if (!partition_builtin_p (into))
1481 break;
1482 for (++i; partitions.iterate (i, &partition); ++i)
1483 if (!partition_builtin_p (partition))
1485 if (dump_file && (dump_flags & TDF_DETAILS))
1487 fprintf (dump_file, "fusing non-builtin partitions\n");
1488 dump_bitmap (dump_file, into->stmts);
1489 dump_bitmap (dump_file, partition->stmts);
1491 partition_merge_into (into, partition);
1492 partitions.unordered_remove (i);
1493 partition_free (partition);
1494 i--;
1498 /* Due to limitations in the transform phase we have to fuse all
1499 reduction partitions into the last partition so the existing
1500 loop will contain all loop-closed PHI nodes. */
1501 for (i = 0; partitions.iterate (i, &into); ++i)
1502 if (partition_reduction_p (into))
1503 break;
1504 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1505 if (partition_reduction_p (partition))
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, "fusing partitions\n");
1510 dump_bitmap (dump_file, into->stmts);
1511 dump_bitmap (dump_file, partition->stmts);
1512 fprintf (dump_file, "because they have reductions\n");
1514 partition_merge_into (into, partition);
1515 partitions.unordered_remove (i);
1516 partition_free (partition);
1517 i--;
1520 /* Apply our simple cost model - fuse partitions with similar
1521 memory accesses. */
1522 for (i = 0; partitions.iterate (i, &into); ++i)
1524 if (partition_builtin_p (into))
1525 continue;
1526 for (int j = i + 1;
1527 partitions.iterate (j, &partition); ++j)
1529 if (!partition_builtin_p (partition)
1530 && similar_memory_accesses (rdg, into, partition))
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "fusing partitions\n");
1535 dump_bitmap (dump_file, into->stmts);
1536 dump_bitmap (dump_file, partition->stmts);
1537 fprintf (dump_file, "because they have similar "
1538 "memory accesses\n");
1540 partition_merge_into (into, partition);
1541 partitions.unordered_remove (j);
1542 partition_free (partition);
1543 j--;
1548 /* Build the partition dependency graph. */
1549 if (partitions.length () > 1)
1551 pg = new_graph (partitions.length ());
1552 struct pgdata {
1553 partition_t partition;
1554 vec<data_reference_p> writes;
1555 vec<data_reference_p> reads;
1557 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1558 for (i = 0; partitions.iterate (i, &partition); ++i)
1560 vertex *v = &pg->vertices[i];
1561 pgdata *data = new pgdata;
1562 data_reference_p dr;
1563 /* FIXME - leaks. */
1564 v->data = data;
1565 bitmap_iterator bi;
1566 unsigned j;
1567 data->partition = partition;
1568 data->reads = vNULL;
1569 data->writes = vNULL;
1570 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1571 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1572 if (DR_IS_READ (dr))
1573 data->reads.safe_push (dr);
1574 else
1575 data->writes.safe_push (dr);
1577 partition_t partition1, partition2;
1578 for (i = 0; partitions.iterate (i, &partition1); ++i)
1579 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1581 /* dependence direction - 0 is no dependence, -1 is back,
1582 1 is forth, 2 is both (we can stop then, merging will occur). */
1583 int dir = 0;
1584 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1585 PGDATA(i)->writes,
1586 PGDATA(j)->reads);
1587 if (dir != 2)
1588 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1589 PGDATA(i)->reads,
1590 PGDATA(j)->writes);
1591 if (dir != 2)
1592 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1593 PGDATA(i)->writes,
1594 PGDATA(j)->writes);
1595 if (dir == 1 || dir == 2)
1596 add_edge (pg, i, j);
1597 if (dir == -1 || dir == 2)
1598 add_edge (pg, j, i);
1601 /* Add edges to the reduction partition (if any) to force it last. */
1602 unsigned j;
1603 for (j = 0; partitions.iterate (j, &partition); ++j)
1604 if (partition_reduction_p (partition))
1605 break;
1606 if (j < partitions.length ())
1608 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1609 if (i != j)
1610 add_edge (pg, i, j);
1613 /* Compute partitions we cannot separate and fuse them. */
1614 num_sccs = graphds_scc (pg, NULL);
1615 for (i = 0; i < num_sccs; ++i)
1617 partition_t first;
1618 int j;
1619 for (j = 0; partitions.iterate (j, &first); ++j)
1620 if (pg->vertices[j].component == i)
1621 break;
1622 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1623 if (pg->vertices[j].component == i)
1625 if (dump_file && (dump_flags & TDF_DETAILS))
1627 fprintf (dump_file, "fusing partitions\n");
1628 dump_bitmap (dump_file, first->stmts);
1629 dump_bitmap (dump_file, partition->stmts);
1630 fprintf (dump_file, "because they are in the same "
1631 "dependence SCC\n");
1633 partition_merge_into (first, partition);
1634 partitions[j] = NULL;
1635 partition_free (partition);
1636 PGDATA (j)->partition = NULL;
1640 /* Now order the remaining nodes in postorder. */
1641 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1642 partitions.truncate (0);
1643 for (i = 0; i < pg->n_vertices; ++i)
1645 pgdata *data = PGDATA (i);
1646 if (data->partition)
1647 partitions.safe_push (data->partition);
1648 data->reads.release ();
1649 data->writes.release ();
1650 delete data;
1652 gcc_assert (partitions.length () == (unsigned)num_sccs);
1653 free_graph (pg);
1656 nbp = partitions.length ();
1657 if (nbp == 0
1658 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1659 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1661 nbp = 0;
1662 goto ldist_done;
1665 if (dump_file && (dump_flags & TDF_DETAILS))
1666 dump_rdg_partitions (dump_file, partitions);
1668 FOR_EACH_VEC_ELT (partitions, i, partition)
1670 if (partition_builtin_p (partition))
1671 (*nb_calls)++;
1672 generate_code_for_partition (loop, partition, i < nbp - 1);
1675 ldist_done:
1677 FOR_EACH_VEC_ELT (partitions, i, partition)
1678 partition_free (partition);
1680 free_rdg (rdg);
1681 return nbp - *nb_calls;
1684 /* Distribute all loops in the current function. */
1686 namespace {
1688 const pass_data pass_data_loop_distribution =
1690 GIMPLE_PASS, /* type */
1691 "ldist", /* name */
1692 OPTGROUP_LOOP, /* optinfo_flags */
1693 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1694 ( PROP_cfg | PROP_ssa ), /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 0, /* todo_flags_finish */
1701 class pass_loop_distribution : public gimple_opt_pass
1703 public:
1704 pass_loop_distribution (gcc::context *ctxt)
1705 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1708 /* opt_pass methods: */
1709 virtual bool gate (function *)
1711 return flag_tree_loop_distribution
1712 || flag_tree_loop_distribute_patterns;
1715 virtual unsigned int execute (function *);
1717 }; // class pass_loop_distribution
1719 unsigned int
1720 pass_loop_distribution::execute (function *fun)
1722 struct loop *loop;
1723 bool changed = false;
1724 basic_block bb;
1725 control_dependences *cd = NULL;
1727 FOR_ALL_BB_FN (bb, fun)
1729 gimple_stmt_iterator gsi;
1730 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1731 gimple_set_uid (gsi_stmt (gsi), -1);
1732 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1733 gimple_set_uid (gsi_stmt (gsi), -1);
1736 /* We can at the moment only distribute non-nested loops, thus restrict
1737 walking to innermost loops. */
1738 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1740 auto_vec<gimple> work_list;
1741 basic_block *bbs;
1742 int num = loop->num;
1743 unsigned int i;
1745 /* If the loop doesn't have a single exit we will fail anyway,
1746 so do that early. */
1747 if (!single_exit (loop))
1748 continue;
1750 /* Only optimize hot loops. */
1751 if (!optimize_loop_for_speed_p (loop))
1752 continue;
1754 /* Initialize the worklist with stmts we seed the partitions with. */
1755 bbs = get_loop_body_in_dom_order (loop);
1756 for (i = 0; i < loop->num_nodes; ++i)
1758 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1759 !gsi_end_p (gsi);
1760 gsi_next (&gsi))
1762 gphi *phi = gsi.phi ();
1763 if (virtual_operand_p (gimple_phi_result (phi)))
1764 continue;
1765 /* Distribute stmts which have defs that are used outside of
1766 the loop. */
1767 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1768 continue;
1769 work_list.safe_push (phi);
1771 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1772 !gsi_end_p (gsi);
1773 gsi_next (&gsi))
1775 gimple stmt = gsi_stmt (gsi);
1777 /* If there is a stmt with side-effects bail out - we
1778 cannot and should not distribute this loop. */
1779 if (gimple_has_side_effects (stmt))
1781 work_list.truncate (0);
1782 goto out;
1785 /* Distribute stmts which have defs that are used outside of
1786 the loop. */
1787 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1789 /* Otherwise only distribute stores for now. */
1790 else if (!gimple_vdef (stmt))
1791 continue;
1793 work_list.safe_push (stmt);
1796 out:
1797 free (bbs);
1799 int nb_generated_loops = 0;
1800 int nb_generated_calls = 0;
1801 location_t loc = find_loop_location (loop);
1802 if (work_list.length () > 0)
1804 if (!cd)
1806 calculate_dominance_info (CDI_DOMINATORS);
1807 calculate_dominance_info (CDI_POST_DOMINATORS);
1808 cd = new control_dependences (create_edge_list ());
1809 free_dominance_info (CDI_POST_DOMINATORS);
1811 nb_generated_loops = distribute_loop (loop, work_list, cd,
1812 &nb_generated_calls);
1815 if (nb_generated_loops + nb_generated_calls > 0)
1817 changed = true;
1818 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1819 loc, "Loop %d distributed: split to %d loops "
1820 "and %d library calls.\n",
1821 num, nb_generated_loops, nb_generated_calls);
1823 else if (dump_file && (dump_flags & TDF_DETAILS))
1824 fprintf (dump_file, "Loop %d is the same.\n", num);
1827 if (cd)
1828 delete cd;
1830 if (changed)
1832 mark_virtual_operands_for_renaming (fun);
1833 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1836 #ifdef ENABLE_CHECKING
1837 verify_loop_structure ();
1838 #endif
1840 return 0;
1843 } // anon namespace
1845 gimple_opt_pass *
1846 make_pass_loop_distribution (gcc::context *ctxt)
1848 return new pass_loop_distribution (ctxt);