2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob5c203dbb13442e2fb93908f28b39d923756f98c3
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "input.h"
48 #include "alias.h"
49 #include "symtab.h"
50 #include "options.h"
51 #include "tree.h"
52 #include "fold-const.h"
53 #include "predict.h"
54 #include "tm.h"
55 #include "hard-reg-set.h"
56 #include "input.h"
57 #include "function.h"
58 #include "dominance.h"
59 #include "cfg.h"
60 #include "cfganal.h"
61 #include "basic-block.h"
62 #include "tree-ssa-alias.h"
63 #include "internal-fn.h"
64 #include "gimple-expr.h"
65 #include "is-a.h"
66 #include "gimple.h"
67 #include "gimple-iterator.h"
68 #include "gimplify-me.h"
69 #include "stor-layout.h"
70 #include "gimple-ssa.h"
71 #include "tree-cfg.h"
72 #include "tree-phinodes.h"
73 #include "ssa-iterators.h"
74 #include "stringpool.h"
75 #include "tree-ssanames.h"
76 #include "tree-ssa-loop-manip.h"
77 #include "tree-ssa-loop.h"
78 #include "tree-into-ssa.h"
79 #include "tree-ssa.h"
80 #include "cfgloop.h"
81 #include "tree-chrec.h"
82 #include "tree-data-ref.h"
83 #include "tree-scalar-evolution.h"
84 #include "tree-pass.h"
85 #include "gimple-pretty-print.h"
86 #include "tree-vectorizer.h"
89 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
90 typedef struct rdg_vertex
92 /* The statement represented by this vertex. */
93 gimple stmt;
95 /* Vector of data-references in this statement. */
96 vec<data_reference_p> datarefs;
98 /* True when the statement contains a write to memory. */
99 bool has_mem_write;
101 /* True when the statement contains a read from memory. */
102 bool has_mem_reads;
103 } *rdg_vertex_p;
105 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
106 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
107 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
108 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
109 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
110 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
111 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
112 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
114 /* Data dependence type. */
116 enum rdg_dep_type
118 /* Read After Write (RAW). */
119 flow_dd = 'f',
121 /* Control dependence (execute conditional on). */
122 control_dd = 'c'
125 /* Dependence information attached to an edge of the RDG. */
127 typedef struct rdg_edge
129 /* Type of the dependence. */
130 enum rdg_dep_type type;
131 } *rdg_edge_p;
133 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
135 /* Dump vertex I in RDG to FILE. */
137 static void
138 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
140 struct vertex *v = &(rdg->vertices[i]);
141 struct graph_edge *e;
143 fprintf (file, "(vertex %d: (%s%s) (in:", i,
144 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
145 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
147 if (v->pred)
148 for (e = v->pred; e; e = e->pred_next)
149 fprintf (file, " %d", e->src);
151 fprintf (file, ") (out:");
153 if (v->succ)
154 for (e = v->succ; e; e = e->succ_next)
155 fprintf (file, " %d", e->dest);
157 fprintf (file, ")\n");
158 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
159 fprintf (file, ")\n");
162 /* Call dump_rdg_vertex on stderr. */
164 DEBUG_FUNCTION void
165 debug_rdg_vertex (struct graph *rdg, int i)
167 dump_rdg_vertex (stderr, rdg, i);
170 /* Dump the reduced dependence graph RDG to FILE. */
172 static void
173 dump_rdg (FILE *file, struct graph *rdg)
175 fprintf (file, "(rdg\n");
176 for (int i = 0; i < rdg->n_vertices; i++)
177 dump_rdg_vertex (file, rdg, i);
178 fprintf (file, ")\n");
181 /* Call dump_rdg on stderr. */
183 DEBUG_FUNCTION void
184 debug_rdg (struct graph *rdg)
186 dump_rdg (stderr, rdg);
189 static void
190 dot_rdg_1 (FILE *file, struct graph *rdg)
192 int i;
193 pretty_printer buffer;
194 pp_needs_newline (&buffer) = false;
195 buffer.buffer->stream = file;
197 fprintf (file, "digraph RDG {\n");
199 for (i = 0; i < rdg->n_vertices; i++)
201 struct vertex *v = &(rdg->vertices[i]);
202 struct graph_edge *e;
204 fprintf (file, "%d [label=\"[%d] ", i, i);
205 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
206 pp_flush (&buffer);
207 fprintf (file, "\"]\n");
209 /* Highlight reads from memory. */
210 if (RDG_MEM_READS_STMT (rdg, i))
211 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
213 /* Highlight stores to memory. */
214 if (RDG_MEM_WRITE_STMT (rdg, i))
215 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
217 if (v->succ)
218 for (e = v->succ; e; e = e->succ_next)
219 switch (RDGE_TYPE (e))
221 case flow_dd:
222 /* These are the most common dependences: don't print these. */
223 fprintf (file, "%d -> %d \n", i, e->dest);
224 break;
226 case control_dd:
227 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
228 break;
230 default:
231 gcc_unreachable ();
235 fprintf (file, "}\n\n");
238 /* Display the Reduced Dependence Graph using dotty. */
240 DEBUG_FUNCTION void
241 dot_rdg (struct graph *rdg)
243 /* When debugging, you may want to enable the following code. */
244 #ifdef HAVE_POPEN
245 FILE *file = popen ("dot -Tx11", "w");
246 if (!file)
247 return;
248 dot_rdg_1 (file, rdg);
249 fflush (file);
250 close (fileno (file));
251 pclose (file);
252 #else
253 dot_rdg_1 (stderr, rdg);
254 #endif
257 /* Returns the index of STMT in RDG. */
259 static int
260 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
262 int index = gimple_uid (stmt);
263 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
264 return index;
267 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
268 the index of DEF in RDG. */
270 static void
271 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
273 use_operand_p imm_use_p;
274 imm_use_iterator iterator;
276 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
278 struct graph_edge *e;
279 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
281 if (use < 0)
282 continue;
284 e = add_edge (rdg, idef, use);
285 e->data = XNEW (struct rdg_edge);
286 RDGE_TYPE (e) = flow_dd;
290 /* Creates an edge for the control dependences of BB to the vertex V. */
292 static void
293 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
294 int v, control_dependences *cd)
296 bitmap_iterator bi;
297 unsigned edge_n;
298 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
299 0, edge_n, bi)
301 basic_block cond_bb = cd->get_edge (edge_n)->src;
302 gimple stmt = last_stmt (cond_bb);
303 if (stmt && is_ctrl_stmt (stmt))
305 struct graph_edge *e;
306 int c = rdg_vertex_for_stmt (rdg, stmt);
307 if (c < 0)
308 continue;
310 e = add_edge (rdg, c, v);
311 e->data = XNEW (struct rdg_edge);
312 RDGE_TYPE (e) = control_dd;
317 /* Creates the edges of the reduced dependence graph RDG. */
319 static void
320 create_rdg_flow_edges (struct graph *rdg)
322 int i;
323 def_operand_p def_p;
324 ssa_op_iter iter;
326 for (i = 0; i < rdg->n_vertices; i++)
327 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
328 iter, SSA_OP_DEF)
329 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
332 /* Creates the edges of the reduced dependence graph RDG. */
334 static void
335 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
337 int i;
339 for (i = 0; i < rdg->n_vertices; i++)
341 gimple stmt = RDG_STMT (rdg, i);
342 if (gimple_code (stmt) == GIMPLE_PHI)
344 edge_iterator ei;
345 edge e;
346 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
347 create_edge_for_control_dependence (rdg, e->src, i, cd);
349 else
350 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
354 /* Build the vertices of the reduced dependence graph RDG. Return false
355 if that failed. */
357 static bool
358 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
359 vec<data_reference_p> *datarefs)
361 int i;
362 gimple stmt;
364 FOR_EACH_VEC_ELT (stmts, i, stmt)
366 struct vertex *v = &(rdg->vertices[i]);
368 /* Record statement to vertex mapping. */
369 gimple_set_uid (stmt, i);
371 v->data = XNEW (struct rdg_vertex);
372 RDGV_STMT (v) = stmt;
373 RDGV_DATAREFS (v).create (0);
374 RDGV_HAS_MEM_WRITE (v) = false;
375 RDGV_HAS_MEM_READS (v) = false;
376 if (gimple_code (stmt) == GIMPLE_PHI)
377 continue;
379 unsigned drp = datarefs->length ();
380 if (!find_data_references_in_stmt (loop, stmt, datarefs))
381 return false;
382 for (unsigned j = drp; j < datarefs->length (); ++j)
384 data_reference_p dr = (*datarefs)[j];
385 if (DR_IS_READ (dr))
386 RDGV_HAS_MEM_READS (v) = true;
387 else
388 RDGV_HAS_MEM_WRITE (v) = true;
389 RDGV_DATAREFS (v).safe_push (dr);
392 return true;
395 /* Initialize STMTS with all the statements of LOOP. The order in
396 which we discover statements is important as
397 generate_loops_for_partition is using the same traversal for
398 identifying statements in loop copies. */
400 static void
401 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
403 unsigned int i;
404 basic_block *bbs = get_loop_body_in_dom_order (loop);
406 for (i = 0; i < loop->num_nodes; i++)
408 basic_block bb = bbs[i];
410 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
411 gsi_next (&bsi))
412 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
413 stmts->safe_push (bsi.phi ());
415 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
416 gsi_next (&bsi))
418 gimple stmt = gsi_stmt (bsi);
419 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
420 stmts->safe_push (stmt);
424 free (bbs);
427 /* Free the reduced dependence graph RDG. */
429 static void
430 free_rdg (struct graph *rdg)
432 int i;
434 for (i = 0; i < rdg->n_vertices; i++)
436 struct vertex *v = &(rdg->vertices[i]);
437 struct graph_edge *e;
439 for (e = v->succ; e; e = e->succ_next)
440 free (e->data);
442 if (v->data)
444 gimple_set_uid (RDGV_STMT (v), -1);
445 free_data_refs (RDGV_DATAREFS (v));
446 free (v->data);
450 free_graph (rdg);
453 /* Build the Reduced Dependence Graph (RDG) with one vertex per
454 statement of the loop nest LOOP_NEST, and one edge per data dependence or
455 scalar dependence. */
457 static struct graph *
458 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
460 struct graph *rdg;
461 vec<data_reference_p> datarefs;
463 /* Create the RDG vertices from the stmts of the loop nest. */
464 auto_vec<gimple, 10> stmts;
465 stmts_from_loop (loop_nest[0], &stmts);
466 rdg = new_graph (stmts.length ());
467 datarefs.create (10);
468 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
470 datarefs.release ();
471 free_rdg (rdg);
472 return NULL;
474 stmts.release ();
476 create_rdg_flow_edges (rdg);
477 if (cd)
478 create_rdg_cd_edges (rdg, cd);
480 datarefs.release ();
482 return rdg;
487 enum partition_kind {
488 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
491 typedef struct partition_s
493 bitmap stmts;
494 bitmap loops;
495 bool reduction_p;
496 enum partition_kind kind;
497 /* data-references a kind != PKIND_NORMAL partition is about. */
498 data_reference_p main_dr;
499 data_reference_p secondary_dr;
500 tree niter;
501 bool plus_one;
502 } *partition_t;
505 /* Allocate and initialize a partition from BITMAP. */
507 static partition_t
508 partition_alloc (bitmap stmts, bitmap loops)
510 partition_t partition = XCNEW (struct partition_s);
511 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
512 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
513 partition->reduction_p = false;
514 partition->kind = PKIND_NORMAL;
515 return partition;
518 /* Free PARTITION. */
520 static void
521 partition_free (partition_t partition)
523 BITMAP_FREE (partition->stmts);
524 BITMAP_FREE (partition->loops);
525 free (partition);
528 /* Returns true if the partition can be generated as a builtin. */
530 static bool
531 partition_builtin_p (partition_t partition)
533 return partition->kind != PKIND_NORMAL;
536 /* Returns true if the partition contains a reduction. */
538 static bool
539 partition_reduction_p (partition_t partition)
541 return partition->reduction_p;
544 /* Merge PARTITION into the partition DEST. */
546 static void
547 partition_merge_into (partition_t dest, partition_t partition)
549 dest->kind = PKIND_NORMAL;
550 bitmap_ior_into (dest->stmts, partition->stmts);
551 if (partition_reduction_p (partition))
552 dest->reduction_p = true;
556 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
557 the LOOP. */
559 static bool
560 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
562 imm_use_iterator imm_iter;
563 use_operand_p use_p;
565 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
567 gimple use_stmt = USE_STMT (use_p);
568 if (!is_gimple_debug (use_stmt)
569 && loop != loop_containing_stmt (use_stmt))
570 return true;
573 return false;
576 /* Returns true when STMT defines a scalar variable used after the
577 loop LOOP. */
579 static bool
580 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
582 def_operand_p def_p;
583 ssa_op_iter op_iter;
585 if (gimple_code (stmt) == GIMPLE_PHI)
586 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
588 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
589 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
590 return true;
592 return false;
595 /* Return a copy of LOOP placed before LOOP. */
597 static struct loop *
598 copy_loop_before (struct loop *loop)
600 struct loop *res;
601 edge preheader = loop_preheader_edge (loop);
603 initialize_original_copy_tables ();
604 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
605 gcc_assert (res != NULL);
606 free_original_copy_tables ();
607 delete_update_ssa ();
609 return res;
612 /* Creates an empty basic block after LOOP. */
614 static void
615 create_bb_after_loop (struct loop *loop)
617 edge exit = single_exit (loop);
619 if (!exit)
620 return;
622 split_edge (exit);
625 /* Generate code for PARTITION from the code in LOOP. The loop is
626 copied when COPY_P is true. All the statements not flagged in the
627 PARTITION bitmap are removed from the loop or from its copy. The
628 statements are indexed in sequence inside a basic block, and the
629 basic blocks of a loop are taken in dom order. */
631 static void
632 generate_loops_for_partition (struct loop *loop, partition_t partition,
633 bool copy_p)
635 unsigned i;
636 basic_block *bbs;
638 if (copy_p)
640 loop = copy_loop_before (loop);
641 gcc_assert (loop != NULL);
642 create_preheader (loop, CP_SIMPLE_PREHEADERS);
643 create_bb_after_loop (loop);
646 /* Remove stmts not in the PARTITION bitmap. */
647 bbs = get_loop_body_in_dom_order (loop);
649 if (MAY_HAVE_DEBUG_STMTS)
650 for (i = 0; i < loop->num_nodes; i++)
652 basic_block bb = bbs[i];
654 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
655 gsi_next (&bsi))
657 gphi *phi = bsi.phi ();
658 if (!virtual_operand_p (gimple_phi_result (phi))
659 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
660 reset_debug_uses (phi);
663 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
665 gimple stmt = gsi_stmt (bsi);
666 if (gimple_code (stmt) != GIMPLE_LABEL
667 && !is_gimple_debug (stmt)
668 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
669 reset_debug_uses (stmt);
673 for (i = 0; i < loop->num_nodes; i++)
675 basic_block bb = bbs[i];
677 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
679 gphi *phi = bsi.phi ();
680 if (!virtual_operand_p (gimple_phi_result (phi))
681 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
682 remove_phi_node (&bsi, true);
683 else
684 gsi_next (&bsi);
687 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
689 gimple stmt = gsi_stmt (bsi);
690 if (gimple_code (stmt) != GIMPLE_LABEL
691 && !is_gimple_debug (stmt)
692 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
694 /* Choose an arbitrary path through the empty CFG part
695 that this unnecessary control stmt controls. */
696 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
698 gimple_cond_make_false (cond_stmt);
699 update_stmt (stmt);
701 else if (gimple_code (stmt) == GIMPLE_SWITCH)
703 gswitch *switch_stmt = as_a <gswitch *> (stmt);
704 gimple_switch_set_index
705 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
706 update_stmt (stmt);
708 else
710 unlink_stmt_vdef (stmt);
711 gsi_remove (&bsi, true);
712 release_defs (stmt);
713 continue;
716 gsi_next (&bsi);
720 free (bbs);
723 /* Build the size argument for a memory operation call. */
725 static tree
726 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
727 bool plus_one)
729 tree size = fold_convert_loc (loc, sizetype, nb_iter);
730 if (plus_one)
731 size = size_binop (PLUS_EXPR, size, size_one_node);
732 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
733 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
734 size = fold_convert_loc (loc, size_type_node, size);
735 return size;
738 /* Build an address argument for a memory operation call. */
740 static tree
741 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
743 tree addr_base;
745 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
746 addr_base = fold_convert_loc (loc, sizetype, addr_base);
748 /* Test for a negative stride, iterating over every element. */
749 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
751 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
752 fold_convert_loc (loc, sizetype, nb_bytes));
753 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
754 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
757 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
760 /* If VAL memory representation contains the same value in all bytes,
761 return that value, otherwise return -1.
762 E.g. for 0x24242424 return 0x24, for IEEE double
763 747708026454360457216.0 return 0x44, etc. */
765 static int
766 const_with_all_bytes_same (tree val)
768 unsigned char buf[64];
769 int i, len;
771 if (integer_zerop (val)
772 || real_zerop (val)
773 || (TREE_CODE (val) == CONSTRUCTOR
774 && !TREE_CLOBBER_P (val)
775 && CONSTRUCTOR_NELTS (val) == 0))
776 return 0;
778 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
779 return -1;
781 len = native_encode_expr (val, buf, sizeof (buf));
782 if (len == 0)
783 return -1;
784 for (i = 1; i < len; i++)
785 if (buf[i] != buf[0])
786 return -1;
787 return buf[0];
790 /* Generate a call to memset for PARTITION in LOOP. */
792 static void
793 generate_memset_builtin (struct loop *loop, partition_t partition)
795 gimple_stmt_iterator gsi;
796 gimple stmt, fn_call;
797 tree mem, fn, nb_bytes;
798 location_t loc;
799 tree val;
801 stmt = DR_STMT (partition->main_dr);
802 loc = gimple_location (stmt);
804 /* The new statements will be placed before LOOP. */
805 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
807 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
808 partition->plus_one);
809 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
810 false, GSI_CONTINUE_LINKING);
811 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
812 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
813 false, GSI_CONTINUE_LINKING);
815 /* This exactly matches the pattern recognition in classify_partition. */
816 val = gimple_assign_rhs1 (stmt);
817 /* Handle constants like 0x15151515 and similarly
818 floating point constants etc. where all bytes are the same. */
819 int bytev = const_with_all_bytes_same (val);
820 if (bytev != -1)
821 val = build_int_cst (integer_type_node, bytev);
822 else if (TREE_CODE (val) == INTEGER_CST)
823 val = fold_convert (integer_type_node, val);
824 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
826 tree tem = make_ssa_name (integer_type_node);
827 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
828 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
829 val = tem;
832 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
833 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
834 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
836 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, "generated memset");
839 if (bytev == 0)
840 fprintf (dump_file, " zero\n");
841 else
842 fprintf (dump_file, "\n");
846 /* Generate a call to memcpy for PARTITION in LOOP. */
848 static void
849 generate_memcpy_builtin (struct loop *loop, partition_t partition)
851 gimple_stmt_iterator gsi;
852 gimple stmt, fn_call;
853 tree dest, src, fn, nb_bytes;
854 location_t loc;
855 enum built_in_function kind;
857 stmt = DR_STMT (partition->main_dr);
858 loc = gimple_location (stmt);
860 /* The new statements will be placed before LOOP. */
861 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
863 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
864 partition->plus_one);
865 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
868 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
869 if (ptr_derefs_may_alias_p (dest, src))
870 kind = BUILT_IN_MEMMOVE;
871 else
872 kind = BUILT_IN_MEMCPY;
874 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
877 false, GSI_CONTINUE_LINKING);
878 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
879 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
880 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 if (kind == BUILT_IN_MEMCPY)
885 fprintf (dump_file, "generated memcpy\n");
886 else
887 fprintf (dump_file, "generated memmove\n");
891 /* Remove and destroy the loop LOOP. */
893 static void
894 destroy_loop (struct loop *loop)
896 unsigned nbbs = loop->num_nodes;
897 edge exit = single_exit (loop);
898 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
899 basic_block *bbs;
900 unsigned i;
902 bbs = get_loop_body_in_dom_order (loop);
904 redirect_edge_pred (exit, src);
905 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
906 exit->flags |= EDGE_FALLTHRU;
907 cancel_loop_tree (loop);
908 rescan_loop_exit (exit, false, true);
910 for (i = 0; i < nbbs; i++)
912 /* We have made sure to not leave any dangling uses of SSA
913 names defined in the loop. With the exception of virtuals.
914 Make sure we replace all uses of virtual defs that will remain
915 outside of the loop with the bare symbol as delete_basic_block
916 will release them. */
917 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
918 gsi_next (&gsi))
920 gphi *phi = gsi.phi ();
921 if (virtual_operand_p (gimple_phi_result (phi)))
922 mark_virtual_phi_result_for_renaming (phi);
924 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
925 gsi_next (&gsi))
927 gimple stmt = gsi_stmt (gsi);
928 tree vdef = gimple_vdef (stmt);
929 if (vdef && TREE_CODE (vdef) == SSA_NAME)
930 mark_virtual_operand_for_renaming (vdef);
932 delete_basic_block (bbs[i]);
934 free (bbs);
936 set_immediate_dominator (CDI_DOMINATORS, dest,
937 recompute_dominator (CDI_DOMINATORS, dest));
940 /* Generates code for PARTITION. */
942 static void
943 generate_code_for_partition (struct loop *loop,
944 partition_t partition, bool copy_p)
946 switch (partition->kind)
948 case PKIND_NORMAL:
949 /* Reductions all have to be in the last partition. */
950 gcc_assert (!partition_reduction_p (partition)
951 || !copy_p);
952 generate_loops_for_partition (loop, partition, copy_p);
953 return;
955 case PKIND_MEMSET:
956 generate_memset_builtin (loop, partition);
957 break;
959 case PKIND_MEMCPY:
960 generate_memcpy_builtin (loop, partition);
961 break;
963 default:
964 gcc_unreachable ();
967 /* Common tail for partitions we turn into a call. If this was the last
968 partition for which we generate code, we have to destroy the loop. */
969 if (!copy_p)
970 destroy_loop (loop);
974 /* Returns a partition with all the statements needed for computing
975 the vertex V of the RDG, also including the loop exit conditions. */
977 static partition_t
978 build_rdg_partition_for_vertex (struct graph *rdg, int v)
980 partition_t partition = partition_alloc (NULL, NULL);
981 auto_vec<int, 3> nodes;
982 unsigned i;
983 int x;
985 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
987 FOR_EACH_VEC_ELT (nodes, i, x)
989 bitmap_set_bit (partition->stmts, x);
990 bitmap_set_bit (partition->loops,
991 loop_containing_stmt (RDG_STMT (rdg, x))->num);
994 return partition;
997 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
998 For the moment we detect only the memset zero pattern. */
1000 static void
1001 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1003 bitmap_iterator bi;
1004 unsigned i;
1005 tree nb_iter;
1006 data_reference_p single_load, single_store;
1007 bool volatiles_p = false;
1008 bool plus_one = false;
1010 partition->kind = PKIND_NORMAL;
1011 partition->main_dr = NULL;
1012 partition->secondary_dr = NULL;
1013 partition->niter = NULL_TREE;
1014 partition->plus_one = false;
1016 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1018 gimple stmt = RDG_STMT (rdg, i);
1020 if (gimple_has_volatile_ops (stmt))
1021 volatiles_p = true;
1023 /* If the stmt has uses outside of the loop mark it as reduction. */
1024 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1026 partition->reduction_p = true;
1027 return;
1031 /* Perform general partition disqualification for builtins. */
1032 if (volatiles_p
1033 || !flag_tree_loop_distribute_patterns)
1034 return;
1036 /* Detect memset and memcpy. */
1037 single_load = NULL;
1038 single_store = NULL;
1039 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1041 gimple stmt = RDG_STMT (rdg, i);
1042 data_reference_p dr;
1043 unsigned j;
1045 if (gimple_code (stmt) == GIMPLE_PHI)
1046 continue;
1048 /* Any scalar stmts are ok. */
1049 if (!gimple_vuse (stmt))
1050 continue;
1052 /* Otherwise just regular loads/stores. */
1053 if (!gimple_assign_single_p (stmt))
1054 return;
1056 /* But exactly one store and/or load. */
1057 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1059 if (DR_IS_READ (dr))
1061 if (single_load != NULL)
1062 return;
1063 single_load = dr;
1065 else
1067 if (single_store != NULL)
1068 return;
1069 single_store = dr;
1074 if (!single_store)
1075 return;
1077 nb_iter = number_of_latch_executions (loop);
1078 if (!nb_iter || nb_iter == chrec_dont_know)
1079 return;
1080 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1081 gimple_bb (DR_STMT (single_store))))
1082 plus_one = true;
1084 if (single_store && !single_load)
1086 gimple stmt = DR_STMT (single_store);
1087 tree rhs = gimple_assign_rhs1 (stmt);
1088 if (const_with_all_bytes_same (rhs) == -1
1089 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1090 || (TYPE_MODE (TREE_TYPE (rhs))
1091 != TYPE_MODE (unsigned_char_type_node))))
1092 return;
1093 if (TREE_CODE (rhs) == SSA_NAME
1094 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1095 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1096 return;
1097 if (!adjacent_dr_p (single_store)
1098 || !dominated_by_p (CDI_DOMINATORS,
1099 loop->latch, gimple_bb (stmt)))
1100 return;
1101 partition->kind = PKIND_MEMSET;
1102 partition->main_dr = single_store;
1103 partition->niter = nb_iter;
1104 partition->plus_one = plus_one;
1106 else if (single_store && single_load)
1108 gimple store = DR_STMT (single_store);
1109 gimple load = DR_STMT (single_load);
1110 /* Direct aggregate copy or via an SSA name temporary. */
1111 if (load != store
1112 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1113 return;
1114 if (!adjacent_dr_p (single_store)
1115 || !adjacent_dr_p (single_load)
1116 || !operand_equal_p (DR_STEP (single_store),
1117 DR_STEP (single_load), 0)
1118 || !dominated_by_p (CDI_DOMINATORS,
1119 loop->latch, gimple_bb (store)))
1120 return;
1121 /* Now check that if there is a dependence this dependence is
1122 of a suitable form for memmove. */
1123 vec<loop_p> loops = vNULL;
1124 ddr_p ddr;
1125 loops.safe_push (loop);
1126 ddr = initialize_data_dependence_relation (single_load, single_store,
1127 loops);
1128 compute_affine_dependence (ddr, loop);
1129 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1131 free_dependence_relation (ddr);
1132 loops.release ();
1133 return;
1135 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1137 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1139 free_dependence_relation (ddr);
1140 loops.release ();
1141 return;
1143 lambda_vector dist_v;
1144 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1146 int dist = dist_v[index_in_loop_nest (loop->num,
1147 DDR_LOOP_NEST (ddr))];
1148 if (dist > 0 && !DDR_REVERSED_P (ddr))
1150 free_dependence_relation (ddr);
1151 loops.release ();
1152 return;
1156 free_dependence_relation (ddr);
1157 loops.release ();
1158 partition->kind = PKIND_MEMCPY;
1159 partition->main_dr = single_store;
1160 partition->secondary_dr = single_load;
1161 partition->niter = nb_iter;
1162 partition->plus_one = plus_one;
1166 /* For a data reference REF, return the declaration of its base
1167 address or NULL_TREE if the base is not determined. */
1169 static tree
1170 ref_base_address (data_reference_p dr)
1172 tree base_address = DR_BASE_ADDRESS (dr);
1173 if (base_address
1174 && TREE_CODE (base_address) == ADDR_EXPR)
1175 return TREE_OPERAND (base_address, 0);
1177 return base_address;
1180 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1181 accesses in RDG. */
1183 static bool
1184 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1185 partition_t partition2)
1187 unsigned i, j, k, l;
1188 bitmap_iterator bi, bj;
1189 data_reference_p ref1, ref2;
1191 /* First check whether in the intersection of the two partitions are
1192 any loads or stores. Common loads are the situation that happens
1193 most often. */
1194 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1195 if (RDG_MEM_WRITE_STMT (rdg, i)
1196 || RDG_MEM_READS_STMT (rdg, i))
1197 return true;
1199 /* Then check all data-references against each other. */
1200 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1201 if (RDG_MEM_WRITE_STMT (rdg, i)
1202 || RDG_MEM_READS_STMT (rdg, i))
1203 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1204 if (RDG_MEM_WRITE_STMT (rdg, j)
1205 || RDG_MEM_READS_STMT (rdg, j))
1207 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1209 tree base1 = ref_base_address (ref1);
1210 if (base1)
1211 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1212 if (base1 == ref_base_address (ref2))
1213 return true;
1217 return false;
1220 /* Aggregate several components into a useful partition that is
1221 registered in the PARTITIONS vector. Partitions will be
1222 distributed in different loops. */
1224 static void
1225 rdg_build_partitions (struct graph *rdg,
1226 vec<gimple> starting_stmts,
1227 vec<partition_t> *partitions)
1229 bitmap processed = BITMAP_ALLOC (NULL);
1230 int i;
1231 gimple stmt;
1233 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1235 int v = rdg_vertex_for_stmt (rdg, stmt);
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file,
1239 "ldist asked to generate code for vertex %d\n", v);
1241 /* If the vertex is already contained in another partition so
1242 is the partition rooted at it. */
1243 if (bitmap_bit_p (processed, v))
1244 continue;
1246 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1247 bitmap_ior_into (processed, partition->stmts);
1249 if (dump_file && (dump_flags & TDF_DETAILS))
1251 fprintf (dump_file, "ldist useful partition:\n");
1252 dump_bitmap (dump_file, partition->stmts);
1255 partitions->safe_push (partition);
1258 /* All vertices should have been assigned to at least one partition now,
1259 other than vertices belonging to dead code. */
1261 BITMAP_FREE (processed);
1264 /* Dump to FILE the PARTITIONS. */
1266 static void
1267 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1269 int i;
1270 partition_t partition;
1272 FOR_EACH_VEC_ELT (partitions, i, partition)
1273 debug_bitmap_file (file, partition->stmts);
1276 /* Debug PARTITIONS. */
1277 extern void debug_rdg_partitions (vec<partition_t> );
1279 DEBUG_FUNCTION void
1280 debug_rdg_partitions (vec<partition_t> partitions)
1282 dump_rdg_partitions (stderr, partitions);
1285 /* Returns the number of read and write operations in the RDG. */
1287 static int
1288 number_of_rw_in_rdg (struct graph *rdg)
1290 int i, res = 0;
1292 for (i = 0; i < rdg->n_vertices; i++)
1294 if (RDG_MEM_WRITE_STMT (rdg, i))
1295 ++res;
1297 if (RDG_MEM_READS_STMT (rdg, i))
1298 ++res;
1301 return res;
1304 /* Returns the number of read and write operations in a PARTITION of
1305 the RDG. */
1307 static int
1308 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1310 int res = 0;
1311 unsigned i;
1312 bitmap_iterator ii;
1314 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1316 if (RDG_MEM_WRITE_STMT (rdg, i))
1317 ++res;
1319 if (RDG_MEM_READS_STMT (rdg, i))
1320 ++res;
1323 return res;
1326 /* Returns true when one of the PARTITIONS contains all the read or
1327 write operations of RDG. */
1329 static bool
1330 partition_contains_all_rw (struct graph *rdg,
1331 vec<partition_t> partitions)
1333 int i;
1334 partition_t partition;
1335 int nrw = number_of_rw_in_rdg (rdg);
1337 FOR_EACH_VEC_ELT (partitions, i, partition)
1338 if (nrw == number_of_rw_in_partition (rdg, partition))
1339 return true;
1341 return false;
1344 /* Compute partition dependence created by the data references in DRS1
1345 and DRS2 and modify and return DIR according to that. */
1347 static int
1348 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1349 vec<data_reference_p> drs1,
1350 vec<data_reference_p> drs2)
1352 data_reference_p dr1, dr2;
1354 /* dependence direction - 0 is no dependence, -1 is back,
1355 1 is forth, 2 is both (we can stop then, merging will occur). */
1356 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1357 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1359 data_reference_p saved_dr1 = dr1;
1360 int this_dir = 1;
1361 ddr_p ddr;
1362 /* Re-shuffle data-refs to be in dominator order. */
1363 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1364 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1366 data_reference_p tem = dr1;
1367 dr1 = dr2;
1368 dr2 = tem;
1369 this_dir = -this_dir;
1371 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1372 compute_affine_dependence (ddr, loops[0]);
1373 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1374 this_dir = 2;
1375 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1377 if (DDR_REVERSED_P (ddr))
1379 data_reference_p tem = dr1;
1380 dr1 = dr2;
1381 dr2 = tem;
1382 this_dir = -this_dir;
1384 /* Known dependences can still be unordered througout the
1385 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1386 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1387 this_dir = 2;
1388 /* If the overlap is exact preserve stmt order. */
1389 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1391 else
1393 /* Else as the distance vector is lexicographic positive
1394 swap the dependence direction. */
1395 this_dir = -this_dir;
1398 else
1399 this_dir = 0;
1400 free_dependence_relation (ddr);
1401 if (dir == 0)
1402 dir = this_dir;
1403 else if (dir != this_dir)
1404 return 2;
1405 /* Shuffle "back" dr1. */
1406 dr1 = saved_dr1;
1408 return dir;
1411 /* Compare postorder number of the partition graph vertices V1 and V2. */
1413 static int
1414 pgcmp (const void *v1_, const void *v2_)
1416 const vertex *v1 = (const vertex *)v1_;
1417 const vertex *v2 = (const vertex *)v2_;
1418 return v2->post - v1->post;
1421 /* Distributes the code from LOOP in such a way that producer
1422 statements are placed before consumer statements. Tries to separate
1423 only the statements from STMTS into separate loops.
1424 Returns the number of distributed loops. */
1426 static int
1427 distribute_loop (struct loop *loop, vec<gimple> stmts,
1428 control_dependences *cd, int *nb_calls)
1430 struct graph *rdg;
1431 partition_t partition;
1432 bool any_builtin;
1433 int i, nbp;
1434 graph *pg = NULL;
1435 int num_sccs = 1;
1437 *nb_calls = 0;
1438 auto_vec<loop_p, 3> loop_nest;
1439 if (!find_loop_nest (loop, &loop_nest))
1440 return 0;
1442 rdg = build_rdg (loop_nest, cd);
1443 if (!rdg)
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file,
1447 "Loop %d not distributed: failed to build the RDG.\n",
1448 loop->num);
1450 return 0;
1453 if (dump_file && (dump_flags & TDF_DETAILS))
1454 dump_rdg (dump_file, rdg);
1456 auto_vec<partition_t, 3> partitions;
1457 rdg_build_partitions (rdg, stmts, &partitions);
1459 any_builtin = false;
1460 FOR_EACH_VEC_ELT (partitions, i, partition)
1462 classify_partition (loop, rdg, partition);
1463 any_builtin |= partition_builtin_p (partition);
1466 /* If we are only distributing patterns but did not detect any,
1467 simply bail out. */
1468 if (!flag_tree_loop_distribution
1469 && !any_builtin)
1471 nbp = 0;
1472 goto ldist_done;
1475 /* If we are only distributing patterns fuse all partitions that
1476 were not classified as builtins. This also avoids chopping
1477 a loop into pieces, separated by builtin calls. That is, we
1478 only want no or a single loop body remaining. */
1479 partition_t into;
1480 if (!flag_tree_loop_distribution)
1482 for (i = 0; partitions.iterate (i, &into); ++i)
1483 if (!partition_builtin_p (into))
1484 break;
1485 for (++i; partitions.iterate (i, &partition); ++i)
1486 if (!partition_builtin_p (partition))
1488 if (dump_file && (dump_flags & TDF_DETAILS))
1490 fprintf (dump_file, "fusing non-builtin partitions\n");
1491 dump_bitmap (dump_file, into->stmts);
1492 dump_bitmap (dump_file, partition->stmts);
1494 partition_merge_into (into, partition);
1495 partitions.unordered_remove (i);
1496 partition_free (partition);
1497 i--;
1501 /* Due to limitations in the transform phase we have to fuse all
1502 reduction partitions into the last partition so the existing
1503 loop will contain all loop-closed PHI nodes. */
1504 for (i = 0; partitions.iterate (i, &into); ++i)
1505 if (partition_reduction_p (into))
1506 break;
1507 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1508 if (partition_reduction_p (partition))
1510 if (dump_file && (dump_flags & TDF_DETAILS))
1512 fprintf (dump_file, "fusing partitions\n");
1513 dump_bitmap (dump_file, into->stmts);
1514 dump_bitmap (dump_file, partition->stmts);
1515 fprintf (dump_file, "because they have reductions\n");
1517 partition_merge_into (into, partition);
1518 partitions.unordered_remove (i);
1519 partition_free (partition);
1520 i--;
1523 /* Apply our simple cost model - fuse partitions with similar
1524 memory accesses. */
1525 for (i = 0; partitions.iterate (i, &into); ++i)
1527 if (partition_builtin_p (into))
1528 continue;
1529 for (int j = i + 1;
1530 partitions.iterate (j, &partition); ++j)
1532 if (!partition_builtin_p (partition)
1533 && similar_memory_accesses (rdg, into, partition))
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1537 fprintf (dump_file, "fusing partitions\n");
1538 dump_bitmap (dump_file, into->stmts);
1539 dump_bitmap (dump_file, partition->stmts);
1540 fprintf (dump_file, "because they have similar "
1541 "memory accesses\n");
1543 partition_merge_into (into, partition);
1544 partitions.unordered_remove (j);
1545 partition_free (partition);
1546 j--;
1551 /* Build the partition dependency graph. */
1552 if (partitions.length () > 1)
1554 pg = new_graph (partitions.length ());
1555 struct pgdata {
1556 partition_t partition;
1557 vec<data_reference_p> writes;
1558 vec<data_reference_p> reads;
1560 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1561 for (i = 0; partitions.iterate (i, &partition); ++i)
1563 vertex *v = &pg->vertices[i];
1564 pgdata *data = new pgdata;
1565 data_reference_p dr;
1566 /* FIXME - leaks. */
1567 v->data = data;
1568 bitmap_iterator bi;
1569 unsigned j;
1570 data->partition = partition;
1571 data->reads = vNULL;
1572 data->writes = vNULL;
1573 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1574 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1575 if (DR_IS_READ (dr))
1576 data->reads.safe_push (dr);
1577 else
1578 data->writes.safe_push (dr);
1580 partition_t partition1, partition2;
1581 for (i = 0; partitions.iterate (i, &partition1); ++i)
1582 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1584 /* dependence direction - 0 is no dependence, -1 is back,
1585 1 is forth, 2 is both (we can stop then, merging will occur). */
1586 int dir = 0;
1587 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1588 PGDATA(i)->writes,
1589 PGDATA(j)->reads);
1590 if (dir != 2)
1591 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1592 PGDATA(i)->reads,
1593 PGDATA(j)->writes);
1594 if (dir != 2)
1595 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1596 PGDATA(i)->writes,
1597 PGDATA(j)->writes);
1598 if (dir == 1 || dir == 2)
1599 add_edge (pg, i, j);
1600 if (dir == -1 || dir == 2)
1601 add_edge (pg, j, i);
1604 /* Add edges to the reduction partition (if any) to force it last. */
1605 unsigned j;
1606 for (j = 0; partitions.iterate (j, &partition); ++j)
1607 if (partition_reduction_p (partition))
1608 break;
1609 if (j < partitions.length ())
1611 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1612 if (i != j)
1613 add_edge (pg, i, j);
1616 /* Compute partitions we cannot separate and fuse them. */
1617 num_sccs = graphds_scc (pg, NULL);
1618 for (i = 0; i < num_sccs; ++i)
1620 partition_t first;
1621 int j;
1622 for (j = 0; partitions.iterate (j, &first); ++j)
1623 if (pg->vertices[j].component == i)
1624 break;
1625 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1626 if (pg->vertices[j].component == i)
1628 if (dump_file && (dump_flags & TDF_DETAILS))
1630 fprintf (dump_file, "fusing partitions\n");
1631 dump_bitmap (dump_file, first->stmts);
1632 dump_bitmap (dump_file, partition->stmts);
1633 fprintf (dump_file, "because they are in the same "
1634 "dependence SCC\n");
1636 partition_merge_into (first, partition);
1637 partitions[j] = NULL;
1638 partition_free (partition);
1639 PGDATA (j)->partition = NULL;
1643 /* Now order the remaining nodes in postorder. */
1644 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1645 partitions.truncate (0);
1646 for (i = 0; i < pg->n_vertices; ++i)
1648 pgdata *data = PGDATA (i);
1649 if (data->partition)
1650 partitions.safe_push (data->partition);
1651 data->reads.release ();
1652 data->writes.release ();
1653 delete data;
1655 gcc_assert (partitions.length () == (unsigned)num_sccs);
1656 free_graph (pg);
1659 nbp = partitions.length ();
1660 if (nbp == 0
1661 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1662 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1664 nbp = 0;
1665 goto ldist_done;
1668 if (dump_file && (dump_flags & TDF_DETAILS))
1669 dump_rdg_partitions (dump_file, partitions);
1671 FOR_EACH_VEC_ELT (partitions, i, partition)
1673 if (partition_builtin_p (partition))
1674 (*nb_calls)++;
1675 generate_code_for_partition (loop, partition, i < nbp - 1);
1678 ldist_done:
1680 FOR_EACH_VEC_ELT (partitions, i, partition)
1681 partition_free (partition);
1683 free_rdg (rdg);
1684 return nbp - *nb_calls;
1687 /* Distribute all loops in the current function. */
1689 namespace {
1691 const pass_data pass_data_loop_distribution =
1693 GIMPLE_PASS, /* type */
1694 "ldist", /* name */
1695 OPTGROUP_LOOP, /* optinfo_flags */
1696 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1697 ( PROP_cfg | PROP_ssa ), /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 0, /* todo_flags_finish */
1704 class pass_loop_distribution : public gimple_opt_pass
1706 public:
1707 pass_loop_distribution (gcc::context *ctxt)
1708 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1711 /* opt_pass methods: */
1712 virtual bool gate (function *)
1714 return flag_tree_loop_distribution
1715 || flag_tree_loop_distribute_patterns;
1718 virtual unsigned int execute (function *);
1720 }; // class pass_loop_distribution
1722 unsigned int
1723 pass_loop_distribution::execute (function *fun)
1725 struct loop *loop;
1726 bool changed = false;
1727 basic_block bb;
1728 control_dependences *cd = NULL;
1730 FOR_ALL_BB_FN (bb, fun)
1732 gimple_stmt_iterator gsi;
1733 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1734 gimple_set_uid (gsi_stmt (gsi), -1);
1735 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1736 gimple_set_uid (gsi_stmt (gsi), -1);
1739 /* We can at the moment only distribute non-nested loops, thus restrict
1740 walking to innermost loops. */
1741 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1743 auto_vec<gimple> work_list;
1744 basic_block *bbs;
1745 int num = loop->num;
1746 unsigned int i;
1748 /* If the loop doesn't have a single exit we will fail anyway,
1749 so do that early. */
1750 if (!single_exit (loop))
1751 continue;
1753 /* Only optimize hot loops. */
1754 if (!optimize_loop_for_speed_p (loop))
1755 continue;
1757 /* Initialize the worklist with stmts we seed the partitions with. */
1758 bbs = get_loop_body_in_dom_order (loop);
1759 for (i = 0; i < loop->num_nodes; ++i)
1761 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1762 !gsi_end_p (gsi);
1763 gsi_next (&gsi))
1765 gphi *phi = gsi.phi ();
1766 if (virtual_operand_p (gimple_phi_result (phi)))
1767 continue;
1768 /* Distribute stmts which have defs that are used outside of
1769 the loop. */
1770 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1771 continue;
1772 work_list.safe_push (phi);
1774 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1775 !gsi_end_p (gsi);
1776 gsi_next (&gsi))
1778 gimple stmt = gsi_stmt (gsi);
1780 /* If there is a stmt with side-effects bail out - we
1781 cannot and should not distribute this loop. */
1782 if (gimple_has_side_effects (stmt))
1784 work_list.truncate (0);
1785 goto out;
1788 /* Distribute stmts which have defs that are used outside of
1789 the loop. */
1790 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1792 /* Otherwise only distribute stores for now. */
1793 else if (!gimple_vdef (stmt))
1794 continue;
1796 work_list.safe_push (stmt);
1799 out:
1800 free (bbs);
1802 int nb_generated_loops = 0;
1803 int nb_generated_calls = 0;
1804 location_t loc = find_loop_location (loop);
1805 if (work_list.length () > 0)
1807 if (!cd)
1809 calculate_dominance_info (CDI_DOMINATORS);
1810 calculate_dominance_info (CDI_POST_DOMINATORS);
1811 cd = new control_dependences (create_edge_list ());
1812 free_dominance_info (CDI_POST_DOMINATORS);
1814 nb_generated_loops = distribute_loop (loop, work_list, cd,
1815 &nb_generated_calls);
1818 if (nb_generated_loops + nb_generated_calls > 0)
1820 changed = true;
1821 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1822 loc, "Loop %d distributed: split to %d loops "
1823 "and %d library calls.\n",
1824 num, nb_generated_loops, nb_generated_calls);
1826 else if (dump_file && (dump_flags & TDF_DETAILS))
1827 fprintf (dump_file, "Loop %d is the same.\n", num);
1830 if (cd)
1831 delete cd;
1833 if (changed)
1835 /* Cached scalar evolutions now may refer to wrong or non-existing
1836 loops. */
1837 scev_reset_htab ();
1838 mark_virtual_operands_for_renaming (fun);
1839 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1842 #ifdef ENABLE_CHECKING
1843 verify_loop_structure ();
1844 #endif
1846 return 0;
1849 } // anon namespace
1851 gimple_opt_pass *
1852 make_pass_loop_distribution (gcc::context *ctxt)
1854 return new pass_loop_distribution (ctxt);