2015-06-24 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob19523b941d2a52de888d61103cf33f0e5af5d7d0
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "alias.h"
48 #include "symtab.h"
49 #include "options.h"
50 #include "tree.h"
51 #include "fold-const.h"
52 #include "predict.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "function.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfganal.h"
59 #include "basic-block.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-expr.h"
63 #include "gimple.h"
64 #include "gimple-iterator.h"
65 #include "gimplify-me.h"
66 #include "stor-layout.h"
67 #include "gimple-ssa.h"
68 #include "tree-cfg.h"
69 #include "tree-phinodes.h"
70 #include "ssa-iterators.h"
71 #include "stringpool.h"
72 #include "tree-ssanames.h"
73 #include "tree-ssa-loop-manip.h"
74 #include "tree-ssa-loop.h"
75 #include "tree-into-ssa.h"
76 #include "tree-ssa.h"
77 #include "cfgloop.h"
78 #include "tree-chrec.h"
79 #include "tree-data-ref.h"
80 #include "tree-scalar-evolution.h"
81 #include "tree-pass.h"
82 #include "gimple-pretty-print.h"
83 #include "tree-vectorizer.h"
86 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
87 typedef struct rdg_vertex
89 /* The statement represented by this vertex. */
90 gimple stmt;
92 /* Vector of data-references in this statement. */
93 vec<data_reference_p> datarefs;
95 /* True when the statement contains a write to memory. */
96 bool has_mem_write;
98 /* True when the statement contains a read from memory. */
99 bool has_mem_reads;
100 } *rdg_vertex_p;
102 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
103 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
104 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
105 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
106 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
107 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
108 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
109 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
111 /* Data dependence type. */
113 enum rdg_dep_type
115 /* Read After Write (RAW). */
116 flow_dd = 'f',
118 /* Control dependence (execute conditional on). */
119 control_dd = 'c'
122 /* Dependence information attached to an edge of the RDG. */
124 typedef struct rdg_edge
126 /* Type of the dependence. */
127 enum rdg_dep_type type;
128 } *rdg_edge_p;
130 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
132 /* Dump vertex I in RDG to FILE. */
134 static void
135 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
137 struct vertex *v = &(rdg->vertices[i]);
138 struct graph_edge *e;
140 fprintf (file, "(vertex %d: (%s%s) (in:", i,
141 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
142 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
144 if (v->pred)
145 for (e = v->pred; e; e = e->pred_next)
146 fprintf (file, " %d", e->src);
148 fprintf (file, ") (out:");
150 if (v->succ)
151 for (e = v->succ; e; e = e->succ_next)
152 fprintf (file, " %d", e->dest);
154 fprintf (file, ")\n");
155 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
156 fprintf (file, ")\n");
159 /* Call dump_rdg_vertex on stderr. */
161 DEBUG_FUNCTION void
162 debug_rdg_vertex (struct graph *rdg, int i)
164 dump_rdg_vertex (stderr, rdg, i);
167 /* Dump the reduced dependence graph RDG to FILE. */
169 static void
170 dump_rdg (FILE *file, struct graph *rdg)
172 fprintf (file, "(rdg\n");
173 for (int i = 0; i < rdg->n_vertices; i++)
174 dump_rdg_vertex (file, rdg, i);
175 fprintf (file, ")\n");
178 /* Call dump_rdg on stderr. */
180 DEBUG_FUNCTION void
181 debug_rdg (struct graph *rdg)
183 dump_rdg (stderr, rdg);
186 static void
187 dot_rdg_1 (FILE *file, struct graph *rdg)
189 int i;
190 pretty_printer buffer;
191 pp_needs_newline (&buffer) = false;
192 buffer.buffer->stream = file;
194 fprintf (file, "digraph RDG {\n");
196 for (i = 0; i < rdg->n_vertices; i++)
198 struct vertex *v = &(rdg->vertices[i]);
199 struct graph_edge *e;
201 fprintf (file, "%d [label=\"[%d] ", i, i);
202 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
203 pp_flush (&buffer);
204 fprintf (file, "\"]\n");
206 /* Highlight reads from memory. */
207 if (RDG_MEM_READS_STMT (rdg, i))
208 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
210 /* Highlight stores to memory. */
211 if (RDG_MEM_WRITE_STMT (rdg, i))
212 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
214 if (v->succ)
215 for (e = v->succ; e; e = e->succ_next)
216 switch (RDGE_TYPE (e))
218 case flow_dd:
219 /* These are the most common dependences: don't print these. */
220 fprintf (file, "%d -> %d \n", i, e->dest);
221 break;
223 case control_dd:
224 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
225 break;
227 default:
228 gcc_unreachable ();
232 fprintf (file, "}\n\n");
235 /* Display the Reduced Dependence Graph using dotty. */
237 DEBUG_FUNCTION void
238 dot_rdg (struct graph *rdg)
240 /* When debugging, you may want to enable the following code. */
241 #ifdef HAVE_POPEN
242 FILE *file = popen ("dot -Tx11", "w");
243 if (!file)
244 return;
245 dot_rdg_1 (file, rdg);
246 fflush (file);
247 close (fileno (file));
248 pclose (file);
249 #else
250 dot_rdg_1 (stderr, rdg);
251 #endif
254 /* Returns the index of STMT in RDG. */
256 static int
257 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
259 int index = gimple_uid (stmt);
260 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
261 return index;
264 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
265 the index of DEF in RDG. */
267 static void
268 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
270 use_operand_p imm_use_p;
271 imm_use_iterator iterator;
273 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
275 struct graph_edge *e;
276 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
278 if (use < 0)
279 continue;
281 e = add_edge (rdg, idef, use);
282 e->data = XNEW (struct rdg_edge);
283 RDGE_TYPE (e) = flow_dd;
287 /* Creates an edge for the control dependences of BB to the vertex V. */
289 static void
290 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
291 int v, control_dependences *cd)
293 bitmap_iterator bi;
294 unsigned edge_n;
295 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
296 0, edge_n, bi)
298 basic_block cond_bb = cd->get_edge (edge_n)->src;
299 gimple stmt = last_stmt (cond_bb);
300 if (stmt && is_ctrl_stmt (stmt))
302 struct graph_edge *e;
303 int c = rdg_vertex_for_stmt (rdg, stmt);
304 if (c < 0)
305 continue;
307 e = add_edge (rdg, c, v);
308 e->data = XNEW (struct rdg_edge);
309 RDGE_TYPE (e) = control_dd;
314 /* Creates the edges of the reduced dependence graph RDG. */
316 static void
317 create_rdg_flow_edges (struct graph *rdg)
319 int i;
320 def_operand_p def_p;
321 ssa_op_iter iter;
323 for (i = 0; i < rdg->n_vertices; i++)
324 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
325 iter, SSA_OP_DEF)
326 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
329 /* Creates the edges of the reduced dependence graph RDG. */
331 static void
332 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
334 int i;
336 for (i = 0; i < rdg->n_vertices; i++)
338 gimple stmt = RDG_STMT (rdg, i);
339 if (gimple_code (stmt) == GIMPLE_PHI)
341 edge_iterator ei;
342 edge e;
343 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
344 create_edge_for_control_dependence (rdg, e->src, i, cd);
346 else
347 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
351 /* Build the vertices of the reduced dependence graph RDG. Return false
352 if that failed. */
354 static bool
355 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
356 vec<data_reference_p> *datarefs)
358 int i;
359 gimple stmt;
361 FOR_EACH_VEC_ELT (stmts, i, stmt)
363 struct vertex *v = &(rdg->vertices[i]);
365 /* Record statement to vertex mapping. */
366 gimple_set_uid (stmt, i);
368 v->data = XNEW (struct rdg_vertex);
369 RDGV_STMT (v) = stmt;
370 RDGV_DATAREFS (v).create (0);
371 RDGV_HAS_MEM_WRITE (v) = false;
372 RDGV_HAS_MEM_READS (v) = false;
373 if (gimple_code (stmt) == GIMPLE_PHI)
374 continue;
376 unsigned drp = datarefs->length ();
377 if (!find_data_references_in_stmt (loop, stmt, datarefs))
378 return false;
379 for (unsigned j = drp; j < datarefs->length (); ++j)
381 data_reference_p dr = (*datarefs)[j];
382 if (DR_IS_READ (dr))
383 RDGV_HAS_MEM_READS (v) = true;
384 else
385 RDGV_HAS_MEM_WRITE (v) = true;
386 RDGV_DATAREFS (v).safe_push (dr);
389 return true;
392 /* Initialize STMTS with all the statements of LOOP. The order in
393 which we discover statements is important as
394 generate_loops_for_partition is using the same traversal for
395 identifying statements in loop copies. */
397 static void
398 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
400 unsigned int i;
401 basic_block *bbs = get_loop_body_in_dom_order (loop);
403 for (i = 0; i < loop->num_nodes; i++)
405 basic_block bb = bbs[i];
407 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
408 gsi_next (&bsi))
409 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
410 stmts->safe_push (bsi.phi ());
412 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
413 gsi_next (&bsi))
415 gimple stmt = gsi_stmt (bsi);
416 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
417 stmts->safe_push (stmt);
421 free (bbs);
424 /* Free the reduced dependence graph RDG. */
426 static void
427 free_rdg (struct graph *rdg)
429 int i;
431 for (i = 0; i < rdg->n_vertices; i++)
433 struct vertex *v = &(rdg->vertices[i]);
434 struct graph_edge *e;
436 for (e = v->succ; e; e = e->succ_next)
437 free (e->data);
439 if (v->data)
441 gimple_set_uid (RDGV_STMT (v), -1);
442 free_data_refs (RDGV_DATAREFS (v));
443 free (v->data);
447 free_graph (rdg);
450 /* Build the Reduced Dependence Graph (RDG) with one vertex per
451 statement of the loop nest LOOP_NEST, and one edge per data dependence or
452 scalar dependence. */
454 static struct graph *
455 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
457 struct graph *rdg;
458 vec<data_reference_p> datarefs;
460 /* Create the RDG vertices from the stmts of the loop nest. */
461 auto_vec<gimple, 10> stmts;
462 stmts_from_loop (loop_nest[0], &stmts);
463 rdg = new_graph (stmts.length ());
464 datarefs.create (10);
465 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
467 datarefs.release ();
468 free_rdg (rdg);
469 return NULL;
471 stmts.release ();
473 create_rdg_flow_edges (rdg);
474 if (cd)
475 create_rdg_cd_edges (rdg, cd);
477 datarefs.release ();
479 return rdg;
484 enum partition_kind {
485 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
488 typedef struct partition_s
490 bitmap stmts;
491 bitmap loops;
492 bool reduction_p;
493 enum partition_kind kind;
494 /* data-references a kind != PKIND_NORMAL partition is about. */
495 data_reference_p main_dr;
496 data_reference_p secondary_dr;
497 tree niter;
498 bool plus_one;
499 } *partition_t;
502 /* Allocate and initialize a partition from BITMAP. */
504 static partition_t
505 partition_alloc (bitmap stmts, bitmap loops)
507 partition_t partition = XCNEW (struct partition_s);
508 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
509 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
510 partition->reduction_p = false;
511 partition->kind = PKIND_NORMAL;
512 return partition;
515 /* Free PARTITION. */
517 static void
518 partition_free (partition_t partition)
520 BITMAP_FREE (partition->stmts);
521 BITMAP_FREE (partition->loops);
522 free (partition);
525 /* Returns true if the partition can be generated as a builtin. */
527 static bool
528 partition_builtin_p (partition_t partition)
530 return partition->kind != PKIND_NORMAL;
533 /* Returns true if the partition contains a reduction. */
535 static bool
536 partition_reduction_p (partition_t partition)
538 return partition->reduction_p;
541 /* Merge PARTITION into the partition DEST. */
543 static void
544 partition_merge_into (partition_t dest, partition_t partition)
546 dest->kind = PKIND_NORMAL;
547 bitmap_ior_into (dest->stmts, partition->stmts);
548 if (partition_reduction_p (partition))
549 dest->reduction_p = true;
553 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
554 the LOOP. */
556 static bool
557 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
559 imm_use_iterator imm_iter;
560 use_operand_p use_p;
562 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
564 gimple use_stmt = USE_STMT (use_p);
565 if (!is_gimple_debug (use_stmt)
566 && loop != loop_containing_stmt (use_stmt))
567 return true;
570 return false;
573 /* Returns true when STMT defines a scalar variable used after the
574 loop LOOP. */
576 static bool
577 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
579 def_operand_p def_p;
580 ssa_op_iter op_iter;
582 if (gimple_code (stmt) == GIMPLE_PHI)
583 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
585 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
586 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
587 return true;
589 return false;
592 /* Return a copy of LOOP placed before LOOP. */
594 static struct loop *
595 copy_loop_before (struct loop *loop)
597 struct loop *res;
598 edge preheader = loop_preheader_edge (loop);
600 initialize_original_copy_tables ();
601 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
602 gcc_assert (res != NULL);
603 free_original_copy_tables ();
604 delete_update_ssa ();
606 return res;
609 /* Creates an empty basic block after LOOP. */
611 static void
612 create_bb_after_loop (struct loop *loop)
614 edge exit = single_exit (loop);
616 if (!exit)
617 return;
619 split_edge (exit);
622 /* Generate code for PARTITION from the code in LOOP. The loop is
623 copied when COPY_P is true. All the statements not flagged in the
624 PARTITION bitmap are removed from the loop or from its copy. The
625 statements are indexed in sequence inside a basic block, and the
626 basic blocks of a loop are taken in dom order. */
628 static void
629 generate_loops_for_partition (struct loop *loop, partition_t partition,
630 bool copy_p)
632 unsigned i;
633 basic_block *bbs;
635 if (copy_p)
637 loop = copy_loop_before (loop);
638 gcc_assert (loop != NULL);
639 create_preheader (loop, CP_SIMPLE_PREHEADERS);
640 create_bb_after_loop (loop);
643 /* Remove stmts not in the PARTITION bitmap. */
644 bbs = get_loop_body_in_dom_order (loop);
646 if (MAY_HAVE_DEBUG_STMTS)
647 for (i = 0; i < loop->num_nodes; i++)
649 basic_block bb = bbs[i];
651 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
652 gsi_next (&bsi))
654 gphi *phi = bsi.phi ();
655 if (!virtual_operand_p (gimple_phi_result (phi))
656 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
657 reset_debug_uses (phi);
660 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
662 gimple stmt = gsi_stmt (bsi);
663 if (gimple_code (stmt) != GIMPLE_LABEL
664 && !is_gimple_debug (stmt)
665 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
666 reset_debug_uses (stmt);
670 for (i = 0; i < loop->num_nodes; i++)
672 basic_block bb = bbs[i];
674 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
676 gphi *phi = bsi.phi ();
677 if (!virtual_operand_p (gimple_phi_result (phi))
678 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
679 remove_phi_node (&bsi, true);
680 else
681 gsi_next (&bsi);
684 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
686 gimple stmt = gsi_stmt (bsi);
687 if (gimple_code (stmt) != GIMPLE_LABEL
688 && !is_gimple_debug (stmt)
689 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
691 /* Choose an arbitrary path through the empty CFG part
692 that this unnecessary control stmt controls. */
693 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
695 gimple_cond_make_false (cond_stmt);
696 update_stmt (stmt);
698 else if (gimple_code (stmt) == GIMPLE_SWITCH)
700 gswitch *switch_stmt = as_a <gswitch *> (stmt);
701 gimple_switch_set_index
702 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
703 update_stmt (stmt);
705 else
707 unlink_stmt_vdef (stmt);
708 gsi_remove (&bsi, true);
709 release_defs (stmt);
710 continue;
713 gsi_next (&bsi);
717 free (bbs);
720 /* Build the size argument for a memory operation call. */
722 static tree
723 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
724 bool plus_one)
726 tree size = fold_convert_loc (loc, sizetype, nb_iter);
727 if (plus_one)
728 size = size_binop (PLUS_EXPR, size, size_one_node);
729 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
730 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
731 size = fold_convert_loc (loc, size_type_node, size);
732 return size;
735 /* Build an address argument for a memory operation call. */
737 static tree
738 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
740 tree addr_base;
742 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
743 addr_base = fold_convert_loc (loc, sizetype, addr_base);
745 /* Test for a negative stride, iterating over every element. */
746 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
748 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
749 fold_convert_loc (loc, sizetype, nb_bytes));
750 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
751 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
754 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
757 /* If VAL memory representation contains the same value in all bytes,
758 return that value, otherwise return -1.
759 E.g. for 0x24242424 return 0x24, for IEEE double
760 747708026454360457216.0 return 0x44, etc. */
762 static int
763 const_with_all_bytes_same (tree val)
765 unsigned char buf[64];
766 int i, len;
768 if (integer_zerop (val)
769 || real_zerop (val)
770 || (TREE_CODE (val) == CONSTRUCTOR
771 && !TREE_CLOBBER_P (val)
772 && CONSTRUCTOR_NELTS (val) == 0))
773 return 0;
775 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
776 return -1;
778 len = native_encode_expr (val, buf, sizeof (buf));
779 if (len == 0)
780 return -1;
781 for (i = 1; i < len; i++)
782 if (buf[i] != buf[0])
783 return -1;
784 return buf[0];
787 /* Generate a call to memset for PARTITION in LOOP. */
789 static void
790 generate_memset_builtin (struct loop *loop, partition_t partition)
792 gimple_stmt_iterator gsi;
793 gimple stmt, fn_call;
794 tree mem, fn, nb_bytes;
795 location_t loc;
796 tree val;
798 stmt = DR_STMT (partition->main_dr);
799 loc = gimple_location (stmt);
801 /* The new statements will be placed before LOOP. */
802 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
804 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
805 partition->plus_one);
806 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
807 false, GSI_CONTINUE_LINKING);
808 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
809 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
810 false, GSI_CONTINUE_LINKING);
812 /* This exactly matches the pattern recognition in classify_partition. */
813 val = gimple_assign_rhs1 (stmt);
814 /* Handle constants like 0x15151515 and similarly
815 floating point constants etc. where all bytes are the same. */
816 int bytev = const_with_all_bytes_same (val);
817 if (bytev != -1)
818 val = build_int_cst (integer_type_node, bytev);
819 else if (TREE_CODE (val) == INTEGER_CST)
820 val = fold_convert (integer_type_node, val);
821 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
823 tree tem = make_ssa_name (integer_type_node);
824 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
825 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
826 val = tem;
829 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
830 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
831 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
833 if (dump_file && (dump_flags & TDF_DETAILS))
835 fprintf (dump_file, "generated memset");
836 if (bytev == 0)
837 fprintf (dump_file, " zero\n");
838 else
839 fprintf (dump_file, "\n");
843 /* Generate a call to memcpy for PARTITION in LOOP. */
845 static void
846 generate_memcpy_builtin (struct loop *loop, partition_t partition)
848 gimple_stmt_iterator gsi;
849 gimple stmt, fn_call;
850 tree dest, src, fn, nb_bytes;
851 location_t loc;
852 enum built_in_function kind;
854 stmt = DR_STMT (partition->main_dr);
855 loc = gimple_location (stmt);
857 /* The new statements will be placed before LOOP. */
858 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
860 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
861 partition->plus_one);
862 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
863 false, GSI_CONTINUE_LINKING);
864 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
865 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
866 if (ptr_derefs_may_alias_p (dest, src))
867 kind = BUILT_IN_MEMMOVE;
868 else
869 kind = BUILT_IN_MEMCPY;
871 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
872 false, GSI_CONTINUE_LINKING);
873 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
874 false, GSI_CONTINUE_LINKING);
875 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
876 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
877 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
879 if (dump_file && (dump_flags & TDF_DETAILS))
881 if (kind == BUILT_IN_MEMCPY)
882 fprintf (dump_file, "generated memcpy\n");
883 else
884 fprintf (dump_file, "generated memmove\n");
888 /* Remove and destroy the loop LOOP. */
890 static void
891 destroy_loop (struct loop *loop)
893 unsigned nbbs = loop->num_nodes;
894 edge exit = single_exit (loop);
895 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
896 basic_block *bbs;
897 unsigned i;
899 bbs = get_loop_body_in_dom_order (loop);
901 redirect_edge_pred (exit, src);
902 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
903 exit->flags |= EDGE_FALLTHRU;
904 cancel_loop_tree (loop);
905 rescan_loop_exit (exit, false, true);
907 for (i = 0; i < nbbs; i++)
909 /* We have made sure to not leave any dangling uses of SSA
910 names defined in the loop. With the exception of virtuals.
911 Make sure we replace all uses of virtual defs that will remain
912 outside of the loop with the bare symbol as delete_basic_block
913 will release them. */
914 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
915 gsi_next (&gsi))
917 gphi *phi = gsi.phi ();
918 if (virtual_operand_p (gimple_phi_result (phi)))
919 mark_virtual_phi_result_for_renaming (phi);
921 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
922 gsi_next (&gsi))
924 gimple stmt = gsi_stmt (gsi);
925 tree vdef = gimple_vdef (stmt);
926 if (vdef && TREE_CODE (vdef) == SSA_NAME)
927 mark_virtual_operand_for_renaming (vdef);
929 delete_basic_block (bbs[i]);
931 free (bbs);
933 set_immediate_dominator (CDI_DOMINATORS, dest,
934 recompute_dominator (CDI_DOMINATORS, dest));
937 /* Generates code for PARTITION. */
939 static void
940 generate_code_for_partition (struct loop *loop,
941 partition_t partition, bool copy_p)
943 switch (partition->kind)
945 case PKIND_NORMAL:
946 /* Reductions all have to be in the last partition. */
947 gcc_assert (!partition_reduction_p (partition)
948 || !copy_p);
949 generate_loops_for_partition (loop, partition, copy_p);
950 return;
952 case PKIND_MEMSET:
953 generate_memset_builtin (loop, partition);
954 break;
956 case PKIND_MEMCPY:
957 generate_memcpy_builtin (loop, partition);
958 break;
960 default:
961 gcc_unreachable ();
964 /* Common tail for partitions we turn into a call. If this was the last
965 partition for which we generate code, we have to destroy the loop. */
966 if (!copy_p)
967 destroy_loop (loop);
971 /* Returns a partition with all the statements needed for computing
972 the vertex V of the RDG, also including the loop exit conditions. */
974 static partition_t
975 build_rdg_partition_for_vertex (struct graph *rdg, int v)
977 partition_t partition = partition_alloc (NULL, NULL);
978 auto_vec<int, 3> nodes;
979 unsigned i;
980 int x;
982 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
984 FOR_EACH_VEC_ELT (nodes, i, x)
986 bitmap_set_bit (partition->stmts, x);
987 bitmap_set_bit (partition->loops,
988 loop_containing_stmt (RDG_STMT (rdg, x))->num);
991 return partition;
994 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
995 For the moment we detect only the memset zero pattern. */
997 static void
998 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1000 bitmap_iterator bi;
1001 unsigned i;
1002 tree nb_iter;
1003 data_reference_p single_load, single_store;
1004 bool volatiles_p = false;
1005 bool plus_one = false;
1007 partition->kind = PKIND_NORMAL;
1008 partition->main_dr = NULL;
1009 partition->secondary_dr = NULL;
1010 partition->niter = NULL_TREE;
1011 partition->plus_one = false;
1013 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1015 gimple stmt = RDG_STMT (rdg, i);
1017 if (gimple_has_volatile_ops (stmt))
1018 volatiles_p = true;
1020 /* If the stmt has uses outside of the loop mark it as reduction. */
1021 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1023 partition->reduction_p = true;
1024 return;
1028 /* Perform general partition disqualification for builtins. */
1029 if (volatiles_p
1030 || !flag_tree_loop_distribute_patterns)
1031 return;
1033 /* Detect memset and memcpy. */
1034 single_load = NULL;
1035 single_store = NULL;
1036 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1038 gimple stmt = RDG_STMT (rdg, i);
1039 data_reference_p dr;
1040 unsigned j;
1042 if (gimple_code (stmt) == GIMPLE_PHI)
1043 continue;
1045 /* Any scalar stmts are ok. */
1046 if (!gimple_vuse (stmt))
1047 continue;
1049 /* Otherwise just regular loads/stores. */
1050 if (!gimple_assign_single_p (stmt))
1051 return;
1053 /* But exactly one store and/or load. */
1054 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1056 if (DR_IS_READ (dr))
1058 if (single_load != NULL)
1059 return;
1060 single_load = dr;
1062 else
1064 if (single_store != NULL)
1065 return;
1066 single_store = dr;
1071 if (!single_store)
1072 return;
1074 nb_iter = number_of_latch_executions (loop);
1075 if (!nb_iter || nb_iter == chrec_dont_know)
1076 return;
1077 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1078 gimple_bb (DR_STMT (single_store))))
1079 plus_one = true;
1081 if (single_store && !single_load)
1083 gimple stmt = DR_STMT (single_store);
1084 tree rhs = gimple_assign_rhs1 (stmt);
1085 if (const_with_all_bytes_same (rhs) == -1
1086 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1087 || (TYPE_MODE (TREE_TYPE (rhs))
1088 != TYPE_MODE (unsigned_char_type_node))))
1089 return;
1090 if (TREE_CODE (rhs) == SSA_NAME
1091 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1092 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1093 return;
1094 if (!adjacent_dr_p (single_store)
1095 || !dominated_by_p (CDI_DOMINATORS,
1096 loop->latch, gimple_bb (stmt)))
1097 return;
1098 partition->kind = PKIND_MEMSET;
1099 partition->main_dr = single_store;
1100 partition->niter = nb_iter;
1101 partition->plus_one = plus_one;
1103 else if (single_store && single_load)
1105 gimple store = DR_STMT (single_store);
1106 gimple load = DR_STMT (single_load);
1107 /* Direct aggregate copy or via an SSA name temporary. */
1108 if (load != store
1109 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1110 return;
1111 if (!adjacent_dr_p (single_store)
1112 || !adjacent_dr_p (single_load)
1113 || !operand_equal_p (DR_STEP (single_store),
1114 DR_STEP (single_load), 0)
1115 || !dominated_by_p (CDI_DOMINATORS,
1116 loop->latch, gimple_bb (store)))
1117 return;
1118 /* Now check that if there is a dependence this dependence is
1119 of a suitable form for memmove. */
1120 vec<loop_p> loops = vNULL;
1121 ddr_p ddr;
1122 loops.safe_push (loop);
1123 ddr = initialize_data_dependence_relation (single_load, single_store,
1124 loops);
1125 compute_affine_dependence (ddr, loop);
1126 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1128 free_dependence_relation (ddr);
1129 loops.release ();
1130 return;
1132 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1134 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1140 lambda_vector dist_v;
1141 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1143 int dist = dist_v[index_in_loop_nest (loop->num,
1144 DDR_LOOP_NEST (ddr))];
1145 if (dist > 0 && !DDR_REVERSED_P (ddr))
1147 free_dependence_relation (ddr);
1148 loops.release ();
1149 return;
1153 free_dependence_relation (ddr);
1154 loops.release ();
1155 partition->kind = PKIND_MEMCPY;
1156 partition->main_dr = single_store;
1157 partition->secondary_dr = single_load;
1158 partition->niter = nb_iter;
1159 partition->plus_one = plus_one;
1163 /* For a data reference REF, return the declaration of its base
1164 address or NULL_TREE if the base is not determined. */
1166 static tree
1167 ref_base_address (data_reference_p dr)
1169 tree base_address = DR_BASE_ADDRESS (dr);
1170 if (base_address
1171 && TREE_CODE (base_address) == ADDR_EXPR)
1172 return TREE_OPERAND (base_address, 0);
1174 return base_address;
1177 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1178 accesses in RDG. */
1180 static bool
1181 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1182 partition_t partition2)
1184 unsigned i, j, k, l;
1185 bitmap_iterator bi, bj;
1186 data_reference_p ref1, ref2;
1188 /* First check whether in the intersection of the two partitions are
1189 any loads or stores. Common loads are the situation that happens
1190 most often. */
1191 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1192 if (RDG_MEM_WRITE_STMT (rdg, i)
1193 || RDG_MEM_READS_STMT (rdg, i))
1194 return true;
1196 /* Then check all data-references against each other. */
1197 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1198 if (RDG_MEM_WRITE_STMT (rdg, i)
1199 || RDG_MEM_READS_STMT (rdg, i))
1200 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1201 if (RDG_MEM_WRITE_STMT (rdg, j)
1202 || RDG_MEM_READS_STMT (rdg, j))
1204 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1206 tree base1 = ref_base_address (ref1);
1207 if (base1)
1208 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1209 if (base1 == ref_base_address (ref2))
1210 return true;
1214 return false;
1217 /* Aggregate several components into a useful partition that is
1218 registered in the PARTITIONS vector. Partitions will be
1219 distributed in different loops. */
1221 static void
1222 rdg_build_partitions (struct graph *rdg,
1223 vec<gimple> starting_stmts,
1224 vec<partition_t> *partitions)
1226 bitmap processed = BITMAP_ALLOC (NULL);
1227 int i;
1228 gimple stmt;
1230 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1232 int v = rdg_vertex_for_stmt (rdg, stmt);
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 fprintf (dump_file,
1236 "ldist asked to generate code for vertex %d\n", v);
1238 /* If the vertex is already contained in another partition so
1239 is the partition rooted at it. */
1240 if (bitmap_bit_p (processed, v))
1241 continue;
1243 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1244 bitmap_ior_into (processed, partition->stmts);
1246 if (dump_file && (dump_flags & TDF_DETAILS))
1248 fprintf (dump_file, "ldist useful partition:\n");
1249 dump_bitmap (dump_file, partition->stmts);
1252 partitions->safe_push (partition);
1255 /* All vertices should have been assigned to at least one partition now,
1256 other than vertices belonging to dead code. */
1258 BITMAP_FREE (processed);
1261 /* Dump to FILE the PARTITIONS. */
1263 static void
1264 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1266 int i;
1267 partition_t partition;
1269 FOR_EACH_VEC_ELT (partitions, i, partition)
1270 debug_bitmap_file (file, partition->stmts);
1273 /* Debug PARTITIONS. */
1274 extern void debug_rdg_partitions (vec<partition_t> );
1276 DEBUG_FUNCTION void
1277 debug_rdg_partitions (vec<partition_t> partitions)
1279 dump_rdg_partitions (stderr, partitions);
1282 /* Returns the number of read and write operations in the RDG. */
1284 static int
1285 number_of_rw_in_rdg (struct graph *rdg)
1287 int i, res = 0;
1289 for (i = 0; i < rdg->n_vertices; i++)
1291 if (RDG_MEM_WRITE_STMT (rdg, i))
1292 ++res;
1294 if (RDG_MEM_READS_STMT (rdg, i))
1295 ++res;
1298 return res;
1301 /* Returns the number of read and write operations in a PARTITION of
1302 the RDG. */
1304 static int
1305 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1307 int res = 0;
1308 unsigned i;
1309 bitmap_iterator ii;
1311 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1313 if (RDG_MEM_WRITE_STMT (rdg, i))
1314 ++res;
1316 if (RDG_MEM_READS_STMT (rdg, i))
1317 ++res;
1320 return res;
1323 /* Returns true when one of the PARTITIONS contains all the read or
1324 write operations of RDG. */
1326 static bool
1327 partition_contains_all_rw (struct graph *rdg,
1328 vec<partition_t> partitions)
1330 int i;
1331 partition_t partition;
1332 int nrw = number_of_rw_in_rdg (rdg);
1334 FOR_EACH_VEC_ELT (partitions, i, partition)
1335 if (nrw == number_of_rw_in_partition (rdg, partition))
1336 return true;
1338 return false;
1341 /* Compute partition dependence created by the data references in DRS1
1342 and DRS2 and modify and return DIR according to that. */
1344 static int
1345 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1346 vec<data_reference_p> drs1,
1347 vec<data_reference_p> drs2)
1349 data_reference_p dr1, dr2;
1351 /* dependence direction - 0 is no dependence, -1 is back,
1352 1 is forth, 2 is both (we can stop then, merging will occur). */
1353 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1354 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1356 data_reference_p saved_dr1 = dr1;
1357 int this_dir = 1;
1358 ddr_p ddr;
1359 /* Re-shuffle data-refs to be in dominator order. */
1360 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1361 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1363 std::swap (dr1, dr2);
1364 this_dir = -this_dir;
1366 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1367 compute_affine_dependence (ddr, loops[0]);
1368 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1369 this_dir = 2;
1370 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1372 if (DDR_REVERSED_P (ddr))
1374 std::swap (dr1, dr2);
1375 this_dir = -this_dir;
1377 /* Known dependences can still be unordered througout the
1378 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1379 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1380 this_dir = 2;
1381 /* If the overlap is exact preserve stmt order. */
1382 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1384 else
1386 /* Else as the distance vector is lexicographic positive
1387 swap the dependence direction. */
1388 this_dir = -this_dir;
1391 else
1392 this_dir = 0;
1393 free_dependence_relation (ddr);
1394 if (dir == 0)
1395 dir = this_dir;
1396 else if (dir != this_dir)
1397 return 2;
1398 /* Shuffle "back" dr1. */
1399 dr1 = saved_dr1;
1401 return dir;
1404 /* Compare postorder number of the partition graph vertices V1 and V2. */
1406 static int
1407 pgcmp (const void *v1_, const void *v2_)
1409 const vertex *v1 = (const vertex *)v1_;
1410 const vertex *v2 = (const vertex *)v2_;
1411 return v2->post - v1->post;
1414 /* Distributes the code from LOOP in such a way that producer
1415 statements are placed before consumer statements. Tries to separate
1416 only the statements from STMTS into separate loops.
1417 Returns the number of distributed loops. */
1419 static int
1420 distribute_loop (struct loop *loop, vec<gimple> stmts,
1421 control_dependences *cd, int *nb_calls)
1423 struct graph *rdg;
1424 partition_t partition;
1425 bool any_builtin;
1426 int i, nbp;
1427 graph *pg = NULL;
1428 int num_sccs = 1;
1430 *nb_calls = 0;
1431 auto_vec<loop_p, 3> loop_nest;
1432 if (!find_loop_nest (loop, &loop_nest))
1433 return 0;
1435 rdg = build_rdg (loop_nest, cd);
1436 if (!rdg)
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1439 fprintf (dump_file,
1440 "Loop %d not distributed: failed to build the RDG.\n",
1441 loop->num);
1443 return 0;
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1447 dump_rdg (dump_file, rdg);
1449 auto_vec<partition_t, 3> partitions;
1450 rdg_build_partitions (rdg, stmts, &partitions);
1452 any_builtin = false;
1453 FOR_EACH_VEC_ELT (partitions, i, partition)
1455 classify_partition (loop, rdg, partition);
1456 any_builtin |= partition_builtin_p (partition);
1459 /* If we are only distributing patterns but did not detect any,
1460 simply bail out. */
1461 if (!flag_tree_loop_distribution
1462 && !any_builtin)
1464 nbp = 0;
1465 goto ldist_done;
1468 /* If we are only distributing patterns fuse all partitions that
1469 were not classified as builtins. This also avoids chopping
1470 a loop into pieces, separated by builtin calls. That is, we
1471 only want no or a single loop body remaining. */
1472 partition_t into;
1473 if (!flag_tree_loop_distribution)
1475 for (i = 0; partitions.iterate (i, &into); ++i)
1476 if (!partition_builtin_p (into))
1477 break;
1478 for (++i; partitions.iterate (i, &partition); ++i)
1479 if (!partition_builtin_p (partition))
1481 if (dump_file && (dump_flags & TDF_DETAILS))
1483 fprintf (dump_file, "fusing non-builtin partitions\n");
1484 dump_bitmap (dump_file, into->stmts);
1485 dump_bitmap (dump_file, partition->stmts);
1487 partition_merge_into (into, partition);
1488 partitions.unordered_remove (i);
1489 partition_free (partition);
1490 i--;
1494 /* Due to limitations in the transform phase we have to fuse all
1495 reduction partitions into the last partition so the existing
1496 loop will contain all loop-closed PHI nodes. */
1497 for (i = 0; partitions.iterate (i, &into); ++i)
1498 if (partition_reduction_p (into))
1499 break;
1500 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1501 if (partition_reduction_p (partition))
1503 if (dump_file && (dump_flags & TDF_DETAILS))
1505 fprintf (dump_file, "fusing partitions\n");
1506 dump_bitmap (dump_file, into->stmts);
1507 dump_bitmap (dump_file, partition->stmts);
1508 fprintf (dump_file, "because they have reductions\n");
1510 partition_merge_into (into, partition);
1511 partitions.unordered_remove (i);
1512 partition_free (partition);
1513 i--;
1516 /* Apply our simple cost model - fuse partitions with similar
1517 memory accesses. */
1518 for (i = 0; partitions.iterate (i, &into); ++i)
1520 if (partition_builtin_p (into))
1521 continue;
1522 for (int j = i + 1;
1523 partitions.iterate (j, &partition); ++j)
1525 if (!partition_builtin_p (partition)
1526 && similar_memory_accesses (rdg, into, partition))
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, "fusing partitions\n");
1531 dump_bitmap (dump_file, into->stmts);
1532 dump_bitmap (dump_file, partition->stmts);
1533 fprintf (dump_file, "because they have similar "
1534 "memory accesses\n");
1536 partition_merge_into (into, partition);
1537 partitions.unordered_remove (j);
1538 partition_free (partition);
1539 j--;
1544 /* Build the partition dependency graph. */
1545 if (partitions.length () > 1)
1547 pg = new_graph (partitions.length ());
1548 struct pgdata {
1549 partition_t partition;
1550 vec<data_reference_p> writes;
1551 vec<data_reference_p> reads;
1553 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1554 for (i = 0; partitions.iterate (i, &partition); ++i)
1556 vertex *v = &pg->vertices[i];
1557 pgdata *data = new pgdata;
1558 data_reference_p dr;
1559 /* FIXME - leaks. */
1560 v->data = data;
1561 bitmap_iterator bi;
1562 unsigned j;
1563 data->partition = partition;
1564 data->reads = vNULL;
1565 data->writes = vNULL;
1566 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1567 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1568 if (DR_IS_READ (dr))
1569 data->reads.safe_push (dr);
1570 else
1571 data->writes.safe_push (dr);
1573 partition_t partition1, partition2;
1574 for (i = 0; partitions.iterate (i, &partition1); ++i)
1575 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1577 /* dependence direction - 0 is no dependence, -1 is back,
1578 1 is forth, 2 is both (we can stop then, merging will occur). */
1579 int dir = 0;
1580 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1581 PGDATA(i)->writes,
1582 PGDATA(j)->reads);
1583 if (dir != 2)
1584 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1585 PGDATA(i)->reads,
1586 PGDATA(j)->writes);
1587 if (dir != 2)
1588 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1589 PGDATA(i)->writes,
1590 PGDATA(j)->writes);
1591 if (dir == 1 || dir == 2)
1592 add_edge (pg, i, j);
1593 if (dir == -1 || dir == 2)
1594 add_edge (pg, j, i);
1597 /* Add edges to the reduction partition (if any) to force it last. */
1598 unsigned j;
1599 for (j = 0; partitions.iterate (j, &partition); ++j)
1600 if (partition_reduction_p (partition))
1601 break;
1602 if (j < partitions.length ())
1604 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1605 if (i != j)
1606 add_edge (pg, i, j);
1609 /* Compute partitions we cannot separate and fuse them. */
1610 num_sccs = graphds_scc (pg, NULL);
1611 for (i = 0; i < num_sccs; ++i)
1613 partition_t first;
1614 int j;
1615 for (j = 0; partitions.iterate (j, &first); ++j)
1616 if (pg->vertices[j].component == i)
1617 break;
1618 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1619 if (pg->vertices[j].component == i)
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1623 fprintf (dump_file, "fusing partitions\n");
1624 dump_bitmap (dump_file, first->stmts);
1625 dump_bitmap (dump_file, partition->stmts);
1626 fprintf (dump_file, "because they are in the same "
1627 "dependence SCC\n");
1629 partition_merge_into (first, partition);
1630 partitions[j] = NULL;
1631 partition_free (partition);
1632 PGDATA (j)->partition = NULL;
1636 /* Now order the remaining nodes in postorder. */
1637 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1638 partitions.truncate (0);
1639 for (i = 0; i < pg->n_vertices; ++i)
1641 pgdata *data = PGDATA (i);
1642 if (data->partition)
1643 partitions.safe_push (data->partition);
1644 data->reads.release ();
1645 data->writes.release ();
1646 delete data;
1648 gcc_assert (partitions.length () == (unsigned)num_sccs);
1649 free_graph (pg);
1652 nbp = partitions.length ();
1653 if (nbp == 0
1654 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1655 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1657 nbp = 0;
1658 goto ldist_done;
1661 if (dump_file && (dump_flags & TDF_DETAILS))
1662 dump_rdg_partitions (dump_file, partitions);
1664 FOR_EACH_VEC_ELT (partitions, i, partition)
1666 if (partition_builtin_p (partition))
1667 (*nb_calls)++;
1668 generate_code_for_partition (loop, partition, i < nbp - 1);
1671 ldist_done:
1673 FOR_EACH_VEC_ELT (partitions, i, partition)
1674 partition_free (partition);
1676 free_rdg (rdg);
1677 return nbp - *nb_calls;
1680 /* Distribute all loops in the current function. */
1682 namespace {
1684 const pass_data pass_data_loop_distribution =
1686 GIMPLE_PASS, /* type */
1687 "ldist", /* name */
1688 OPTGROUP_LOOP, /* optinfo_flags */
1689 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1690 ( PROP_cfg | PROP_ssa ), /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
1694 0, /* todo_flags_finish */
1697 class pass_loop_distribution : public gimple_opt_pass
1699 public:
1700 pass_loop_distribution (gcc::context *ctxt)
1701 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1704 /* opt_pass methods: */
1705 virtual bool gate (function *)
1707 return flag_tree_loop_distribution
1708 || flag_tree_loop_distribute_patterns;
1711 virtual unsigned int execute (function *);
1713 }; // class pass_loop_distribution
1715 unsigned int
1716 pass_loop_distribution::execute (function *fun)
1718 struct loop *loop;
1719 bool changed = false;
1720 basic_block bb;
1721 control_dependences *cd = NULL;
1723 FOR_ALL_BB_FN (bb, fun)
1725 gimple_stmt_iterator gsi;
1726 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1727 gimple_set_uid (gsi_stmt (gsi), -1);
1728 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1729 gimple_set_uid (gsi_stmt (gsi), -1);
1732 /* We can at the moment only distribute non-nested loops, thus restrict
1733 walking to innermost loops. */
1734 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1736 auto_vec<gimple> work_list;
1737 basic_block *bbs;
1738 int num = loop->num;
1739 unsigned int i;
1741 /* If the loop doesn't have a single exit we will fail anyway,
1742 so do that early. */
1743 if (!single_exit (loop))
1744 continue;
1746 /* Only optimize hot loops. */
1747 if (!optimize_loop_for_speed_p (loop))
1748 continue;
1750 /* Initialize the worklist with stmts we seed the partitions with. */
1751 bbs = get_loop_body_in_dom_order (loop);
1752 for (i = 0; i < loop->num_nodes; ++i)
1754 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1755 !gsi_end_p (gsi);
1756 gsi_next (&gsi))
1758 gphi *phi = gsi.phi ();
1759 if (virtual_operand_p (gimple_phi_result (phi)))
1760 continue;
1761 /* Distribute stmts which have defs that are used outside of
1762 the loop. */
1763 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1764 continue;
1765 work_list.safe_push (phi);
1767 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1768 !gsi_end_p (gsi);
1769 gsi_next (&gsi))
1771 gimple stmt = gsi_stmt (gsi);
1773 /* If there is a stmt with side-effects bail out - we
1774 cannot and should not distribute this loop. */
1775 if (gimple_has_side_effects (stmt))
1777 work_list.truncate (0);
1778 goto out;
1781 /* Distribute stmts which have defs that are used outside of
1782 the loop. */
1783 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1785 /* Otherwise only distribute stores for now. */
1786 else if (!gimple_vdef (stmt))
1787 continue;
1789 work_list.safe_push (stmt);
1792 out:
1793 free (bbs);
1795 int nb_generated_loops = 0;
1796 int nb_generated_calls = 0;
1797 location_t loc = find_loop_location (loop);
1798 if (work_list.length () > 0)
1800 if (!cd)
1802 calculate_dominance_info (CDI_DOMINATORS);
1803 calculate_dominance_info (CDI_POST_DOMINATORS);
1804 cd = new control_dependences (create_edge_list ());
1805 free_dominance_info (CDI_POST_DOMINATORS);
1807 nb_generated_loops = distribute_loop (loop, work_list, cd,
1808 &nb_generated_calls);
1811 if (nb_generated_loops + nb_generated_calls > 0)
1813 changed = true;
1814 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1815 loc, "Loop %d distributed: split to %d loops "
1816 "and %d library calls.\n",
1817 num, nb_generated_loops, nb_generated_calls);
1819 else if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "Loop %d is the same.\n", num);
1823 if (cd)
1824 delete cd;
1826 if (changed)
1828 /* Cached scalar evolutions now may refer to wrong or non-existing
1829 loops. */
1830 scev_reset_htab ();
1831 mark_virtual_operands_for_renaming (fun);
1832 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1835 #ifdef ENABLE_CHECKING
1836 verify_loop_structure ();
1837 #endif
1839 return 0;
1842 } // anon namespace
1844 gimple_opt_pass *
1845 make_pass_loop_distribution (gcc::context *ctxt)
1847 return new pass_loop_distribution (ctxt);