* omp-low.c (lower_omp_target): Remove unreachable code & merge
[official-gcc.git] / gcc / tree-loop-distribution.c
blob4003584e6d99510e282d84886713764df90d4ba5
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "hard-reg-set.h"
49 #include "tree.h"
50 #include "gimple.h"
51 #include "cfghooks.h"
52 #include "tree-pass.h"
53 #include "ssa.h"
54 #include "gimple-pretty-print.h"
55 #include "alias.h"
56 #include "fold-const.h"
57 #include "cfganal.h"
58 #include "internal-fn.h"
59 #include "gimple-iterator.h"
60 #include "gimplify-me.h"
61 #include "stor-layout.h"
62 #include "tree-cfg.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-into-ssa.h"
66 #include "tree-ssa.h"
67 #include "cfgloop.h"
68 #include "tree-chrec.h"
69 #include "tree-data-ref.h"
70 #include "tree-scalar-evolution.h"
71 #include "tree-vectorizer.h"
74 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
75 struct rdg_vertex
77 /* The statement represented by this vertex. */
78 gimple *stmt;
80 /* Vector of data-references in this statement. */
81 vec<data_reference_p> datarefs;
83 /* True when the statement contains a write to memory. */
84 bool has_mem_write;
86 /* True when the statement contains a read from memory. */
87 bool has_mem_reads;
90 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
91 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
92 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
93 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
94 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
95 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
96 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
97 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
99 /* Data dependence type. */
101 enum rdg_dep_type
103 /* Read After Write (RAW). */
104 flow_dd = 'f',
106 /* Control dependence (execute conditional on). */
107 control_dd = 'c'
110 /* Dependence information attached to an edge of the RDG. */
112 struct rdg_edge
114 /* Type of the dependence. */
115 enum rdg_dep_type type;
118 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
120 /* Dump vertex I in RDG to FILE. */
122 static void
123 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
125 struct vertex *v = &(rdg->vertices[i]);
126 struct graph_edge *e;
128 fprintf (file, "(vertex %d: (%s%s) (in:", i,
129 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
130 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
132 if (v->pred)
133 for (e = v->pred; e; e = e->pred_next)
134 fprintf (file, " %d", e->src);
136 fprintf (file, ") (out:");
138 if (v->succ)
139 for (e = v->succ; e; e = e->succ_next)
140 fprintf (file, " %d", e->dest);
142 fprintf (file, ")\n");
143 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
144 fprintf (file, ")\n");
147 /* Call dump_rdg_vertex on stderr. */
149 DEBUG_FUNCTION void
150 debug_rdg_vertex (struct graph *rdg, int i)
152 dump_rdg_vertex (stderr, rdg, i);
155 /* Dump the reduced dependence graph RDG to FILE. */
157 static void
158 dump_rdg (FILE *file, struct graph *rdg)
160 fprintf (file, "(rdg\n");
161 for (int i = 0; i < rdg->n_vertices; i++)
162 dump_rdg_vertex (file, rdg, i);
163 fprintf (file, ")\n");
166 /* Call dump_rdg on stderr. */
168 DEBUG_FUNCTION void
169 debug_rdg (struct graph *rdg)
171 dump_rdg (stderr, rdg);
174 static void
175 dot_rdg_1 (FILE *file, struct graph *rdg)
177 int i;
178 pretty_printer buffer;
179 pp_needs_newline (&buffer) = false;
180 buffer.buffer->stream = file;
182 fprintf (file, "digraph RDG {\n");
184 for (i = 0; i < rdg->n_vertices; i++)
186 struct vertex *v = &(rdg->vertices[i]);
187 struct graph_edge *e;
189 fprintf (file, "%d [label=\"[%d] ", i, i);
190 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
191 pp_flush (&buffer);
192 fprintf (file, "\"]\n");
194 /* Highlight reads from memory. */
195 if (RDG_MEM_READS_STMT (rdg, i))
196 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
198 /* Highlight stores to memory. */
199 if (RDG_MEM_WRITE_STMT (rdg, i))
200 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
202 if (v->succ)
203 for (e = v->succ; e; e = e->succ_next)
204 switch (RDGE_TYPE (e))
206 case flow_dd:
207 /* These are the most common dependences: don't print these. */
208 fprintf (file, "%d -> %d \n", i, e->dest);
209 break;
211 case control_dd:
212 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
213 break;
215 default:
216 gcc_unreachable ();
220 fprintf (file, "}\n\n");
223 /* Display the Reduced Dependence Graph using dotty. */
225 DEBUG_FUNCTION void
226 dot_rdg (struct graph *rdg)
228 /* When debugging, you may want to enable the following code. */
229 #ifdef HAVE_POPEN
230 FILE *file = popen ("dot -Tx11", "w");
231 if (!file)
232 return;
233 dot_rdg_1 (file, rdg);
234 fflush (file);
235 close (fileno (file));
236 pclose (file);
237 #else
238 dot_rdg_1 (stderr, rdg);
239 #endif
242 /* Returns the index of STMT in RDG. */
244 static int
245 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
247 int index = gimple_uid (stmt);
248 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
249 return index;
252 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
253 the index of DEF in RDG. */
255 static void
256 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
258 use_operand_p imm_use_p;
259 imm_use_iterator iterator;
261 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
263 struct graph_edge *e;
264 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
266 if (use < 0)
267 continue;
269 e = add_edge (rdg, idef, use);
270 e->data = XNEW (struct rdg_edge);
271 RDGE_TYPE (e) = flow_dd;
275 /* Creates an edge for the control dependences of BB to the vertex V. */
277 static void
278 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
279 int v, control_dependences *cd)
281 bitmap_iterator bi;
282 unsigned edge_n;
283 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
284 0, edge_n, bi)
286 basic_block cond_bb = cd->get_edge (edge_n)->src;
287 gimple *stmt = last_stmt (cond_bb);
288 if (stmt && is_ctrl_stmt (stmt))
290 struct graph_edge *e;
291 int c = rdg_vertex_for_stmt (rdg, stmt);
292 if (c < 0)
293 continue;
295 e = add_edge (rdg, c, v);
296 e->data = XNEW (struct rdg_edge);
297 RDGE_TYPE (e) = control_dd;
302 /* Creates the edges of the reduced dependence graph RDG. */
304 static void
305 create_rdg_flow_edges (struct graph *rdg)
307 int i;
308 def_operand_p def_p;
309 ssa_op_iter iter;
311 for (i = 0; i < rdg->n_vertices; i++)
312 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
313 iter, SSA_OP_DEF)
314 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
317 /* Creates the edges of the reduced dependence graph RDG. */
319 static void
320 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
322 int i;
324 for (i = 0; i < rdg->n_vertices; i++)
326 gimple *stmt = RDG_STMT (rdg, i);
327 if (gimple_code (stmt) == GIMPLE_PHI)
329 edge_iterator ei;
330 edge e;
331 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
332 create_edge_for_control_dependence (rdg, e->src, i, cd);
334 else
335 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
339 /* Build the vertices of the reduced dependence graph RDG. Return false
340 if that failed. */
342 static bool
343 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
344 vec<data_reference_p> *datarefs)
346 int i;
347 gimple *stmt;
349 FOR_EACH_VEC_ELT (stmts, i, stmt)
351 struct vertex *v = &(rdg->vertices[i]);
353 /* Record statement to vertex mapping. */
354 gimple_set_uid (stmt, i);
356 v->data = XNEW (struct rdg_vertex);
357 RDGV_STMT (v) = stmt;
358 RDGV_DATAREFS (v).create (0);
359 RDGV_HAS_MEM_WRITE (v) = false;
360 RDGV_HAS_MEM_READS (v) = false;
361 if (gimple_code (stmt) == GIMPLE_PHI)
362 continue;
364 unsigned drp = datarefs->length ();
365 if (!find_data_references_in_stmt (loop, stmt, datarefs))
366 return false;
367 for (unsigned j = drp; j < datarefs->length (); ++j)
369 data_reference_p dr = (*datarefs)[j];
370 if (DR_IS_READ (dr))
371 RDGV_HAS_MEM_READS (v) = true;
372 else
373 RDGV_HAS_MEM_WRITE (v) = true;
374 RDGV_DATAREFS (v).safe_push (dr);
377 return true;
380 /* Initialize STMTS with all the statements of LOOP. The order in
381 which we discover statements is important as
382 generate_loops_for_partition is using the same traversal for
383 identifying statements in loop copies. */
385 static void
386 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
388 unsigned int i;
389 basic_block *bbs = get_loop_body_in_dom_order (loop);
391 for (i = 0; i < loop->num_nodes; i++)
393 basic_block bb = bbs[i];
395 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
396 gsi_next (&bsi))
397 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
398 stmts->safe_push (bsi.phi ());
400 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
401 gsi_next (&bsi))
403 gimple *stmt = gsi_stmt (bsi);
404 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
405 stmts->safe_push (stmt);
409 free (bbs);
412 /* Free the reduced dependence graph RDG. */
414 static void
415 free_rdg (struct graph *rdg)
417 int i;
419 for (i = 0; i < rdg->n_vertices; i++)
421 struct vertex *v = &(rdg->vertices[i]);
422 struct graph_edge *e;
424 for (e = v->succ; e; e = e->succ_next)
425 free (e->data);
427 if (v->data)
429 gimple_set_uid (RDGV_STMT (v), -1);
430 free_data_refs (RDGV_DATAREFS (v));
431 free (v->data);
435 free_graph (rdg);
438 /* Build the Reduced Dependence Graph (RDG) with one vertex per
439 statement of the loop nest LOOP_NEST, and one edge per data dependence or
440 scalar dependence. */
442 static struct graph *
443 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
445 struct graph *rdg;
446 vec<data_reference_p> datarefs;
448 /* Create the RDG vertices from the stmts of the loop nest. */
449 auto_vec<gimple *, 10> stmts;
450 stmts_from_loop (loop_nest[0], &stmts);
451 rdg = new_graph (stmts.length ());
452 datarefs.create (10);
453 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
455 datarefs.release ();
456 free_rdg (rdg);
457 return NULL;
459 stmts.release ();
461 create_rdg_flow_edges (rdg);
462 if (cd)
463 create_rdg_cd_edges (rdg, cd);
465 datarefs.release ();
467 return rdg;
472 enum partition_kind {
473 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
476 struct partition
478 bitmap stmts;
479 bitmap loops;
480 bool reduction_p;
481 enum partition_kind kind;
482 /* data-references a kind != PKIND_NORMAL partition is about. */
483 data_reference_p main_dr;
484 data_reference_p secondary_dr;
485 tree niter;
486 bool plus_one;
490 /* Allocate and initialize a partition from BITMAP. */
492 static partition *
493 partition_alloc (bitmap stmts, bitmap loops)
495 partition *partition = XCNEW (struct partition);
496 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
497 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
498 partition->reduction_p = false;
499 partition->kind = PKIND_NORMAL;
500 return partition;
503 /* Free PARTITION. */
505 static void
506 partition_free (partition *partition)
508 BITMAP_FREE (partition->stmts);
509 BITMAP_FREE (partition->loops);
510 free (partition);
513 /* Returns true if the partition can be generated as a builtin. */
515 static bool
516 partition_builtin_p (partition *partition)
518 return partition->kind != PKIND_NORMAL;
521 /* Returns true if the partition contains a reduction. */
523 static bool
524 partition_reduction_p (partition *partition)
526 return partition->reduction_p;
529 /* Merge PARTITION into the partition DEST. */
531 static void
532 partition_merge_into (partition *dest, partition *partition)
534 dest->kind = PKIND_NORMAL;
535 bitmap_ior_into (dest->stmts, partition->stmts);
536 if (partition_reduction_p (partition))
537 dest->reduction_p = true;
541 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
542 the LOOP. */
544 static bool
545 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
547 imm_use_iterator imm_iter;
548 use_operand_p use_p;
550 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
552 gimple *use_stmt = USE_STMT (use_p);
553 if (!is_gimple_debug (use_stmt)
554 && loop != loop_containing_stmt (use_stmt))
555 return true;
558 return false;
561 /* Returns true when STMT defines a scalar variable used after the
562 loop LOOP. */
564 static bool
565 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
567 def_operand_p def_p;
568 ssa_op_iter op_iter;
570 if (gimple_code (stmt) == GIMPLE_PHI)
571 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
573 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
574 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
575 return true;
577 return false;
580 /* Return a copy of LOOP placed before LOOP. */
582 static struct loop *
583 copy_loop_before (struct loop *loop)
585 struct loop *res;
586 edge preheader = loop_preheader_edge (loop);
588 initialize_original_copy_tables ();
589 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
590 gcc_assert (res != NULL);
591 free_original_copy_tables ();
592 delete_update_ssa ();
594 return res;
597 /* Creates an empty basic block after LOOP. */
599 static void
600 create_bb_after_loop (struct loop *loop)
602 edge exit = single_exit (loop);
604 if (!exit)
605 return;
607 split_edge (exit);
610 /* Generate code for PARTITION from the code in LOOP. The loop is
611 copied when COPY_P is true. All the statements not flagged in the
612 PARTITION bitmap are removed from the loop or from its copy. The
613 statements are indexed in sequence inside a basic block, and the
614 basic blocks of a loop are taken in dom order. */
616 static void
617 generate_loops_for_partition (struct loop *loop, partition *partition,
618 bool copy_p)
620 unsigned i;
621 basic_block *bbs;
623 if (copy_p)
625 loop = copy_loop_before (loop);
626 gcc_assert (loop != NULL);
627 create_preheader (loop, CP_SIMPLE_PREHEADERS);
628 create_bb_after_loop (loop);
631 /* Remove stmts not in the PARTITION bitmap. */
632 bbs = get_loop_body_in_dom_order (loop);
634 if (MAY_HAVE_DEBUG_STMTS)
635 for (i = 0; i < loop->num_nodes; i++)
637 basic_block bb = bbs[i];
639 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
640 gsi_next (&bsi))
642 gphi *phi = bsi.phi ();
643 if (!virtual_operand_p (gimple_phi_result (phi))
644 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
645 reset_debug_uses (phi);
648 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
650 gimple *stmt = gsi_stmt (bsi);
651 if (gimple_code (stmt) != GIMPLE_LABEL
652 && !is_gimple_debug (stmt)
653 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
654 reset_debug_uses (stmt);
658 for (i = 0; i < loop->num_nodes; i++)
660 basic_block bb = bbs[i];
662 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
664 gphi *phi = bsi.phi ();
665 if (!virtual_operand_p (gimple_phi_result (phi))
666 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
667 remove_phi_node (&bsi, true);
668 else
669 gsi_next (&bsi);
672 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
674 gimple *stmt = gsi_stmt (bsi);
675 if (gimple_code (stmt) != GIMPLE_LABEL
676 && !is_gimple_debug (stmt)
677 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
679 /* Choose an arbitrary path through the empty CFG part
680 that this unnecessary control stmt controls. */
681 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
683 gimple_cond_make_false (cond_stmt);
684 update_stmt (stmt);
686 else if (gimple_code (stmt) == GIMPLE_SWITCH)
688 gswitch *switch_stmt = as_a <gswitch *> (stmt);
689 gimple_switch_set_index
690 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
691 update_stmt (stmt);
693 else
695 unlink_stmt_vdef (stmt);
696 gsi_remove (&bsi, true);
697 release_defs (stmt);
698 continue;
701 gsi_next (&bsi);
705 free (bbs);
708 /* Build the size argument for a memory operation call. */
710 static tree
711 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
712 bool plus_one)
714 tree size = fold_convert_loc (loc, sizetype, nb_iter);
715 if (plus_one)
716 size = size_binop (PLUS_EXPR, size, size_one_node);
717 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
718 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
719 size = fold_convert_loc (loc, size_type_node, size);
720 return size;
723 /* Build an address argument for a memory operation call. */
725 static tree
726 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
728 tree addr_base;
730 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
731 addr_base = fold_convert_loc (loc, sizetype, addr_base);
733 /* Test for a negative stride, iterating over every element. */
734 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
736 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
737 fold_convert_loc (loc, sizetype, nb_bytes));
738 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
739 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
742 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
745 /* If VAL memory representation contains the same value in all bytes,
746 return that value, otherwise return -1.
747 E.g. for 0x24242424 return 0x24, for IEEE double
748 747708026454360457216.0 return 0x44, etc. */
750 static int
751 const_with_all_bytes_same (tree val)
753 unsigned char buf[64];
754 int i, len;
756 if (integer_zerop (val)
757 || real_zerop (val)
758 || (TREE_CODE (val) == CONSTRUCTOR
759 && !TREE_CLOBBER_P (val)
760 && CONSTRUCTOR_NELTS (val) == 0))
761 return 0;
763 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
764 return -1;
766 len = native_encode_expr (val, buf, sizeof (buf));
767 if (len == 0)
768 return -1;
769 for (i = 1; i < len; i++)
770 if (buf[i] != buf[0])
771 return -1;
772 return buf[0];
775 /* Generate a call to memset for PARTITION in LOOP. */
777 static void
778 generate_memset_builtin (struct loop *loop, partition *partition)
780 gimple_stmt_iterator gsi;
781 gimple *stmt, *fn_call;
782 tree mem, fn, nb_bytes;
783 location_t loc;
784 tree val;
786 stmt = DR_STMT (partition->main_dr);
787 loc = gimple_location (stmt);
789 /* The new statements will be placed before LOOP. */
790 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
792 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
793 partition->plus_one);
794 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
795 false, GSI_CONTINUE_LINKING);
796 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
797 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
798 false, GSI_CONTINUE_LINKING);
800 /* This exactly matches the pattern recognition in classify_partition. */
801 val = gimple_assign_rhs1 (stmt);
802 /* Handle constants like 0x15151515 and similarly
803 floating point constants etc. where all bytes are the same. */
804 int bytev = const_with_all_bytes_same (val);
805 if (bytev != -1)
806 val = build_int_cst (integer_type_node, bytev);
807 else if (TREE_CODE (val) == INTEGER_CST)
808 val = fold_convert (integer_type_node, val);
809 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
811 tree tem = make_ssa_name (integer_type_node);
812 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
813 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
814 val = tem;
817 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
818 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
819 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "generated memset");
824 if (bytev == 0)
825 fprintf (dump_file, " zero\n");
826 else
827 fprintf (dump_file, "\n");
831 /* Generate a call to memcpy for PARTITION in LOOP. */
833 static void
834 generate_memcpy_builtin (struct loop *loop, partition *partition)
836 gimple_stmt_iterator gsi;
837 gimple *stmt, *fn_call;
838 tree dest, src, fn, nb_bytes;
839 location_t loc;
840 enum built_in_function kind;
842 stmt = DR_STMT (partition->main_dr);
843 loc = gimple_location (stmt);
845 /* The new statements will be placed before LOOP. */
846 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
848 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
849 partition->plus_one);
850 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
853 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
854 if (ptr_derefs_may_alias_p (dest, src))
855 kind = BUILT_IN_MEMMOVE;
856 else
857 kind = BUILT_IN_MEMCPY;
859 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
860 false, GSI_CONTINUE_LINKING);
861 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
862 false, GSI_CONTINUE_LINKING);
863 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
864 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
865 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
867 if (dump_file && (dump_flags & TDF_DETAILS))
869 if (kind == BUILT_IN_MEMCPY)
870 fprintf (dump_file, "generated memcpy\n");
871 else
872 fprintf (dump_file, "generated memmove\n");
876 /* Remove and destroy the loop LOOP. */
878 static void
879 destroy_loop (struct loop *loop)
881 unsigned nbbs = loop->num_nodes;
882 edge exit = single_exit (loop);
883 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
884 basic_block *bbs;
885 unsigned i;
887 bbs = get_loop_body_in_dom_order (loop);
889 redirect_edge_pred (exit, src);
890 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
891 exit->flags |= EDGE_FALLTHRU;
892 cancel_loop_tree (loop);
893 rescan_loop_exit (exit, false, true);
895 for (i = 0; i < nbbs; i++)
897 /* We have made sure to not leave any dangling uses of SSA
898 names defined in the loop. With the exception of virtuals.
899 Make sure we replace all uses of virtual defs that will remain
900 outside of the loop with the bare symbol as delete_basic_block
901 will release them. */
902 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
903 gsi_next (&gsi))
905 gphi *phi = gsi.phi ();
906 if (virtual_operand_p (gimple_phi_result (phi)))
907 mark_virtual_phi_result_for_renaming (phi);
909 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
910 gsi_next (&gsi))
912 gimple *stmt = gsi_stmt (gsi);
913 tree vdef = gimple_vdef (stmt);
914 if (vdef && TREE_CODE (vdef) == SSA_NAME)
915 mark_virtual_operand_for_renaming (vdef);
917 delete_basic_block (bbs[i]);
919 free (bbs);
921 set_immediate_dominator (CDI_DOMINATORS, dest,
922 recompute_dominator (CDI_DOMINATORS, dest));
925 /* Generates code for PARTITION. */
927 static void
928 generate_code_for_partition (struct loop *loop,
929 partition *partition, bool copy_p)
931 switch (partition->kind)
933 case PKIND_NORMAL:
934 /* Reductions all have to be in the last partition. */
935 gcc_assert (!partition_reduction_p (partition)
936 || !copy_p);
937 generate_loops_for_partition (loop, partition, copy_p);
938 return;
940 case PKIND_MEMSET:
941 generate_memset_builtin (loop, partition);
942 break;
944 case PKIND_MEMCPY:
945 generate_memcpy_builtin (loop, partition);
946 break;
948 default:
949 gcc_unreachable ();
952 /* Common tail for partitions we turn into a call. If this was the last
953 partition for which we generate code, we have to destroy the loop. */
954 if (!copy_p)
955 destroy_loop (loop);
959 /* Returns a partition with all the statements needed for computing
960 the vertex V of the RDG, also including the loop exit conditions. */
962 static partition *
963 build_rdg_partition_for_vertex (struct graph *rdg, int v)
965 partition *partition = partition_alloc (NULL, NULL);
966 auto_vec<int, 3> nodes;
967 unsigned i;
968 int x;
970 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
972 FOR_EACH_VEC_ELT (nodes, i, x)
974 bitmap_set_bit (partition->stmts, x);
975 bitmap_set_bit (partition->loops,
976 loop_containing_stmt (RDG_STMT (rdg, x))->num);
979 return partition;
982 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
983 For the moment we detect only the memset zero pattern. */
985 static void
986 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
988 bitmap_iterator bi;
989 unsigned i;
990 tree nb_iter;
991 data_reference_p single_load, single_store;
992 bool volatiles_p = false;
993 bool plus_one = false;
995 partition->kind = PKIND_NORMAL;
996 partition->main_dr = NULL;
997 partition->secondary_dr = NULL;
998 partition->niter = NULL_TREE;
999 partition->plus_one = false;
1001 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1003 gimple *stmt = RDG_STMT (rdg, i);
1005 if (gimple_has_volatile_ops (stmt))
1006 volatiles_p = true;
1008 /* If the stmt has uses outside of the loop mark it as reduction. */
1009 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1011 partition->reduction_p = true;
1012 return;
1016 /* Perform general partition disqualification for builtins. */
1017 if (volatiles_p
1018 || !flag_tree_loop_distribute_patterns)
1019 return;
1021 /* Detect memset and memcpy. */
1022 single_load = NULL;
1023 single_store = NULL;
1024 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1026 gimple *stmt = RDG_STMT (rdg, i);
1027 data_reference_p dr;
1028 unsigned j;
1030 if (gimple_code (stmt) == GIMPLE_PHI)
1031 continue;
1033 /* Any scalar stmts are ok. */
1034 if (!gimple_vuse (stmt))
1035 continue;
1037 /* Otherwise just regular loads/stores. */
1038 if (!gimple_assign_single_p (stmt))
1039 return;
1041 /* But exactly one store and/or load. */
1042 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1044 if (DR_IS_READ (dr))
1046 if (single_load != NULL)
1047 return;
1048 single_load = dr;
1050 else
1052 if (single_store != NULL)
1053 return;
1054 single_store = dr;
1059 if (!single_store)
1060 return;
1062 nb_iter = number_of_latch_executions (loop);
1063 if (!nb_iter || nb_iter == chrec_dont_know)
1064 return;
1065 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1066 gimple_bb (DR_STMT (single_store))))
1067 plus_one = true;
1069 if (single_store && !single_load)
1071 gimple *stmt = DR_STMT (single_store);
1072 tree rhs = gimple_assign_rhs1 (stmt);
1073 if (const_with_all_bytes_same (rhs) == -1
1074 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1075 || (TYPE_MODE (TREE_TYPE (rhs))
1076 != TYPE_MODE (unsigned_char_type_node))))
1077 return;
1078 if (TREE_CODE (rhs) == SSA_NAME
1079 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1080 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1081 return;
1082 if (!adjacent_dr_p (single_store)
1083 || !dominated_by_p (CDI_DOMINATORS,
1084 loop->latch, gimple_bb (stmt)))
1085 return;
1086 partition->kind = PKIND_MEMSET;
1087 partition->main_dr = single_store;
1088 partition->niter = nb_iter;
1089 partition->plus_one = plus_one;
1091 else if (single_store && single_load)
1093 gimple *store = DR_STMT (single_store);
1094 gimple *load = DR_STMT (single_load);
1095 /* Direct aggregate copy or via an SSA name temporary. */
1096 if (load != store
1097 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1098 return;
1099 if (!adjacent_dr_p (single_store)
1100 || !adjacent_dr_p (single_load)
1101 || !operand_equal_p (DR_STEP (single_store),
1102 DR_STEP (single_load), 0)
1103 || !dominated_by_p (CDI_DOMINATORS,
1104 loop->latch, gimple_bb (store)))
1105 return;
1106 /* Now check that if there is a dependence this dependence is
1107 of a suitable form for memmove. */
1108 vec<loop_p> loops = vNULL;
1109 ddr_p ddr;
1110 loops.safe_push (loop);
1111 ddr = initialize_data_dependence_relation (single_load, single_store,
1112 loops);
1113 compute_affine_dependence (ddr, loop);
1114 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1116 free_dependence_relation (ddr);
1117 loops.release ();
1118 return;
1120 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1122 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1124 free_dependence_relation (ddr);
1125 loops.release ();
1126 return;
1128 lambda_vector dist_v;
1129 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1131 int dist = dist_v[index_in_loop_nest (loop->num,
1132 DDR_LOOP_NEST (ddr))];
1133 if (dist > 0 && !DDR_REVERSED_P (ddr))
1135 free_dependence_relation (ddr);
1136 loops.release ();
1137 return;
1141 free_dependence_relation (ddr);
1142 loops.release ();
1143 partition->kind = PKIND_MEMCPY;
1144 partition->main_dr = single_store;
1145 partition->secondary_dr = single_load;
1146 partition->niter = nb_iter;
1147 partition->plus_one = plus_one;
1151 /* For a data reference REF, return the declaration of its base
1152 address or NULL_TREE if the base is not determined. */
1154 static tree
1155 ref_base_address (data_reference_p dr)
1157 tree base_address = DR_BASE_ADDRESS (dr);
1158 if (base_address
1159 && TREE_CODE (base_address) == ADDR_EXPR)
1160 return TREE_OPERAND (base_address, 0);
1162 return base_address;
1165 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1166 accesses in RDG. */
1168 static bool
1169 similar_memory_accesses (struct graph *rdg, partition *partition1,
1170 partition *partition2)
1172 unsigned i, j, k, l;
1173 bitmap_iterator bi, bj;
1174 data_reference_p ref1, ref2;
1176 /* First check whether in the intersection of the two partitions are
1177 any loads or stores. Common loads are the situation that happens
1178 most often. */
1179 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1180 if (RDG_MEM_WRITE_STMT (rdg, i)
1181 || RDG_MEM_READS_STMT (rdg, i))
1182 return true;
1184 /* Then check all data-references against each other. */
1185 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1186 if (RDG_MEM_WRITE_STMT (rdg, i)
1187 || RDG_MEM_READS_STMT (rdg, i))
1188 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1189 if (RDG_MEM_WRITE_STMT (rdg, j)
1190 || RDG_MEM_READS_STMT (rdg, j))
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1194 tree base1 = ref_base_address (ref1);
1195 if (base1)
1196 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1197 if (base1 == ref_base_address (ref2))
1198 return true;
1202 return false;
1205 /* Aggregate several components into a useful partition that is
1206 registered in the PARTITIONS vector. Partitions will be
1207 distributed in different loops. */
1209 static void
1210 rdg_build_partitions (struct graph *rdg,
1211 vec<gimple *> starting_stmts,
1212 vec<partition *> *partitions)
1214 bitmap processed = BITMAP_ALLOC (NULL);
1215 int i;
1216 gimple *stmt;
1218 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1220 int v = rdg_vertex_for_stmt (rdg, stmt);
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file,
1224 "ldist asked to generate code for vertex %d\n", v);
1226 /* If the vertex is already contained in another partition so
1227 is the partition rooted at it. */
1228 if (bitmap_bit_p (processed, v))
1229 continue;
1231 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1232 bitmap_ior_into (processed, partition->stmts);
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1236 fprintf (dump_file, "ldist useful partition:\n");
1237 dump_bitmap (dump_file, partition->stmts);
1240 partitions->safe_push (partition);
1243 /* All vertices should have been assigned to at least one partition now,
1244 other than vertices belonging to dead code. */
1246 BITMAP_FREE (processed);
1249 /* Dump to FILE the PARTITIONS. */
1251 static void
1252 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1254 int i;
1255 partition *partition;
1257 FOR_EACH_VEC_ELT (partitions, i, partition)
1258 debug_bitmap_file (file, partition->stmts);
1261 /* Debug PARTITIONS. */
1262 extern void debug_rdg_partitions (vec<partition *> );
1264 DEBUG_FUNCTION void
1265 debug_rdg_partitions (vec<partition *> partitions)
1267 dump_rdg_partitions (stderr, partitions);
1270 /* Returns the number of read and write operations in the RDG. */
1272 static int
1273 number_of_rw_in_rdg (struct graph *rdg)
1275 int i, res = 0;
1277 for (i = 0; i < rdg->n_vertices; i++)
1279 if (RDG_MEM_WRITE_STMT (rdg, i))
1280 ++res;
1282 if (RDG_MEM_READS_STMT (rdg, i))
1283 ++res;
1286 return res;
1289 /* Returns the number of read and write operations in a PARTITION of
1290 the RDG. */
1292 static int
1293 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1295 int res = 0;
1296 unsigned i;
1297 bitmap_iterator ii;
1299 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1301 if (RDG_MEM_WRITE_STMT (rdg, i))
1302 ++res;
1304 if (RDG_MEM_READS_STMT (rdg, i))
1305 ++res;
1308 return res;
1311 /* Returns true when one of the PARTITIONS contains all the read or
1312 write operations of RDG. */
1314 static bool
1315 partition_contains_all_rw (struct graph *rdg,
1316 vec<partition *> partitions)
1318 int i;
1319 partition *partition;
1320 int nrw = number_of_rw_in_rdg (rdg);
1322 FOR_EACH_VEC_ELT (partitions, i, partition)
1323 if (nrw == number_of_rw_in_partition (rdg, partition))
1324 return true;
1326 return false;
1329 /* Compute partition dependence created by the data references in DRS1
1330 and DRS2 and modify and return DIR according to that. */
1332 static int
1333 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1334 vec<data_reference_p> drs1,
1335 vec<data_reference_p> drs2)
1337 data_reference_p dr1, dr2;
1339 /* dependence direction - 0 is no dependence, -1 is back,
1340 1 is forth, 2 is both (we can stop then, merging will occur). */
1341 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1342 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1344 data_reference_p saved_dr1 = dr1;
1345 int this_dir = 1;
1346 ddr_p ddr;
1347 /* Re-shuffle data-refs to be in dominator order. */
1348 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1349 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1351 std::swap (dr1, dr2);
1352 this_dir = -this_dir;
1354 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1355 compute_affine_dependence (ddr, loops[0]);
1356 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1357 this_dir = 2;
1358 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1360 if (DDR_REVERSED_P (ddr))
1362 std::swap (dr1, dr2);
1363 this_dir = -this_dir;
1365 /* Known dependences can still be unordered througout the
1366 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1367 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1368 this_dir = 2;
1369 /* If the overlap is exact preserve stmt order. */
1370 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1372 else
1374 /* Else as the distance vector is lexicographic positive
1375 swap the dependence direction. */
1376 this_dir = -this_dir;
1379 else
1380 this_dir = 0;
1381 free_dependence_relation (ddr);
1382 if (dir == 0)
1383 dir = this_dir;
1384 else if (dir != this_dir)
1385 return 2;
1386 /* Shuffle "back" dr1. */
1387 dr1 = saved_dr1;
1389 return dir;
1392 /* Compare postorder number of the partition graph vertices V1 and V2. */
1394 static int
1395 pgcmp (const void *v1_, const void *v2_)
1397 const vertex *v1 = (const vertex *)v1_;
1398 const vertex *v2 = (const vertex *)v2_;
1399 return v2->post - v1->post;
1402 /* Distributes the code from LOOP in such a way that producer
1403 statements are placed before consumer statements. Tries to separate
1404 only the statements from STMTS into separate loops.
1405 Returns the number of distributed loops. */
1407 static int
1408 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1409 control_dependences *cd, int *nb_calls)
1411 struct graph *rdg;
1412 partition *partition;
1413 bool any_builtin;
1414 int i, nbp;
1415 graph *pg = NULL;
1416 int num_sccs = 1;
1418 *nb_calls = 0;
1419 auto_vec<loop_p, 3> loop_nest;
1420 if (!find_loop_nest (loop, &loop_nest))
1421 return 0;
1423 rdg = build_rdg (loop_nest, cd);
1424 if (!rdg)
1426 if (dump_file && (dump_flags & TDF_DETAILS))
1427 fprintf (dump_file,
1428 "Loop %d not distributed: failed to build the RDG.\n",
1429 loop->num);
1431 return 0;
1434 if (dump_file && (dump_flags & TDF_DETAILS))
1435 dump_rdg (dump_file, rdg);
1437 auto_vec<struct partition *, 3> partitions;
1438 rdg_build_partitions (rdg, stmts, &partitions);
1440 any_builtin = false;
1441 FOR_EACH_VEC_ELT (partitions, i, partition)
1443 classify_partition (loop, rdg, partition);
1444 any_builtin |= partition_builtin_p (partition);
1447 /* If we are only distributing patterns but did not detect any,
1448 simply bail out. */
1449 if (!flag_tree_loop_distribution
1450 && !any_builtin)
1452 nbp = 0;
1453 goto ldist_done;
1456 /* If we are only distributing patterns fuse all partitions that
1457 were not classified as builtins. This also avoids chopping
1458 a loop into pieces, separated by builtin calls. That is, we
1459 only want no or a single loop body remaining. */
1460 struct partition *into;
1461 if (!flag_tree_loop_distribution)
1463 for (i = 0; partitions.iterate (i, &into); ++i)
1464 if (!partition_builtin_p (into))
1465 break;
1466 for (++i; partitions.iterate (i, &partition); ++i)
1467 if (!partition_builtin_p (partition))
1469 if (dump_file && (dump_flags & TDF_DETAILS))
1471 fprintf (dump_file, "fusing non-builtin partitions\n");
1472 dump_bitmap (dump_file, into->stmts);
1473 dump_bitmap (dump_file, partition->stmts);
1475 partition_merge_into (into, partition);
1476 partitions.unordered_remove (i);
1477 partition_free (partition);
1478 i--;
1482 /* Due to limitations in the transform phase we have to fuse all
1483 reduction partitions into the last partition so the existing
1484 loop will contain all loop-closed PHI nodes. */
1485 for (i = 0; partitions.iterate (i, &into); ++i)
1486 if (partition_reduction_p (into))
1487 break;
1488 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1489 if (partition_reduction_p (partition))
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1493 fprintf (dump_file, "fusing partitions\n");
1494 dump_bitmap (dump_file, into->stmts);
1495 dump_bitmap (dump_file, partition->stmts);
1496 fprintf (dump_file, "because they have reductions\n");
1498 partition_merge_into (into, partition);
1499 partitions.unordered_remove (i);
1500 partition_free (partition);
1501 i--;
1504 /* Apply our simple cost model - fuse partitions with similar
1505 memory accesses. */
1506 for (i = 0; partitions.iterate (i, &into); ++i)
1508 if (partition_builtin_p (into))
1509 continue;
1510 for (int j = i + 1;
1511 partitions.iterate (j, &partition); ++j)
1513 if (!partition_builtin_p (partition)
1514 && similar_memory_accesses (rdg, into, partition))
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1518 fprintf (dump_file, "fusing partitions\n");
1519 dump_bitmap (dump_file, into->stmts);
1520 dump_bitmap (dump_file, partition->stmts);
1521 fprintf (dump_file, "because they have similar "
1522 "memory accesses\n");
1524 partition_merge_into (into, partition);
1525 partitions.unordered_remove (j);
1526 partition_free (partition);
1527 j--;
1532 /* Build the partition dependency graph. */
1533 if (partitions.length () > 1)
1535 pg = new_graph (partitions.length ());
1536 struct pgdata {
1537 struct partition *partition;
1538 vec<data_reference_p> writes;
1539 vec<data_reference_p> reads;
1541 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1542 for (i = 0; partitions.iterate (i, &partition); ++i)
1544 vertex *v = &pg->vertices[i];
1545 pgdata *data = new pgdata;
1546 data_reference_p dr;
1547 /* FIXME - leaks. */
1548 v->data = data;
1549 bitmap_iterator bi;
1550 unsigned j;
1551 data->partition = partition;
1552 data->reads = vNULL;
1553 data->writes = vNULL;
1554 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1555 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1556 if (DR_IS_READ (dr))
1557 data->reads.safe_push (dr);
1558 else
1559 data->writes.safe_push (dr);
1561 struct partition *partition1, *partition2;
1562 for (i = 0; partitions.iterate (i, &partition1); ++i)
1563 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1565 /* dependence direction - 0 is no dependence, -1 is back,
1566 1 is forth, 2 is both (we can stop then, merging will occur). */
1567 int dir = 0;
1568 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1569 PGDATA(i)->writes,
1570 PGDATA(j)->reads);
1571 if (dir != 2)
1572 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1573 PGDATA(i)->reads,
1574 PGDATA(j)->writes);
1575 if (dir != 2)
1576 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1577 PGDATA(i)->writes,
1578 PGDATA(j)->writes);
1579 if (dir == 1 || dir == 2)
1580 add_edge (pg, i, j);
1581 if (dir == -1 || dir == 2)
1582 add_edge (pg, j, i);
1585 /* Add edges to the reduction partition (if any) to force it last. */
1586 unsigned j;
1587 for (j = 0; partitions.iterate (j, &partition); ++j)
1588 if (partition_reduction_p (partition))
1589 break;
1590 if (j < partitions.length ())
1592 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1593 if (i != j)
1594 add_edge (pg, i, j);
1597 /* Compute partitions we cannot separate and fuse them. */
1598 num_sccs = graphds_scc (pg, NULL);
1599 for (i = 0; i < num_sccs; ++i)
1601 struct partition *first;
1602 int j;
1603 for (j = 0; partitions.iterate (j, &first); ++j)
1604 if (pg->vertices[j].component == i)
1605 break;
1606 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1607 if (pg->vertices[j].component == i)
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, "fusing partitions\n");
1612 dump_bitmap (dump_file, first->stmts);
1613 dump_bitmap (dump_file, partition->stmts);
1614 fprintf (dump_file, "because they are in the same "
1615 "dependence SCC\n");
1617 partition_merge_into (first, partition);
1618 partitions[j] = NULL;
1619 partition_free (partition);
1620 PGDATA (j)->partition = NULL;
1624 /* Now order the remaining nodes in postorder. */
1625 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1626 partitions.truncate (0);
1627 for (i = 0; i < pg->n_vertices; ++i)
1629 pgdata *data = PGDATA (i);
1630 if (data->partition)
1631 partitions.safe_push (data->partition);
1632 data->reads.release ();
1633 data->writes.release ();
1634 delete data;
1636 gcc_assert (partitions.length () == (unsigned)num_sccs);
1637 free_graph (pg);
1640 nbp = partitions.length ();
1641 if (nbp == 0
1642 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1643 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1645 nbp = 0;
1646 goto ldist_done;
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1650 dump_rdg_partitions (dump_file, partitions);
1652 FOR_EACH_VEC_ELT (partitions, i, partition)
1654 if (partition_builtin_p (partition))
1655 (*nb_calls)++;
1656 generate_code_for_partition (loop, partition, i < nbp - 1);
1659 ldist_done:
1661 FOR_EACH_VEC_ELT (partitions, i, partition)
1662 partition_free (partition);
1664 free_rdg (rdg);
1665 return nbp - *nb_calls;
1668 /* Distribute all loops in the current function. */
1670 namespace {
1672 const pass_data pass_data_loop_distribution =
1674 GIMPLE_PASS, /* type */
1675 "ldist", /* name */
1676 OPTGROUP_LOOP, /* optinfo_flags */
1677 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1678 ( PROP_cfg | PROP_ssa ), /* properties_required */
1679 0, /* properties_provided */
1680 0, /* properties_destroyed */
1681 0, /* todo_flags_start */
1682 0, /* todo_flags_finish */
1685 class pass_loop_distribution : public gimple_opt_pass
1687 public:
1688 pass_loop_distribution (gcc::context *ctxt)
1689 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1692 /* opt_pass methods: */
1693 virtual bool gate (function *)
1695 return flag_tree_loop_distribution
1696 || flag_tree_loop_distribute_patterns;
1699 virtual unsigned int execute (function *);
1701 }; // class pass_loop_distribution
1703 unsigned int
1704 pass_loop_distribution::execute (function *fun)
1706 struct loop *loop;
1707 bool changed = false;
1708 basic_block bb;
1709 control_dependences *cd = NULL;
1711 FOR_ALL_BB_FN (bb, fun)
1713 gimple_stmt_iterator gsi;
1714 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1715 gimple_set_uid (gsi_stmt (gsi), -1);
1716 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple_set_uid (gsi_stmt (gsi), -1);
1720 /* We can at the moment only distribute non-nested loops, thus restrict
1721 walking to innermost loops. */
1722 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1724 auto_vec<gimple *> work_list;
1725 basic_block *bbs;
1726 int num = loop->num;
1727 unsigned int i;
1729 /* If the loop doesn't have a single exit we will fail anyway,
1730 so do that early. */
1731 if (!single_exit (loop))
1732 continue;
1734 /* Only optimize hot loops. */
1735 if (!optimize_loop_for_speed_p (loop))
1736 continue;
1738 /* Initialize the worklist with stmts we seed the partitions with. */
1739 bbs = get_loop_body_in_dom_order (loop);
1740 for (i = 0; i < loop->num_nodes; ++i)
1742 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1743 !gsi_end_p (gsi);
1744 gsi_next (&gsi))
1746 gphi *phi = gsi.phi ();
1747 if (virtual_operand_p (gimple_phi_result (phi)))
1748 continue;
1749 /* Distribute stmts which have defs that are used outside of
1750 the loop. */
1751 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1752 continue;
1753 work_list.safe_push (phi);
1755 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1756 !gsi_end_p (gsi);
1757 gsi_next (&gsi))
1759 gimple *stmt = gsi_stmt (gsi);
1761 /* If there is a stmt with side-effects bail out - we
1762 cannot and should not distribute this loop. */
1763 if (gimple_has_side_effects (stmt))
1765 work_list.truncate (0);
1766 goto out;
1769 /* Distribute stmts which have defs that are used outside of
1770 the loop. */
1771 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1773 /* Otherwise only distribute stores for now. */
1774 else if (!gimple_vdef (stmt))
1775 continue;
1777 work_list.safe_push (stmt);
1780 out:
1781 free (bbs);
1783 int nb_generated_loops = 0;
1784 int nb_generated_calls = 0;
1785 location_t loc = find_loop_location (loop);
1786 if (work_list.length () > 0)
1788 if (!cd)
1790 calculate_dominance_info (CDI_DOMINATORS);
1791 calculate_dominance_info (CDI_POST_DOMINATORS);
1792 cd = new control_dependences (create_edge_list ());
1793 free_dominance_info (CDI_POST_DOMINATORS);
1795 nb_generated_loops = distribute_loop (loop, work_list, cd,
1796 &nb_generated_calls);
1799 if (nb_generated_loops + nb_generated_calls > 0)
1801 changed = true;
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1803 loc, "Loop %d distributed: split to %d loops "
1804 "and %d library calls.\n",
1805 num, nb_generated_loops, nb_generated_calls);
1807 else if (dump_file && (dump_flags & TDF_DETAILS))
1808 fprintf (dump_file, "Loop %d is the same.\n", num);
1811 if (cd)
1812 delete cd;
1814 if (changed)
1816 /* Cached scalar evolutions now may refer to wrong or non-existing
1817 loops. */
1818 scev_reset_htab ();
1819 mark_virtual_operands_for_renaming (fun);
1820 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1823 checking_verify_loop_structure ();
1825 return 0;
1828 } // anon namespace
1830 gimple_opt_pass *
1831 make_pass_loop_distribution (gcc::context *ctxt)
1833 return new pass_loop_distribution (ctxt);