2016-06-29 Jerry DeLisle <jvdelisle@gcc.gnu.org>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobe4163b274ffde9b1dad255f12b552f08a014703c
1 /* Loop distribution.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 struct rdg_vertex
72 /* The statement represented by this vertex. */
73 gimple *stmt;
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
94 /* Data dependence type. */
96 enum rdg_dep_type
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
105 /* Dependence information attached to an edge of the RDG. */
107 struct rdg_edge
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 /* Dump vertex I in RDG to FILE. */
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
131 fprintf (file, ") (out:");
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
142 /* Call dump_rdg_vertex on stderr. */
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
147 dump_rdg_vertex (stderr, rdg, i);
150 /* Dump the reduced dependence graph RDG to FILE. */
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
161 /* Call dump_rdg on stderr. */
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
166 dump_rdg (stderr, rdg);
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
177 fprintf (file, "digraph RDG {\n");
179 for (i = 0; i < rdg->n_vertices; i++)
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
210 default:
211 gcc_unreachable ();
215 fprintf (file, "}\n\n");
218 /* Display the Reduced Dependence Graph using dotty. */
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
223 /* When debugging, you may want to enable the following code. */
224 #ifdef HAVE_POPEN
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
237 /* Returns the index of STMT in RDG. */
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
261 if (use < 0)
262 continue;
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
270 /* Creates an edge for the control dependences of BB to the vertex V. */
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
281 basic_block cond_bb = cd->get_edge_src (edge_n);
282 gimple *stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
297 /* Creates the edges of the reduced dependence graph RDG. */
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
312 /* Creates the edges of the reduced dependence graph RDG. */
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
317 int i;
319 for (i = 0; i < rdg->n_vertices; i++)
321 gimple *stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 if (flow_bb_inside_loop_p (loop, e->src))
328 create_edge_for_control_dependence (rdg, e->src, i, cd);
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
335 /* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
338 static bool
339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
340 vec<data_reference_p> *datarefs)
342 int i;
343 gimple *stmt;
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
347 struct vertex *v = &(rdg->vertices[i]);
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
373 return true;
376 /* Initialize STMTS with all the statements of LOOP. The order in
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
379 identifying statements in loop copies. */
381 static void
382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
384 unsigned int i;
385 basic_block *bbs = get_loop_body_in_dom_order (loop);
387 for (i = 0; i < loop->num_nodes; i++)
389 basic_block bb = bbs[i];
391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 stmts->safe_push (bsi.phi ());
396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
399 gimple *stmt = gsi_stmt (bsi);
400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 stmts->safe_push (stmt);
405 free (bbs);
408 /* Free the reduced dependence graph RDG. */
410 static void
411 free_rdg (struct graph *rdg)
413 int i;
415 for (i = 0; i < rdg->n_vertices; i++)
417 struct vertex *v = &(rdg->vertices[i]);
418 struct graph_edge *e;
420 for (e = v->succ; e; e = e->succ_next)
421 free (e->data);
423 if (v->data)
425 gimple_set_uid (RDGV_STMT (v), -1);
426 free_data_refs (RDGV_DATAREFS (v));
427 free (v->data);
431 free_graph (rdg);
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
436 scalar dependence. */
438 static struct graph *
439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
441 struct graph *rdg;
442 vec<data_reference_p> datarefs;
444 /* Create the RDG vertices from the stmts of the loop nest. */
445 auto_vec<gimple *, 10> stmts;
446 stmts_from_loop (loop_nest[0], &stmts);
447 rdg = new_graph (stmts.length ());
448 datarefs.create (10);
449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
451 datarefs.release ();
452 free_rdg (rdg);
453 return NULL;
455 stmts.release ();
457 create_rdg_flow_edges (rdg);
458 if (cd)
459 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
461 datarefs.release ();
463 return rdg;
468 enum partition_kind {
469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
472 struct partition
474 bitmap stmts;
475 bitmap loops;
476 bool reduction_p;
477 enum partition_kind kind;
478 /* data-references a kind != PKIND_NORMAL partition is about. */
479 data_reference_p main_dr;
480 data_reference_p secondary_dr;
481 tree niter;
482 bool plus_one;
486 /* Allocate and initialize a partition from BITMAP. */
488 static partition *
489 partition_alloc (bitmap stmts, bitmap loops)
491 partition *partition = XCNEW (struct partition);
492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494 partition->reduction_p = false;
495 partition->kind = PKIND_NORMAL;
496 return partition;
499 /* Free PARTITION. */
501 static void
502 partition_free (partition *partition)
504 BITMAP_FREE (partition->stmts);
505 BITMAP_FREE (partition->loops);
506 free (partition);
509 /* Returns true if the partition can be generated as a builtin. */
511 static bool
512 partition_builtin_p (partition *partition)
514 return partition->kind != PKIND_NORMAL;
517 /* Returns true if the partition contains a reduction. */
519 static bool
520 partition_reduction_p (partition *partition)
522 return partition->reduction_p;
525 /* Merge PARTITION into the partition DEST. */
527 static void
528 partition_merge_into (partition *dest, partition *partition)
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
548 gimple *use_stmt = USE_STMT (use_p);
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
554 return false;
557 /* Returns true when STMT defines a scalar variable used after the
558 loop LOOP. */
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
573 return false;
576 /* Return a copy of LOOP placed before LOOP. */
578 static struct loop *
579 copy_loop_before (struct loop *loop)
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
584 initialize_original_copy_tables ();
585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
586 gcc_assert (res != NULL);
587 free_original_copy_tables ();
588 delete_update_ssa ();
590 return res;
593 /* Creates an empty basic block after LOOP. */
595 static void
596 create_bb_after_loop (struct loop *loop)
598 edge exit = single_exit (loop);
600 if (!exit)
601 return;
603 split_edge (exit);
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
612 static void
613 generate_loops_for_partition (struct loop *loop, partition *partition,
614 bool copy_p)
616 unsigned i;
617 basic_block *bbs;
619 if (copy_p)
621 loop = copy_loop_before (loop);
622 gcc_assert (loop != NULL);
623 create_preheader (loop, CP_SIMPLE_PREHEADERS);
624 create_bb_after_loop (loop);
627 /* Remove stmts not in the PARTITION bitmap. */
628 bbs = get_loop_body_in_dom_order (loop);
630 if (MAY_HAVE_DEBUG_STMTS)
631 for (i = 0; i < loop->num_nodes; i++)
633 basic_block bb = bbs[i];
635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 gsi_next (&bsi))
638 gphi *phi = bsi.phi ();
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
646 gimple *stmt = gsi_stmt (bsi);
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 reset_debug_uses (stmt);
654 for (i = 0; i < loop->num_nodes; i++)
656 basic_block bb = bbs[i];
658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
660 gphi *phi = bsi.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 remove_phi_node (&bsi, true);
664 else
665 gsi_next (&bsi);
668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
670 gimple *stmt = gsi_stmt (bsi);
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
679 gimple_cond_make_false (cond_stmt);
680 update_stmt (stmt);
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
684 gswitch *switch_stmt = as_a <gswitch *> (stmt);
685 gimple_switch_set_index
686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
687 update_stmt (stmt);
689 else
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
697 gsi_next (&bsi);
701 free (bbs);
704 /* Build the size argument for a memory operation call. */
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
719 /* Build an address argument for a memory operation call. */
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
724 tree addr_base;
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
746 static int
747 const_with_all_bytes_same (tree val)
749 unsigned char buf[64];
750 int i, len;
752 if (integer_zerop (val)
753 || real_zerop (val)
754 || (TREE_CODE (val) == CONSTRUCTOR
755 && !TREE_CLOBBER_P (val)
756 && CONSTRUCTOR_NELTS (val) == 0))
757 return 0;
759 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
760 return -1;
762 len = native_encode_expr (val, buf, sizeof (buf));
763 if (len == 0)
764 return -1;
765 for (i = 1; i < len; i++)
766 if (buf[i] != buf[0])
767 return -1;
768 return buf[0];
771 /* Generate a call to memset for PARTITION in LOOP. */
773 static void
774 generate_memset_builtin (struct loop *loop, partition *partition)
776 gimple_stmt_iterator gsi;
777 gimple *stmt, *fn_call;
778 tree mem, fn, nb_bytes;
779 location_t loc;
780 tree val;
782 stmt = DR_STMT (partition->main_dr);
783 loc = gimple_location (stmt);
785 /* The new statements will be placed before LOOP. */
786 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
788 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
789 partition->plus_one);
790 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
791 false, GSI_CONTINUE_LINKING);
792 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
793 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
794 false, GSI_CONTINUE_LINKING);
796 /* This exactly matches the pattern recognition in classify_partition. */
797 val = gimple_assign_rhs1 (stmt);
798 /* Handle constants like 0x15151515 and similarly
799 floating point constants etc. where all bytes are the same. */
800 int bytev = const_with_all_bytes_same (val);
801 if (bytev != -1)
802 val = build_int_cst (integer_type_node, bytev);
803 else if (TREE_CODE (val) == INTEGER_CST)
804 val = fold_convert (integer_type_node, val);
805 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
807 tree tem = make_ssa_name (integer_type_node);
808 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
809 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
810 val = tem;
813 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
814 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
815 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
817 if (dump_file && (dump_flags & TDF_DETAILS))
819 fprintf (dump_file, "generated memset");
820 if (bytev == 0)
821 fprintf (dump_file, " zero\n");
822 else
823 fprintf (dump_file, "\n");
827 /* Generate a call to memcpy for PARTITION in LOOP. */
829 static void
830 generate_memcpy_builtin (struct loop *loop, partition *partition)
832 gimple_stmt_iterator gsi;
833 gimple *stmt, *fn_call;
834 tree dest, src, fn, nb_bytes;
835 location_t loc;
836 enum built_in_function kind;
838 stmt = DR_STMT (partition->main_dr);
839 loc = gimple_location (stmt);
841 /* The new statements will be placed before LOOP. */
842 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
844 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
845 partition->plus_one);
846 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
847 false, GSI_CONTINUE_LINKING);
848 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
849 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
850 if (ptr_derefs_may_alias_p (dest, src))
851 kind = BUILT_IN_MEMMOVE;
852 else
853 kind = BUILT_IN_MEMCPY;
855 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
856 false, GSI_CONTINUE_LINKING);
857 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
858 false, GSI_CONTINUE_LINKING);
859 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
860 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
861 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
863 if (dump_file && (dump_flags & TDF_DETAILS))
865 if (kind == BUILT_IN_MEMCPY)
866 fprintf (dump_file, "generated memcpy\n");
867 else
868 fprintf (dump_file, "generated memmove\n");
872 /* Remove and destroy the loop LOOP. */
874 static void
875 destroy_loop (struct loop *loop)
877 unsigned nbbs = loop->num_nodes;
878 edge exit = single_exit (loop);
879 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
880 basic_block *bbs;
881 unsigned i;
883 bbs = get_loop_body_in_dom_order (loop);
885 redirect_edge_pred (exit, src);
886 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
887 exit->flags |= EDGE_FALLTHRU;
888 cancel_loop_tree (loop);
889 rescan_loop_exit (exit, false, true);
891 for (i = 0; i < nbbs; i++)
893 /* We have made sure to not leave any dangling uses of SSA
894 names defined in the loop. With the exception of virtuals.
895 Make sure we replace all uses of virtual defs that will remain
896 outside of the loop with the bare symbol as delete_basic_block
897 will release them. */
898 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
899 gsi_next (&gsi))
901 gphi *phi = gsi.phi ();
902 if (virtual_operand_p (gimple_phi_result (phi)))
903 mark_virtual_phi_result_for_renaming (phi);
905 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
906 gsi_next (&gsi))
908 gimple *stmt = gsi_stmt (gsi);
909 tree vdef = gimple_vdef (stmt);
910 if (vdef && TREE_CODE (vdef) == SSA_NAME)
911 mark_virtual_operand_for_renaming (vdef);
913 delete_basic_block (bbs[i]);
915 free (bbs);
917 set_immediate_dominator (CDI_DOMINATORS, dest,
918 recompute_dominator (CDI_DOMINATORS, dest));
921 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
923 static bool
924 generate_code_for_partition (struct loop *loop,
925 partition *partition, bool copy_p)
927 switch (partition->kind)
929 case PKIND_NORMAL:
930 /* Reductions all have to be in the last partition. */
931 gcc_assert (!partition_reduction_p (partition)
932 || !copy_p);
933 generate_loops_for_partition (loop, partition, copy_p);
934 return false;
936 case PKIND_MEMSET:
937 generate_memset_builtin (loop, partition);
938 break;
940 case PKIND_MEMCPY:
941 generate_memcpy_builtin (loop, partition);
942 break;
944 default:
945 gcc_unreachable ();
948 /* Common tail for partitions we turn into a call. If this was the last
949 partition for which we generate code, we have to destroy the loop. */
950 if (!copy_p)
951 return true;
952 return false;
956 /* Returns a partition with all the statements needed for computing
957 the vertex V of the RDG, also including the loop exit conditions. */
959 static partition *
960 build_rdg_partition_for_vertex (struct graph *rdg, int v)
962 partition *partition = partition_alloc (NULL, NULL);
963 auto_vec<int, 3> nodes;
964 unsigned i;
965 int x;
967 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
969 FOR_EACH_VEC_ELT (nodes, i, x)
971 bitmap_set_bit (partition->stmts, x);
972 bitmap_set_bit (partition->loops,
973 loop_containing_stmt (RDG_STMT (rdg, x))->num);
976 return partition;
979 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
980 For the moment we detect only the memset zero pattern. */
982 static void
983 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
985 bitmap_iterator bi;
986 unsigned i;
987 tree nb_iter;
988 data_reference_p single_load, single_store;
989 bool volatiles_p = false;
990 bool plus_one = false;
992 partition->kind = PKIND_NORMAL;
993 partition->main_dr = NULL;
994 partition->secondary_dr = NULL;
995 partition->niter = NULL_TREE;
996 partition->plus_one = false;
998 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1000 gimple *stmt = RDG_STMT (rdg, i);
1002 if (gimple_has_volatile_ops (stmt))
1003 volatiles_p = true;
1005 /* If the stmt has uses outside of the loop mark it as reduction. */
1006 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1008 partition->reduction_p = true;
1009 return;
1013 /* Perform general partition disqualification for builtins. */
1014 if (volatiles_p
1015 || !flag_tree_loop_distribute_patterns)
1016 return;
1018 /* Detect memset and memcpy. */
1019 single_load = NULL;
1020 single_store = NULL;
1021 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1023 gimple *stmt = RDG_STMT (rdg, i);
1024 data_reference_p dr;
1025 unsigned j;
1027 if (gimple_code (stmt) == GIMPLE_PHI)
1028 continue;
1030 /* Any scalar stmts are ok. */
1031 if (!gimple_vuse (stmt))
1032 continue;
1034 /* Otherwise just regular loads/stores. */
1035 if (!gimple_assign_single_p (stmt))
1036 return;
1038 /* But exactly one store and/or load. */
1039 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1041 if (DR_IS_READ (dr))
1043 if (single_load != NULL)
1044 return;
1045 single_load = dr;
1047 else
1049 if (single_store != NULL)
1050 return;
1051 single_store = dr;
1056 if (!single_store)
1057 return;
1059 nb_iter = number_of_latch_executions (loop);
1060 if (!nb_iter || nb_iter == chrec_dont_know)
1061 return;
1062 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1063 gimple_bb (DR_STMT (single_store))))
1064 plus_one = true;
1066 if (single_store && !single_load)
1068 gimple *stmt = DR_STMT (single_store);
1069 tree rhs = gimple_assign_rhs1 (stmt);
1070 if (const_with_all_bytes_same (rhs) == -1
1071 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1072 || (TYPE_MODE (TREE_TYPE (rhs))
1073 != TYPE_MODE (unsigned_char_type_node))))
1074 return;
1075 if (TREE_CODE (rhs) == SSA_NAME
1076 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1077 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1078 return;
1079 if (!adjacent_dr_p (single_store)
1080 || !dominated_by_p (CDI_DOMINATORS,
1081 loop->latch, gimple_bb (stmt)))
1082 return;
1083 partition->kind = PKIND_MEMSET;
1084 partition->main_dr = single_store;
1085 partition->niter = nb_iter;
1086 partition->plus_one = plus_one;
1088 else if (single_store && single_load)
1090 gimple *store = DR_STMT (single_store);
1091 gimple *load = DR_STMT (single_load);
1092 /* Direct aggregate copy or via an SSA name temporary. */
1093 if (load != store
1094 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1095 return;
1096 if (!adjacent_dr_p (single_store)
1097 || !adjacent_dr_p (single_load)
1098 || !operand_equal_p (DR_STEP (single_store),
1099 DR_STEP (single_load), 0)
1100 || !dominated_by_p (CDI_DOMINATORS,
1101 loop->latch, gimple_bb (store)))
1102 return;
1103 /* Now check that if there is a dependence this dependence is
1104 of a suitable form for memmove. */
1105 vec<loop_p> loops = vNULL;
1106 ddr_p ddr;
1107 loops.safe_push (loop);
1108 ddr = initialize_data_dependence_relation (single_load, single_store,
1109 loops);
1110 compute_affine_dependence (ddr, loop);
1111 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1113 free_dependence_relation (ddr);
1114 loops.release ();
1115 return;
1117 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1119 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1121 free_dependence_relation (ddr);
1122 loops.release ();
1123 return;
1125 lambda_vector dist_v;
1126 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1128 int dist = dist_v[index_in_loop_nest (loop->num,
1129 DDR_LOOP_NEST (ddr))];
1130 if (dist > 0 && !DDR_REVERSED_P (ddr))
1132 free_dependence_relation (ddr);
1133 loops.release ();
1134 return;
1138 free_dependence_relation (ddr);
1139 loops.release ();
1140 partition->kind = PKIND_MEMCPY;
1141 partition->main_dr = single_store;
1142 partition->secondary_dr = single_load;
1143 partition->niter = nb_iter;
1144 partition->plus_one = plus_one;
1148 /* For a data reference REF, return the declaration of its base
1149 address or NULL_TREE if the base is not determined. */
1151 static tree
1152 ref_base_address (data_reference_p dr)
1154 tree base_address = DR_BASE_ADDRESS (dr);
1155 if (base_address
1156 && TREE_CODE (base_address) == ADDR_EXPR)
1157 return TREE_OPERAND (base_address, 0);
1159 return base_address;
1162 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1163 accesses in RDG. */
1165 static bool
1166 similar_memory_accesses (struct graph *rdg, partition *partition1,
1167 partition *partition2)
1169 unsigned i, j, k, l;
1170 bitmap_iterator bi, bj;
1171 data_reference_p ref1, ref2;
1173 /* First check whether in the intersection of the two partitions are
1174 any loads or stores. Common loads are the situation that happens
1175 most often. */
1176 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1177 if (RDG_MEM_WRITE_STMT (rdg, i)
1178 || RDG_MEM_READS_STMT (rdg, i))
1179 return true;
1181 /* Then check all data-references against each other. */
1182 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1183 if (RDG_MEM_WRITE_STMT (rdg, i)
1184 || RDG_MEM_READS_STMT (rdg, i))
1185 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1186 if (RDG_MEM_WRITE_STMT (rdg, j)
1187 || RDG_MEM_READS_STMT (rdg, j))
1189 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1191 tree base1 = ref_base_address (ref1);
1192 if (base1)
1193 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1194 if (base1 == ref_base_address (ref2))
1195 return true;
1199 return false;
1202 /* Aggregate several components into a useful partition that is
1203 registered in the PARTITIONS vector. Partitions will be
1204 distributed in different loops. */
1206 static void
1207 rdg_build_partitions (struct graph *rdg,
1208 vec<gimple *> starting_stmts,
1209 vec<partition *> *partitions)
1211 bitmap processed = BITMAP_ALLOC (NULL);
1212 int i;
1213 gimple *stmt;
1215 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1217 int v = rdg_vertex_for_stmt (rdg, stmt);
1219 if (dump_file && (dump_flags & TDF_DETAILS))
1220 fprintf (dump_file,
1221 "ldist asked to generate code for vertex %d\n", v);
1223 /* If the vertex is already contained in another partition so
1224 is the partition rooted at it. */
1225 if (bitmap_bit_p (processed, v))
1226 continue;
1228 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1229 bitmap_ior_into (processed, partition->stmts);
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1233 fprintf (dump_file, "ldist useful partition:\n");
1234 dump_bitmap (dump_file, partition->stmts);
1237 partitions->safe_push (partition);
1240 /* All vertices should have been assigned to at least one partition now,
1241 other than vertices belonging to dead code. */
1243 BITMAP_FREE (processed);
1246 /* Dump to FILE the PARTITIONS. */
1248 static void
1249 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1251 int i;
1252 partition *partition;
1254 FOR_EACH_VEC_ELT (partitions, i, partition)
1255 debug_bitmap_file (file, partition->stmts);
1258 /* Debug PARTITIONS. */
1259 extern void debug_rdg_partitions (vec<partition *> );
1261 DEBUG_FUNCTION void
1262 debug_rdg_partitions (vec<partition *> partitions)
1264 dump_rdg_partitions (stderr, partitions);
1267 /* Returns the number of read and write operations in the RDG. */
1269 static int
1270 number_of_rw_in_rdg (struct graph *rdg)
1272 int i, res = 0;
1274 for (i = 0; i < rdg->n_vertices; i++)
1276 if (RDG_MEM_WRITE_STMT (rdg, i))
1277 ++res;
1279 if (RDG_MEM_READS_STMT (rdg, i))
1280 ++res;
1283 return res;
1286 /* Returns the number of read and write operations in a PARTITION of
1287 the RDG. */
1289 static int
1290 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1292 int res = 0;
1293 unsigned i;
1294 bitmap_iterator ii;
1296 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1298 if (RDG_MEM_WRITE_STMT (rdg, i))
1299 ++res;
1301 if (RDG_MEM_READS_STMT (rdg, i))
1302 ++res;
1305 return res;
1308 /* Returns true when one of the PARTITIONS contains all the read or
1309 write operations of RDG. */
1311 static bool
1312 partition_contains_all_rw (struct graph *rdg,
1313 vec<partition *> partitions)
1315 int i;
1316 partition *partition;
1317 int nrw = number_of_rw_in_rdg (rdg);
1319 FOR_EACH_VEC_ELT (partitions, i, partition)
1320 if (nrw == number_of_rw_in_partition (rdg, partition))
1321 return true;
1323 return false;
1326 /* Compute partition dependence created by the data references in DRS1
1327 and DRS2 and modify and return DIR according to that. */
1329 static int
1330 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1331 vec<data_reference_p> drs1,
1332 vec<data_reference_p> drs2)
1334 data_reference_p dr1, dr2;
1336 /* dependence direction - 0 is no dependence, -1 is back,
1337 1 is forth, 2 is both (we can stop then, merging will occur). */
1338 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1339 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1341 data_reference_p saved_dr1 = dr1;
1342 int this_dir = 1;
1343 ddr_p ddr;
1344 /* Re-shuffle data-refs to be in dominator order. */
1345 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1346 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1348 std::swap (dr1, dr2);
1349 this_dir = -this_dir;
1351 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1352 compute_affine_dependence (ddr, loops[0]);
1353 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1354 this_dir = 2;
1355 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1357 if (DDR_REVERSED_P (ddr))
1359 std::swap (dr1, dr2);
1360 this_dir = -this_dir;
1362 /* Known dependences can still be unordered througout the
1363 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1364 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1365 this_dir = 2;
1366 /* If the overlap is exact preserve stmt order. */
1367 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1369 else
1371 /* Else as the distance vector is lexicographic positive
1372 swap the dependence direction. */
1373 this_dir = -this_dir;
1376 else
1377 this_dir = 0;
1378 free_dependence_relation (ddr);
1379 if (dir == 0)
1380 dir = this_dir;
1381 else if (dir != this_dir)
1382 return 2;
1383 /* Shuffle "back" dr1. */
1384 dr1 = saved_dr1;
1386 return dir;
1389 /* Compare postorder number of the partition graph vertices V1 and V2. */
1391 static int
1392 pgcmp (const void *v1_, const void *v2_)
1394 const vertex *v1 = (const vertex *)v1_;
1395 const vertex *v2 = (const vertex *)v2_;
1396 return v2->post - v1->post;
1399 /* Distributes the code from LOOP in such a way that producer
1400 statements are placed before consumer statements. Tries to separate
1401 only the statements from STMTS into separate loops.
1402 Returns the number of distributed loops. Set *DESTROY_P to whether
1403 LOOP needs to be destroyed. */
1405 static int
1406 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1407 control_dependences *cd, int *nb_calls, bool *destroy_p)
1409 struct graph *rdg;
1410 partition *partition;
1411 bool any_builtin;
1412 int i, nbp;
1413 graph *pg = NULL;
1414 int num_sccs = 1;
1416 *destroy_p = false;
1417 *nb_calls = 0;
1418 auto_vec<loop_p, 3> loop_nest;
1419 if (!find_loop_nest (loop, &loop_nest))
1420 return 0;
1422 rdg = build_rdg (loop_nest, cd);
1423 if (!rdg)
1425 if (dump_file && (dump_flags & TDF_DETAILS))
1426 fprintf (dump_file,
1427 "Loop %d not distributed: failed to build the RDG.\n",
1428 loop->num);
1430 return 0;
1433 if (dump_file && (dump_flags & TDF_DETAILS))
1434 dump_rdg (dump_file, rdg);
1436 auto_vec<struct partition *, 3> partitions;
1437 rdg_build_partitions (rdg, stmts, &partitions);
1439 any_builtin = false;
1440 FOR_EACH_VEC_ELT (partitions, i, partition)
1442 classify_partition (loop, rdg, partition);
1443 any_builtin |= partition_builtin_p (partition);
1446 /* If we are only distributing patterns but did not detect any,
1447 simply bail out. */
1448 if (!flag_tree_loop_distribution
1449 && !any_builtin)
1451 nbp = 0;
1452 goto ldist_done;
1455 /* If we are only distributing patterns fuse all partitions that
1456 were not classified as builtins. This also avoids chopping
1457 a loop into pieces, separated by builtin calls. That is, we
1458 only want no or a single loop body remaining. */
1459 struct partition *into;
1460 if (!flag_tree_loop_distribution)
1462 for (i = 0; partitions.iterate (i, &into); ++i)
1463 if (!partition_builtin_p (into))
1464 break;
1465 for (++i; partitions.iterate (i, &partition); ++i)
1466 if (!partition_builtin_p (partition))
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1470 fprintf (dump_file, "fusing non-builtin partitions\n");
1471 dump_bitmap (dump_file, into->stmts);
1472 dump_bitmap (dump_file, partition->stmts);
1474 partition_merge_into (into, partition);
1475 partitions.unordered_remove (i);
1476 partition_free (partition);
1477 i--;
1481 /* Due to limitations in the transform phase we have to fuse all
1482 reduction partitions into the last partition so the existing
1483 loop will contain all loop-closed PHI nodes. */
1484 for (i = 0; partitions.iterate (i, &into); ++i)
1485 if (partition_reduction_p (into))
1486 break;
1487 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1488 if (partition_reduction_p (partition))
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1492 fprintf (dump_file, "fusing partitions\n");
1493 dump_bitmap (dump_file, into->stmts);
1494 dump_bitmap (dump_file, partition->stmts);
1495 fprintf (dump_file, "because they have reductions\n");
1497 partition_merge_into (into, partition);
1498 partitions.unordered_remove (i);
1499 partition_free (partition);
1500 i--;
1503 /* Apply our simple cost model - fuse partitions with similar
1504 memory accesses. */
1505 for (i = 0; partitions.iterate (i, &into); ++i)
1507 if (partition_builtin_p (into))
1508 continue;
1509 for (int j = i + 1;
1510 partitions.iterate (j, &partition); ++j)
1512 if (!partition_builtin_p (partition)
1513 && similar_memory_accesses (rdg, into, partition))
1515 if (dump_file && (dump_flags & TDF_DETAILS))
1517 fprintf (dump_file, "fusing partitions\n");
1518 dump_bitmap (dump_file, into->stmts);
1519 dump_bitmap (dump_file, partition->stmts);
1520 fprintf (dump_file, "because they have similar "
1521 "memory accesses\n");
1523 partition_merge_into (into, partition);
1524 partitions.unordered_remove (j);
1525 partition_free (partition);
1526 j--;
1531 /* Build the partition dependency graph. */
1532 if (partitions.length () > 1)
1534 pg = new_graph (partitions.length ());
1535 struct pgdata {
1536 struct partition *partition;
1537 vec<data_reference_p> writes;
1538 vec<data_reference_p> reads;
1540 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1541 for (i = 0; partitions.iterate (i, &partition); ++i)
1543 vertex *v = &pg->vertices[i];
1544 pgdata *data = new pgdata;
1545 data_reference_p dr;
1546 /* FIXME - leaks. */
1547 v->data = data;
1548 bitmap_iterator bi;
1549 unsigned j;
1550 data->partition = partition;
1551 data->reads = vNULL;
1552 data->writes = vNULL;
1553 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1554 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1555 if (DR_IS_READ (dr))
1556 data->reads.safe_push (dr);
1557 else
1558 data->writes.safe_push (dr);
1560 struct partition *partition1, *partition2;
1561 for (i = 0; partitions.iterate (i, &partition1); ++i)
1562 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1564 /* dependence direction - 0 is no dependence, -1 is back,
1565 1 is forth, 2 is both (we can stop then, merging will occur). */
1566 int dir = 0;
1567 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1568 PGDATA(i)->writes,
1569 PGDATA(j)->reads);
1570 if (dir != 2)
1571 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1572 PGDATA(i)->reads,
1573 PGDATA(j)->writes);
1574 if (dir != 2)
1575 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1576 PGDATA(i)->writes,
1577 PGDATA(j)->writes);
1578 if (dir == 1 || dir == 2)
1579 add_edge (pg, i, j);
1580 if (dir == -1 || dir == 2)
1581 add_edge (pg, j, i);
1584 /* Add edges to the reduction partition (if any) to force it last. */
1585 unsigned j;
1586 for (j = 0; partitions.iterate (j, &partition); ++j)
1587 if (partition_reduction_p (partition))
1588 break;
1589 if (j < partitions.length ())
1591 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1592 if (i != j)
1593 add_edge (pg, i, j);
1596 /* Compute partitions we cannot separate and fuse them. */
1597 num_sccs = graphds_scc (pg, NULL);
1598 for (i = 0; i < num_sccs; ++i)
1600 struct partition *first;
1601 int j;
1602 for (j = 0; partitions.iterate (j, &first); ++j)
1603 if (pg->vertices[j].component == i)
1604 break;
1605 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1606 if (pg->vertices[j].component == i)
1608 if (dump_file && (dump_flags & TDF_DETAILS))
1610 fprintf (dump_file, "fusing partitions\n");
1611 dump_bitmap (dump_file, first->stmts);
1612 dump_bitmap (dump_file, partition->stmts);
1613 fprintf (dump_file, "because they are in the same "
1614 "dependence SCC\n");
1616 partition_merge_into (first, partition);
1617 partitions[j] = NULL;
1618 partition_free (partition);
1619 PGDATA (j)->partition = NULL;
1623 /* Now order the remaining nodes in postorder. */
1624 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1625 partitions.truncate (0);
1626 for (i = 0; i < pg->n_vertices; ++i)
1628 pgdata *data = PGDATA (i);
1629 if (data->partition)
1630 partitions.safe_push (data->partition);
1631 data->reads.release ();
1632 data->writes.release ();
1633 delete data;
1635 gcc_assert (partitions.length () == (unsigned)num_sccs);
1636 free_graph (pg);
1639 nbp = partitions.length ();
1640 if (nbp == 0
1641 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1642 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1644 nbp = 0;
1645 goto ldist_done;
1648 if (dump_file && (dump_flags & TDF_DETAILS))
1649 dump_rdg_partitions (dump_file, partitions);
1651 FOR_EACH_VEC_ELT (partitions, i, partition)
1653 if (partition_builtin_p (partition))
1654 (*nb_calls)++;
1655 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1658 ldist_done:
1660 FOR_EACH_VEC_ELT (partitions, i, partition)
1661 partition_free (partition);
1663 free_rdg (rdg);
1664 return nbp - *nb_calls;
1667 /* Distribute all loops in the current function. */
1669 namespace {
1671 const pass_data pass_data_loop_distribution =
1673 GIMPLE_PASS, /* type */
1674 "ldist", /* name */
1675 OPTGROUP_LOOP, /* optinfo_flags */
1676 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1677 ( PROP_cfg | PROP_ssa ), /* properties_required */
1678 0, /* properties_provided */
1679 0, /* properties_destroyed */
1680 0, /* todo_flags_start */
1681 0, /* todo_flags_finish */
1684 class pass_loop_distribution : public gimple_opt_pass
1686 public:
1687 pass_loop_distribution (gcc::context *ctxt)
1688 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1691 /* opt_pass methods: */
1692 virtual bool gate (function *)
1694 return flag_tree_loop_distribution
1695 || flag_tree_loop_distribute_patterns;
1698 virtual unsigned int execute (function *);
1700 }; // class pass_loop_distribution
1702 unsigned int
1703 pass_loop_distribution::execute (function *fun)
1705 struct loop *loop;
1706 bool changed = false;
1707 basic_block bb;
1708 control_dependences *cd = NULL;
1709 auto_vec<loop_p> loops_to_be_destroyed;
1711 FOR_ALL_BB_FN (bb, fun)
1713 gimple_stmt_iterator gsi;
1714 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1715 gimple_set_uid (gsi_stmt (gsi), -1);
1716 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple_set_uid (gsi_stmt (gsi), -1);
1720 /* We can at the moment only distribute non-nested loops, thus restrict
1721 walking to innermost loops. */
1722 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1724 auto_vec<gimple *> work_list;
1725 basic_block *bbs;
1726 int num = loop->num;
1727 unsigned int i;
1729 /* If the loop doesn't have a single exit we will fail anyway,
1730 so do that early. */
1731 if (!single_exit (loop))
1732 continue;
1734 /* Only optimize hot loops. */
1735 if (!optimize_loop_for_speed_p (loop))
1736 continue;
1738 /* Initialize the worklist with stmts we seed the partitions with. */
1739 bbs = get_loop_body_in_dom_order (loop);
1740 for (i = 0; i < loop->num_nodes; ++i)
1742 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1743 !gsi_end_p (gsi);
1744 gsi_next (&gsi))
1746 gphi *phi = gsi.phi ();
1747 if (virtual_operand_p (gimple_phi_result (phi)))
1748 continue;
1749 /* Distribute stmts which have defs that are used outside of
1750 the loop. */
1751 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1752 continue;
1753 work_list.safe_push (phi);
1755 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1756 !gsi_end_p (gsi);
1757 gsi_next (&gsi))
1759 gimple *stmt = gsi_stmt (gsi);
1761 /* If there is a stmt with side-effects bail out - we
1762 cannot and should not distribute this loop. */
1763 if (gimple_has_side_effects (stmt))
1765 work_list.truncate (0);
1766 goto out;
1769 /* Distribute stmts which have defs that are used outside of
1770 the loop. */
1771 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1773 /* Otherwise only distribute stores for now. */
1774 else if (!gimple_vdef (stmt))
1775 continue;
1777 work_list.safe_push (stmt);
1780 out:
1781 free (bbs);
1783 int nb_generated_loops = 0;
1784 int nb_generated_calls = 0;
1785 location_t loc = find_loop_location (loop);
1786 if (work_list.length () > 0)
1788 if (!cd)
1790 calculate_dominance_info (CDI_DOMINATORS);
1791 calculate_dominance_info (CDI_POST_DOMINATORS);
1792 cd = new control_dependences ();
1793 free_dominance_info (CDI_POST_DOMINATORS);
1795 bool destroy_p;
1796 nb_generated_loops = distribute_loop (loop, work_list, cd,
1797 &nb_generated_calls,
1798 &destroy_p);
1799 if (destroy_p)
1800 loops_to_be_destroyed.safe_push (loop);
1803 if (nb_generated_loops + nb_generated_calls > 0)
1805 changed = true;
1806 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1807 loc, "Loop %d distributed: split to %d loops "
1808 "and %d library calls.\n",
1809 num, nb_generated_loops, nb_generated_calls);
1811 else if (dump_file && (dump_flags & TDF_DETAILS))
1812 fprintf (dump_file, "Loop %d is the same.\n", num);
1815 if (cd)
1816 delete cd;
1818 if (changed)
1820 /* Destroy loop bodies that could not be reused. Do this late as we
1821 otherwise can end up refering to stale data in control dependences. */
1822 unsigned i;
1823 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1824 destroy_loop (loop);
1826 /* Cached scalar evolutions now may refer to wrong or non-existing
1827 loops. */
1828 scev_reset_htab ();
1829 mark_virtual_operands_for_renaming (fun);
1830 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1833 checking_verify_loop_structure ();
1835 return 0;
1838 } // anon namespace
1840 gimple_opt_pass *
1841 make_pass_loop_distribution (gcc::context *ctxt)
1843 return new pass_loop_distribution (ctxt);